Whamcloud - gitweb
LU-1330 obdclass: split client-server mount routines
authorLiu Xuezhao <xuezhao.liu@emc.com>
Mon, 17 Dec 2012 15:31:26 +0000 (23:31 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Mon, 1 Apr 2013 16:38:35 +0000 (12:38 -0400)
Move server side mount routines to obd_mount_server.c.  Const correct
several server_name2xxx type functions.

Signed-off-by: Liu Xuezhao <xuezhao.liu@emc.com>
Signed-off-by: John L. Hammond <john.hammond@intel.com>
Change-Id: I8abdb6fdd0411f2e75f6fb6ee4ff8502e50ef213
Reviewed-on: http://review.whamcloud.com/2672
Tested-by: Hudson
Reviewed-by: Nathaniel Clark <nathaniel.l.clark@intel.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Peng Tao <bergwolf@gmail.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/doc/Makefile.am
lustre/include/lustre_disk.h
lustre/include/obd_class.h
lustre/mgs/mgs_llog.c
lustre/obdclass/Makefile.in
lustre/obdclass/lprocfs_status.c
lustre/obdclass/obd_mount.c
lustre/obdclass/obd_mount_server.c [new file with mode: 0644]

index 475aabc..6511ea7 100644 (file)
@@ -48,7 +48,7 @@ PS2PDF = ps2pdf
 TEXEXPAND = texexpand
 SUFFIXES = .lin .lyx .pdf .ps .sgml .html .txt .tex .fig .eps .dvi
 
 TEXEXPAND = texexpand
 SUFFIXES = .lin .lyx .pdf .ps .sgml .html .txt .tex .fig .eps .dvi
 
-MANFILES = lustre.7 lfs.1 mount.lustre.8 mkfs.lustre.8 tunefs.lustre.8 lctl.8 \
+MANFILES = lustre.7 lfs.1 mount.lustre.8 lctl.8 \
        llverdev.8 llbackup.8 llapi_quotactl.3 llobdstat.8 llstat.8 \
        plot-llstat.8 l_getgroups.8 lst.8 routerstat.8 lshowmount.8 \
        ll_recover_lost_found_objs.8 llog_reader.8 llapi_file_open.3 \
        llverdev.8 llbackup.8 llapi_quotactl.3 llobdstat.8 llstat.8 \
        plot-llstat.8 l_getgroups.8 lst.8 routerstat.8 lshowmount.8 \
        ll_recover_lost_found_objs.8 llog_reader.8 llapi_file_open.3 \
@@ -56,6 +56,10 @@ MANFILES = lustre.7 lfs.1 mount.lustre.8 mkfs.lustre.8 tunefs.lustre.8 lctl.8 \
        lustre_rsync.8 lfs_migrate.1 lhbadm.8 ldev.8 ldev.conf.5 nids.5 \
        lfs-hsm.1 llapi_hsm_state_get.3 llapi_hsm_state_set.3
 
        lustre_rsync.8 lfs_migrate.1 lhbadm.8 ldev.8 ldev.conf.5 nids.5 \
        lfs-hsm.1 llapi_hsm_state_get.3 llapi_hsm_state_set.3
 
+if SERVER
+MANFILES += mkfs.lustre.8 tunefs.lustre.8
+endif
+
 if UTILS
 man_MANS = $(MANFILES)
 endif
 if UTILS
 man_MANS = $(MANFILES)
 endif
index b1c030a..819513f 100644 (file)
@@ -172,12 +172,13 @@ struct lustre_disk_data {
 /*8192*/char       ldd_params[4096];     /* key=value pairs */
 };
 
 /*8192*/char       ldd_params[4096];     /* key=value pairs */
 };
 
+
 #define IS_MDT(data)    ((data)->lsi_flags & LDD_F_SV_TYPE_MDT)
 #define IS_OST(data)    ((data)->lsi_flags & LDD_F_SV_TYPE_OST)
 #define IS_MGS(data)    ((data)->lsi_flags & LDD_F_SV_TYPE_MGS)
 #define IS_SERVER(data) ((data)->lsi_flags & (LDD_F_SV_TYPE_MGS | \
 #define IS_MDT(data)    ((data)->lsi_flags & LDD_F_SV_TYPE_MDT)
 #define IS_OST(data)    ((data)->lsi_flags & LDD_F_SV_TYPE_OST)
 #define IS_MGS(data)    ((data)->lsi_flags & LDD_F_SV_TYPE_MGS)
 #define IS_SERVER(data) ((data)->lsi_flags & (LDD_F_SV_TYPE_MGS | \
-       LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_OST))
-#define MT_STR(data)   mt_str((data)->ldd_mount_type)
+                        LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_OST))
+#define MT_STR(data)    mt_str((data)->ldd_mount_type)
 
 /* Make the mdt/ost server obd name based on the filesystem name */
 static inline int server_make_name(__u32 flags, __u16 index, char *fs,
 
 /* Make the mdt/ost server obd name based on the filesystem name */
 static inline int server_make_name(__u32 flags, __u16 index, char *fs,
@@ -199,11 +200,6 @@ static inline int server_make_name(__u32 flags, __u16 index, char *fs,
         return 0;
 }
 
         return 0;
 }
 
-/* Get the index from the obd name */
-int server_name2index(char *svname, __u32 *idx, char **endptr);
-int server_name2svname(char *label, char *svname, char **endptr, size_t svsize);
-
-
 /****************** mount command *********************/
 
 /* The lmd is only used internally by Lustre; mount simply passes
 /****************** mount command *********************/
 
 /* The lmd is only used internally by Lustre; mount simply passes
@@ -536,27 +532,35 @@ struct los_ondisk {
 /****************** prototypes *********************/
 
 #ifdef __KERNEL__
 /****************** prototypes *********************/
 
 #ifdef __KERNEL__
-
 /* obd_mount.c */
 /* obd_mount.c */
+int server_name2fsname(const char *svname, char *fsname, const char **endptr);
+int server_name2index(const char *svname, __u32 *idx, const char **endptr);
+int server_name2svname(const char *label, char *svname, const char **endptr,
+                      size_t svsize);
+
+int lustre_put_lsi(struct super_block *sb);
+int lustre_start_simple(char *obdname, char *type, char *uuid,
+                       char *s1, char *s2, char *s3, char *s4);
+int lustre_start_mgc(struct super_block *sb);
 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
-                                                  struct vfsmount *mnt));
+                                                 struct vfsmount *mnt));
 void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb));
 void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb));
-
-
 int lustre_common_put_super(struct super_block *sb);
 int lustre_common_put_super(struct super_block *sb);
-struct lustre_mount_info *server_find_mount_locked(const char *name);
+
+# ifdef HAVE_SERVER_SUPPORT
+/* obd_mount_server.c */
+int server_fill_super(struct super_block *sb);
 struct lustre_mount_info *server_get_mount(const char *name);
 struct lustre_mount_info *server_get_mount_2(const char *name);
 int server_put_mount(const char *name, struct vfsmount *mnt);
 int server_put_mount_2(const char *name, struct vfsmount *mnt);
 struct mgs_target_info;
 struct lustre_mount_info *server_get_mount(const char *name);
 struct lustre_mount_info *server_get_mount_2(const char *name);
 int server_put_mount(const char *name, struct vfsmount *mnt);
 int server_put_mount_2(const char *name, struct vfsmount *mnt);
 struct mgs_target_info;
-int server_mti_print(char *title, struct mgs_target_info *mti);
+int server_mti_print(const char *title, struct mgs_target_info *mti);
 void server_calc_timeout(struct lustre_sb_info *lsi, struct obd_device *obd);
 void server_calc_timeout(struct lustre_sb_info *lsi, struct obd_device *obd);
+# endif
 
 
-/* mgc_request.c */
 int mgc_fsname2resid(char *fsname, struct ldlm_res_id *res_id, int type);
 int mgc_fsname2resid(char *fsname, struct ldlm_res_id *res_id, int type);
-
-#endif
+#endif /* __KERNEL__ */
 
 /** @} disk */
 
 
 /** @} disk */
 
index 575955d..ec00115 100644 (file)
@@ -2279,11 +2279,12 @@ struct lwp_register_item {
 extern int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
 
 /* obd_mount.c */
 extern int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
 
 /* obd_mount.c */
-int server_name2fsname(char *svname, char *fsname, char **endptr);
-int lustre_register_lwp_item(char *lwpname, struct obd_export **exp,
+#ifdef HAVE_SERVER_SUPPORT
+int lustre_register_lwp_item(const char *lwpname, struct obd_export **exp,
                             register_lwp_cb cb_func, void *cb_data);
 void lustre_deregister_lwp_item(struct obd_export **exp);
                             register_lwp_cb cb_func, void *cb_data);
 void lustre_deregister_lwp_item(struct obd_export **exp);
-int tgt_name2lwpname(char *tgt_name, char *lwp_name);
+int tgt_name2lwpname(const char *tgt_name, char *lwp_name);
+#endif /* HAVE_SERVER_SUPPORT */
 
 /* sysctl.c */
 extern void obd_sysctl_init (void);
 
 /* sysctl.c */
 extern void obd_sysctl_init (void);
index 99a58c5..5e9a9fd 100644 (file)
@@ -3790,13 +3790,14 @@ static void print_lustre_cfg(struct lustre_cfg *lcfg)
 int mgs_setparam(const struct lu_env *env, struct mgs_device *mgs,
                 struct lustre_cfg *lcfg, char *fsname)
 {
 int mgs_setparam(const struct lu_env *env, struct mgs_device *mgs,
                 struct lustre_cfg *lcfg, char *fsname)
 {
-        struct fs_db *fsdb;
-        struct mgs_target_info *mti;
+       struct fs_db *fsdb;
+       struct mgs_target_info *mti;
         char *devname, *param;
         char *devname, *param;
-        char *ptr, *tmp;
-        __u32 index;
-        int rc = 0;
-        ENTRY;
+       char *ptr;
+       const char *tmp;
+       __u32 index;
+       int rc = 0;
+       ENTRY;
 
         print_lustre_cfg(lcfg);
 
 
         print_lustre_cfg(lcfg);
 
index c3e1507..d67d3dc 100644 (file)
@@ -19,6 +19,8 @@ obdclass-all-objs += acl.o idmap.o
 obdclass-all-objs += md_local_object.o md_attrs.o linkea.o
 obdclass-all-objs += lu_ucred.o
 
 obdclass-all-objs += md_local_object.o md_attrs.o linkea.o
 obdclass-all-objs += lu_ucred.o
 
+@SERVER_TRUE@obdclass-all-objs += obd_mount_server.o
+
 obdclass-objs := $(obdclass-linux-objs) $(obdclass-all-objs)
 
 EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LDISKFS_DIR@ -I@LDISKFS_DIR@/ldiskfs
 obdclass-objs := $(obdclass-linux-objs) $(obdclass-all-objs)
 
 EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LDISKFS_DIR@ -I@LDISKFS_DIR@/ldiskfs
index 117eb50..a966ed3 100644 (file)
@@ -2681,6 +2681,7 @@ int lprocfs_obd_wr_recovery_time_hard(struct file *file, const char *buffer,
 }
 EXPORT_SYMBOL(lprocfs_obd_wr_recovery_time_hard);
 
 }
 EXPORT_SYMBOL(lprocfs_obd_wr_recovery_time_hard);
 
+#ifdef HAVE_SERVER_SUPPORT
 int lprocfs_obd_rd_mntdev(char *page, char **start, off_t off,
                           int count, int *eof, void *data)
 {
 int lprocfs_obd_rd_mntdev(char *page, char **start, off_t off,
                           int count, int *eof, void *data)
 {
@@ -2697,6 +2698,7 @@ int lprocfs_obd_rd_mntdev(char *page, char **start, off_t off,
        return snprintf(page, count, "%s\n", dev_name);
 }
 EXPORT_SYMBOL(lprocfs_obd_rd_mntdev);
        return snprintf(page, count, "%s\n", dev_name);
 }
 EXPORT_SYMBOL(lprocfs_obd_rd_mntdev);
+#endif
 
 int lprocfs_obd_rd_max_pages_per_rpc(char *page, char **start, off_t off,
                                      int count, int *eof, void *data)
 
 int lprocfs_obd_rd_max_pages_per_rpc(char *page, char **start, off_t off,
                                      int count, int *eof, void *data)
index d6b4662..6cb33aa 100644 (file)
  *
  * lustre/obdclass/obd_mount.c
  *
  *
  * lustre/obdclass/obd_mount.c
  *
- * Client/server mount routines
+ * Client mount routines
  *
  * Author: Nathan Rutman <nathan@clusterfs.com>
  */
 
 
 #define DEBUG_SUBSYSTEM S_CLASS
  *
  * Author: Nathan Rutman <nathan@clusterfs.com>
  */
 
 
 #define DEBUG_SUBSYSTEM S_CLASS
-#define D_MOUNT D_SUPER|D_CONFIG /*|D_WARNING */
+#define D_MOUNT (D_SUPER|D_CONFIG/*|D_WARNING */)
 #define PRINT_CMD CDEBUG
 #define PRINT_CMD CDEBUG
-#define PRINT_MASK D_SUPER|D_CONFIG
 
 #include <obd.h>
 #include <lvfs.h>
 
 #include <obd.h>
 #include <lvfs.h>
 #include <lustre_log.h>
 #include <lustre_disk.h>
 #include <lustre_param.h>
 #include <lustre_log.h>
 #include <lustre_disk.h>
 #include <lustre_param.h>
-#ifdef HAVE_KERNEL_LOCKED
-#include <linux/smp_lock.h>
-#endif
 
 static int (*client_fill_super)(struct super_block *sb,
 
 static int (*client_fill_super)(struct super_block *sb,
-                                struct vfsmount *mnt) = NULL;
-static void (*kill_super_cb)(struct super_block *sb) = NULL;
-
-/*********** mount lookup *********/
-
-DEFINE_MUTEX(lustre_mount_info_lock);
-static CFS_LIST_HEAD(server_mount_info_list);
-
-static struct lustre_mount_info *server_find_mount(const char *name)
-{
-        cfs_list_t *tmp;
-        struct lustre_mount_info *lmi;
-        ENTRY;
-
-        cfs_list_for_each(tmp, &server_mount_info_list) {
-                lmi = cfs_list_entry(tmp, struct lustre_mount_info,
-                                     lmi_list_chain);
-                if (strcmp(name, lmi->lmi_name) == 0)
-                        RETURN(lmi);
-        }
-        RETURN(NULL);
-}
-
-/* we must register an obd for a mount before we call the setup routine.
-   *_setup will call lustre_get_mount to get the mnt struct
-   by obd_name, since we can't pass the pointer to setup. */
-static int server_register_mount(const char *name, struct super_block *sb,
-                          struct vfsmount *mnt)
-{
-        struct lustre_mount_info *lmi;
-        char *name_cp;
-        ENTRY;
-
-        LASSERT(sb);
-
-        OBD_ALLOC(lmi, sizeof(*lmi));
-        if (!lmi)
-                RETURN(-ENOMEM);
-        OBD_ALLOC(name_cp, strlen(name) + 1);
-        if (!name_cp) {
-                OBD_FREE(lmi, sizeof(*lmi));
-                RETURN(-ENOMEM);
-        }
-        strcpy(name_cp, name);
-
-       mutex_lock(&lustre_mount_info_lock);
-
-        if (server_find_mount(name)) {
-               mutex_unlock(&lustre_mount_info_lock);
-                OBD_FREE(lmi, sizeof(*lmi));
-                OBD_FREE(name_cp, strlen(name) + 1);
-                CERROR("Already registered %s\n", name);
-                RETURN(-EEXIST);
-        }
-        lmi->lmi_name = name_cp;
-        lmi->lmi_sb = sb;
-        lmi->lmi_mnt = mnt;
-        cfs_list_add(&lmi->lmi_list_chain, &server_mount_info_list);
-
-       mutex_unlock(&lustre_mount_info_lock);
-
-       CDEBUG(D_MOUNT, "reg_mnt %p from %s\n", lmi->lmi_mnt, name);
-
-        RETURN(0);
-}
-
-/* when an obd no longer needs a mount */
-static int server_deregister_mount(const char *name)
-{
-        struct lustre_mount_info *lmi;
-        ENTRY;
-
-       mutex_lock(&lustre_mount_info_lock);
-        lmi = server_find_mount(name);
-        if (!lmi) {
-               mutex_unlock(&lustre_mount_info_lock);
-                CERROR("%s not registered\n", name);
-                RETURN(-ENOENT);
-        }
-
-       CDEBUG(D_MOUNT, "dereg_mnt %p from %s\n", lmi->lmi_mnt, name);
-
-        OBD_FREE(lmi->lmi_name, strlen(lmi->lmi_name) + 1);
-        cfs_list_del(&lmi->lmi_list_chain);
-        OBD_FREE(lmi, sizeof(*lmi));
-       mutex_unlock(&lustre_mount_info_lock);
-
-        RETURN(0);
-}
-
-/* obd's look up a registered mount using their obdname. This is just
-   for initial obd setup to find the mount struct.  It should not be
-   called every time you want to mntget. */
-struct lustre_mount_info *server_get_mount(const char *name)
-{
-        struct lustre_mount_info *lmi;
-        struct lustre_sb_info *lsi;
-        ENTRY;
-
-       mutex_lock(&lustre_mount_info_lock);
-        lmi = server_find_mount(name);
-       mutex_unlock(&lustre_mount_info_lock);
-        if (!lmi) {
-                CERROR("Can't find mount for %s\n", name);
-                RETURN(NULL);
-        }
-        lsi = s2lsi(lmi->lmi_sb);
-
-        cfs_atomic_inc(&lsi->lsi_mounts);
-
-       CDEBUG(D_MOUNT, "get_mnt %p from %s, refs=%d\n", lmi->lmi_mnt,
-              name, cfs_atomic_read(&lsi->lsi_mounts));
-
-        RETURN(lmi);
-}
-EXPORT_SYMBOL(server_get_mount);
-
-/*
- * Used by mdt to get mount_info from obdname.
- * There are no blocking when using the mount_info.
- * Do not use server_get_mount for this purpose.
- */
-struct lustre_mount_info *server_get_mount_2(const char *name)
-{
-        struct lustre_mount_info *lmi;
-        ENTRY;
-
-       mutex_lock(&lustre_mount_info_lock);
-        lmi = server_find_mount(name);
-       mutex_unlock(&lustre_mount_info_lock);
-        if (!lmi)
-                CERROR("Can't find mount for %s\n", name);
-
-        RETURN(lmi);
-}
-EXPORT_SYMBOL(server_get_mount_2);
-
-static int lustre_put_lsi(struct super_block *sb);
-
-/* to be called from obd_cleanup methods */
-int server_put_mount(const char *name, struct vfsmount *mnt)
-{
-        struct lustre_mount_info *lmi;
-        struct lustre_sb_info *lsi;
-        ENTRY;
-
-       mutex_lock(&lustre_mount_info_lock);
-        lmi = server_find_mount(name);
-       mutex_unlock(&lustre_mount_info_lock);
-        if (!lmi) {
-                CERROR("Can't find mount for %s\n", name);
-                RETURN(-ENOENT);
-        }
-        lsi = s2lsi(lmi->lmi_sb);
-
-       CDEBUG(D_MOUNT, "put_mnt %p from %s, refs=%d\n",
-              lmi->lmi_mnt, name, cfs_atomic_read(&lsi->lsi_mounts));
+                               struct vfsmount *mnt);
 
 
-       if (lustre_put_lsi(lmi->lmi_sb))
-               CDEBUG(D_MOUNT, "Last put of mnt %p from %s\n",
-                      lmi->lmi_mnt, name);
-
-        /* this obd should never need the mount again */
-        server_deregister_mount(name);
-
-        RETURN(0);
-}
-EXPORT_SYMBOL(server_put_mount);
-
-/* Corresponding to server_get_mount_2 */
-int server_put_mount_2(const char *name, struct vfsmount *mnt)
-{
-        ENTRY;
-        RETURN(0);
-}
-EXPORT_SYMBOL(server_put_mount_2);
+static void (*kill_super_cb)(struct super_block *sb);
 
 /**************** config llog ********************/
 
 
 /**************** config llog ********************/
 
@@ -362,8 +185,8 @@ EXPORT_SYMBOL(do_lcfg);
 /** Call class_attach and class_setup.  These methods in turn call
  * obd type-specific methods.
  */
 /** Call class_attach and class_setup.  These methods in turn call
  * obd type-specific methods.
  */
-static int lustre_start_simple(char *obdname, char *type, char *uuid,
-                              char *s1, char *s2, char *s3, char *s4)
+int lustre_start_simple(char *obdname, char *type, char *uuid,
+                       char *s1, char *s2, char *s3, char *s4)
 {
        int rc;
        CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
 {
        int rc;
        CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
@@ -371,7 +194,7 @@ static int lustre_start_simple(char *obdname, char *type, char *uuid,
        rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, 0, 0);
        if (rc) {
                CERROR("%s attach error %d\n", obdname, rc);
        rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, 0, 0);
        if (rc) {
                CERROR("%s attach error %d\n", obdname, rc);
-               return(rc);
+               return rc;
        }
        rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, s3, s4);
        if (rc) {
        }
        rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, s3, s4);
        if (rc) {
@@ -381,66 +204,6 @@ static int lustre_start_simple(char *obdname, char *type, char *uuid,
        return rc;
 }
 
        return rc;
 }
 
-/* Set up a MGS to serve startup logs */
-static int server_start_mgs(struct super_block *sb)
-{
-        struct lustre_sb_info    *lsi = s2lsi(sb);
-        struct vfsmount          *mnt = lsi->lsi_srv_mnt;
-        struct lustre_mount_info *lmi;
-        int    rc = 0;
-        ENTRY;
-
-        /* It is impossible to have more than 1 MGS per node, since
-           MGC wouldn't know which to connect to */
-        lmi = server_find_mount(LUSTRE_MGS_OBDNAME);
-        if (lmi) {
-                lsi = s2lsi(lmi->lmi_sb);
-                LCONSOLE_ERROR_MSG(0x15d, "The MGS service was already started"
-                                  " from server\n");
-                RETURN(-EALREADY);
-        }
-
-        CDEBUG(D_CONFIG, "Start MGS service %s\n", LUSTRE_MGS_OBDNAME);
-
-        rc = server_register_mount(LUSTRE_MGS_OBDNAME, sb, mnt);
-
-        if (!rc) {
-                rc = lustre_start_simple(LUSTRE_MGS_OBDNAME, LUSTRE_MGS_NAME,
-                                        LUSTRE_MGS_OBDNAME, 0, 0,
-                                        lsi->lsi_osd_obdname, 0);
-                /* Do NOT call server_deregister_mount() here. This leads to
-                 * inability cleanup cleanly and free lsi and other stuff when
-                 * mgs calls server_put_mount() in error handling case. -umka */
-        }
-
-        if (rc)
-                LCONSOLE_ERROR_MSG(0x15e, "Failed to start MGS '%s' (%d). "
-                                   "Is the 'mgs' module loaded?\n",
-                                   LUSTRE_MGS_OBDNAME, rc);
-        RETURN(rc);
-}
-
-static int server_stop_mgs(struct super_block *sb)
-{
-        struct obd_device *obd;
-        int rc;
-        ENTRY;
-
-        CDEBUG(D_MOUNT, "Stop MGS service %s\n", LUSTRE_MGS_OBDNAME);
-
-        /* There better be only one MGS */
-        obd = class_name2obd(LUSTRE_MGS_OBDNAME);
-        if (!obd) {
-                CDEBUG(D_CONFIG, "mgs %s not running\n", LUSTRE_MGS_OBDNAME);
-                RETURN(-EALREADY);
-        }
-
-        /* The MGS should always stop when we say so */
-        obd->obd_force = 1;
-        rc = class_manual_cleanup(obd);
-        RETURN(rc);
-}
-
 DEFINE_MUTEX(mgc_start_lock);
 
 /** Set up a mgc obd to process startup logs
 DEFINE_MUTEX(mgc_start_lock);
 
 /** Set up a mgc obd to process startup logs
@@ -449,7 +212,7 @@ DEFINE_MUTEX(mgc_start_lock);
  *
  * \retval 0 success, otherwise error code
  */
  *
  * \retval 0 success, otherwise error code
  */
-static int lustre_start_mgc(struct super_block *sb)
+int lustre_start_mgc(struct super_block *sb)
 {
         struct obd_connect_data *data = NULL;
         struct lustre_sb_info *lsi = s2lsi(sb);
 {
         struct obd_connect_data *data = NULL;
         struct lustre_sb_info *lsi = s2lsi(sb);
@@ -622,7 +385,7 @@ static int lustre_start_mgc(struct super_block *sb)
 
         /* Start the MGC */
         rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
 
         /* Start the MGC */
         rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
-                                 (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
+                                (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
                                 niduuid, 0, 0);
         OBD_FREE_PTR(uuid);
         if (rc)
                                 niduuid, 0, 0);
         OBD_FREE_PTR(uuid);
         if (rc)
@@ -710,1209 +473,82 @@ out_free:
        mutex_unlock(&mgc_start_lock);
 
         if (data)
        mutex_unlock(&mgc_start_lock);
 
         if (data)
-                OBD_FREE_PTR(data);
-        if (mgcname)
-                OBD_FREE(mgcname, len);
-        if (niduuid)
-                OBD_FREE(niduuid, len + 2);
-        RETURN(rc);
-}
-
-static int lustre_stop_mgc(struct super_block *sb)
-{
-        struct lustre_sb_info *lsi = s2lsi(sb);
-        struct obd_device *obd;
-        char *niduuid = 0, *ptr = 0;
-        int i, rc = 0, len = 0;
-        ENTRY;
-
-        if (!lsi)
-                RETURN(-ENOENT);
-        obd = lsi->lsi_mgc;
-        if (!obd)
-                RETURN(-ENOENT);
-        lsi->lsi_mgc = NULL;
-
-       mutex_lock(&mgc_start_lock);
-        LASSERT(cfs_atomic_read(&obd->u.cli.cl_mgc_refcount) > 0);
-        if (!cfs_atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
-                /* This is not fatal, every client that stops
-                   will call in here. */
-                CDEBUG(D_MOUNT, "mgc still has %d references.\n",
-                       cfs_atomic_read(&obd->u.cli.cl_mgc_refcount));
-                GOTO(out, rc = -EBUSY);
-        }
-
-        /* The MGC has no recoverable data in any case.
-         * force shotdown set in umount_begin */
-        obd->obd_no_recov = 1;
-
-        if (obd->u.cli.cl_mgc_mgsexp) {
-                /* An error is not fatal, if we are unable to send the
-                   disconnect mgs ping evictor cleans up the export */
-                rc = obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
-                if (rc)
-                        CDEBUG(D_MOUNT, "disconnect failed %d\n", rc);
-        }
-
-        /* Save the obdname for cleaning the nid uuids, which are
-           obdname_XX */
-        len = strlen(obd->obd_name) + 6;
-        OBD_ALLOC(niduuid, len);
-        if (niduuid) {
-                strcpy(niduuid, obd->obd_name);
-                ptr = niduuid + strlen(niduuid);
-        }
-
-        rc = class_manual_cleanup(obd);
-        if (rc)
-                GOTO(out, rc);
-
-        /* Clean the nid uuids */
-        if (!niduuid)
-                GOTO(out, rc = -ENOMEM);
-
-        for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
-                sprintf(ptr, "_%x", i);
-                rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
-                             niduuid, 0, 0, 0);
-                if (rc)
-                        CERROR("del MDC UUID %s failed: rc = %d\n",
-                               niduuid, rc);
-        }
-out:
-        if (niduuid)
-                OBD_FREE(niduuid, len);
-
-        /* class_import_put will get rid of the additional connections */
-       mutex_unlock(&mgc_start_lock);
-        RETURN(rc);
-}
-
-/* Since there's only one mgc per node, we have to change it's fs to get
-   access to the right disk. */
-static int server_mgc_set_fs(struct obd_device *mgc, struct super_block *sb)
-{
-        struct lustre_sb_info *lsi = s2lsi(sb);
-        int rc;
-        ENTRY;
-
-        CDEBUG(D_MOUNT, "Set mgc disk for %s\n", lsi->lsi_lmd->lmd_dev);
-
-        /* cl_mgc_sem in mgc insures we sleep if the mgc_fs is busy */
-        rc = obd_set_info_async(NULL, mgc->obd_self_export,
-                                sizeof(KEY_SET_FS), KEY_SET_FS,
-                                sizeof(*sb), sb, NULL);
-        if (rc) {
-                CERROR("can't set_fs %d\n", rc);
-        }
-
-        RETURN(rc);
-}
-
-static int server_mgc_clear_fs(struct obd_device *mgc)
-{
-        int rc;
-        ENTRY;
-
-        CDEBUG(D_MOUNT, "Unassign mgc disk\n");
-
-        rc = obd_set_info_async(NULL, mgc->obd_self_export,
-                                sizeof(KEY_CLEAR_FS), KEY_CLEAR_FS,
-                                0, NULL, NULL);
-        RETURN(rc);
-}
-
-/** Get the fsname ("lustre") from the server name ("lustre-OST003F").
- * @param [in] svname server name including type and index
- * @param [out] fsname Buffer to copy filesystem name prefix into.
- *  Must have at least 'strlen(fsname) + 1' chars.
- * @param [out] endptr if endptr isn't NULL it is set to end of fsname
- * rc < 0  on error
- */
-int server_name2fsname(char *svname, char *fsname, char **endptr)
-{
-       char *dash = strrchr(svname, '-');
-       if (!dash) {
-               dash = strrchr(svname, ':');
-               if (!dash)
-                       return -EINVAL;
-       }
-
-       /* interpret <fsname>-MDTXXXXX-mdc as mdt, the better way is to pass
-        * in the fsname, then determine the server index */
-       if (!strcmp(LUSTRE_MDC_NAME, dash + 1)) {
-               dash--;
-               for (; dash > svname && *dash != '-' && *dash != ':'; dash--)
-                       ;
-               if (dash == svname)
-                       return -EINVAL;
-       }
-
-       if (fsname != NULL) {
-               strncpy(fsname, svname, dash - svname);
-               fsname[dash - svname] = '\0';
-       }
-
-       if (endptr != NULL)
-               *endptr = dash;
-
-       return 0;
-}
-EXPORT_SYMBOL(server_name2fsname);
-
-static int is_mdc_device(char *devname)
-{
-       char *ptr;
-       ptr = strrchr(devname, '-');
-       if (ptr != NULL && strcmp(ptr, "-mdc") == 0)
-               return 1;
-       return 0;
-}
-
-static int inline tgt_is_mdt0(char *tgtname)
-{
-       __u32 idx;
-       int   type;
-
-       type = server_name2index(tgtname, &idx, NULL);
-       if (type != LDD_F_SV_TYPE_MDT)
-               return 0;
-
-       return (idx == 0) ? 1 :0;
-}
-
-static int inline is_mdc_for_mdt0(char *devname)
-{
-       char   *ptr;
-
-       if (!is_mdc_device(devname))
-               return 0;
-
-       ptr = strrchr(devname, '-');
-       if (ptr == NULL)
-               return 0;
-
-       *ptr = 0;
-       if (tgt_is_mdt0(devname)) {
-               *ptr = '-';
-               return 1;
-       }
-       *ptr = '-';
-       return 0;
-}
-
-/**
- * Convert OST/MDT name(fsname-OSTxxxx) to a lwp name
- * (fsname-MDT0000-lwp-OSTxxxx)
- **/
-int tgt_name2lwpname(char *svname, char *lwpname)
-{
-       char    *fsname, *tgt;
-       int      rc;
-       ENTRY;
-
-       OBD_ALLOC(fsname, MTI_NAME_MAXLEN);
-       if (fsname == NULL)
-               RETURN(-ENOMEM);
-
-       rc = server_name2fsname(svname, fsname, &tgt);
-       if (rc != 0) {
-               CERROR("%s: failed to get fsname from svname. %d\n",
-                      svname, rc);
-               GOTO(cleanup, rc);
-       }
-
-       if (*tgt != '-' && *tgt != ':') {
-               CERROR("%s: invalid svname name!\n", svname);
-               GOTO(cleanup, rc = -EINVAL);
-       }
-
-       tgt++;
-       if (strncmp(tgt, "OST", 3) != 0 && strncmp(tgt, "MDT", 3) != 0) {
-               CERROR("%s is not an OST or MDT target!\n", svname);
-               GOTO(cleanup, rc = -EINVAL);
-       }
-       sprintf(lwpname, "%s-MDT0000-%s-%s", fsname, LUSTRE_LWP_NAME, tgt);
-cleanup:
-       if (fsname != NULL)
-               OBD_FREE(fsname, MTI_NAME_MAXLEN);
-       RETURN(rc);
-}
-EXPORT_SYMBOL(tgt_name2lwpname);
-
-static CFS_LIST_HEAD(lwp_register_list);
-DEFINE_MUTEX(lwp_register_list_lock);
-
-int lustre_register_lwp_item(char *lwpname, struct obd_export **exp,
-                            register_lwp_cb cb_func, void *cb_data)
-{
-       struct obd_device        *lwp;
-       struct lwp_register_item *lri;
-       ENTRY;
-
-       LASSERTF(strlen(lwpname) < MTI_NAME_MAXLEN, "lwpname is too long %s\n",
-                lwpname);
-       LASSERT(exp != NULL && *exp == NULL);
-
-       OBD_ALLOC_PTR(lri);
-       if (lri == NULL)
-               RETURN(-ENOMEM);
-
-       mutex_lock(&lwp_register_list_lock);
-
-       lwp = class_name2obd(lwpname);
-       if (lwp != NULL && lwp->obd_set_up == 1) {
-               struct obd_uuid *uuid;
-
-               OBD_ALLOC_PTR(uuid);
-               if (uuid == NULL) {
-                       mutex_unlock(&lwp_register_list_lock);
-                       OBD_FREE_PTR(lri);
-                       RETURN(-ENOMEM);
-               }
-               memcpy(uuid->uuid, lwpname, strlen(lwpname));
-               *exp = cfs_hash_lookup(lwp->obd_uuid_hash, uuid);
-               OBD_FREE_PTR(uuid);
-       }
-
-       memcpy(lri->lri_name, lwpname, strlen(lwpname));
-       lri->lri_exp = exp;
-       lri->lri_cb_func = cb_func;
-       lri->lri_cb_data = cb_data;
-       CFS_INIT_LIST_HEAD(&lri->lri_list);
-       cfs_list_add(&lri->lri_list, &lwp_register_list);
-
-       if (*exp != NULL && cb_func != NULL)
-               cb_func(cb_data);
-
-       mutex_unlock(&lwp_register_list_lock);
-       RETURN(0);
-}
-EXPORT_SYMBOL(lustre_register_lwp_item);
-
-void lustre_deregister_lwp_item(struct obd_export **exp)
-{
-       struct lwp_register_item *lri, *tmp;
-
-       mutex_lock(&lwp_register_list_lock);
-       cfs_list_for_each_entry_safe(lri, tmp, &lwp_register_list, lri_list) {
-               if (exp == lri->lri_exp) {
-                       if (*exp)
-                               class_export_put(*exp);
-                       cfs_list_del(&lri->lri_list);
-                       OBD_FREE_PTR(lri);
-                       break;
-               }
-       }
-       mutex_unlock(&lwp_register_list_lock);
-}
-EXPORT_SYMBOL(lustre_deregister_lwp_item);
-
-static void lustre_notify_lwp_list(struct obd_export *exp)
-{
-       struct lwp_register_item *lri, *tmp;
-       LASSERT(exp != NULL);
-
-       mutex_lock(&lwp_register_list_lock);
-       cfs_list_for_each_entry_safe(lri, tmp, &lwp_register_list, lri_list) {
-               if (strcmp(exp->exp_obd->obd_name, lri->lri_name))
-                       continue;
-               if (*lri->lri_exp != NULL)
-                       continue;
-               *lri->lri_exp = class_export_get(exp);
-               if (lri->lri_cb_func != NULL)
-                       lri->lri_cb_func(lri->lri_cb_data);
-       }
-       mutex_unlock(&lwp_register_list_lock);
-}
-
-static int lustre_lwp_connect(struct obd_device *lwp)
-{
-       struct lu_env            env;
-       struct lu_context        session_ctx;
-       struct obd_export       *exp;
-       struct obd_uuid         *uuid = NULL;
-       struct obd_connect_data *data = NULL;
-       int                      rc;
-       ENTRY;
-
-       /* log has been fully processed, let clients connect */
-       rc = lu_env_init(&env, lwp->obd_lu_dev->ld_type->ldt_ctx_tags);
-       if (rc != 0)
-               RETURN(rc);
-
-       lu_context_init(&session_ctx, LCT_SESSION);
-       session_ctx.lc_thread = NULL;
-       lu_context_enter(&session_ctx);
-       env.le_ses = &session_ctx;
-
-       OBD_ALLOC_PTR(data);
-       if (data == NULL)
-               GOTO(out, rc = -ENOMEM);
-
-       data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_INDEX;
-       data->ocd_version = LUSTRE_VERSION_CODE;
-       data->ocd_connect_flags |= OBD_CONNECT_MDS_MDS | OBD_CONNECT_FID |
-                                  OBD_CONNECT_AT | OBD_CONNECT_LRU_RESIZE |
-                                  OBD_CONNECT_FULL20 | OBD_CONNECT_LVB_TYPE |
-                                  OBD_CONNECT_LIGHTWEIGHT |
-                                  OBD_CONNECT_PINGLESS;
-       OBD_ALLOC_PTR(uuid);
-       if (uuid == NULL)
-               GOTO(out, rc = -ENOMEM);
-
-       if (strlen(lwp->obd_name) > sizeof(uuid->uuid)) {
-               CERROR("%s: Too long lwp name %s, max_size is %d\n",
-                      lwp->obd_name, lwp->obd_name, (int)sizeof(uuid->uuid));
-               GOTO(out, rc = -EINVAL);
-       }
-
-       /* Use lwp name as the uuid, so we find the export by lwp name later */
-       memcpy(uuid->uuid, lwp->obd_name, strlen(lwp->obd_name));
-       rc = obd_connect(&env, &exp, lwp, uuid, data, NULL);
-       if (rc != 0)
-               CERROR("%s: connect failed: rc = %d\n", lwp->obd_name, rc);
-       else
-               lustre_notify_lwp_list(exp);
-
-out:
-       if (data != NULL)
-               OBD_FREE_PTR(data);
-       if (uuid != NULL)
-               OBD_FREE_PTR(uuid);
-
-       lu_env_fini(&env);
-       lu_context_exit(&session_ctx);
-       lu_context_fini(&session_ctx);
-
-       RETURN(rc);
-}
-
-/**
- * lwp is used by slaves (Non-MDT0 targets) to manage the connection
- * to MDT0.
- **/
-static int lustre_lwp_setup(struct lustre_cfg *lcfg, struct lustre_sb_info *lsi)
-{
-       struct obd_connect_data *data = NULL;
-       struct obd_device       *obd;
-       char                    *lwpname = NULL;
-       char                    *lwpuuid = NULL;
-       int                      rc;
-       ENTRY;
-
-       rc = class_add_uuid(lustre_cfg_string(lcfg, 1),
-                           lcfg->lcfg_nid);
-       if (rc) {
-               CERROR("%s: Can't add uuid: rc =%d\n", lsi->lsi_svname, rc);
-               GOTO(out, rc);
-       }
-
-       OBD_ALLOC(lwpname, MTI_NAME_MAXLEN);
-       if (lwpname == NULL)
-               GOTO(out, rc = -ENOMEM);
-
-       rc = tgt_name2lwpname(lsi->lsi_svname, lwpname);
-       if (rc != 0) {
-               CERROR("%s: failed to generate lwp name. %d\n",
-                      lsi->lsi_svname, rc);
-               GOTO(out, rc);
-       }
-
-       OBD_ALLOC(lwpuuid, MTI_NAME_MAXLEN);
-       if (lwpuuid == NULL)
-               GOTO(out, rc = -ENOMEM);
-
-       sprintf(lwpuuid, "%s_UUID", lwpname);
-       rc = lustre_start_simple(lwpname, LUSTRE_LWP_NAME,
-                                lwpuuid, lustre_cfg_string(lcfg, 1),
-                                0, 0, 0);
-       if (rc) {
-               CERROR("%s: setup up failed: rc %d\n", lwpname, rc);
-               GOTO(out, rc);
-       }
-
-       obd = class_name2obd(lwpname);
-       LASSERT(obd != NULL);
-
-       rc = lustre_lwp_connect(obd);
-       if (rc != 0)
-               CERROR("%s: connect failed: rc = %d\n", lwpname, rc);
-out:
-       if (data != NULL)
-               OBD_FREE_PTR(data);
-       if (lwpname != NULL)
-               OBD_FREE(lwpname, MTI_NAME_MAXLEN);
-       if (lwpuuid != NULL)
-               OBD_FREE(lwpuuid, MTI_NAME_MAXLEN);
-
-       RETURN(rc);
-}
-
-/* the caller is responsible for memory free */
-static struct obd_device *lustre_find_lwp(struct lustre_sb_info *lsi,
-                                         char **lwpname, char **logname)
-{
-       struct obd_device       *lwp;
-       int                      rc = 0;
-       ENTRY;
-
-       LASSERT(lwpname != NULL);
-       LASSERT(IS_OST(lsi) || IS_MDT(lsi));
-
-       OBD_ALLOC(*lwpname, MTI_NAME_MAXLEN);
-       if (*lwpname == NULL)
-               RETURN(ERR_PTR(-ENOMEM));
-
-       if (logname != NULL) {
-               OBD_ALLOC(*logname, MTI_NAME_MAXLEN);
-               if (*logname == NULL)
-                       GOTO(out, rc = -ENOMEM);
-               rc = server_name2fsname(lsi->lsi_svname, *lwpname, NULL);
-               if (rc != 0) {
-                       CERROR("%s: failed to get fsname from svname. %d\n",
-                              lsi->lsi_svname, rc);
-                       GOTO(out, rc = -EINVAL);
-               }
-               sprintf(*logname, "%s-client", *lwpname);
-       }
-
-       rc = tgt_name2lwpname(lsi->lsi_svname, *lwpname);
-       if (rc != 0) {
-               CERROR("%s: failed to generate lwp name. %d\n",
-                      lsi->lsi_svname, rc);
-               GOTO(out, rc = -EINVAL);
-       }
-
-       lwp = class_name2obd(*lwpname);
-
-out:
-       if (rc != 0) {
-               if (*lwpname != NULL) {
-                       OBD_FREE(*lwpname, MTI_NAME_MAXLEN);
-                       *lwpname = NULL;
-               }
-               if (logname != NULL && *logname != NULL) {
-                       OBD_FREE(*logname, MTI_NAME_MAXLEN);
-                       *logname = NULL;
-               }
-               lwp = ERR_PTR(rc);
-       }
-
-       RETURN(lwp != NULL ? lwp : ERR_PTR(-ENOENT));
-}
-
-static int lustre_lwp_add_conn(struct lustre_cfg *cfg,
-                               struct lustre_sb_info *lsi)
-{
-       struct lustre_cfg_bufs *bufs = NULL;
-       struct lustre_cfg      *lcfg = NULL;
-       char                   *lwpname = NULL;
-       struct obd_device      *lwp;
-       int                     rc;
-       ENTRY;
-
-       lwp = lustre_find_lwp(lsi, &lwpname, NULL);
-       if (IS_ERR(lwp)) {
-               CERROR("%s: can't find lwp device.\n", lsi->lsi_svname);
-               GOTO(out, rc = PTR_ERR(lwp));
-       }
-       LASSERT(lwpname != NULL);
-
-       OBD_ALLOC_PTR(bufs);
-       if (bufs == NULL)
-               GOTO(out, rc = -ENOMEM);
-
-       lustre_cfg_bufs_reset(bufs, lwpname);
-       lustre_cfg_bufs_set_string(bufs, 1,
-                                  lustre_cfg_string(cfg, 1));
-
-       lcfg = lustre_cfg_new(LCFG_ADD_CONN, bufs);
-
-       rc = class_add_conn(lwp, lcfg);
-       if (rc)
-               CERROR("%s: can't add conn: rc = %d\n", lwpname, rc);
-
-out:
-       if (bufs != NULL)
-               OBD_FREE_PTR(bufs);
-       if (lcfg != NULL)
-               lustre_cfg_free(lcfg);
-       if (lwpname != NULL)
-               OBD_FREE(lwpname, MTI_NAME_MAXLEN);
-       RETURN(rc);
-}
-
-/**
- * Retrieve MDT nids from the client log, then start the lwp device.
- * there are only two scenarios which would include mdt nid.
- * 1.
- * marker   5 (flags=0x01, v2.1.54.0) lustre-MDT0000  'add mdc' xxx-
- * add_uuid  nid=192.168.122.162@tcp(0x20000c0a87aa2)  0:  1:192.168.122.162@tcp
- * attach    0:lustre-MDT0000-mdc  1:mdc  2:lustre-clilmv_UUID
- * setup     0:lustre-MDT0000-mdc  1:lustre-MDT0000_UUID  2:192.168.122.162@tcp
- * add_uuid  nid=192.168.172.1@tcp(0x20000c0a8ac01)  0:  1:192.168.172.1@tcp
- * add_conn  0:lustre-MDT0000-mdc  1:192.168.172.1@tcp
- * modify_mdc_tgts add 0:lustre-clilmv  1:lustre-MDT0000_UUID xxxx
- * marker   5 (flags=0x02, v2.1.54.0) lustre-MDT0000  'add mdc' xxxx-
- * 2.
- * marker   7 (flags=0x01, v2.1.54.0) lustre-MDT0000  'add failnid' xxxx-
- * add_uuid  nid=192.168.122.2@tcp(0x20000c0a87a02)  0:  1:192.168.122.2@tcp
- * add_conn  0:lustre-MDT0000-mdc  1:192.168.122.2@tcp
- * marker   7 (flags=0x02, v2.1.54.0) lustre-MDT0000  'add failnid' xxxx-
-**/
-static int client_lwp_config_process(const struct lu_env *env,
-                                    struct llog_handle *handle,
-                                    struct llog_rec_hdr *rec, void *data)
-{
-       struct config_llog_instance *clli = data;
-       int                          cfg_len = rec->lrh_len;
-       char                        *cfg_buf = (char *) (rec + 1);
-       struct lustre_cfg           *lcfg = NULL;
-       struct lustre_sb_info       *lsi;
-       int                          rc = 0, swab = 0;
-       ENTRY;
-
-       if (rec->lrh_type != OBD_CFG_REC) {
-               CERROR("Unknown llog record type %#x encountered\n",
-                      rec->lrh_type);
-               RETURN(-EINVAL);
-       }
-
-       LASSERT(clli->cfg_sb != NULL);
-       lsi = s2lsi(clli->cfg_sb);
-
-       lcfg = (struct lustre_cfg *)cfg_buf;
-       if (lcfg->lcfg_version == __swab32(LUSTRE_CFG_VERSION)) {
-               lustre_swab_lustre_cfg(lcfg);
-               swab = 1;
-       }
-
-       rc = lustre_cfg_sanity_check(cfg_buf, cfg_len);
-       if (rc)
-               GOTO(out, rc);
-
-       switch (lcfg->lcfg_command) {
-       case LCFG_MARKER: {
-               struct cfg_marker *marker = lustre_cfg_buf(lcfg, 1);
-
-               lustre_swab_cfg_marker(marker, swab,
-                                      LUSTRE_CFG_BUFLEN(lcfg, 1));
-               if (marker->cm_flags & CM_SKIP ||
-                   marker->cm_flags & CM_EXCLUDE)
-                       GOTO(out, rc = 0);
-
-               if (!tgt_is_mdt0(marker->cm_tgtname))
-                       GOTO(out, rc = 0);
-
-               if(!strncmp(marker->cm_comment, "add mdc", 7) ||
-                  !strncmp(marker->cm_comment, "add failnid", 11)) {
-                       if (marker->cm_flags & CM_START) {
-                               clli->cfg_flags = CFG_F_MARKER;
-                               /* This hack is to differentiate the
-                                * ADD_UUID is come from "add mdc" record
-                                * or from "add failnid" record. */
-                               if (!strncmp(marker->cm_comment,
-                                            "add failnid", 11))
-                                       clli->cfg_flags |= CFG_F_SKIP;
-                       } else if (marker->cm_flags & CM_END) {
-                               clli->cfg_flags = 0;
-                       }
-               }
-               break;
-       }
-       case LCFG_ADD_UUID: {
-               if (clli->cfg_flags == CFG_F_MARKER) {
-                       rc = lustre_lwp_setup(lcfg, lsi);
-                       /* XXX: process only the first nid as
-                        * we don't need another instance of lwp */
-                       clli->cfg_flags |= CFG_F_SKIP;
-               } else if (clli->cfg_flags == (CFG_F_MARKER | CFG_F_SKIP)) {
-                       rc = class_add_uuid(lustre_cfg_string(lcfg, 1),
-                                       lcfg->lcfg_nid);
-                       if (rc)
-                               CERROR("%s: Fail to add uuid, rc:%d\n",
-                                      lsi->lsi_svname, rc);
-               }
-               break;
-       }
-       case LCFG_ADD_CONN: {
-               if (is_mdc_for_mdt0(lustre_cfg_string(lcfg, 0)))
-                       rc = lustre_lwp_add_conn(lcfg, lsi);
-               break;
-       }
-       default:
-               break;
-       }
-out:
-       RETURN(rc);
-}
-
-static int lustre_disconnect_lwp(struct super_block *sb)
-{
-       struct lustre_sb_info           *lsi = s2lsi(sb);
-       struct obd_device               *lwp;
-       char                            *lwpname = NULL;
-       char                            *logname = NULL;
-       struct lustre_cfg               *lcfg = NULL;
-       struct lustre_cfg_bufs          *bufs = NULL;
-       struct config_llog_instance     *cfg = NULL;
-       int                              rc;
-       ENTRY;
-
-       lwp = lustre_find_lwp(lsi, &lwpname, &logname);
-       if (IS_ERR(lwp) && PTR_ERR(lwp) != -ENOENT)
-               GOTO(out, rc = PTR_ERR(lwp));
-
-       LASSERT(lwpname != NULL);
-       LASSERT(logname != NULL);
-
-       OBD_ALLOC_PTR(cfg);
-       if (cfg == NULL)
-               GOTO(out, rc = -ENOMEM);
-
-       /* end log first */
-       cfg->cfg_instance = sb;
-       rc = lustre_end_log(sb, logname, cfg);
-       if (rc != 0) {
-               CERROR("%s: Can't end config log %s.\n", lwpname, logname);
-               GOTO(out, rc);
-       }
-
-       if (PTR_ERR(lwp) == -ENOENT) {
-               CDEBUG(D_CONFIG, "%s: lwp device wasn't started.\n",
-                      lsi->lsi_svname);
-               GOTO(out, rc = 0);
-       }
-
-       OBD_ALLOC_PTR(bufs);
-       if (bufs == NULL)
-               GOTO(out, rc = -ENOMEM);
-
-       lustre_cfg_bufs_reset(bufs, lwp->obd_name);
-       lustre_cfg_bufs_set_string(bufs, 1, NULL);
-       lcfg = lustre_cfg_new(LCFG_CLEANUP, bufs);
-       if (!lcfg)
-               GOTO(out, rc = -ENOMEM);
-
-       /* Disconnect import first. NULL is passed for the '@env', since
-        * it will not be used. */
-       rc = lwp->obd_lu_dev->ld_ops->ldo_process_config(NULL, lwp->obd_lu_dev,
-                                                        lcfg);
-out:
-       if (lcfg)
-               lustre_cfg_free(lcfg);
-       if (bufs)
-               OBD_FREE_PTR(bufs);
-       if (cfg)
-               OBD_FREE_PTR(cfg);
-       if (lwpname)
-               OBD_FREE(lwpname, MTI_NAME_MAXLEN);
-       if (logname)
-               OBD_FREE(logname, MTI_NAME_MAXLEN);
-       RETURN(rc);
-}
-
-/**
- * Stop the lwp for an OST/MDT target.
- **/
-static int lustre_stop_lwp(struct super_block *sb)
-{
-       struct lustre_sb_info   *lsi = s2lsi(sb);
-       struct obd_device       *lwp = NULL;
-       char                    *lwpname = NULL;
-       int                      rc = 0;
-       ENTRY;
-
-       lwp = lustre_find_lwp(lsi, &lwpname, NULL);
-       if (IS_ERR(lwp)) {
-               CDEBUG(PTR_ERR(lwp) == -ENOENT ? D_CONFIG : D_ERROR,
-                      "%s: lwp wasn't started.\n", lsi->lsi_svname);
-               GOTO(out, rc = 0);
-       }
-
-       lwp->obd_force = 1;
-       rc = class_manual_cleanup(lwp);
-
-out:
-       if (lwpname != NULL)
-               OBD_FREE(lwpname, MTI_NAME_MAXLEN);
-       RETURN(rc);
-}
-
-/**
- * Start the lwp(fsname-MDT0000-lwp-OSTxxxx) for an OST or MDT target,
- * which would be used to establish connection from OST to MDT0.
- **/
-static int lustre_start_lwp(struct super_block *sb)
-{
-       struct lustre_sb_info       *lsi = s2lsi(sb);
-       struct config_llog_instance *cfg = NULL;
-       struct obd_device           *lwp;
-       char                        *lwpname = NULL;
-       char                        *logname = NULL;
-       int                          rc;
-       ENTRY;
-
-       lwp = lustre_find_lwp(lsi, &lwpname, &logname);
-
-       /* the lwp device already stared */
-       if (lwp && !IS_ERR(lwp))
-               GOTO(out, rc = 0);
-
-       if (PTR_ERR(lwp) != -ENOENT)
-               GOTO(out, rc = PTR_ERR(lwp));
-
-       LASSERT(lwpname != NULL);
-       LASSERT(logname != NULL);
-
-       OBD_ALLOC_PTR(cfg);
-       if (cfg == NULL)
-               GOTO(out, rc = -ENOMEM);
-
-       cfg->cfg_callback = client_lwp_config_process;
-       cfg->cfg_instance = sb;
-
-       rc = lustre_process_log(sb, logname, cfg);
-out:
-       if (lwpname != NULL)
-               OBD_FREE(lwpname, MTI_NAME_MAXLEN);
-       if (logname != NULL)
-               OBD_FREE(logname, MTI_NAME_MAXLEN);
-       if (cfg != NULL)
-               OBD_FREE_PTR(cfg);
-       RETURN(rc);
-}
-
-DEFINE_MUTEX(server_start_lock);
-
-/* Stop MDS/OSS if nobody is using them */
-static int server_stop_servers(int lsiflags)
-{
-        struct obd_device *obd = NULL;
-        struct obd_type *type = NULL;
-        int rc = 0;
-        ENTRY;
-
-       mutex_lock(&server_start_lock);
-
-        /* Either an MDT or an OST or neither  */
-        /* if this was an MDT, and there are no more MDT's, clean up the MDS */
-       if ((lsiflags & LDD_F_SV_TYPE_MDT) &&
-           (obd = class_name2obd(LUSTRE_MDS_OBDNAME))) {
-               type = class_search_type(LUSTRE_MDT_NAME);
-       }
-       /* if this was an OST, and there are no more OST's, clean up the OSS */
-       if ((lsiflags & LDD_F_SV_TYPE_OST) &&
-            (obd = class_name2obd(LUSTRE_OSS_OBDNAME))) {
-                type = class_search_type(LUSTRE_OST_NAME);
-        }
-
-        if (obd && (!type || !type->typ_refcnt)) {
-                int err;
-                obd->obd_force = 1;
-                /* obd_fail doesn't mean much on a server obd */
-                err = class_manual_cleanup(obd);
-                if (!rc)
-                        rc = err;
-        }
-
-       mutex_unlock(&server_start_lock);
-
-        RETURN(rc);
-}
-
-int server_mti_print(char *title, struct mgs_target_info *mti)
-{
-        PRINT_CMD(PRINT_MASK, "mti %s\n", title);
-        PRINT_CMD(PRINT_MASK, "server: %s\n", mti->mti_svname);
-        PRINT_CMD(PRINT_MASK, "fs:     %s\n", mti->mti_fsname);
-        PRINT_CMD(PRINT_MASK, "uuid:   %s\n", mti->mti_uuid);
-        PRINT_CMD(PRINT_MASK, "ver: %d  flags: %#x\n",
-                  mti->mti_config_ver, mti->mti_flags);
-        return(0);
-}
-
-/**
- * Get service name (svname) from string
- * rc < 0 on error
- * if endptr isn't NULL it is set to end of fsname *
- */
-int server_name2svname(char *label, char *svname, char **endptr, size_t svsize)
-{
-       int rc;
-       char *dash;
-
-       /* We use server_name2fsname() just for parsing */
-       rc = server_name2fsname(label, NULL, &dash);
-       if (rc != 0)
-               return rc;
-
-       if (*dash != '-')
-               return -1;
-
-       if (strlcpy(svname, dash + 1, svsize) >= svsize)
-               return -E2BIG;
-
-       return 0;
-}
-EXPORT_SYMBOL(server_name2svname);
-
-
-/* Get the index from the obd name.
-   rc = server type, or
-   rc < 0  on error
-   if endptr isn't NULL it is set to end of name */
-int server_name2index(char *svname, __u32 *idx, char **endptr)
-{
-       unsigned long index;
-       int rc;
-       char *dash;
-
-       /* We use server_name2fsname() just for parsing */
-       rc = server_name2fsname(svname, NULL, &dash);
-       if (rc != 0)
-               return rc;
-
-       if (*dash != '-')
-               return -EINVAL;
-
-       dash++;
-
-       if (strncmp(dash, "MDT", 3) == 0)
-               rc = LDD_F_SV_TYPE_MDT;
-       else if (strncmp(dash, "OST", 3) == 0)
-               rc = LDD_F_SV_TYPE_OST;
-       else
-               return -EINVAL;
-
-       dash += 3;
-
-       if (strcmp(dash, "all") == 0)
-               return rc | LDD_F_SV_ALL;
-
-       index = simple_strtoul(dash, endptr, 16);
-       *idx = index;
-
-       return rc;
-}
-EXPORT_SYMBOL(server_name2index);
-
-/* Generate data for registration */
-static int server_lsi2mti(struct lustre_sb_info *lsi,
-                         struct mgs_target_info *mti)
-{
-       lnet_process_id_t id;
-       int rc, i = 0;
-       int cplen = 0;
-        ENTRY;
-
-       if (!IS_SERVER(lsi))
-                RETURN(-EINVAL);
-
-       if (strlcpy(mti->mti_svname, lsi->lsi_svname, sizeof(mti->mti_svname))
-           >= sizeof(mti->mti_svname))
-               RETURN(-E2BIG);
-
-        mti->mti_nid_count = 0;
-        while (LNetGetId(i++, &id) != -ENOENT) {
-                if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
-                        continue;
-
-                /* server use --servicenode param, only allow specified
-                 * nids be registered */
-               if ((lsi->lsi_lmd->lmd_flags & LMD_FLG_NO_PRIMNODE) != 0 &&
-                   class_match_nid(lsi->lsi_lmd->lmd_params,
-                                    PARAM_FAILNODE, id.nid) < 1)
-                        continue;
-
-                /* match specified network */
-               if (!class_match_net(lsi->lsi_lmd->lmd_params,
-                                     PARAM_NETWORK, LNET_NIDNET(id.nid)))
-                        continue;
-
-                mti->mti_nids[mti->mti_nid_count] = id.nid;
-                mti->mti_nid_count++;
-                if (mti->mti_nid_count >= MTI_NIDS_MAX) {
-                        CWARN("Only using first %d nids for %s\n",
-                              mti->mti_nid_count, mti->mti_svname);
-                        break;
-                }
-        }
-
-        mti->mti_lustre_ver = LUSTRE_VERSION_CODE;
-        mti->mti_config_ver = 0;
-
-       rc = server_name2fsname(lsi->lsi_svname, mti->mti_fsname, NULL);
-       if (rc != 0)
-               return rc;
-
-       rc = server_name2index(lsi->lsi_svname, &mti->mti_stripe_index, NULL);
-       if (rc < 0)
-               return rc;
-       /* Orion requires index to be set */
-       LASSERT(!(rc & LDD_F_NEED_INDEX));
-       /* keep only LDD flags */
-       mti->mti_flags = lsi->lsi_flags & LDD_F_MASK;
-       if (mti->mti_flags & (LDD_F_WRITECONF | LDD_F_VIRGIN))
-               mti->mti_flags |= LDD_F_UPDATE;
-       cplen = strlcpy(mti->mti_params, lsi->lsi_lmd->lmd_params,
-                       sizeof(mti->mti_params));
-       if (cplen >= sizeof(mti->mti_params))
-               return -E2BIG;
-       return 0;
-}
-
-/* Register an old or new target with the MGS. If needed MGS will construct
-   startup logs and assign index */
-static int server_register_target(struct lustre_sb_info *lsi)
-{
-        struct obd_device *mgc = lsi->lsi_mgc;
-        struct mgs_target_info *mti = NULL;
-        bool writeconf;
-        int rc;
-        ENTRY;
-
-        LASSERT(mgc);
-
-       if (!IS_SERVER(lsi))
-                RETURN(-EINVAL);
-
-        OBD_ALLOC_PTR(mti);
-        if (!mti)
-                RETURN(-ENOMEM);
-
-       rc = server_lsi2mti(lsi, mti);
-        if (rc)
-                GOTO(out, rc);
-
-        CDEBUG(D_MOUNT, "Registration %s, fs=%s, %s, index=%04x, flags=%#x\n",
-               mti->mti_svname, mti->mti_fsname,
-               libcfs_nid2str(mti->mti_nids[0]), mti->mti_stripe_index,
-               mti->mti_flags);
-
-        /* if write_conf is true, the registration must succeed */
-       writeconf = !!(lsi->lsi_flags & (LDD_F_NEED_INDEX | LDD_F_UPDATE));
-        mti->mti_flags |= LDD_F_OPC_REG;
-
-        /* Register the target */
-        /* FIXME use mgc_process_config instead */
-        rc = obd_set_info_async(NULL, mgc->u.cli.cl_mgc_mgsexp,
-                                sizeof(KEY_REGISTER_TARGET), KEY_REGISTER_TARGET,
-                                sizeof(*mti), mti, NULL);
-        if (rc) {
-                if (mti->mti_flags & LDD_F_ERROR) {
-                        LCONSOLE_ERROR_MSG(0x160,
-                                "The MGS is refusing to allow this "
-                                "server (%s) to start. Please see messages"
-                               " on the MGS node.\n", lsi->lsi_svname);
-                } else if (writeconf) {
-                        LCONSOLE_ERROR_MSG(0x15f,
-                                "Communication to the MGS return error %d. "
-                                "Is the MGS running?\n", rc);
-                } else {
-                        CERROR("Cannot talk to the MGS: %d, not fatal\n", rc);
-                        /* reset the error code for non-fatal error. */
-                        rc = 0;
-                }
-                GOTO(out, rc);
-        }
-
-out:
-        if (mti)
-                OBD_FREE_PTR(mti);
-        RETURN(rc);
-}
-
-/**
- * Notify the MGS that this target is ready.
- * Used by IR - if the MGS receives this message, it will notify clients.
- */
-static int server_notify_target(struct super_block *sb, struct obd_device *obd)
-{
-        struct lustre_sb_info *lsi = s2lsi(sb);
-        struct obd_device *mgc = lsi->lsi_mgc;
-        struct mgs_target_info *mti = NULL;
-        int rc;
-        ENTRY;
-
-        LASSERT(mgc);
-
-       if (!(IS_SERVER(lsi)))
-                RETURN(-EINVAL);
-
-        OBD_ALLOC_PTR(mti);
-        if (!mti)
-                RETURN(-ENOMEM);
-       rc = server_lsi2mti(lsi, mti);
-        if (rc)
-                GOTO(out, rc);
-
-        mti->mti_instance = obd->u.obt.obt_instance;
-        mti->mti_flags |= LDD_F_OPC_READY;
-
-        /* FIXME use mgc_process_config instead */
-        rc = obd_set_info_async(NULL, mgc->u.cli.cl_mgc_mgsexp,
-                                sizeof(KEY_REGISTER_TARGET),
-                                KEY_REGISTER_TARGET,
-                                sizeof(*mti), mti, NULL);
-
-        /* Imperative recovery: if the mgs informs us to use IR? */
-        if (!rc && !(mti->mti_flags & LDD_F_ERROR) &&
-            (mti->mti_flags & LDD_F_IR_CAPABLE))
-               lsi->lsi_flags |= LDD_F_IR_CAPABLE;
-
-out:
-        if (mti)
-                OBD_FREE_PTR(mti);
-        RETURN(rc);
-
-}
-
-/** Start server targets: MDTs and OSTs
- */
-static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
-{
-        struct obd_device *obd;
-        struct lustre_sb_info *lsi = s2lsi(sb);
-        struct config_llog_instance cfg;
-       struct lu_env env;
-       struct lu_device *dev;
-        int rc;
-        ENTRY;
-
-       CDEBUG(D_MOUNT, "starting target %s\n", lsi->lsi_svname);
-
-       if (IS_MDT(lsi)) {
-               /* make sure the MDS is started */
-               mutex_lock(&server_start_lock);
-               obd = class_name2obd(LUSTRE_MDS_OBDNAME);
-               if (!obd) {
-                       rc = lustre_start_simple(LUSTRE_MDS_OBDNAME,
-                                                LUSTRE_MDS_NAME,
-                                                LUSTRE_MDS_OBDNAME"_uuid",
-                                                0, 0, 0, 0);
-                       if (rc) {
-                               mutex_unlock(&server_start_lock);
-                               CERROR("failed to start MDS: %d\n", rc);
-                               RETURN(rc);
-                       }
-               }
-               mutex_unlock(&server_start_lock);
-       }
-
-        /* If we're an OST, make sure the global OSS is running */
-       if (IS_OST(lsi)) {
-                /* make sure OSS is started */
-               mutex_lock(&server_start_lock);
-                obd = class_name2obd(LUSTRE_OSS_OBDNAME);
-                if (!obd) {
-                        rc = lustre_start_simple(LUSTRE_OSS_OBDNAME,
-                                                 LUSTRE_OSS_NAME,
-                                                 LUSTRE_OSS_OBDNAME"_uuid",
-                                                0, 0, 0, 0);
-                        if (rc) {
-                               mutex_unlock(&server_start_lock);
-                                CERROR("failed to start OSS: %d\n", rc);
-                                RETURN(rc);
-                        }
-                }
-               mutex_unlock(&server_start_lock);
-        }
-
-        /* Set the mgc fs to our server disk.  This allows the MGC to
-         * read and write configs locally, in case it can't talk to the MGS. */
-       if (lsi->lsi_srv_mnt) {
-               rc = server_mgc_set_fs(lsi->lsi_mgc, sb);
-               if (rc)
-                       GOTO(out_stop_service, rc);
-       }
-
-        /* Register with MGS */
-       rc = server_register_target(lsi);
-        if (rc)
-                GOTO(out_mgc, rc);
-
-        /* Let the target look up the mount using the target's name
-           (we can't pass the sb or mnt through class_process_config.) */
-       rc = server_register_mount(lsi->lsi_svname, sb, mnt);
-        if (rc)
-                GOTO(out_mgc, rc);
-
-       /* Start targets using the llog named for the target */
-       memset(&cfg, 0, sizeof(cfg));
-       cfg.cfg_callback = class_config_llog_handler;
-       rc = lustre_process_log(sb, lsi->lsi_svname, &cfg);
-       if (rc) {
-               CERROR("failed to start server %s: %d\n",
-                      lsi->lsi_svname, rc);
-               /* Do NOT call server_deregister_mount() here. This makes it
-                * impossible to find mount later in cleanup time and leaves
-                * @lsi and othder stuff leaked. -umka */
-               GOTO(out_mgc, rc);
-       }
-
-       obd = class_name2obd(lsi->lsi_svname);
-       if (!obd) {
-               CERROR("no server named %s was started\n", lsi->lsi_svname);
-               GOTO(out_mgc, rc = -ENXIO);
-       }
-
-       if (IS_OST(lsi) || IS_MDT(lsi)) {
-               rc = lustre_start_lwp(sb);
-               if (rc) {
-                       CERROR("%s: failed to start LWP: %d\n",
-                              lsi->lsi_svname, rc);
-                       GOTO(out_mgc, rc);
-               }
-       }
-
-       server_notify_target(sb, obd);
+                OBD_FREE_PTR(data);
+        if (mgcname)
+                OBD_FREE(mgcname, len);
+        if (niduuid)
+                OBD_FREE(niduuid, len + 2);
+        RETURN(rc);
+}
 
 
-       /* calculate recovery timeout, do it after lustre_process_log */
-       server_calc_timeout(lsi, obd);
+static int lustre_stop_mgc(struct super_block *sb)
+{
+        struct lustre_sb_info *lsi = s2lsi(sb);
+        struct obd_device *obd;
+        char *niduuid = 0, *ptr = 0;
+        int i, rc = 0, len = 0;
+        ENTRY;
 
 
-       /* log has been fully processed */
-       obd_notify(obd, NULL, OBD_NOTIFY_CONFIG, (void *)CONFIG_LOG);
+        if (!lsi)
+                RETURN(-ENOENT);
+        obd = lsi->lsi_mgc;
+        if (!obd)
+                RETURN(-ENOENT);
+        lsi->lsi_mgc = NULL;
 
 
-       /* log has been fully processed, let clients connect */
-       dev = obd->obd_lu_dev;
-       if (dev && dev->ld_ops->ldo_prepare) {
-               rc = lu_env_init(&env, dev->ld_type->ldt_ctx_tags);
-               if (rc == 0) {
-                       struct lu_context  session_ctx;
+       mutex_lock(&mgc_start_lock);
+        LASSERT(cfs_atomic_read(&obd->u.cli.cl_mgc_refcount) > 0);
+        if (!cfs_atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
+                /* This is not fatal, every client that stops
+                   will call in here. */
+                CDEBUG(D_MOUNT, "mgc still has %d references.\n",
+                       cfs_atomic_read(&obd->u.cli.cl_mgc_refcount));
+                GOTO(out, rc = -EBUSY);
+        }
 
 
-                       lu_context_init(&session_ctx, LCT_SESSION);
-                       session_ctx.lc_thread = NULL;
-                       lu_context_enter(&session_ctx);
-                       env.le_ses = &session_ctx;
+        /* The MGC has no recoverable data in any case.
+         * force shotdown set in umount_begin */
+        obd->obd_no_recov = 1;
 
 
-                       rc = dev->ld_ops->ldo_prepare(&env, NULL, dev);
+        if (obd->u.cli.cl_mgc_mgsexp) {
+                /* An error is not fatal, if we are unable to send the
+                   disconnect mgs ping evictor cleans up the export */
+                rc = obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
+                if (rc)
+                        CDEBUG(D_MOUNT, "disconnect failed %d\n", rc);
+        }
 
 
-                       lu_env_fini(&env);
-                       lu_context_exit(&session_ctx);
-                       lu_context_fini(&session_ctx);
-               }
-       }
+        /* Save the obdname for cleaning the nid uuids, which are
+           obdname_XX */
+        len = strlen(obd->obd_name) + 6;
+        OBD_ALLOC(niduuid, len);
+        if (niduuid) {
+                strcpy(niduuid, obd->obd_name);
+                ptr = niduuid + strlen(niduuid);
+        }
 
 
-       /* abort recovery only on the complete stack:
-        * many devices can be involved */
-       if ((lsi->lsi_lmd->lmd_flags & LMD_FLG_ABORT_RECOV) &&
-           (OBP(obd, iocontrol))) {
-               obd_iocontrol(OBD_IOC_ABORT_RECOVERY, obd->obd_self_export, 0,
-                             NULL, NULL);
-       }
+        rc = class_manual_cleanup(obd);
+        if (rc)
+                GOTO(out, rc);
 
 
-out_mgc:
-        /* Release the mgc fs for others to use */
-       if (lsi->lsi_srv_mnt)
-               server_mgc_clear_fs(lsi->lsi_mgc);
+        /* Clean the nid uuids */
+        if (!niduuid)
+                GOTO(out, rc = -ENOMEM);
 
 
-out_stop_service:
-       if (rc != 0)
-               server_stop_servers(lsi->lsi_flags);
+        for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
+                sprintf(ptr, "_%x", i);
+                rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
+                             niduuid, 0, 0, 0);
+                if (rc)
+                        CERROR("del MDC UUID %s failed: rc = %d\n",
+                               niduuid, rc);
+        }
+out:
+        if (niduuid)
+                OBD_FREE(niduuid, len);
 
 
+        /* class_import_put will get rid of the additional connections */
+       mutex_unlock(&mgc_start_lock);
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
@@ -1994,7 +630,7 @@ static int lustre_free_lsi(struct super_block *sb)
 
 /* The lsi has one reference for every server that is using the disk -
    e.g. MDT, MGS, and potentially MGC */
 
 /* The lsi has one reference for every server that is using the disk -
    e.g. MDT, MGS, and potentially MGC */
-static int lustre_put_lsi(struct super_block *sb)
+int lustre_put_lsi(struct super_block *sb)
 {
         struct lustre_sb_info *lsi = s2lsi(sb);
         ENTRY;
 {
         struct lustre_sb_info *lsi = s2lsi(sb);
         ENTRY;
@@ -2014,476 +650,109 @@ static int lustre_put_lsi(struct super_block *sb)
         RETURN(0);
 }
 
         RETURN(0);
 }
 
-static int lsi_prepare(struct lustre_sb_info *lsi)
-{
-       __u32 index;
-       int rc;
-       ENTRY;
-
-       LASSERT(lsi);
-       LASSERT(lsi->lsi_lmd);
-
-       /* The server name is given as a mount line option */
-       if (lsi->lsi_lmd->lmd_profile == NULL) {
-               LCONSOLE_ERROR("Can't determine server name\n");
-               RETURN(-EINVAL);
-       }
-
-       if (strlen(lsi->lsi_lmd->lmd_profile) >= sizeof(lsi->lsi_svname))
-               RETURN(-ENAMETOOLONG);
-
-       strcpy(lsi->lsi_svname, lsi->lsi_lmd->lmd_profile);
-
-       /* Determine osd type */
-       if (lsi->lsi_lmd->lmd_osd_type != NULL) {
-               if (strlen(lsi->lsi_lmd->lmd_osd_type) >=
-                          sizeof(lsi->lsi_osd_type))
-                       RETURN(-ENAMETOOLONG);
-
-               strcpy(lsi->lsi_osd_type, lsi->lsi_lmd->lmd_osd_type);
-       } else {
-               strcpy(lsi->lsi_osd_type, LUSTRE_OSD_LDISKFS_NAME);
-       }
-
-       /* XXX: a temp. solution for components using fsfilt
-        *      to be removed in one of the subsequent patches */
-       if (!strcmp(lsi->lsi_lmd->lmd_osd_type, "osd-ldiskfs")) {
-               strcpy(lsi->lsi_fstype, "ldiskfs");
-       } else {
-               strcpy(lsi->lsi_fstype, lsi->lsi_lmd->lmd_osd_type);
-       }
-
-       /* Determine server type */
-       rc = server_name2index(lsi->lsi_svname, &index, NULL);
-       if (rc < 0) {
-               if (lsi->lsi_lmd->lmd_flags & LMD_FLG_MGS) {
-                       /* Assume we're a bare MGS */
-                       rc = 0;
-                       lsi->lsi_lmd->lmd_flags |= LMD_FLG_NOSVC;
-               } else {
-                       LCONSOLE_ERROR("Can't determine server type of '%s'\n",
-                                      lsi->lsi_svname);
-                       RETURN(rc);
-               }
-       }
-       lsi->lsi_flags |= rc;
-
-       /* Add mount line flags that used to be in ldd:
-        * writeconf, mgs, anything else?
-        */
-       lsi->lsi_flags |= (lsi->lsi_lmd->lmd_flags & LMD_FLG_WRITECONF) ?
-               LDD_F_WRITECONF : 0;
-       lsi->lsi_flags |= (lsi->lsi_lmd->lmd_flags & LMD_FLG_VIRGIN) ?
-               LDD_F_VIRGIN : 0;
-       lsi->lsi_flags |= (lsi->lsi_lmd->lmd_flags & LMD_FLG_MGS) ?
-               LDD_F_SV_TYPE_MGS : 0;
-       lsi->lsi_flags |= (lsi->lsi_lmd->lmd_flags & LMD_FLG_NO_PRIMNODE) ?
-               LDD_F_NO_PRIMNODE : 0;
-
-       RETURN(0);
-}
-
-/*************** server mount ******************/
-
-/** Start the shutdown of servers at umount.
+/** Get the fsname ("lustre") from the server name ("lustre-OST003F").
+ * @param [in] svname server name including type and index
+ * @param [out] fsname Buffer to copy filesystem name prefix into.
+ *  Must have at least 'strlen(fsname) + 1' chars.
+ * @param [out] endptr if endptr isn't NULL it is set to end of fsname
+ * rc < 0  on error
  */
  */
-static void server_put_super(struct super_block *sb)
+int server_name2fsname(const char *svname, char *fsname, const char **endptr)
 {
 {
-        struct lustre_sb_info *lsi = s2lsi(sb);
-        struct obd_device     *obd;
-        char *tmpname, *extraname = NULL;
-        int tmpname_sz;
-        int lsiflags = lsi->lsi_flags;
-        ENTRY;
-
-       LASSERT(IS_SERVER(lsi));
-
-       tmpname_sz = strlen(lsi->lsi_svname) + 1;
-        OBD_ALLOC(tmpname, tmpname_sz);
-       memcpy(tmpname, lsi->lsi_svname, tmpname_sz);
-        CDEBUG(D_MOUNT, "server put_super %s\n", tmpname);
-       if (IS_MDT(lsi) && (lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC))
-                snprintf(tmpname, tmpname_sz, "MGS");
-
-       /* disconnect the lwp first to drain off the inflight request */
-       if (IS_OST(lsi) || IS_MDT(lsi)) {
-               int     rc;
-
-               rc = lustre_disconnect_lwp(sb);
-               if (rc && rc != ETIMEDOUT)
-                       CERROR("%s: failed to disconnect lwp. (rc=%d)\n",
-                              tmpname, rc);
+       const char *dash = strrchr(svname, '-');
+       if (!dash) {
+               dash = strrchr(svname, ':');
+               if (!dash)
+                       return -EINVAL;
        }
 
        }
 
-        /* Stop the target */
-        if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
-           (IS_MDT(lsi) || IS_OST(lsi))) {
-                struct lustre_profile *lprof = NULL;
-
-                /* tell the mgc to drop the config log */
-               lustre_end_log(sb, lsi->lsi_svname, NULL);
-
-                /* COMPAT_146 - profile may get deleted in mgc_cleanup.
-                   If there are any setup/cleanup errors, save the lov
-                   name for safety cleanup later. */
-               lprof = class_get_profile(lsi->lsi_svname);
-                if (lprof && lprof->lp_dt) {
-                        OBD_ALLOC(extraname, strlen(lprof->lp_dt) + 1);
-                        strcpy(extraname, lprof->lp_dt);
-                }
-
-               obd = class_name2obd(lsi->lsi_svname);
-                if (obd) {
-                        CDEBUG(D_MOUNT, "stopping %s\n", obd->obd_name);
-                       if (lsiflags & LSI_UMOUNT_FAILOVER)
-                                obd->obd_fail = 1;
-                        /* We can't seem to give an error return code
-                         * to .put_super, so we better make sure we clean up! */
-                        obd->obd_force = 1;
-                        class_manual_cleanup(obd);
-                } else {
-                       CERROR("no obd %s\n", lsi->lsi_svname);
-                       server_deregister_mount(lsi->lsi_svname);
-                }
-        }
-
-        /* If they wanted the mgs to stop separately from the mdt, they
-           should have put it on a different device. */
-       if (IS_MGS(lsi)) {
-                /* if MDS start with --nomgs, don't stop MGS then */
-                if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOMGS))
-                        server_stop_mgs(sb);
-        }
+       /* interpret <fsname>-MDTXXXXX-mdc as mdt, the better way is to pass
+        * in the fsname, then determine the server index */
+       if (!strcmp(LUSTRE_MDC_NAME, dash + 1)) {
+               dash--;
+               for (; dash > svname && *dash != '-' && *dash != ':'; dash--)
+                       ;
+               if (dash == svname)
+                       return -EINVAL;
+       }
 
 
-       if (IS_OST(lsi) || IS_MDT(lsi)) {
-               if (lustre_stop_lwp(sb) < 0)
-                       CERROR("%s: failed to stop lwp!\n", tmpname);
+       if (fsname != NULL) {
+               strncpy(fsname, svname, dash - svname);
+               fsname[dash - svname] = '\0';
        }
 
        }
 
-        /* Clean the mgc and sb */
-        lustre_common_put_super(sb);
-
-       /* wait till all in-progress cleanups are done
-        * specifically we're interested in ofd cleanup
-        * as it pins OSS */
-       obd_zombie_barrier();
-
-       /* Stop the servers (MDS, OSS) if no longer needed.  We must wait
-          until the target is really gone so that our type refcount check
-          is right. */
-       server_stop_servers(lsiflags);
-
-       /* In case of startup or cleanup err, stop related obds */
-       if (extraname) {
-               obd = class_name2obd(extraname);
-                if (obd) {
-                        CWARN("Cleaning orphaned obd %s\n", extraname);
-                        obd->obd_force = 1;
-                        class_manual_cleanup(obd);
-                }
-                OBD_FREE(extraname, strlen(extraname) + 1);
-        }
+       if (endptr != NULL)
+               *endptr = dash;
 
 
-        LCONSOLE_WARN("server umount %s complete\n", tmpname);
-        OBD_FREE(tmpname, tmpname_sz);
-        EXIT;
+       return 0;
 }
 }
+EXPORT_SYMBOL(server_name2fsname);
 
 
-/** Called only for 'umount -f'
+/**
+ * Get service name (svname) from string
+ * rc < 0 on error
+ * if endptr isn't NULL it is set to end of fsname *
  */
  */
-#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
-static void server_umount_begin(struct vfsmount *vfsmnt, int flags)
-{
-        struct super_block *sb = vfsmnt->mnt_sb;
-#else
-static void server_umount_begin(struct super_block *sb)
-{
-#endif
-        struct lustre_sb_info *lsi = s2lsi(sb);
-        ENTRY;
-
-#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
-        if (!(flags & MNT_FORCE)) {
-                EXIT;
-                return;
-        }
-#endif
-
-        CDEBUG(D_MOUNT, "umount -f\n");
-        /* umount = failover
-           umount -f = force
-           no third way to do non-force, non-failover */
-        lsi->lsi_flags &= ~LSI_UMOUNT_FAILOVER;
-        EXIT;
-}
-
-static int server_statfs(struct dentry *dentry, struct kstatfs *buf)
+int server_name2svname(const char *label, char *svname, const char **endptr,
+                      size_t svsize)
 {
 {
-       struct super_block *sb = dentry->d_sb;
-       struct lustre_sb_info *lsi = s2lsi(sb);
-       struct obd_statfs statfs;
        int rc;
        int rc;
-       ENTRY;
-
-       if (lsi->lsi_dt_dev) {
-               rc = dt_statfs(NULL, lsi->lsi_dt_dev, &statfs);
-               if (rc == 0) {
-                       statfs_unpack(buf, &statfs);
-                       buf->f_type = sb->s_magic;
-                       RETURN(0);
-               }
-       }
-
-       /* just return 0 */
-       buf->f_type = sb->s_magic;
-       buf->f_bsize = sb->s_blocksize;
-       buf->f_blocks = 1;
-       buf->f_bfree = 0;
-       buf->f_bavail = 0;
-       buf->f_files = 1;
-       buf->f_ffree = 0;
-       buf->f_namelen = NAME_MAX;
-       RETURN(0);
-}
-
-/** The operations we support directly on the superblock:
- * mount, umount, and df.
- */
-static struct super_operations server_ops =
-{
-        .put_super      = server_put_super,
-        .umount_begin   = server_umount_begin, /* umount -f */
-        .statfs         = server_statfs,
-};
-
-#define log2(n) ffz(~(n))
-#define LUSTRE_SUPER_MAGIC 0x0BD00BD1
-
-static int server_fill_super_common(struct super_block *sb)
-{
-        struct inode *root = 0;
-        ENTRY;
-
-        CDEBUG(D_MOUNT, "Server sb, dev=%d\n", (int)sb->s_dev);
+       const const char *dash;
 
 
-        sb->s_blocksize = 4096;
-        sb->s_blocksize_bits = log2(sb->s_blocksize);
-        sb->s_magic = LUSTRE_SUPER_MAGIC;
-        sb->s_maxbytes = 0; /* we don't allow file IO on server mountpoints */
-        sb->s_flags |= MS_RDONLY;
-        sb->s_op = &server_ops;
-
-        root = new_inode(sb);
-        if (!root) {
-                CERROR("Can't make root inode\n");
-                RETURN(-EIO);
-        }
-
-       /* returns -EIO for every operation */
-       /* make_bad_inode(root); -- badness - can't umount */
-       /* apparently we need to be a directory for the mount to finish */
-       root->i_mode = S_IFDIR;
-
-       sb->s_root = d_make_root(root);
-       if (!sb->s_root) {
-               CERROR("%s: can't make root dentry\n", sb->s_id);
-               RETURN(-EIO);
-       }
-
-       RETURN(0);
-}
-
-static int osd_start(struct lustre_sb_info *lsi, unsigned long mflags)
-{
-       struct lustre_mount_data *lmd = lsi->lsi_lmd;
-       struct obd_device        *obd;
-       struct dt_device_param    p;
-       char                      flagstr[16];
-       int                       rc;
-       ENTRY;
-
-       CDEBUG(D_MOUNT,
-              "Attempting to start %s, type=%s, lsifl=%x, mountfl=%lx\n",
-              lsi->lsi_svname, lsi->lsi_osd_type, lsi->lsi_flags, mflags);
-
-       sprintf(lsi->lsi_osd_obdname, "%s-osd", lsi->lsi_svname);
-       strcpy(lsi->lsi_osd_uuid, lsi->lsi_osd_obdname);
-       strcat(lsi->lsi_osd_uuid, "_UUID");
-       sprintf(flagstr, "%lu:%lu", mflags, (unsigned long) lmd->lmd_flags);
-
-       obd = class_name2obd(lsi->lsi_osd_obdname);
-       if (obd == NULL) {
-               rc = lustre_start_simple(lsi->lsi_osd_obdname,
-                               lsi->lsi_osd_type,
-                               lsi->lsi_osd_uuid, lmd->lmd_dev,
-                               flagstr, lsi->lsi_lmd->lmd_opts,
-                               lsi->lsi_svname);
-               if (rc)
-                       GOTO(out, rc);
-               obd = class_name2obd(lsi->lsi_osd_obdname);
-               LASSERT(obd);
-       }
-
-       rc = obd_connect(NULL, &lsi->lsi_osd_exp, obd, &obd->obd_uuid, NULL, NULL);
-       if (rc) {
-               obd->obd_force = 1;
-               class_manual_cleanup(obd);
-               lsi->lsi_dt_dev = NULL;
-       }
-
-       /* XXX: to keep support old components relying on lsi_srv_mnt
-        *      we get this info from OSD just started */
-       LASSERT(obd->obd_lu_dev);
-       lsi->lsi_dt_dev = lu2dt_dev(obd->obd_lu_dev);
-       LASSERT(lsi->lsi_dt_dev);
+       /* We use server_name2fsname() just for parsing */
+       rc = server_name2fsname(label, NULL, &dash);
+       if (rc != 0)
+               return rc;
 
 
-       dt_conf_get(NULL, lsi->lsi_dt_dev, &p);
+       if (*dash != '-')
+               return -1;
 
 
-       lsi->lsi_srv_mnt = p.ddp_mnt;
+       if (strlcpy(svname, dash + 1, svsize) >= svsize)
+               return -E2BIG;
 
 
-out:
-       RETURN(rc);
+       return 0;
 }
 }
+EXPORT_SYMBOL(server_name2svname);
 
 
-/** Fill in the superblock info for a Lustre server.
- * Mount the device with the correct options.
- * Read the on-disk config file.
- * Start the services.
- */
-static int server_fill_super(struct super_block *sb)
-{
-        struct lustre_sb_info *lsi = s2lsi(sb);
-        int rc;
-        ENTRY;
-
-       rc = lsi_prepare(lsi);
-       if (rc)
-               RETURN(rc);
-
-       /* Start low level OSD */
-       rc = osd_start(lsi, sb->s_flags);
-       if (rc) {
-               CERROR("Unable to start osd on %s: %d\n",
-                      lsi->lsi_lmd->lmd_dev, rc);
-                lustre_put_lsi(sb);
-                RETURN(rc);
-       }
-
-       CDEBUG(D_MOUNT, "Found service %s on device %s\n",
-              lsi->lsi_svname, lsi->lsi_lmd->lmd_dev);
-
-       if (class_name2obd(lsi->lsi_svname)) {
-                LCONSOLE_ERROR_MSG(0x161, "The target named %s is already "
-                                   "running. Double-mount may have compromised"
-                                   " the disk journal.\n",
-                                  lsi->lsi_svname);
-                lustre_put_lsi(sb);
-                RETURN(-EALREADY);
-        }
-
-        /* Start MGS before MGC */
-       if (IS_MGS(lsi) && !(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOMGS)){
-                rc = server_start_mgs(sb);
-                if (rc)
-                        GOTO(out_mnt, rc);
-        }
-
-        /* Start MGC before servers */
-        rc = lustre_start_mgc(sb);
-        if (rc)
-                GOTO(out_mnt, rc);
-
-        /* Set up all obd devices for service */
-        if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
-                       (IS_OST(lsi) || IS_MDT(lsi))) {
-               rc = server_start_targets(sb, lsi->lsi_srv_mnt);
-                if (rc < 0) {
-                        CERROR("Unable to start targets: %d\n", rc);
-                        GOTO(out_mnt, rc);
-                }
-        /* FIXME overmount client here,
-           or can we just start a client log and client_fill_super on this sb?
-           We need to make sure server_put_super gets called too - ll_put_super
-           calls lustre_common_put_super; check there for LSI_SERVER flag,
-           call s_p_s if so.
-           Probably should start client from new thread so we can return.
-           Client will not finish until all servers are connected.
-           Note - MGS-only server does NOT get a client, since there is no
-           lustre fs associated - the MGS is for all lustre fs's */
-        }
-
-        rc = server_fill_super_common(sb);
-        if (rc)
-                GOTO(out_mnt, rc);
-
-        RETURN(0);
-out_mnt:
-        /* We jump here in case of failure while starting targets or MGS.
-         * In this case we can't just put @mnt and have to do real cleanup
-         * with stoping targets, etc. */
-        server_put_super(sb);
-        return rc;
-}
 
 
-/*
- * Calculate timeout value for a target.
- */
-void server_calc_timeout(struct lustre_sb_info *lsi, struct obd_device *obd)
+/* Get the index from the obd name.
+   rc = server type, or
+   rc < 0  on error
+   if endptr isn't NULL it is set to end of name */
+int server_name2index(const char *svname, __u32 *idx, const char **endptr)
 {
 {
-        struct lustre_mount_data *lmd;
-        int soft = 0;
-        int hard = 0;
-        int factor = 0;
-       bool has_ir = !!(lsi->lsi_flags & LDD_F_IR_CAPABLE);
-        int min = OBD_RECOVERY_TIME_MIN;
-
-       LASSERT(IS_SERVER(lsi));
-
-        lmd = lsi->lsi_lmd;
-        if (lmd) {
-                soft   = lmd->lmd_recovery_time_soft;
-                hard   = lmd->lmd_recovery_time_hard;
-                has_ir = has_ir && !(lmd->lmd_flags & LMD_FLG_NOIR);
-                obd->obd_no_ir = !has_ir;
-        }
-
-        if (soft == 0)
-                soft = OBD_RECOVERY_TIME_SOFT;
-        if (hard == 0)
-                hard = OBD_RECOVERY_TIME_HARD;
+       unsigned long index;
+       int rc;
+       const char *dash;
 
 
-        /* target may have ir_factor configured. */
-        factor = OBD_IR_FACTOR_DEFAULT;
-        if (obd->obd_recovery_ir_factor)
-                factor = obd->obd_recovery_ir_factor;
+       /* We use server_name2fsname() just for parsing */
+       rc = server_name2fsname(svname, NULL, &dash);
+       if (rc != 0)
+               return rc;
 
 
-        if (has_ir) {
-                int new_soft = soft;
-                int new_hard = hard;
+       if (*dash != '-')
+               return -EINVAL;
 
 
-                /* adjust timeout value by imperative recovery */
+       dash++;
 
 
-                new_soft = (soft * factor) / OBD_IR_FACTOR_MAX;
-                new_hard = (hard * factor) / OBD_IR_FACTOR_MAX;
+       if (strncmp(dash, "MDT", 3) == 0)
+               rc = LDD_F_SV_TYPE_MDT;
+       else if (strncmp(dash, "OST", 3) == 0)
+               rc = LDD_F_SV_TYPE_OST;
+       else
+               return -EINVAL;
 
 
-                /* make sure the timeout is not too short */
-                new_soft = max(min, new_soft);
-                new_hard = max(new_soft, new_hard);
+       dash += 3;
 
 
-                LCONSOLE_INFO("%s: Imperative Recovery enabled, recovery "
-                              "window shrunk from %d-%d down to %d-%d\n",
-                              obd->obd_name, soft, hard, new_soft, new_hard);
+       if (strcmp(dash, "all") == 0)
+               return rc | LDD_F_SV_ALL;
 
 
-                soft = new_soft;
-                hard = new_hard;
-        }
+       index = simple_strtoul(dash, (char **)endptr, 16);
+       *idx = index;
 
 
-        /* we're done */
-        obd->obd_recovery_timeout   = max(obd->obd_recovery_timeout, soft);
-        obd->obd_recovery_time_hard = hard;
-        obd->obd_recovery_ir_factor = factor;
+       return rc;
 }
 }
-EXPORT_SYMBOL(server_calc_timeout);
+EXPORT_SYMBOL(server_name2index);
 
 /*************** mount common betweeen server and client ***************/
 
 
 /*************** mount common betweeen server and client ***************/
 
@@ -2515,29 +784,29 @@ EXPORT_SYMBOL(lustre_common_put_super);
 
 static void lmd_print(struct lustre_mount_data *lmd)
 {
 
 static void lmd_print(struct lustre_mount_data *lmd)
 {
-        int i;
+       int i;
 
 
-        PRINT_CMD(PRINT_MASK, "  mount data:\n");
-        if (lmd_is_client(lmd))
-                PRINT_CMD(PRINT_MASK, "profile: %s\n", lmd->lmd_profile);
-        PRINT_CMD(PRINT_MASK, "device:  %s\n", lmd->lmd_dev);
-        PRINT_CMD(PRINT_MASK, "flags:   %x\n", lmd->lmd_flags);
+       PRINT_CMD(D_MOUNT, "  mount data:\n");
+       if (lmd_is_client(lmd))
+               PRINT_CMD(D_MOUNT, "profile: %s\n", lmd->lmd_profile);
+       PRINT_CMD(D_MOUNT, "device:  %s\n", lmd->lmd_dev);
+       PRINT_CMD(D_MOUNT, "flags:   %x\n", lmd->lmd_flags);
 
 
-        if (lmd->lmd_opts)
-                PRINT_CMD(PRINT_MASK, "options: %s\n", lmd->lmd_opts);
+       if (lmd->lmd_opts)
+               PRINT_CMD(D_MOUNT, "options: %s\n", lmd->lmd_opts);
 
 
-        if (lmd->lmd_recovery_time_soft)
-                PRINT_CMD(PRINT_MASK, "recovery time soft: %d\n",
-                          lmd->lmd_recovery_time_soft);
+       if (lmd->lmd_recovery_time_soft)
+               PRINT_CMD(D_MOUNT, "recovery time soft: %d\n",
+                         lmd->lmd_recovery_time_soft);
 
 
-        if (lmd->lmd_recovery_time_hard)
-                PRINT_CMD(PRINT_MASK, "recovery time hard: %d\n",
-                          lmd->lmd_recovery_time_hard);
+       if (lmd->lmd_recovery_time_hard)
+               PRINT_CMD(D_MOUNT, "recovery time hard: %d\n",
+                         lmd->lmd_recovery_time_hard);
 
 
-        for (i = 0; i < lmd->lmd_exclude_count; i++) {
-                PRINT_CMD(PRINT_MASK, "exclude %d:  OST%04x\n", i,
-                          lmd->lmd_exclude[i]);
-        }
+       for (i = 0; i < lmd->lmd_exclude_count; i++) {
+               PRINT_CMD(D_MOUNT, "exclude %d:  OST%04x\n", i,
+                         lmd->lmd_exclude[i]);
+       }
 }
 
 /* Is this server on the exclusion list */
 }
 
 /* Is this server on the exclusion list */
@@ -2567,9 +836,9 @@ int lustre_check_exclusion(struct super_block *sb, char *svname)
 }
 
 /* mount -v  -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
 }
 
 /* mount -v  -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
-static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr)
+static int lmd_make_exclusion(struct lustre_mount_data *lmd, const char *ptr)
 {
 {
-        char *s1 = ptr, *s2;
+       const char *s1 = ptr, *s2;
         __u32 index, *exclude_list;
         int rc = 0, devmax;
         ENTRY;
         __u32 index, *exclude_list;
         int rc = 0, devmax;
         ENTRY;
@@ -2935,10 +1204,10 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent)
                 RETURN(-ENOMEM);
         lmd = lsi->lsi_lmd;
 
                 RETURN(-ENOMEM);
         lmd = lsi->lsi_lmd;
 
-        /*
-         * Disable lockdep during mount, because mount locking patterns are
-         * `special'.
-         */
+       /*
+        * Disable lockdep during mount, because mount locking patterns are
+        * `special'.
+        */
        lockdep_off();
 
         /*
        lockdep_off();
 
         /*
@@ -2972,12 +1241,18 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent)
                         /* c_f_s will call lustre_common_put_super on failure */
                 }
         } else {
                         /* c_f_s will call lustre_common_put_super on failure */
                 }
         } else {
-                CDEBUG(D_MOUNT, "Mounting server from %s\n", lmd->lmd_dev);
-                rc = server_fill_super(sb);
-                /* s_f_s calls lustre_start_mgc after the mount because we need
-                   the MGS nids which are stored on disk.  Plus, we may
-                   need to start the MGS first. */
-                /* s_f_s will call server_put_super on failure */
+#ifdef HAVE_SERVER_SUPPORT
+               CDEBUG(D_MOUNT, "Mounting server from %s\n", lmd->lmd_dev);
+               rc = server_fill_super(sb);
+               /* s_f_s calls lustre_start_mgc after the mount because we need
+                  the MGS nids which are stored on disk.  Plus, we may
+                  need to start the MGS first. */
+               /* s_f_s will call server_put_super on failure */
+#else
+               CERROR("This is client-side-only module, "
+                      "cannot handle server mount.\n");
+               rc = -EINVAL;
+#endif
         }
 
         /* If error happens in fill_super() call, @lsi will be killed there.
         }
 
         /* If error happens in fill_super() call, @lsi will be killed there.
@@ -2992,7 +1267,7 @@ out:
                        lmd->lmd_dev);
         }
        lockdep_on();
                        lmd->lmd_dev);
         }
        lockdep_on();
-        return rc;
+       return rc;
 }
 
 
 }
 
 
@@ -3064,5 +1339,3 @@ int lustre_unregister_fs(void)
 {
         return unregister_filesystem(&lustre_fs_type);
 }
 {
         return unregister_filesystem(&lustre_fs_type);
 }
-
-EXPORT_SYMBOL(server_mti_print);
diff --git a/lustre/obdclass/obd_mount_server.c b/lustre/obdclass/obd_mount_server.c
new file mode 100644 (file)
index 0000000..86d19d7
--- /dev/null
@@ -0,0 +1,1785 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/obd_mount_server.c
+ *
+ * Server mount routines
+ *
+ * Author: Nathan Rutman <nathan@clusterfs.com>
+ */
+
+
+#define DEBUG_SUBSYSTEM S_CLASS
+#define D_MOUNT (D_SUPER | D_CONFIG /* | D_WARNING */)
+#define PRINT_CMD CDEBUG
+#define PRINT_MASK (D_SUPER | D_CONFIG)
+
+#include <obd.h>
+#include <lvfs.h>
+#include <lustre_fsfilt.h>
+#include <obd_class.h>
+#include <lustre/lustre_user.h>
+#include <linux/version.h>
+#include <lustre_log.h>
+#include <lustre_disk.h>
+#include <lustre_param.h>
+#ifdef HAVE_KERNEL_LOCKED
+#include <linux/smp_lock.h>
+#endif
+
+/*********** mount lookup *********/
+
+DEFINE_MUTEX(lustre_mount_info_lock);
+static CFS_LIST_HEAD(server_mount_info_list);
+
+static struct lustre_mount_info *server_find_mount(const char *name)
+{
+       cfs_list_t *tmp;
+       struct lustre_mount_info *lmi;
+       ENTRY;
+
+       cfs_list_for_each(tmp, &server_mount_info_list) {
+               lmi = cfs_list_entry(tmp, struct lustre_mount_info,
+                                    lmi_list_chain);
+               if (strcmp(name, lmi->lmi_name) == 0)
+                       RETURN(lmi);
+       }
+       RETURN(NULL);
+}
+
+/* we must register an obd for a mount before we call the setup routine.
+ *_setup will call lustre_get_mount to get the mnt struct
+ by obd_name, since we can't pass the pointer to setup. */
+static int server_register_mount(const char *name, struct super_block *sb,
+                                struct vfsmount *mnt)
+{
+       struct lustre_mount_info *lmi;
+       char *name_cp;
+       ENTRY;
+
+       LASSERT(sb);
+
+       OBD_ALLOC(lmi, sizeof(*lmi));
+       if (!lmi)
+               RETURN(-ENOMEM);
+       OBD_ALLOC(name_cp, strlen(name) + 1);
+       if (!name_cp) {
+               OBD_FREE(lmi, sizeof(*lmi));
+               RETURN(-ENOMEM);
+       }
+       strcpy(name_cp, name);
+
+       mutex_lock(&lustre_mount_info_lock);
+
+       if (server_find_mount(name)) {
+               mutex_unlock(&lustre_mount_info_lock);
+               OBD_FREE(lmi, sizeof(*lmi));
+               OBD_FREE(name_cp, strlen(name) + 1);
+               CERROR("Already registered %s\n", name);
+               RETURN(-EEXIST);
+       }
+       lmi->lmi_name = name_cp;
+       lmi->lmi_sb = sb;
+       lmi->lmi_mnt = mnt;
+       cfs_list_add(&lmi->lmi_list_chain, &server_mount_info_list);
+
+       mutex_unlock(&lustre_mount_info_lock);
+
+       CDEBUG(D_MOUNT, "reg_mnt %p from %s\n", lmi->lmi_mnt, name);
+
+       RETURN(0);
+}
+
+/* when an obd no longer needs a mount */
+static int server_deregister_mount(const char *name)
+{
+       struct lustre_mount_info *lmi;
+       ENTRY;
+
+       mutex_lock(&lustre_mount_info_lock);
+       lmi = server_find_mount(name);
+       if (!lmi) {
+               mutex_unlock(&lustre_mount_info_lock);
+               CERROR("%s not registered\n", name);
+               RETURN(-ENOENT);
+       }
+
+       CDEBUG(D_MOUNT, "dereg_mnt %p from %s\n", lmi->lmi_mnt, name);
+
+       OBD_FREE(lmi->lmi_name, strlen(lmi->lmi_name) + 1);
+       cfs_list_del(&lmi->lmi_list_chain);
+       OBD_FREE(lmi, sizeof(*lmi));
+       mutex_unlock(&lustre_mount_info_lock);
+
+       RETURN(0);
+}
+
+/* obd's look up a registered mount using their obdname. This is just
+   for initial obd setup to find the mount struct.  It should not be
+   called every time you want to mntget. */
+struct lustre_mount_info *server_get_mount(const char *name)
+{
+       struct lustre_mount_info *lmi;
+       struct lustre_sb_info *lsi;
+       ENTRY;
+
+       mutex_lock(&lustre_mount_info_lock);
+       lmi = server_find_mount(name);
+       mutex_unlock(&lustre_mount_info_lock);
+       if (!lmi) {
+               CERROR("Can't find mount for %s\n", name);
+               RETURN(NULL);
+       }
+       lsi = s2lsi(lmi->lmi_sb);
+
+       cfs_atomic_inc(&lsi->lsi_mounts);
+
+       CDEBUG(D_MOUNT, "get_mnt %p from %s, refs=%d\n", lmi->lmi_mnt,
+              name, cfs_atomic_read(&lsi->lsi_mounts));
+
+       RETURN(lmi);
+}
+EXPORT_SYMBOL(server_get_mount);
+
+/*
+ * Used by mdt to get mount_info from obdname.
+ * There are no blocking when using the mount_info.
+ * Do not use server_get_mount for this purpose.
+ */
+struct lustre_mount_info *server_get_mount_2(const char *name)
+{
+       struct lustre_mount_info *lmi;
+       ENTRY;
+
+       mutex_lock(&lustre_mount_info_lock);
+       lmi = server_find_mount(name);
+       mutex_unlock(&lustre_mount_info_lock);
+       if (!lmi)
+               CERROR("Can't find mount for %s\n", name);
+
+       RETURN(lmi);
+}
+EXPORT_SYMBOL(server_get_mount_2);
+
+/* to be called from obd_cleanup methods */
+int server_put_mount(const char *name, struct vfsmount *mnt)
+{
+       struct lustre_mount_info *lmi;
+       struct lustre_sb_info *lsi;
+       ENTRY;
+
+       mutex_lock(&lustre_mount_info_lock);
+       lmi = server_find_mount(name);
+       mutex_unlock(&lustre_mount_info_lock);
+       if (!lmi) {
+               CERROR("Can't find mount for %s\n", name);
+               RETURN(-ENOENT);
+       }
+       lsi = s2lsi(lmi->lmi_sb);
+
+       CDEBUG(D_MOUNT, "put_mnt %p from %s, refs=%d\n",
+              lmi->lmi_mnt, name, cfs_atomic_read(&lsi->lsi_mounts));
+
+       if (lustre_put_lsi(lmi->lmi_sb))
+               CDEBUG(D_MOUNT, "Last put of mnt %p from %s\n",
+                      lmi->lmi_mnt, name);
+
+       /* this obd should never need the mount again */
+       server_deregister_mount(name);
+
+       RETURN(0);
+}
+EXPORT_SYMBOL(server_put_mount);
+
+/* Corresponding to server_get_mount_2 */
+int server_put_mount_2(const char *name, struct vfsmount *mnt)
+{
+       ENTRY;
+       RETURN(0);
+}
+EXPORT_SYMBOL(server_put_mount_2);
+
+/* Set up a MGS to serve startup logs */
+static int server_start_mgs(struct super_block *sb)
+{
+       struct lustre_sb_info    *lsi = s2lsi(sb);
+       struct vfsmount   *mnt = lsi->lsi_srv_mnt;
+       struct lustre_mount_info *lmi;
+       int    rc = 0;
+       ENTRY;
+
+       /* It is impossible to have more than 1 MGS per node, since
+          MGC wouldn't know which to connect to */
+       lmi = server_find_mount(LUSTRE_MGS_OBDNAME);
+       if (lmi) {
+               lsi = s2lsi(lmi->lmi_sb);
+               LCONSOLE_ERROR_MSG(0x15d, "The MGS service was already started"
+                                  " from server\n");
+               RETURN(-EALREADY);
+       }
+
+       CDEBUG(D_CONFIG, "Start MGS service %s\n", LUSTRE_MGS_OBDNAME);
+
+       rc = server_register_mount(LUSTRE_MGS_OBDNAME, sb, mnt);
+
+       if (!rc) {
+               rc = lustre_start_simple(LUSTRE_MGS_OBDNAME, LUSTRE_MGS_NAME,
+                                        LUSTRE_MGS_OBDNAME, 0, 0,
+                                        lsi->lsi_osd_obdname, 0);
+               /* Do NOT call server_deregister_mount() here. This leads to
+                * inability cleanup cleanly and free lsi and other stuff when
+                * mgs calls server_put_mount() in error handling case. -umka */
+       }
+
+       if (rc)
+               LCONSOLE_ERROR_MSG(0x15e, "Failed to start MGS '%s' (%d). "
+                                  "Is the 'mgs' module loaded?\n",
+                                  LUSTRE_MGS_OBDNAME, rc);
+       RETURN(rc);
+}
+
+static int server_stop_mgs(struct super_block *sb)
+{
+       struct obd_device *obd;
+       int rc;
+       ENTRY;
+
+       CDEBUG(D_MOUNT, "Stop MGS service %s\n", LUSTRE_MGS_OBDNAME);
+
+       /* There better be only one MGS */
+       obd = class_name2obd(LUSTRE_MGS_OBDNAME);
+       if (!obd) {
+               CDEBUG(D_CONFIG, "mgs %s not running\n", LUSTRE_MGS_OBDNAME);
+               RETURN(-EALREADY);
+       }
+
+       /* The MGS should always stop when we say so */
+       obd->obd_force = 1;
+       rc = class_manual_cleanup(obd);
+       RETURN(rc);
+}
+
+/* Since there's only one mgc per node, we have to change it's fs to get
+   access to the right disk. */
+static int server_mgc_set_fs(struct obd_device *mgc, struct super_block *sb)
+{
+       struct lustre_sb_info *lsi = s2lsi(sb);
+       int rc;
+       ENTRY;
+
+       CDEBUG(D_MOUNT, "Set mgc disk for %s\n", lsi->lsi_lmd->lmd_dev);
+
+       /* cl_mgc_sem in mgc insures we sleep if the mgc_fs is busy */
+       rc = obd_set_info_async(NULL, mgc->obd_self_export,
+                               sizeof(KEY_SET_FS), KEY_SET_FS,
+                               sizeof(*sb), sb, NULL);
+       if (rc != 0)
+               CERROR("can't set_fs %d\n", rc);
+
+       RETURN(rc);
+}
+
+static int server_mgc_clear_fs(struct obd_device *mgc)
+{
+       int rc;
+       ENTRY;
+
+       CDEBUG(D_MOUNT, "Unassign mgc disk\n");
+
+       rc = obd_set_info_async(NULL, mgc->obd_self_export,
+                               sizeof(KEY_CLEAR_FS), KEY_CLEAR_FS,
+                               0, NULL, NULL);
+       RETURN(rc);
+}
+
+static int is_mdc_device(const char *devname)
+{
+       char *ptr;
+
+       ptr = strrchr(devname, '-');
+       if (ptr != NULL && strcmp(ptr, "-mdc") == 0)
+               return 1;
+
+       return 0;
+}
+
+static inline int tgt_is_mdt0(const char *tgtname)
+{
+       __u32 idx;
+       int   type;
+
+       type = server_name2index(tgtname, &idx, NULL);
+       if (type != LDD_F_SV_TYPE_MDT)
+               return 0;
+
+       return idx == 0;
+}
+
+static inline int is_mdc_for_mdt0(const char *devname)
+{
+       char   *ptr;
+
+       if (!is_mdc_device(devname))
+               return 0;
+
+       ptr = strrchr(devname, '-');
+       if (ptr == NULL)
+               return 0;
+
+       *ptr = 0;
+       if (tgt_is_mdt0(devname)) {
+               *ptr = '-';
+               return 1;
+       }
+       *ptr = '-';
+       return 0;
+}
+
+/**
+ * Convert OST/MDT name(fsname-OSTxxxx) to a lwp name
+ * (fsname-MDT0000-lwp-OSTxxxx)
+ **/
+int tgt_name2lwpname(const char *svname, char *lwpname)
+{
+       char            *fsname;
+       const char      *tgt;
+       int             rc;
+       ENTRY;
+
+       OBD_ALLOC(fsname, MTI_NAME_MAXLEN);
+       if (fsname == NULL)
+               RETURN(-ENOMEM);
+
+       rc = server_name2fsname(svname, fsname, &tgt);
+       if (rc != 0) {
+               CERROR("%s: failed to get fsname from svname. %d\n",
+                      svname, rc);
+               GOTO(cleanup, rc);
+       }
+
+       if (*tgt != '-' && *tgt != ':') {
+               CERROR("%s: invalid svname name!\n", svname);
+               GOTO(cleanup, rc = -EINVAL);
+       }
+
+       tgt++;
+       if (strncmp(tgt, "OST", 3) != 0 && strncmp(tgt, "MDT", 3) != 0) {
+               CERROR("%s is not an OST or MDT target!\n", svname);
+               GOTO(cleanup, rc = -EINVAL);
+       }
+       sprintf(lwpname, "%s-MDT0000-%s-%s", fsname, LUSTRE_LWP_NAME, tgt);
+cleanup:
+       if (fsname != NULL)
+               OBD_FREE(fsname, MTI_NAME_MAXLEN);
+       RETURN(rc);
+}
+EXPORT_SYMBOL(tgt_name2lwpname);
+
+static CFS_LIST_HEAD(lwp_register_list);
+DEFINE_MUTEX(lwp_register_list_lock);
+
+int lustre_register_lwp_item(const char *lwpname, struct obd_export **exp,
+                            register_lwp_cb cb_func, void *cb_data)
+{
+       struct obd_device        *lwp;
+       struct lwp_register_item *lri;
+       ENTRY;
+
+       LASSERTF(strlen(lwpname) < MTI_NAME_MAXLEN, "lwpname is too long %s\n",
+                lwpname);
+       LASSERT(exp != NULL && *exp == NULL);
+
+       OBD_ALLOC_PTR(lri);
+       if (lri == NULL)
+               RETURN(-ENOMEM);
+
+       mutex_lock(&lwp_register_list_lock);
+
+       lwp = class_name2obd(lwpname);
+       if (lwp != NULL && lwp->obd_set_up == 1) {
+               struct obd_uuid *uuid;
+
+               OBD_ALLOC_PTR(uuid);
+               if (uuid == NULL) {
+                       mutex_unlock(&lwp_register_list_lock);
+                       OBD_FREE_PTR(lri);
+                       RETURN(-ENOMEM);
+               }
+               memcpy(uuid->uuid, lwpname, strlen(lwpname));
+               *exp = cfs_hash_lookup(lwp->obd_uuid_hash, uuid);
+               OBD_FREE_PTR(uuid);
+       }
+
+       memcpy(lri->lri_name, lwpname, strlen(lwpname));
+       lri->lri_exp = exp;
+       lri->lri_cb_func = cb_func;
+       lri->lri_cb_data = cb_data;
+       CFS_INIT_LIST_HEAD(&lri->lri_list);
+       cfs_list_add(&lri->lri_list, &lwp_register_list);
+
+       if (*exp != NULL && cb_func != NULL)
+               cb_func(cb_data);
+
+       mutex_unlock(&lwp_register_list_lock);
+       RETURN(0);
+}
+EXPORT_SYMBOL(lustre_register_lwp_item);
+
+void lustre_deregister_lwp_item(struct obd_export **exp)
+{
+       struct lwp_register_item *lri, *tmp;
+
+       mutex_lock(&lwp_register_list_lock);
+       cfs_list_for_each_entry_safe(lri, tmp, &lwp_register_list, lri_list) {
+               if (exp == lri->lri_exp) {
+                       if (*exp)
+                               class_export_put(*exp);
+                       cfs_list_del(&lri->lri_list);
+                       OBD_FREE_PTR(lri);
+                       break;
+               }
+       }
+       mutex_unlock(&lwp_register_list_lock);
+}
+EXPORT_SYMBOL(lustre_deregister_lwp_item);
+
+static void lustre_notify_lwp_list(struct obd_export *exp)
+{
+       struct lwp_register_item *lri, *tmp;
+       LASSERT(exp != NULL);
+
+       mutex_lock(&lwp_register_list_lock);
+       cfs_list_for_each_entry_safe(lri, tmp, &lwp_register_list, lri_list) {
+               if (strcmp(exp->exp_obd->obd_name, lri->lri_name))
+                       continue;
+               if (*lri->lri_exp != NULL)
+                       continue;
+               *lri->lri_exp = class_export_get(exp);
+               if (lri->lri_cb_func != NULL)
+                       lri->lri_cb_func(lri->lri_cb_data);
+       }
+       mutex_unlock(&lwp_register_list_lock);
+}
+
+static int lustre_lwp_connect(struct obd_device *lwp)
+{
+       struct lu_env            env;
+       struct lu_context        session_ctx;
+       struct obd_export       *exp;
+       struct obd_uuid         *uuid = NULL;
+       struct obd_connect_data *data = NULL;
+       int                      rc;
+       ENTRY;
+
+       /* log has been fully processed, let clients connect */
+       rc = lu_env_init(&env, lwp->obd_lu_dev->ld_type->ldt_ctx_tags);
+       if (rc != 0)
+               RETURN(rc);
+
+       lu_context_init(&session_ctx, LCT_SESSION);
+       session_ctx.lc_thread = NULL;
+       lu_context_enter(&session_ctx);
+       env.le_ses = &session_ctx;
+
+       OBD_ALLOC_PTR(data);
+       if (data == NULL)
+               GOTO(out, rc = -ENOMEM);
+
+       data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_INDEX;
+       data->ocd_version = LUSTRE_VERSION_CODE;
+       data->ocd_connect_flags |= OBD_CONNECT_MDS_MDS | OBD_CONNECT_FID |
+               OBD_CONNECT_AT | OBD_CONNECT_LRU_RESIZE |
+               OBD_CONNECT_FULL20 | OBD_CONNECT_LVB_TYPE |
+               OBD_CONNECT_LIGHTWEIGHT |
+               OBD_CONNECT_PINGLESS;
+       OBD_ALLOC_PTR(uuid);
+       if (uuid == NULL)
+               GOTO(out, rc = -ENOMEM);
+
+       if (strlen(lwp->obd_name) > sizeof(uuid->uuid)) {
+               CERROR("%s: Too long lwp name %s, max_size is %d\n",
+                      lwp->obd_name, lwp->obd_name, (int)sizeof(uuid->uuid));
+               GOTO(out, rc = -EINVAL);
+       }
+
+       /* Use lwp name as the uuid, so we find the export by lwp name later */
+       memcpy(uuid->uuid, lwp->obd_name, strlen(lwp->obd_name));
+       rc = obd_connect(&env, &exp, lwp, uuid, data, NULL);
+       if (rc != 0)
+               CERROR("%s: connect failed: rc = %d\n", lwp->obd_name, rc);
+       else
+               lustre_notify_lwp_list(exp);
+
+out:
+       if (data != NULL)
+               OBD_FREE_PTR(data);
+       if (uuid != NULL)
+               OBD_FREE_PTR(uuid);
+
+       lu_env_fini(&env);
+       lu_context_exit(&session_ctx);
+       lu_context_fini(&session_ctx);
+
+       RETURN(rc);
+}
+
+/**
+ * lwp is used by slaves (Non-MDT0 targets) to manage the connection
+ * to MDT0.
+ **/
+static int lustre_lwp_setup(struct lustre_cfg *lcfg, struct lustre_sb_info *lsi)
+{
+       struct obd_connect_data *data = NULL;
+       struct obd_device       *obd;
+       char                    *lwpname = NULL;
+       char                    *lwpuuid = NULL;
+       int                      rc;
+       ENTRY;
+
+       rc = class_add_uuid(lustre_cfg_string(lcfg, 1),
+                           lcfg->lcfg_nid);
+       if (rc) {
+               CERROR("%s: Can't add uuid: rc =%d\n", lsi->lsi_svname, rc);
+               GOTO(out, rc);
+       }
+
+       OBD_ALLOC(lwpname, MTI_NAME_MAXLEN);
+       if (lwpname == NULL)
+               GOTO(out, rc = -ENOMEM);
+
+       rc = tgt_name2lwpname(lsi->lsi_svname, lwpname);
+       if (rc != 0) {
+               CERROR("%s: failed to generate lwp name. %d\n",
+                      lsi->lsi_svname, rc);
+               GOTO(out, rc);
+       }
+
+       OBD_ALLOC(lwpuuid, MTI_NAME_MAXLEN);
+       if (lwpuuid == NULL)
+               GOTO(out, rc = -ENOMEM);
+
+       sprintf(lwpuuid, "%s_UUID", lwpname);
+       rc = lustre_start_simple(lwpname, LUSTRE_LWP_NAME,
+                                lwpuuid, lustre_cfg_string(lcfg, 1),
+                                0, 0, 0);
+       if (rc) {
+               CERROR("%s: setup up failed: rc %d\n", lwpname, rc);
+               GOTO(out, rc);
+       }
+
+       obd = class_name2obd(lwpname);
+       LASSERT(obd != NULL);
+
+       rc = lustre_lwp_connect(obd);
+       if (rc != 0)
+               CERROR("%s: connect failed: rc = %d\n", lwpname, rc);
+out:
+       if (data != NULL)
+               OBD_FREE_PTR(data);
+       if (lwpname != NULL)
+               OBD_FREE(lwpname, MTI_NAME_MAXLEN);
+       if (lwpuuid != NULL)
+               OBD_FREE(lwpuuid, MTI_NAME_MAXLEN);
+
+       RETURN(rc);
+}
+
+/* the caller is responsible for memory free */
+static struct obd_device *lustre_find_lwp(struct lustre_sb_info *lsi,
+                                         char **lwpname, char **logname)
+{
+       struct obd_device       *lwp;
+       int                      rc = 0;
+       ENTRY;
+
+       LASSERT(lwpname != NULL);
+       LASSERT(IS_OST(lsi) || IS_MDT(lsi));
+
+       OBD_ALLOC(*lwpname, MTI_NAME_MAXLEN);
+       if (*lwpname == NULL)
+               RETURN(ERR_PTR(-ENOMEM));
+
+       if (logname != NULL) {
+               OBD_ALLOC(*logname, MTI_NAME_MAXLEN);
+               if (*logname == NULL)
+                       GOTO(out, rc = -ENOMEM);
+               rc = server_name2fsname(lsi->lsi_svname, *lwpname, NULL);
+               if (rc != 0) {
+                       CERROR("%s: failed to get fsname from svname. %d\n",
+                              lsi->lsi_svname, rc);
+                       GOTO(out, rc = -EINVAL);
+               }
+               sprintf(*logname, "%s-client", *lwpname);
+       }
+
+       rc = tgt_name2lwpname(lsi->lsi_svname, *lwpname);
+       if (rc != 0) {
+               CERROR("%s: failed to generate lwp name. %d\n",
+                      lsi->lsi_svname, rc);
+               GOTO(out, rc = -EINVAL);
+       }
+
+       lwp = class_name2obd(*lwpname);
+
+out:
+       if (rc != 0) {
+               if (*lwpname != NULL) {
+                       OBD_FREE(*lwpname, MTI_NAME_MAXLEN);
+                       *lwpname = NULL;
+               }
+               if (logname != NULL && *logname != NULL) {
+                       OBD_FREE(*logname, MTI_NAME_MAXLEN);
+                       *logname = NULL;
+               }
+               lwp = ERR_PTR(rc);
+       }
+
+       RETURN(lwp != NULL ? lwp : ERR_PTR(-ENOENT));
+}
+
+static int lustre_lwp_add_conn(struct lustre_cfg *cfg,
+                              struct lustre_sb_info *lsi)
+{
+       struct lustre_cfg_bufs *bufs = NULL;
+       struct lustre_cfg      *lcfg = NULL;
+       char                   *lwpname = NULL;
+       struct obd_device      *lwp;
+       int                     rc;
+       ENTRY;
+
+       lwp = lustre_find_lwp(lsi, &lwpname, NULL);
+       if (IS_ERR(lwp)) {
+               CERROR("%s: can't find lwp device.\n", lsi->lsi_svname);
+               GOTO(out, rc = PTR_ERR(lwp));
+       }
+       LASSERT(lwpname != NULL);
+
+       OBD_ALLOC_PTR(bufs);
+       if (bufs == NULL)
+               GOTO(out, rc = -ENOMEM);
+
+       lustre_cfg_bufs_reset(bufs, lwpname);
+       lustre_cfg_bufs_set_string(bufs, 1,
+                                  lustre_cfg_string(cfg, 1));
+
+       lcfg = lustre_cfg_new(LCFG_ADD_CONN, bufs);
+
+       rc = class_add_conn(lwp, lcfg);
+       if (rc)
+               CERROR("%s: can't add conn: rc = %d\n", lwpname, rc);
+
+out:
+       if (bufs != NULL)
+               OBD_FREE_PTR(bufs);
+       if (lcfg != NULL)
+               lustre_cfg_free(lcfg);
+       if (lwpname != NULL)
+               OBD_FREE(lwpname, MTI_NAME_MAXLEN);
+       RETURN(rc);
+}
+
+/**
+ * Retrieve MDT nids from the client log, then start the lwp device.
+ * there are only two scenarios which would include mdt nid.
+ * 1.
+ * marker   5 (flags=0x01, v2.1.54.0) lustre-MDT0000  'add mdc' xxx-
+ * add_uuid  nid=192.168.122.162@tcp(0x20000c0a87aa2)  0:  1:192.168.122.162@tcp
+ * attach    0:lustre-MDT0000-mdc  1:mdc  2:lustre-clilmv_UUID
+ * setup     0:lustre-MDT0000-mdc  1:lustre-MDT0000_UUID  2:192.168.122.162@tcp
+ * add_uuid  nid=192.168.172.1@tcp(0x20000c0a8ac01)  0:  1:192.168.172.1@tcp
+ * add_conn  0:lustre-MDT0000-mdc  1:192.168.172.1@tcp
+ * modify_mdc_tgts add 0:lustre-clilmv  1:lustre-MDT0000_UUID xxxx
+ * marker   5 (flags=0x02, v2.1.54.0) lustre-MDT0000  'add mdc' xxxx-
+ * 2.
+ * marker   7 (flags=0x01, v2.1.54.0) lustre-MDT0000  'add failnid' xxxx-
+ * add_uuid  nid=192.168.122.2@tcp(0x20000c0a87a02)  0:  1:192.168.122.2@tcp
+ * add_conn  0:lustre-MDT0000-mdc  1:192.168.122.2@tcp
+ * marker   7 (flags=0x02, v2.1.54.0) lustre-MDT0000  'add failnid' xxxx-
+ **/
+static int client_lwp_config_process(const struct lu_env *env,
+                                    struct llog_handle *handle,
+                                    struct llog_rec_hdr *rec, void *data)
+{
+       struct config_llog_instance *clli = data;
+       int                          cfg_len = rec->lrh_len;
+       char                        *cfg_buf = (char *) (rec + 1);
+       struct lustre_cfg           *lcfg = NULL;
+       struct lustre_sb_info       *lsi;
+       int                          rc = 0, swab = 0;
+       ENTRY;
+
+       if (rec->lrh_type != OBD_CFG_REC) {
+               CERROR("Unknown llog record type %#x encountered\n",
+                      rec->lrh_type);
+               RETURN(-EINVAL);
+       }
+
+       LASSERT(clli->cfg_sb != NULL);
+       lsi = s2lsi(clli->cfg_sb);
+
+       lcfg = (struct lustre_cfg *)cfg_buf;
+       if (lcfg->lcfg_version == __swab32(LUSTRE_CFG_VERSION)) {
+               lustre_swab_lustre_cfg(lcfg);
+               swab = 1;
+       }
+
+       rc = lustre_cfg_sanity_check(cfg_buf, cfg_len);
+       if (rc)
+               GOTO(out, rc);
+
+       switch (lcfg->lcfg_command) {
+       case LCFG_MARKER: {
+               struct cfg_marker *marker = lustre_cfg_buf(lcfg, 1);
+
+               lustre_swab_cfg_marker(marker, swab,
+                                      LUSTRE_CFG_BUFLEN(lcfg, 1));
+               if (marker->cm_flags & CM_SKIP ||
+                   marker->cm_flags & CM_EXCLUDE)
+                       GOTO(out, rc = 0);
+
+               if (!tgt_is_mdt0(marker->cm_tgtname))
+                       GOTO(out, rc = 0);
+
+               if (!strncmp(marker->cm_comment, "add mdc", 7) ||
+                   !strncmp(marker->cm_comment, "add failnid", 11)) {
+                       if (marker->cm_flags & CM_START) {
+                               clli->cfg_flags = CFG_F_MARKER;
+                               /* This hack is to differentiate the
+                                * ADD_UUID is come from "add mdc" record
+                                * or from "add failnid" record. */
+                               if (!strncmp(marker->cm_comment,
+                                            "add failnid", 11))
+                                       clli->cfg_flags |= CFG_F_SKIP;
+                       } else if (marker->cm_flags & CM_END) {
+                               clli->cfg_flags = 0;
+                       }
+               }
+               break;
+       }
+       case LCFG_ADD_UUID: {
+               if (clli->cfg_flags == CFG_F_MARKER) {
+                       rc = lustre_lwp_setup(lcfg, lsi);
+                       /* XXX: process only the first nid as
+                        * we don't need another instance of lwp */
+                       clli->cfg_flags |= CFG_F_SKIP;
+               } else if (clli->cfg_flags == (CFG_F_MARKER | CFG_F_SKIP)) {
+                       rc = class_add_uuid(lustre_cfg_string(lcfg, 1),
+                                           lcfg->lcfg_nid);
+                       if (rc)
+                               CERROR("%s: Fail to add uuid, rc:%d\n",
+                                      lsi->lsi_svname, rc);
+               }
+               break;
+       }
+       case LCFG_ADD_CONN: {
+               if (is_mdc_for_mdt0(lustre_cfg_string(lcfg, 0)))
+                       rc = lustre_lwp_add_conn(lcfg, lsi);
+               break;
+       }
+       default:
+               break;
+       }
+out:
+       RETURN(rc);
+}
+
+static int lustre_disconnect_lwp(struct super_block *sb)
+{
+       struct lustre_sb_info           *lsi = s2lsi(sb);
+       struct obd_device               *lwp;
+       char                            *lwpname = NULL;
+       char                            *logname = NULL;
+       struct lustre_cfg               *lcfg = NULL;
+       struct lustre_cfg_bufs          *bufs = NULL;
+       struct config_llog_instance     *cfg = NULL;
+       int                              rc;
+       ENTRY;
+
+       lwp = lustre_find_lwp(lsi, &lwpname, &logname);
+       if (IS_ERR(lwp) && PTR_ERR(lwp) != -ENOENT)
+               GOTO(out, rc = PTR_ERR(lwp));
+
+       LASSERT(lwpname != NULL);
+       LASSERT(logname != NULL);
+
+       OBD_ALLOC_PTR(cfg);
+       if (cfg == NULL)
+               GOTO(out, rc = -ENOMEM);
+
+       /* end log first */
+       cfg->cfg_instance = sb;
+       rc = lustre_end_log(sb, logname, cfg);
+       if (rc != 0) {
+               CERROR("%s: Can't end config log %s.\n", lwpname, logname);
+               GOTO(out, rc);
+       }
+
+       if (PTR_ERR(lwp) == -ENOENT) {
+               CDEBUG(D_CONFIG, "%s: lwp device wasn't started.\n",
+                      lsi->lsi_svname);
+               GOTO(out, rc = 0);
+       }
+
+       OBD_ALLOC_PTR(bufs);
+       if (bufs == NULL)
+               GOTO(out, rc = -ENOMEM);
+
+       lustre_cfg_bufs_reset(bufs, lwp->obd_name);
+       lustre_cfg_bufs_set_string(bufs, 1, NULL);
+       lcfg = lustre_cfg_new(LCFG_CLEANUP, bufs);
+       if (!lcfg)
+               GOTO(out, rc = -ENOMEM);
+
+       /* Disconnect import first. NULL is passed for the '@env', since
+        * it will not be used. */
+       rc = lwp->obd_lu_dev->ld_ops->ldo_process_config(NULL, lwp->obd_lu_dev,
+                                                        lcfg);
+out:
+       if (lcfg)
+               lustre_cfg_free(lcfg);
+       if (bufs)
+               OBD_FREE_PTR(bufs);
+       if (cfg)
+               OBD_FREE_PTR(cfg);
+       if (lwpname)
+               OBD_FREE(lwpname, MTI_NAME_MAXLEN);
+       if (logname)
+               OBD_FREE(logname, MTI_NAME_MAXLEN);
+       RETURN(rc);
+}
+
+/**
+ * Stop the lwp for an OST/MDT target.
+ **/
+static int lustre_stop_lwp(struct super_block *sb)
+{
+       struct lustre_sb_info   *lsi = s2lsi(sb);
+       struct obd_device       *lwp = NULL;
+       char                    *lwpname = NULL;
+       int                      rc = 0;
+       ENTRY;
+
+       lwp = lustre_find_lwp(lsi, &lwpname, NULL);
+       if (IS_ERR(lwp)) {
+               CDEBUG(PTR_ERR(lwp) == -ENOENT ? D_CONFIG : D_ERROR,
+                      "%s: lwp wasn't started.\n", lsi->lsi_svname);
+               GOTO(out, rc = 0);
+       }
+
+       lwp->obd_force = 1;
+       rc = class_manual_cleanup(lwp);
+
+out:
+       if (lwpname != NULL)
+               OBD_FREE(lwpname, MTI_NAME_MAXLEN);
+       RETURN(rc);
+}
+
+/**
+ * Start the lwp(fsname-MDT0000-lwp-OSTxxxx) for an OST or MDT target,
+ * which would be used to establish connection from OST to MDT0.
+ **/
+static int lustre_start_lwp(struct super_block *sb)
+{
+       struct lustre_sb_info       *lsi = s2lsi(sb);
+       struct config_llog_instance *cfg = NULL;
+       struct obd_device           *lwp;
+       char                        *lwpname = NULL;
+       char                        *logname = NULL;
+       int                          rc;
+       ENTRY;
+
+       lwp = lustre_find_lwp(lsi, &lwpname, &logname);
+
+       /* the lwp device already stared */
+       if (lwp && !IS_ERR(lwp))
+               GOTO(out, rc = 0);
+
+       if (PTR_ERR(lwp) != -ENOENT)
+               GOTO(out, rc = PTR_ERR(lwp));
+
+       LASSERT(lwpname != NULL);
+       LASSERT(logname != NULL);
+
+       OBD_ALLOC_PTR(cfg);
+       if (cfg == NULL)
+               GOTO(out, rc = -ENOMEM);
+
+       cfg->cfg_callback = client_lwp_config_process;
+       cfg->cfg_instance = sb;
+
+       rc = lustre_process_log(sb, logname, cfg);
+out:
+       if (lwpname != NULL)
+               OBD_FREE(lwpname, MTI_NAME_MAXLEN);
+       if (logname != NULL)
+               OBD_FREE(logname, MTI_NAME_MAXLEN);
+       if (cfg != NULL)
+               OBD_FREE_PTR(cfg);
+       RETURN(rc);
+}
+
+DEFINE_MUTEX(server_start_lock);
+
+/* Stop MDS/OSS if nobody is using them */
+static int server_stop_servers(int lsiflags)
+{
+       struct obd_device *obd = NULL;
+       struct obd_type *type = NULL;
+       int rc = 0;
+       ENTRY;
+
+       mutex_lock(&server_start_lock);
+
+       /* Either an MDT or an OST or neither  */
+       /* if this was an MDT, and there are no more MDT's, clean up the MDS */
+       if (lsiflags & LDD_F_SV_TYPE_MDT) {
+               obd = class_name2obd(LUSTRE_MDS_OBDNAME);
+               if (obd != NULL)
+                       type = class_search_type(LUSTRE_MDT_NAME);
+       }
+
+       /* if this was an OST, and there are no more OST's, clean up the OSS */
+       if (lsiflags & LDD_F_SV_TYPE_OST) {
+               obd = class_name2obd(LUSTRE_OSS_OBDNAME);
+               if (obd != NULL)
+                       type = class_search_type(LUSTRE_OST_NAME);
+       }
+
+       if (obd != NULL && (type == NULL || type->typ_refcnt == 0)) {
+               int err;
+
+               obd->obd_force = 1;
+               /* obd_fail doesn't mean much on a server obd */
+               err = class_manual_cleanup(obd);
+               if (rc != 0)
+                       rc = err;
+       }
+
+       mutex_unlock(&server_start_lock);
+
+       RETURN(rc);
+}
+
+int server_mti_print(const char *title, struct mgs_target_info *mti)
+{
+       PRINT_CMD(PRINT_MASK, "mti %s\n", title);
+       PRINT_CMD(PRINT_MASK, "server: %s\n", mti->mti_svname);
+       PRINT_CMD(PRINT_MASK, "fs:     %s\n", mti->mti_fsname);
+       PRINT_CMD(PRINT_MASK, "uuid:   %s\n", mti->mti_uuid);
+       PRINT_CMD(PRINT_MASK, "ver: %d  flags: %#x\n",
+                 mti->mti_config_ver, mti->mti_flags);
+       return 0;
+}
+EXPORT_SYMBOL(server_mti_print);
+
+/* Generate data for registration */
+static int server_lsi2mti(struct lustre_sb_info *lsi,
+                         struct mgs_target_info *mti)
+{
+       lnet_process_id_t id;
+       int rc, i = 0;
+       int cplen = 0;
+       ENTRY;
+
+       if (!IS_SERVER(lsi))
+               RETURN(-EINVAL);
+
+       if (strlcpy(mti->mti_svname, lsi->lsi_svname, sizeof(mti->mti_svname))
+           >= sizeof(mti->mti_svname))
+               RETURN(-E2BIG);
+
+       mti->mti_nid_count = 0;
+       while (LNetGetId(i++, &id) != -ENOENT) {
+               if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
+                       continue;
+
+               /* server use --servicenode param, only allow specified
+                * nids be registered */
+               if ((lsi->lsi_lmd->lmd_flags & LMD_FLG_NO_PRIMNODE) != 0 &&
+                   class_match_nid(lsi->lsi_lmd->lmd_params,
+                                   PARAM_FAILNODE, id.nid) < 1)
+                       continue;
+
+               /* match specified network */
+               if (!class_match_net(lsi->lsi_lmd->lmd_params,
+                                    PARAM_NETWORK, LNET_NIDNET(id.nid)))
+                       continue;
+
+               mti->mti_nids[mti->mti_nid_count] = id.nid;
+               mti->mti_nid_count++;
+               if (mti->mti_nid_count >= MTI_NIDS_MAX) {
+                       CWARN("Only using first %d nids for %s\n",
+                             mti->mti_nid_count, mti->mti_svname);
+                       break;
+               }
+       }
+
+       mti->mti_lustre_ver = LUSTRE_VERSION_CODE;
+       mti->mti_config_ver = 0;
+
+       rc = server_name2fsname(lsi->lsi_svname, mti->mti_fsname, NULL);
+       if (rc != 0)
+               return rc;
+
+       rc = server_name2index(lsi->lsi_svname, &mti->mti_stripe_index, NULL);
+       if (rc < 0)
+               return rc;
+       /* Orion requires index to be set */
+       LASSERT(!(rc & LDD_F_NEED_INDEX));
+       /* keep only LDD flags */
+       mti->mti_flags = lsi->lsi_flags & LDD_F_MASK;
+       if (mti->mti_flags & (LDD_F_WRITECONF | LDD_F_VIRGIN))
+               mti->mti_flags |= LDD_F_UPDATE;
+       cplen = strlcpy(mti->mti_params, lsi->lsi_lmd->lmd_params,
+                       sizeof(mti->mti_params));
+       if (cplen >= sizeof(mti->mti_params))
+               return -E2BIG;
+       return 0;
+}
+
+/* Register an old or new target with the MGS. If needed MGS will construct
+   startup logs and assign index */
+static int server_register_target(struct lustre_sb_info *lsi)
+{
+       struct obd_device *mgc = lsi->lsi_mgc;
+       struct mgs_target_info *mti = NULL;
+       bool writeconf;
+       int rc;
+       ENTRY;
+
+       LASSERT(mgc);
+
+       if (!IS_SERVER(lsi))
+               RETURN(-EINVAL);
+
+       OBD_ALLOC_PTR(mti);
+       if (!mti)
+               RETURN(-ENOMEM);
+
+       rc = server_lsi2mti(lsi, mti);
+       if (rc)
+               GOTO(out, rc);
+
+       CDEBUG(D_MOUNT, "Registration %s, fs=%s, %s, index=%04x, flags=%#x\n",
+              mti->mti_svname, mti->mti_fsname,
+              libcfs_nid2str(mti->mti_nids[0]), mti->mti_stripe_index,
+              mti->mti_flags);
+
+       /* if write_conf is true, the registration must succeed */
+       writeconf = !!(lsi->lsi_flags & (LDD_F_NEED_INDEX | LDD_F_UPDATE));
+       mti->mti_flags |= LDD_F_OPC_REG;
+
+       /* Register the target */
+       /* FIXME use mgc_process_config instead */
+       rc = obd_set_info_async(NULL, mgc->u.cli.cl_mgc_mgsexp,
+                               sizeof(KEY_REGISTER_TARGET),
+                               KEY_REGISTER_TARGET,
+                               sizeof(*mti), mti, NULL);
+       if (rc) {
+               if (mti->mti_flags & LDD_F_ERROR) {
+                       LCONSOLE_ERROR_MSG(0x160,
+                               "%s: the MGS refuses to allow this server "
+                               "to start: rc = %d. Please see messages on "
+                               "the MGS.\n", lsi->lsi_svname, rc);
+               } else if (writeconf) {
+                       LCONSOLE_ERROR_MSG(0x15f,
+                               "%s: cannot register this server with the MGS: "
+                               "rc = %d. Is the MGS running?\n",
+                               lsi->lsi_svname, rc);
+               } else {
+                       CERROR("%s: error registering with the MGS: rc = %d "
+                              "(not fatal)\n", lsi->lsi_svname, rc);
+                       /* reset the error code for non-fatal error. */
+                       rc = 0;
+               }
+               GOTO(out, rc);
+       }
+
+out:
+       if (mti)
+               OBD_FREE_PTR(mti);
+       RETURN(rc);
+}
+
+/**
+ * Notify the MGS that this target is ready.
+ * Used by IR - if the MGS receives this message, it will notify clients.
+ */
+static int server_notify_target(struct super_block *sb, struct obd_device *obd)
+{
+       struct lustre_sb_info *lsi = s2lsi(sb);
+       struct obd_device *mgc = lsi->lsi_mgc;
+       struct mgs_target_info *mti = NULL;
+       int rc;
+       ENTRY;
+
+       LASSERT(mgc);
+
+       if (!(IS_SERVER(lsi)))
+               RETURN(-EINVAL);
+
+       OBD_ALLOC_PTR(mti);
+       if (!mti)
+               RETURN(-ENOMEM);
+       rc = server_lsi2mti(lsi, mti);
+       if (rc)
+               GOTO(out, rc);
+
+       mti->mti_instance = obd->u.obt.obt_instance;
+       mti->mti_flags |= LDD_F_OPC_READY;
+
+       /* FIXME use mgc_process_config instead */
+       rc = obd_set_info_async(NULL, mgc->u.cli.cl_mgc_mgsexp,
+                               sizeof(KEY_REGISTER_TARGET),
+                               KEY_REGISTER_TARGET,
+                               sizeof(*mti), mti, NULL);
+
+       /* Imperative recovery: if the mgs informs us to use IR? */
+       if (!rc && !(mti->mti_flags & LDD_F_ERROR) &&
+           (mti->mti_flags & LDD_F_IR_CAPABLE))
+               lsi->lsi_flags |= LDD_F_IR_CAPABLE;
+
+out:
+       if (mti)
+               OBD_FREE_PTR(mti);
+       RETURN(rc);
+
+}
+
+/** Start server targets: MDTs and OSTs
+ */
+static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
+{
+       struct obd_device *obd;
+       struct lustre_sb_info *lsi = s2lsi(sb);
+       struct config_llog_instance cfg;
+       struct lu_env env;
+       struct lu_device *dev;
+       int rc;
+       ENTRY;
+
+       CDEBUG(D_MOUNT, "starting target %s\n", lsi->lsi_svname);
+
+       if (IS_MDT(lsi)) {
+               /* make sure the MDS is started */
+               mutex_lock(&server_start_lock);
+               obd = class_name2obd(LUSTRE_MDS_OBDNAME);
+               if (!obd) {
+                       rc = lustre_start_simple(LUSTRE_MDS_OBDNAME,
+                                                LUSTRE_MDS_NAME,
+                                                LUSTRE_MDS_OBDNAME"_uuid",
+                                                0, 0, 0, 0);
+                       if (rc) {
+                               mutex_unlock(&server_start_lock);
+                               CERROR("failed to start MDS: %d\n", rc);
+                               RETURN(rc);
+                       }
+               }
+               mutex_unlock(&server_start_lock);
+       }
+
+       /* If we're an OST, make sure the global OSS is running */
+       if (IS_OST(lsi)) {
+               /* make sure OSS is started */
+               mutex_lock(&server_start_lock);
+               obd = class_name2obd(LUSTRE_OSS_OBDNAME);
+               if (!obd) {
+                       rc = lustre_start_simple(LUSTRE_OSS_OBDNAME,
+                                                LUSTRE_OSS_NAME,
+                                                LUSTRE_OSS_OBDNAME"_uuid",
+                                                0, 0, 0, 0);
+                       if (rc) {
+                               mutex_unlock(&server_start_lock);
+                               CERROR("failed to start OSS: %d\n", rc);
+                               RETURN(rc);
+                       }
+               }
+               mutex_unlock(&server_start_lock);
+       }
+
+       /* Set the mgc fs to our server disk.  This allows the MGC to
+        * read and write configs locally, in case it can't talk to the MGS. */
+       if (lsi->lsi_srv_mnt) {
+               rc = server_mgc_set_fs(lsi->lsi_mgc, sb);
+               if (rc)
+                       GOTO(out_stop_service, rc);
+       }
+
+       /* Register with MGS */
+       rc = server_register_target(lsi);
+       if (rc)
+               GOTO(out_mgc, rc);
+
+       /* Let the target look up the mount using the target's name
+          (we can't pass the sb or mnt through class_process_config.) */
+       rc = server_register_mount(lsi->lsi_svname, sb, mnt);
+       if (rc)
+               GOTO(out_mgc, rc);
+
+       /* Start targets using the llog named for the target */
+       memset(&cfg, 0, sizeof(cfg));
+       cfg.cfg_callback = class_config_llog_handler;
+       rc = lustre_process_log(sb, lsi->lsi_svname, &cfg);
+       if (rc) {
+               CERROR("failed to start server %s: %d\n",
+                      lsi->lsi_svname, rc);
+               /* Do NOT call server_deregister_mount() here. This makes it
+                * impossible to find mount later in cleanup time and leaves
+                * @lsi and othder stuff leaked. -umka */
+               GOTO(out_mgc, rc);
+       }
+
+       obd = class_name2obd(lsi->lsi_svname);
+       if (!obd) {
+               CERROR("no server named %s was started\n", lsi->lsi_svname);
+               GOTO(out_mgc, rc = -ENXIO);
+       }
+
+       if (IS_OST(lsi) || IS_MDT(lsi)) {
+               rc = lustre_start_lwp(sb);
+               if (rc) {
+                       CERROR("%s: failed to start LWP: %d\n",
+                              lsi->lsi_svname, rc);
+                       GOTO(out_mgc, rc);
+               }
+       }
+
+       server_notify_target(sb, obd);
+
+       /* calculate recovery timeout, do it after lustre_process_log */
+       server_calc_timeout(lsi, obd);
+
+       /* log has been fully processed */
+       obd_notify(obd, NULL, OBD_NOTIFY_CONFIG, (void *)CONFIG_LOG);
+
+       /* log has been fully processed, let clients connect */
+       dev = obd->obd_lu_dev;
+       if (dev && dev->ld_ops->ldo_prepare) {
+               rc = lu_env_init(&env, dev->ld_type->ldt_ctx_tags);
+               if (rc == 0) {
+                       struct lu_context  session_ctx;
+
+                       lu_context_init(&session_ctx, LCT_SESSION);
+                       session_ctx.lc_thread = NULL;
+                       lu_context_enter(&session_ctx);
+                       env.le_ses = &session_ctx;
+
+                       rc = dev->ld_ops->ldo_prepare(&env, NULL, dev);
+
+                       lu_env_fini(&env);
+                       lu_context_exit(&session_ctx);
+                       lu_context_fini(&session_ctx);
+               }
+       }
+
+       /* abort recovery only on the complete stack:
+        * many devices can be involved */
+       if ((lsi->lsi_lmd->lmd_flags & LMD_FLG_ABORT_RECOV) &&
+           (OBP(obd, iocontrol))) {
+               obd_iocontrol(OBD_IOC_ABORT_RECOVERY, obd->obd_self_export, 0,
+                             NULL, NULL);
+       }
+
+out_mgc:
+       /* Release the mgc fs for others to use */
+       if (lsi->lsi_srv_mnt)
+               server_mgc_clear_fs(lsi->lsi_mgc);
+
+out_stop_service:
+       if (rc != 0)
+               server_stop_servers(lsi->lsi_flags);
+
+       RETURN(rc);
+}
+
+static int lsi_prepare(struct lustre_sb_info *lsi)
+{
+       __u32 index;
+       int rc;
+       ENTRY;
+
+       LASSERT(lsi);
+       LASSERT(lsi->lsi_lmd);
+
+       /* The server name is given as a mount line option */
+       if (lsi->lsi_lmd->lmd_profile == NULL) {
+               LCONSOLE_ERROR("Can't determine server name\n");
+               RETURN(-EINVAL);
+       }
+
+       if (strlen(lsi->lsi_lmd->lmd_profile) >= sizeof(lsi->lsi_svname))
+               RETURN(-ENAMETOOLONG);
+
+       strcpy(lsi->lsi_svname, lsi->lsi_lmd->lmd_profile);
+
+       /* Determine osd type */
+       if (lsi->lsi_lmd->lmd_osd_type != NULL) {
+               if (strlen(lsi->lsi_lmd->lmd_osd_type) >=
+                   sizeof(lsi->lsi_osd_type))
+                       RETURN(-ENAMETOOLONG);
+
+               strcpy(lsi->lsi_osd_type, lsi->lsi_lmd->lmd_osd_type);
+       } else {
+               strcpy(lsi->lsi_osd_type, LUSTRE_OSD_LDISKFS_NAME);
+       }
+
+       /* XXX: a temp. solution for components using fsfilt
+        *      to be removed in one of the subsequent patches */
+       if (!strcmp(lsi->lsi_lmd->lmd_osd_type, "osd-ldiskfs"))
+               strcpy(lsi->lsi_fstype, "ldiskfs");
+       else
+               strcpy(lsi->lsi_fstype, lsi->lsi_lmd->lmd_osd_type);
+
+       /* Determine server type */
+       rc = server_name2index(lsi->lsi_svname, &index, NULL);
+       if (rc < 0) {
+               if (lsi->lsi_lmd->lmd_flags & LMD_FLG_MGS) {
+                       /* Assume we're a bare MGS */
+                       rc = 0;
+                       lsi->lsi_lmd->lmd_flags |= LMD_FLG_NOSVC;
+               } else {
+                       LCONSOLE_ERROR("Can't determine server type of '%s'\n",
+                                      lsi->lsi_svname);
+                       RETURN(rc);
+               }
+       }
+       lsi->lsi_flags |= rc;
+
+       /* Add mount line flags that used to be in ldd:
+        * writeconf, mgs, anything else?
+        */
+       lsi->lsi_flags |= (lsi->lsi_lmd->lmd_flags & LMD_FLG_WRITECONF) ?
+               LDD_F_WRITECONF : 0;
+       lsi->lsi_flags |= (lsi->lsi_lmd->lmd_flags & LMD_FLG_VIRGIN) ?
+               LDD_F_VIRGIN : 0;
+       lsi->lsi_flags |= (lsi->lsi_lmd->lmd_flags & LMD_FLG_MGS) ?
+               LDD_F_SV_TYPE_MGS : 0;
+       lsi->lsi_flags |= (lsi->lsi_lmd->lmd_flags & LMD_FLG_NO_PRIMNODE) ?
+               LDD_F_NO_PRIMNODE : 0;
+
+       RETURN(0);
+}
+
+/*************** server mount ******************/
+
+/** Start the shutdown of servers at umount.
+ */
+static void server_put_super(struct super_block *sb)
+{
+       struct lustre_sb_info *lsi = s2lsi(sb);
+       struct obd_device     *obd;
+       char *tmpname, *extraname = NULL;
+       int tmpname_sz;
+       int lsiflags = lsi->lsi_flags;
+       ENTRY;
+
+       LASSERT(IS_SERVER(lsi));
+
+       tmpname_sz = strlen(lsi->lsi_svname) + 1;
+       OBD_ALLOC(tmpname, tmpname_sz);
+       memcpy(tmpname, lsi->lsi_svname, tmpname_sz);
+       CDEBUG(D_MOUNT, "server put_super %s\n", tmpname);
+       if (IS_MDT(lsi) && (lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC))
+               snprintf(tmpname, tmpname_sz, "MGS");
+
+       /* disconnect the lwp first to drain off the inflight request */
+       if (IS_OST(lsi) || IS_MDT(lsi)) {
+               int     rc;
+
+               rc = lustre_disconnect_lwp(sb);
+               if (rc && rc != ETIMEDOUT)
+                       CERROR("%s: failed to disconnect lwp. (rc=%d)\n",
+                              tmpname, rc);
+       }
+
+       /* Stop the target */
+       if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
+           (IS_MDT(lsi) || IS_OST(lsi))) {
+               struct lustre_profile *lprof = NULL;
+
+               /* tell the mgc to drop the config log */
+               lustre_end_log(sb, lsi->lsi_svname, NULL);
+
+               /* COMPAT_146 - profile may get deleted in mgc_cleanup.
+                  If there are any setup/cleanup errors, save the lov
+                  name for safety cleanup later. */
+               lprof = class_get_profile(lsi->lsi_svname);
+               if (lprof && lprof->lp_dt) {
+                       OBD_ALLOC(extraname, strlen(lprof->lp_dt) + 1);
+                       strcpy(extraname, lprof->lp_dt);
+               }
+
+               obd = class_name2obd(lsi->lsi_svname);
+               if (obd) {
+                       CDEBUG(D_MOUNT, "stopping %s\n", obd->obd_name);
+                       if (lsiflags & LSI_UMOUNT_FAILOVER)
+                               obd->obd_fail = 1;
+                       /* We can't seem to give an error return code
+                        * to .put_super, so we better make sure we clean up! */
+                       obd->obd_force = 1;
+                       class_manual_cleanup(obd);
+               } else {
+                       CERROR("no obd %s\n", lsi->lsi_svname);
+                       server_deregister_mount(lsi->lsi_svname);
+               }
+       }
+
+       /* If they wanted the mgs to stop separately from the mdt, they
+          should have put it on a different device. */
+       if (IS_MGS(lsi)) {
+               /* if MDS start with --nomgs, don't stop MGS then */
+               if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOMGS))
+                       server_stop_mgs(sb);
+       }
+
+       if (IS_OST(lsi) || IS_MDT(lsi)) {
+               if (lustre_stop_lwp(sb) < 0)
+                       CERROR("%s: failed to stop lwp!\n", tmpname);
+       }
+
+       /* Clean the mgc and sb */
+       lustre_common_put_super(sb);
+
+       /* wait till all in-progress cleanups are done
+        * specifically we're interested in ofd cleanup
+        * as it pins OSS */
+       obd_zombie_barrier();
+
+       /* Stop the servers (MDS, OSS) if no longer needed.  We must wait
+          until the target is really gone so that our type refcount check
+          is right. */
+       server_stop_servers(lsiflags);
+
+       /* In case of startup or cleanup err, stop related obds */
+       if (extraname) {
+               obd = class_name2obd(extraname);
+               if (obd) {
+                       CWARN("Cleaning orphaned obd %s\n", extraname);
+                       obd->obd_force = 1;
+                       class_manual_cleanup(obd);
+               }
+               OBD_FREE(extraname, strlen(extraname) + 1);
+       }
+
+       LCONSOLE_WARN("server umount %s complete\n", tmpname);
+       OBD_FREE(tmpname, tmpname_sz);
+       EXIT;
+}
+
+/** Called only for 'umount -f'
+ */
+static void server_umount_begin(struct super_block *sb)
+{
+       struct lustre_sb_info *lsi = s2lsi(sb);
+       ENTRY;
+
+       CDEBUG(D_MOUNT, "umount -f\n");
+       /* umount = failover
+          umount -f = force
+          no third way to do non-force, non-failover */
+       lsi->lsi_flags &= ~LSI_UMOUNT_FAILOVER;
+       EXIT;
+}
+
+static int server_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+       struct super_block *sb = dentry->d_sb;
+       struct lustre_sb_info *lsi = s2lsi(sb);
+       struct obd_statfs statfs;
+       int rc;
+       ENTRY;
+
+       if (lsi->lsi_dt_dev) {
+               rc = dt_statfs(NULL, lsi->lsi_dt_dev, &statfs);
+               if (rc == 0) {
+                       statfs_unpack(buf, &statfs);
+                       buf->f_type = sb->s_magic;
+                       RETURN(0);
+               }
+       }
+
+       /* just return 0 */
+       buf->f_type = sb->s_magic;
+       buf->f_bsize = sb->s_blocksize;
+       buf->f_blocks = 1;
+       buf->f_bfree = 0;
+       buf->f_bavail = 0;
+       buf->f_files = 1;
+       buf->f_ffree = 0;
+       buf->f_namelen = NAME_MAX;
+       RETURN(0);
+}
+
+/** The operations we support directly on the superblock:
+ * mount, umount, and df.
+ */
+static struct super_operations server_ops = {
+       .put_super      = server_put_super,
+       .umount_begin   = server_umount_begin, /* umount -f */
+       .statfs         = server_statfs,
+};
+
+#define log2(n) ffz(~(n))
+#define LUSTRE_SUPER_MAGIC 0x0BD00BD1
+
+static int server_fill_super_common(struct super_block *sb)
+{
+       struct inode *root = 0;
+       ENTRY;
+
+       CDEBUG(D_MOUNT, "Server sb, dev=%d\n", (int)sb->s_dev);
+
+       sb->s_blocksize = 4096;
+       sb->s_blocksize_bits = log2(sb->s_blocksize);
+       sb->s_magic = LUSTRE_SUPER_MAGIC;
+       sb->s_maxbytes = 0; /* we don't allow file IO on server mountpoints */
+       sb->s_flags |= MS_RDONLY;
+       sb->s_op = &server_ops;
+
+       root = new_inode(sb);
+       if (!root) {
+               CERROR("Can't make root inode\n");
+               RETURN(-EIO);
+       }
+
+       /* returns -EIO for every operation */
+       /* make_bad_inode(root); -- badness - can't umount */
+       /* apparently we need to be a directory for the mount to finish */
+       root->i_mode = S_IFDIR;
+
+       sb->s_root = d_make_root(root);
+       if (!sb->s_root) {
+               CERROR("%s: can't make root dentry\n", sb->s_id);
+               RETURN(-EIO);
+       }
+
+       RETURN(0);
+}
+
+static int osd_start(struct lustre_sb_info *lsi, unsigned long mflags)
+{
+       struct lustre_mount_data *lmd = lsi->lsi_lmd;
+       struct obd_device        *obd;
+       struct dt_device_param    p;
+       char                      flagstr[16];
+       int                       rc;
+       ENTRY;
+
+       CDEBUG(D_MOUNT,
+              "Attempting to start %s, type=%s, lsifl=%x, mountfl=%lx\n",
+              lsi->lsi_svname, lsi->lsi_osd_type, lsi->lsi_flags, mflags);
+
+       sprintf(lsi->lsi_osd_obdname, "%s-osd", lsi->lsi_svname);
+       strcpy(lsi->lsi_osd_uuid, lsi->lsi_osd_obdname);
+       strcat(lsi->lsi_osd_uuid, "_UUID");
+       sprintf(flagstr, "%lu:%lu", mflags, (unsigned long) lmd->lmd_flags);
+
+       obd = class_name2obd(lsi->lsi_osd_obdname);
+       if (obd == NULL) {
+               rc = lustre_start_simple(lsi->lsi_osd_obdname,
+                                        lsi->lsi_osd_type,
+                                        lsi->lsi_osd_uuid, lmd->lmd_dev,
+                                        flagstr, lsi->lsi_lmd->lmd_opts,
+                                        lsi->lsi_svname);
+               if (rc)
+                       GOTO(out, rc);
+               obd = class_name2obd(lsi->lsi_osd_obdname);
+               LASSERT(obd);
+       }
+
+       rc = obd_connect(NULL, &lsi->lsi_osd_exp,
+                        obd, &obd->obd_uuid, NULL, NULL);
+       if (rc) {
+               obd->obd_force = 1;
+               class_manual_cleanup(obd);
+               lsi->lsi_dt_dev = NULL;
+       }
+
+       /* XXX: to keep support old components relying on lsi_srv_mnt
+        *      we get this info from OSD just started */
+       LASSERT(obd->obd_lu_dev);
+       lsi->lsi_dt_dev = lu2dt_dev(obd->obd_lu_dev);
+       LASSERT(lsi->lsi_dt_dev);
+
+       dt_conf_get(NULL, lsi->lsi_dt_dev, &p);
+
+       lsi->lsi_srv_mnt = p.ddp_mnt;
+
+out:
+       RETURN(rc);
+}
+
+/** Fill in the superblock info for a Lustre server.
+ * Mount the device with the correct options.
+ * Read the on-disk config file.
+ * Start the services.
+ */
+int server_fill_super(struct super_block *sb)
+{
+       struct lustre_sb_info *lsi = s2lsi(sb);
+       int rc;
+       ENTRY;
+
+       rc = lsi_prepare(lsi);
+       if (rc)
+               RETURN(rc);
+
+       /* Start low level OSD */
+       rc = osd_start(lsi, sb->s_flags);
+       if (rc) {
+               CERROR("Unable to start osd on %s: %d\n",
+                      lsi->lsi_lmd->lmd_dev, rc);
+               lustre_put_lsi(sb);
+               RETURN(rc);
+       }
+
+       CDEBUG(D_MOUNT, "Found service %s on device %s\n",
+              lsi->lsi_svname, lsi->lsi_lmd->lmd_dev);
+
+       if (class_name2obd(lsi->lsi_svname)) {
+               LCONSOLE_ERROR_MSG(0x161, "The target named %s is already "
+                                  "running. Double-mount may have compromised"
+                                  " the disk journal.\n",
+                                  lsi->lsi_svname);
+               lustre_put_lsi(sb);
+               RETURN(-EALREADY);
+       }
+
+       /* Start MGS before MGC */
+       if (IS_MGS(lsi) && !(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOMGS)) {
+               rc = server_start_mgs(sb);
+               if (rc)
+                       GOTO(out_mnt, rc);
+       }
+
+       /* Start MGC before servers */
+       rc = lustre_start_mgc(sb);
+       if (rc)
+               GOTO(out_mnt, rc);
+
+       /* Set up all obd devices for service */
+       if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
+           (IS_OST(lsi) || IS_MDT(lsi))) {
+               rc = server_start_targets(sb, lsi->lsi_srv_mnt);
+               if (rc < 0) {
+                       CERROR("Unable to start targets: %d\n", rc);
+                       GOTO(out_mnt, rc);
+               }
+               /* FIXME overmount client here, or can we just start a
+                * client log and client_fill_super on this sb?  We
+                * need to make sure server_put_super gets called too
+                * - ll_put_super calls lustre_common_put_super; check
+                * there for LSI_SERVER flag, call s_p_s if so.
+                *
+                * Probably should start client from new thread so we
+                * can return.  Client will not finish until all
+                * servers are connected.  Note - MGS-only server does
+                * NOT get a client, since there is no lustre fs
+                * associated - the MGS is for all lustre fs's */
+       }
+
+       rc = server_fill_super_common(sb);
+       if (rc)
+               GOTO(out_mnt, rc);
+
+       RETURN(0);
+out_mnt:
+       /* We jump here in case of failure while starting targets or MGS.
+        * In this case we can't just put @mnt and have to do real cleanup
+        * with stoping targets, etc. */
+       server_put_super(sb);
+       return rc;
+}
+
+/*
+ * Calculate timeout value for a target.
+ */
+void server_calc_timeout(struct lustre_sb_info *lsi, struct obd_device *obd)
+{
+       struct lustre_mount_data *lmd;
+       int soft = 0;
+       int hard = 0;
+       int factor = 0;
+       bool has_ir = !!(lsi->lsi_flags & LDD_F_IR_CAPABLE);
+       int min = OBD_RECOVERY_TIME_MIN;
+
+       LASSERT(IS_SERVER(lsi));
+
+       lmd = lsi->lsi_lmd;
+       if (lmd) {
+               soft   = lmd->lmd_recovery_time_soft;
+               hard   = lmd->lmd_recovery_time_hard;
+               has_ir = has_ir && !(lmd->lmd_flags & LMD_FLG_NOIR);
+               obd->obd_no_ir = !has_ir;
+       }
+
+       if (soft == 0)
+               soft = OBD_RECOVERY_TIME_SOFT;
+       if (hard == 0)
+               hard = OBD_RECOVERY_TIME_HARD;
+
+       /* target may have ir_factor configured. */
+       factor = OBD_IR_FACTOR_DEFAULT;
+       if (obd->obd_recovery_ir_factor)
+               factor = obd->obd_recovery_ir_factor;
+
+       if (has_ir) {
+               int new_soft = soft;
+               int new_hard = hard;
+
+               /* adjust timeout value by imperative recovery */
+
+               new_soft = (soft * factor) / OBD_IR_FACTOR_MAX;
+               new_hard = (hard * factor) / OBD_IR_FACTOR_MAX;
+
+               /* make sure the timeout is not too short */
+               new_soft = max(min, new_soft);
+               new_hard = max(new_soft, new_hard);
+
+               LCONSOLE_INFO("%s: Imperative Recovery enabled, recovery "
+                             "window shrunk from %d-%d down to %d-%d\n",
+                             obd->obd_name, soft, hard, new_soft, new_hard);
+
+               soft = new_soft;
+               hard = new_hard;
+       }
+
+       /* we're done */
+       obd->obd_recovery_timeout   = max(obd->obd_recovery_timeout, soft);
+       obd->obd_recovery_time_hard = hard;
+       obd->obd_recovery_ir_factor = factor;
+}
+EXPORT_SYMBOL(server_calc_timeout);