/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
 * vim:expandtab:shiftwidth=8:tabstop=8:
 *
 *  lustre/obdclass/obd_mount.c
 *  Client/server mount routines
 *
 *  Copyright (c) 2006 Cluster File Systems, Inc.
 *   Author: Nathan Rutman <nathan@clusterfs.com>
 *
 *   This file is part of Lustre, http://www.lustre.org/
 *
 *   Lustre is free software; you can redistribute it and/or
 *   modify it under the terms of version 2 of the GNU General Public
 *   License as published by the Free Software Foundation.
 *
 *   Lustre is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with Lustre; if not, write to the Free Software
 *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */


#define DEBUG_SUBSYSTEM S_CLASS
#define D_MOUNT D_SUPER|D_CONFIG /*|D_WARNING */
#define PRINT_CMD CDEBUG
#define PRINT_MASK D_SUPER|D_CONFIG

#include <obd.h>
#include <lvfs.h>
#include <lustre_fsfilt.h>
#include <obd_class.h>
#include <lustre/lustre_user.h>
#include <linux/version.h>
#include <lustre_log.h>
#include <lustre_disk.h>
#include <lustre_param.h>

static int (*client_fill_super)(struct super_block *sb) = NULL;

/*********** mount lookup *********/

DECLARE_MUTEX(lustre_mount_info_lock);
struct list_head server_mount_info_list = LIST_HEAD_INIT(server_mount_info_list);

static struct lustre_mount_info *server_find_mount(const char *name)
{
        struct list_head *tmp;
        struct lustre_mount_info *lmi;
        ENTRY;

        list_for_each(tmp, &server_mount_info_list) {
                lmi = list_entry(tmp, struct lustre_mount_info, lmi_list_chain);
                if (strcmp(name, lmi->lmi_name) == 0)
                        RETURN(lmi);
        }
        RETURN(NULL);
}

/* we must register an obd for a mount before we call the setup routine.
   *_setup will call lustre_get_mount to get the mnt struct
   by obd_name, since we can't pass the pointer to setup. */
static int server_register_mount(const char *name, struct super_block *sb,
                          struct vfsmount *mnt)
{
        struct lustre_mount_info *lmi;
        char *name_cp;
        ENTRY;

        LASSERT(mnt);
        LASSERT(sb);

        OBD_ALLOC(lmi, sizeof(*lmi));
        if (!lmi)
                RETURN(-ENOMEM);
        OBD_ALLOC(name_cp, strlen(name) + 1);
        if (!name_cp) {
                OBD_FREE(lmi, sizeof(*lmi));
                RETURN(-ENOMEM);
        }
        strcpy(name_cp, name);

        down(&lustre_mount_info_lock);

        if (server_find_mount(name)) {
                up(&lustre_mount_info_lock);
                OBD_FREE(lmi, sizeof(*lmi));
                OBD_FREE(name_cp, strlen(name) + 1);
                CERROR("Already registered %s\n", name);
                RETURN(-EEXIST);
        }
        lmi->lmi_name = name_cp;
        lmi->lmi_sb = sb;
        lmi->lmi_mnt = mnt;
        list_add(&lmi->lmi_list_chain, &server_mount_info_list);

        up(&lustre_mount_info_lock);

        CDEBUG(D_MOUNT, "reg_mnt %p from %s, vfscount=%d\n",
               lmi->lmi_mnt, name, atomic_read(&lmi->lmi_mnt->mnt_count));

        RETURN(0);
}

/* when an obd no longer needs a mount */
static int server_deregister_mount(const char *name)
{
        struct lustre_mount_info *lmi;
        ENTRY;

        down(&lustre_mount_info_lock);
        lmi = server_find_mount(name);
        if (!lmi) {
                up(&lustre_mount_info_lock);
                CERROR("%s not registered\n", name);
                RETURN(-ENOENT);
        }

        CDEBUG(D_MOUNT, "dereg_mnt %p from %s, vfscount=%d\n",
               lmi->lmi_mnt, name, atomic_read(&lmi->lmi_mnt->mnt_count));

        OBD_FREE(lmi->lmi_name, strlen(lmi->lmi_name) + 1);
        list_del(&lmi->lmi_list_chain);
        OBD_FREE(lmi, sizeof(*lmi));
        up(&lustre_mount_info_lock);

        RETURN(0);
}

/* obd's look up a registered mount using their obdname. This is just
   for initial obd setup to find the mount struct.  It should not be
   called every time you want to mntget. */
struct lustre_mount_info *server_get_mount(const char *name)
{
        struct lustre_mount_info *lmi;
        struct lustre_sb_info *lsi;
        ENTRY;

        down(&lustre_mount_info_lock);
        lmi = server_find_mount(name);
        up(&lustre_mount_info_lock);
        if (!lmi) {
                CERROR("Can't find mount for %s\n", name);
                RETURN(NULL);
        }
        lsi = s2lsi(lmi->lmi_sb);
        mntget(lmi->lmi_mnt);
        atomic_inc(&lsi->lsi_mounts);

        CDEBUG(D_MOUNT, "get_mnt %p from %s, refs=%d, vfscount=%d\n",
               lmi->lmi_mnt, name, atomic_read(&lsi->lsi_mounts),
               atomic_read(&lmi->lmi_mnt->mnt_count));

        RETURN(lmi);
}

/*
 * Used by mdt to get mount_info from obdname.
 * There are no blocking when using the mount_info.
 * Do not use server_get_mount for this purpose.
 */
struct lustre_mount_info *server_get_mount_2(const char *name)
{
        struct lustre_mount_info *lmi;
        ENTRY;

        down(&lustre_mount_info_lock);
        lmi = server_find_mount(name);
        up(&lustre_mount_info_lock);
        if (!lmi)
                CERROR("Can't find mount for %s\n", name);

        RETURN(lmi);
}

static void unlock_mntput(struct vfsmount *mnt)
{
        if (kernel_locked()) {
                unlock_kernel();
                mntput(mnt);
                lock_kernel();
        } else {
                mntput(mnt);
        }
}

static int lustre_put_lsi(struct super_block *sb);

/* to be called from obd_cleanup methods */
int server_put_mount(const char *name, struct vfsmount *mnt)
{
        struct lustre_mount_info *lmi;
        struct lustre_sb_info *lsi;
        int count = atomic_read(&mnt->mnt_count) - 1;
        ENTRY;

        /* This might be the last one, can't deref after this */
        unlock_mntput(mnt);

        down(&lustre_mount_info_lock);
        lmi = server_find_mount(name);
        up(&lustre_mount_info_lock);
        if (!lmi) {
                CERROR("Can't find mount for %s\n", name);
                RETURN(-ENOENT);
        }
        lsi = s2lsi(lmi->lmi_sb);
        LASSERT(lmi->lmi_mnt == mnt);

        CDEBUG(D_MOUNT, "put_mnt %p from %s, refs=%d, vfscount=%d\n",
               lmi->lmi_mnt, name, atomic_read(&lsi->lsi_mounts), count);

        if (lustre_put_lsi(lmi->lmi_sb)) {
                CDEBUG(D_MOUNT, "Last put of mnt %p from %s, vfscount=%d\n",
                       lmi->lmi_mnt, name, count);
                /* last mount is the One True Mount */
                if (count > 1)
                        CERROR("%s: mount busy, vfscount=%d!\n", name, count);
        }

        /* this obd should never need the mount again */
        server_deregister_mount(name);

        RETURN(0);
}

/* Corresponding to server_get_mount_2 */
int server_put_mount_2(const char *name, struct vfsmount *mnt)
{
        ENTRY;
        RETURN(0);
}

/******* mount helper utilities *********/

#if 0
static void ldd_print(struct lustre_disk_data *ldd)
{
        PRINT_CMD(PRINT_MASK, "  disk data:\n");
        PRINT_CMD(PRINT_MASK, "server:  %s\n", ldd->ldd_svname);
        PRINT_CMD(PRINT_MASK, "uuid:    %s\n", (char *)ldd->ldd_uuid);
        PRINT_CMD(PRINT_MASK, "fs:      %s\n", ldd->ldd_fsname);
        PRINT_CMD(PRINT_MASK, "index:   %04x\n", ldd->ldd_svindex);
        PRINT_CMD(PRINT_MASK, "config:  %d\n", ldd->ldd_config_ver);
        PRINT_CMD(PRINT_MASK, "flags:   %#x\n", ldd->ldd_flags);
        PRINT_CMD(PRINT_MASK, "diskfs:  %s\n", MT_STR(ldd));
        PRINT_CMD(PRINT_MASK, "options: %s\n", ldd->ldd_mount_opts);
        PRINT_CMD(PRINT_MASK, "params:  %s\n", ldd->ldd_params);
        PRINT_CMD(PRINT_MASK, "comment: %s\n", ldd->ldd_userdata);
}
#endif

static int ldd_parse(struct lvfs_run_ctxt *mount_ctxt,
                           struct lustre_disk_data *ldd)
{
        struct lvfs_run_ctxt saved;
        struct file *file;
        loff_t off = 0;
        unsigned long len;
        int rc;
        ENTRY;

        push_ctxt(&saved, mount_ctxt, NULL);

        file = filp_open(MOUNT_DATA_FILE, O_RDONLY, 0644);
        if (IS_ERR(file)) {
                rc = PTR_ERR(file);
                CERROR("cannot open %s: rc = %d\n", MOUNT_DATA_FILE, rc);
                GOTO(out, rc);
        }

        len = i_size_read(file->f_dentry->d_inode);
        CDEBUG(D_MOUNT, "Have %s, size %lu\n", MOUNT_DATA_FILE, len);
        if (len != sizeof(*ldd)) {
                CERROR("disk data size does not match: see %lu expect "LPSZ"\n",
                       len, sizeof(*ldd));
                GOTO(out_close, rc = -EINVAL);
        }

        rc = lustre_fread(file, ldd, len, &off);
        if (rc != len) {
                CERROR("error reading %s: read %d of %lu\n",
                       MOUNT_DATA_FILE, rc, len);
                GOTO(out_close, rc = -EINVAL);
        }
        rc = 0;

        if (ldd->ldd_magic != LDD_MAGIC) {
                /* FIXME add swabbing support */
                CERROR("Bad magic in %s: %x!=%x\n", MOUNT_DATA_FILE,
                       ldd->ldd_magic, LDD_MAGIC);
                GOTO(out_close, rc = -EINVAL);
        }

        if (ldd->ldd_feature_incompat & ~LDD_INCOMPAT_SUPP) {
                CERROR("%s: unsupported incompat filesystem feature(s) %x\n",
                       ldd->ldd_svname,
                       ldd->ldd_feature_incompat & ~LDD_INCOMPAT_SUPP);
                GOTO(out_close, rc = -EINVAL);
        }
        if (ldd->ldd_feature_rocompat & ~LDD_ROCOMPAT_SUPP) {
                CERROR("%s: unsupported read-only filesystem feature(s) %x\n",
                       ldd->ldd_svname,
                       ldd->ldd_feature_rocompat & ~LDD_ROCOMPAT_SUPP);
                /* Do something like remount filesystem read-only */
                GOTO(out_close, rc = -EINVAL);
        }

out_close:
        filp_close(file, 0);
out:
        pop_ctxt(&saved, mount_ctxt, NULL);
        RETURN(rc);
}

static int ldd_write(struct lvfs_run_ctxt *mount_ctxt,
                     struct lustre_disk_data *ldd)
{
        struct lvfs_run_ctxt saved;
        struct file *file;
        loff_t off = 0;
        unsigned long len = sizeof(struct lustre_disk_data);
        int rc = 0;
        ENTRY;

        LASSERT(ldd->ldd_magic == LDD_MAGIC);

        ldd->ldd_config_ver++;

        push_ctxt(&saved, mount_ctxt, NULL);

        file = filp_open(MOUNT_DATA_FILE, O_RDWR, 0644);
        if (IS_ERR(file)) {
                rc = PTR_ERR(file);
                CERROR("cannot open %s: rc = %d\n", MOUNT_DATA_FILE, rc);
                GOTO(out, rc);
        }

        rc = lustre_fwrite(file, ldd, len, &off);
        if (rc != len) {
                CERROR("error writing %s: read %d of %lu\n",
                       MOUNT_DATA_FILE, rc, len);
                GOTO(out_close, rc = -EINVAL);
        }

        rc = 0;

out_close:
        filp_close(file, 0);
out:
        pop_ctxt(&saved, mount_ctxt, NULL);
        RETURN(rc);
}


/**************** config llog ********************/

/* Get a config log from the MGS and process it.
   This func is called for both clients and servers.
   Continue to process new statements appended to the logs
   (whenever the config lock is revoked) until lustre_end_log
   is called. */
int lustre_process_log(struct super_block *sb, char *logname,
                     struct config_llog_instance *cfg)
{
        struct lustre_cfg *lcfg;
        struct lustre_cfg_bufs bufs;
        struct lustre_sb_info *lsi = s2lsi(sb);
        struct obd_device *mgc = lsi->lsi_mgc;
        int rc;
        ENTRY;

        LASSERT(mgc);
        LASSERT(cfg);

        /* mgc_process_config */
        lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
        lustre_cfg_bufs_set_string(&bufs, 1, logname);
        lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
        lustre_cfg_bufs_set(&bufs, 3, &sb, sizeof(sb));
        lcfg = lustre_cfg_new(LCFG_LOG_START, &bufs);
        rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
        lustre_cfg_free(lcfg);

        if (rc == -EINVAL)
                LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s'"
                                   "failed from the MGS (%d).  Make sure this "
                                   "client and the MGS are running compatible "
                                   "versions of Lustre.\n",
                                   mgc->obd_name, logname, rc);

        if (rc)
                LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' "
                                   "failed (%d). This may be the result of "
                                   "communication errors between this node and "
                                   "the MGS, a bad configuration, or other "
                                   "errors. See the syslog for more "
                                   "information.\n", mgc->obd_name, logname, 
                                   rc);

        /* class_obd_list(); */
        RETURN(rc);
}

/* Stop watching this config log for updates */
int lustre_end_log(struct super_block *sb, char *logname,
                       struct config_llog_instance *cfg)
{
        struct lustre_cfg *lcfg;
        struct lustre_cfg_bufs bufs;
        struct lustre_sb_info *lsi = s2lsi(sb);
        struct obd_device *mgc = lsi->lsi_mgc;
        int rc;
        ENTRY;

        if (!mgc)
                RETURN(-ENOENT);

        /* mgc_process_config */
        lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
        lustre_cfg_bufs_set_string(&bufs, 1, logname);
        if (cfg)
                lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
        lcfg = lustre_cfg_new(LCFG_LOG_END, &bufs);
        rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
        lustre_cfg_free(lcfg);
        RETURN(rc);
}

/**************** obd start *******************/

int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
            char *s1, char *s2, char *s3, char *s4)
{
        struct lustre_cfg_bufs bufs;
        struct lustre_cfg    * lcfg = NULL;
        int rc;

        CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname,
               cmd, s1, s2, s3, s4);

        lustre_cfg_bufs_reset(&bufs, cfgname);
        if (s1)
                lustre_cfg_bufs_set_string(&bufs, 1, s1);
        if (s2)
                lustre_cfg_bufs_set_string(&bufs, 2, s2);
        if (s3)
                lustre_cfg_bufs_set_string(&bufs, 3, s3);
        if (s4)
                lustre_cfg_bufs_set_string(&bufs, 4, s4);

        lcfg = lustre_cfg_new(cmd, &bufs);
        lcfg->lcfg_nid = nid;
        rc = class_process_config(lcfg);
        lustre_cfg_free(lcfg);
        return(rc);
}

static int lustre_start_simple(char *obdname, char *type, char *uuid,
                               char *s1, char *s2)
{
        int rc;
        CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);

        rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, 0, 0);
        if (rc) {
                CERROR("%s attach error %d\n", obdname, rc);
                return(rc);
        }
        rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, 0, 0);
        if (rc) {
                CERROR("%s setup error %d\n", obdname, rc);
                do_lcfg(obdname, 0, LCFG_DETACH, 0, 0, 0, 0);
        }
        return rc;
}

/* Set up a MGS to serve startup logs */
static int server_start_mgs(struct super_block *sb)
{
        struct lustre_sb_info    *lsi = s2lsi(sb);
        struct vfsmount          *mnt = lsi->lsi_srv_mnt;
        struct lustre_mount_info *lmi;
        int    rc = 0;
        ENTRY;
        LASSERT(mnt);

        /* It is impossible to have more than 1 MGS per node, since
           MGC wouldn't know which to connect to */
        lmi = server_find_mount(LUSTRE_MGS_OBDNAME);
        if (lmi) {
                lsi = s2lsi(lmi->lmi_sb);
                LCONSOLE_ERROR_MSG(0x15d, "The MGS service was already started"
                                   " from server %s\n",
                                   lsi->lsi_ldd->ldd_svname);
                RETURN(-EALREADY);
        }

        CDEBUG(D_CONFIG, "Start MGS service %s\n", LUSTRE_MGS_OBDNAME);

        rc = server_register_mount(LUSTRE_MGS_OBDNAME, sb, mnt);

        if (!rc &&
            ((rc = lustre_start_simple(LUSTRE_MGS_OBDNAME, LUSTRE_MGS_NAME,
                                       LUSTRE_MGS_OBDNAME, 0, 0))))
                server_deregister_mount(LUSTRE_MGS_OBDNAME);

        if (rc)
                LCONSOLE_ERROR_MSG(0x15e, "Failed to start MGS '%s' (%d). "
                                   "Is the 'mgs' module loaded?\n",
                                   LUSTRE_MGS_OBDNAME, rc);
        RETURN(rc);
}

static int server_stop_mgs(struct super_block *sb)
{
        struct obd_device *obd;
        int rc;
        ENTRY;

        CDEBUG(D_MOUNT, "Stop MGS service %s\n", LUSTRE_MGS_OBDNAME);

        /* There better be only one MGS */
        obd = class_name2obd(LUSTRE_MGS_OBDNAME);
        if (!obd) {
                CDEBUG(D_CONFIG, "mgs %s not running\n", LUSTRE_MGS_OBDNAME);
                RETURN(-EALREADY);
        }

        /* The MGS should always stop when we say so */
        obd->obd_force = 1;
        rc = class_manual_cleanup(obd);
        RETURN(rc);
}

DECLARE_MUTEX(mgc_start_lock);

/* Set up a mgcobd to process startup logs */
static int lustre_start_mgc(struct super_block *sb)
{
        struct lustre_handle mgc_conn = {0, };
        struct obd_connect_data ocd = { 0 };
        struct lustre_sb_info *lsi = s2lsi(sb);
        struct obd_device *obd;
        struct obd_export *exp;
        struct obd_uuid *uuid;
        class_uuid_t uuidc;
        lnet_nid_t nid;
        char *mgcname, *niduuid;
        char *ptr;
        int recov_bk;
        int rc = 0, i = 0, j, len;
        ENTRY;

        LASSERT(lsi->lsi_lmd);

        /* Find the first non-lo MGS nid for our MGC name */
        if (lsi->lsi_flags & LSI_SERVER) {
                ptr = lsi->lsi_ldd->ldd_params;
                /* Use mgsnode= nids */
                if ((class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0) &&
                    (class_parse_nid(ptr, &nid, &ptr) == 0)) {
                        i++;
                } else if (IS_MGS(lsi->lsi_ldd)) {
                        lnet_process_id_t id;
                        while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
                                if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
                                        continue;
                                nid = id.nid;
                                i++;
                                break;
                        }
                }
        } else { /* client */
                /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
                ptr = lsi->lsi_lmd->lmd_dev;
                if (class_parse_nid(ptr, &nid, &ptr) == 0)
                        i++;
        }
        if (i == 0) {
                CERROR("No valid MGS nids found.\n");
                RETURN(-EINVAL);
        }

        len = strlen(LUSTRE_MGC_OBDNAME) + strlen(libcfs_nid2str(nid)) + 1;
        OBD_ALLOC(mgcname, len);
        OBD_ALLOC(niduuid, len + 2);
        if (!mgcname || !niduuid)
                GOTO(out_free, rc = -ENOMEM);
        sprintf(mgcname, "%s%s", LUSTRE_MGC_OBDNAME, libcfs_nid2str(nid));

        mutex_down(&mgc_start_lock);

        obd = class_name2obd(mgcname);
        if (obd) {
                /* Re-using an existing MGC */
                atomic_inc(&obd->u.cli.cl_mgc_refcount);

                recov_bk = 0;
                /* If we are restarting the MGS, don't try to keep the MGC's
                   old connection, or registration will fail. */
                if ((lsi->lsi_flags & LSI_SERVER) && IS_MGS(lsi->lsi_ldd)) {
                        CDEBUG(D_MOUNT, "New MGS with live MGC\n");
                        recov_bk = 1;
                }

                /* Try all connections, but only once (again).
                   We don't want to block another target from starting
                   (using its local copy of the log), but we do want to connect
                   if at all possible. */
                recov_bk++;
                CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,recov_bk);
                rc = obd_set_info_async(obd->obd_self_export,
                                        strlen(KEY_INIT_RECOV_BACKUP),
                                        KEY_INIT_RECOV_BACKUP,
                                        sizeof(recov_bk), &recov_bk, NULL);
                GOTO(out, rc = 0);
        }

        CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname);

        /* Add the primary nids for the MGS */
        i = 0;
        sprintf(niduuid, "%s_%x", mgcname, i);
        if (lsi->lsi_flags & LSI_SERVER) {
                ptr = lsi->lsi_ldd->ldd_params;
                if (IS_MGS(lsi->lsi_ldd)) {
                        /* Use local nids (including LO) */
                        lnet_process_id_t id;
                        while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
                                rc = do_lcfg(mgcname, id.nid,
                                             LCFG_ADD_UUID, niduuid, 0,0,0);
                        }
                } else {
                        /* Use mgsnode= nids */
                        if (class_find_param(ptr, PARAM_MGSNODE, &ptr) != 0) {
                                CERROR("No MGS nids given.\n");
                                GOTO(out_free, rc = -EINVAL);
                        }
                        while (class_parse_nid(ptr, &nid, &ptr) == 0) {
                                rc = do_lcfg(mgcname, nid,
                                             LCFG_ADD_UUID, niduuid, 0,0,0);
                                i++;
                        }
                }
        } else { /* client */
                /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
                ptr = lsi->lsi_lmd->lmd_dev;
                while (class_parse_nid(ptr, &nid, &ptr) == 0) {
                        rc = do_lcfg(mgcname, nid,
                                     LCFG_ADD_UUID, niduuid, 0,0,0);
                        i++;
                        /* Stop at the first failover nid */
                        if (*ptr == ':')
                                break;
                }
        }
        if (i == 0) {
                CERROR("No valid MGS nids found.\n");
                GOTO(out_free, rc = -EINVAL);
        }
        lsi->lsi_lmd->lmd_mgs_failnodes = 1;

        /* Random uuid for MGC allows easier reconnects */
        OBD_ALLOC_PTR(uuid);
        ll_generate_random_uuid(uuidc);
        class_uuid_unparse(uuidc, uuid);

        /* Start the MGC */
        rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
                                 (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
                                 niduuid);
        OBD_FREE_PTR(uuid);
        if (rc)
                GOTO(out_free, rc);

        /* Add any failover MGS nids */
        i = 1;
        while ((*ptr == ':' ||
                class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0)) {
                /* New failover node */
                sprintf(niduuid, "%s_%x", mgcname, i);
                j = 0;
                while (class_parse_nid(ptr, &nid, &ptr) == 0) {
                        j++;
                        rc = do_lcfg(mgcname, nid,
                                     LCFG_ADD_UUID, niduuid, 0,0,0);
                        if (*ptr == ':')
                                break;
                }
                if (j > 0) {
                        rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
                                     niduuid, 0, 0, 0);
                        i++;
                } else {
                        /* at ":/fsname" */
                        break;
                }
        }
        lsi->lsi_lmd->lmd_mgs_failnodes = i;

        obd = class_name2obd(mgcname);
        if (!obd) {
                CERROR("Can't find mgcobd %s\n", mgcname);
                GOTO(out_free, rc = -ENOTCONN);
        }

        /* Keep a refcount of servers/clients who started with "mount",
           so we know when we can get rid of the mgc. */
        atomic_set(&obd->u.cli.cl_mgc_refcount, 1);

        /* Try all connections, but only once. */
        recov_bk = 1;
        rc = obd_set_info_async(obd->obd_self_export,
                                strlen(KEY_INIT_RECOV_BACKUP),
                                KEY_INIT_RECOV_BACKUP,
                                sizeof(recov_bk), &recov_bk, NULL);
        if (rc)
                /* nonfatal */
                CERROR("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc);
        /* We connect to the MGS at setup, and don't disconnect until cleanup */
        rc = obd_connect(NULL, &mgc_conn, obd, &(obd->obd_uuid), &ocd);
        if (rc) {
                CERROR("connect failed %d\n", rc);
                GOTO(out, rc);
        }

        exp = class_conn2export(&mgc_conn);
        obd->u.cli.cl_mgc_mgsexp = exp;

out:
        /* Keep the mgc info in the sb. Note that many lsi's can point
           to the same mgc.*/
        lsi->lsi_mgc = obd;
out_free:
        mutex_up(&mgc_start_lock);

        if (mgcname)
                OBD_FREE(mgcname, len);
        if (niduuid)
                OBD_FREE(niduuid, len + 2);
        RETURN(rc);
}

static int lustre_stop_mgc(struct super_block *sb)
{
        struct lustre_sb_info *lsi = s2lsi(sb);
        struct obd_device *obd;
        char *niduuid, *ptr = 0;
        int i, rc = 0, len;
        ENTRY;

        if (!lsi)
                RETURN(-ENOENT);
        obd = lsi->lsi_mgc;
        if (!obd)
                RETURN(-ENOENT);

        lsi->lsi_mgc = NULL;
        mutex_down(&mgc_start_lock);
        if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
                /* This is not fatal, every client that stops
                   will call in here. */
                CDEBUG(D_MOUNT, "mgc still has %d references.\n",
                       atomic_read(&obd->u.cli.cl_mgc_refcount));
                GOTO(out, rc = -EBUSY);
        }

        /* MGC must always stop */
        obd->obd_force = 1;
        /* client_disconnect_export uses the no_recov flag to decide whether it
           should disconnect or just invalidate.  (The MGC has no
           recoverable data in any case.) */
        obd->obd_no_recov = 1;

        if (obd->u.cli.cl_mgc_mgsexp)
                obd_disconnect(obd->u.cli.cl_mgc_mgsexp);

        /* Save the obdname for cleaning the nid uuids, which are
           obdname_XX */
        len = strlen(obd->obd_name) + 6;
        OBD_ALLOC(niduuid, len);
        if (niduuid) {
                strcpy(niduuid, obd->obd_name);
                ptr = niduuid + strlen(niduuid);
        }

        rc = class_manual_cleanup(obd);
        if (rc)
                GOTO(out, rc);

        /* Clean the nid uuids */
        if (!niduuid)
                RETURN(-ENOMEM);
        for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
                sprintf(ptr, "_%x", i);
                rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
                             niduuid, 0, 0, 0);
                if (rc)
                        CERROR("del MDC UUID %s failed: rc = %d\n",
                               niduuid, rc);
        }
        OBD_FREE(niduuid, len);
        /* class_import_put will get rid of the additional connections */

out:
        mutex_up(&mgc_start_lock);
        RETURN(rc);
}

/* Since there's only one mgc per node, we have to change it's fs to get
   access to the right disk. */
static int server_mgc_set_fs(struct obd_device *mgc, struct super_block *sb)
{
        struct lustre_sb_info *lsi = s2lsi(sb);
        int rc;
        ENTRY;

        CDEBUG(D_MOUNT, "Set mgc disk for %s\n", lsi->lsi_lmd->lmd_dev);

        /* cl_mgc_sem in mgc insures we sleep if the mgc_fs is busy */
        rc = obd_set_info_async(mgc->obd_self_export,
                                strlen("set_fs"), "set_fs",
                                sizeof(*sb), sb, NULL);
        if (rc) {
                CERROR("can't set_fs %d\n", rc);
        }

        RETURN(rc);
}

static int server_mgc_clear_fs(struct obd_device *mgc)
{
        int rc;
        ENTRY;

        CDEBUG(D_MOUNT, "Unassign mgc disk\n");

        rc = obd_set_info_async(mgc->obd_self_export,
                                strlen("clear_fs"), "clear_fs",
                                0, NULL, NULL);
        RETURN(rc);
}

DECLARE_MUTEX(server_start_lock);

/* Stop MDS/OSS if nobody is using them */
static int server_stop_servers(int lddflags, int lsiflags)
{
        struct obd_device *obd = NULL;
        struct obd_type *type = NULL;
        int rc = 0;
        ENTRY;

        mutex_down(&server_start_lock);

        /* Either an MDT or an OST or neither  */
        /* if this was an MDT, and there are no more MDT's, clean up the MDS */
        if ((lddflags & LDD_F_SV_TYPE_MDT) &&
            (obd = class_name2obd(LUSTRE_MDS_OBDNAME))) {
                /*FIXME pre-rename, should eventually be LUSTRE_MDT_NAME*/
                type = class_search_type(LUSTRE_MDS_NAME);
        }
        /* if this was an OST, and there are no more OST's, clean up the OSS */
        if ((lddflags & LDD_F_SV_TYPE_OST) &&
            (obd = class_name2obd(LUSTRE_OSS_OBDNAME))) {
                type = class_search_type(LUSTRE_OST_NAME);
        }

        if (obd && (!type || !type->typ_refcnt)) {
                int err;
                obd->obd_force = 1;
                /* obd_fail doesn't mean much on a server obd */
                err = class_manual_cleanup(obd);
                if (!rc)
                        rc = err;
        }

        mutex_up(&server_start_lock);

        RETURN(rc);
}

int server_mti_print(char *title, struct mgs_target_info *mti)
{
        PRINT_CMD(PRINT_MASK, "mti %s\n", title);
        PRINT_CMD(PRINT_MASK, "server: %s\n", mti->mti_svname);
        PRINT_CMD(PRINT_MASK, "fs:     %s\n", mti->mti_fsname);
        PRINT_CMD(PRINT_MASK, "uuid:   %s\n", mti->mti_uuid);
        PRINT_CMD(PRINT_MASK, "ver: %d  flags: %#x\n",
                  mti->mti_config_ver, mti->mti_flags);
        return(0);
}

static
int mti_set_sec_opts(struct mgs_target_info *mti, struct lustre_mount_data *lmd)
{
        char *s1, *s2;

        if (lmd->lmd_sec_mdt == NULL && lmd->lmd_sec_cli == NULL) {
                /* just let on-disk params do its work. but we have an
                 * assumption that any changes of on-disk data by tune2fs
                 * should lead to server rewrite log.
                 */
                return 0;
        }

        /* filter out existing sec options */
        s1 = mti->mti_params;
        while (*s1) {
                int clear;

                while (*s1 == ' ')
                        s1++;

                if (strncmp(s1, PARAM_SEC_RPC_MDT,
                            sizeof(PARAM_SEC_RPC_MDT) - 1) == 0 ||
                    strncmp(s1, PARAM_SEC_RPC_CLI,
                            sizeof(PARAM_SEC_RPC_CLI) - 1) == 0)
                        clear = 1;
                else
                        clear = 0;

                s2 = strchr(s1, ' ');
                if (s2 == NULL) {
                        if (clear)
                                *s1 = '\0';
                        break;
                }
                s2++;
                if (clear)
                        memmove(s1, s2, strlen(s2) + 1);
                else
                        s1 = s2;
        }

        /* append sec options from lmd */
        /* FIXME add flag LDD_F_UPDATE after mountconf start supporting
         * log updating.
         */
        if (lmd->lmd_sec_mdt) {
                if (strlen(mti->mti_params) + strlen(lmd->lmd_sec_mdt) +
                    sizeof(PARAM_SEC_RPC_MDT) + 1 >= sizeof(mti->mti_params)) {
                        CERROR("security params too big for mti\n");
                        return -ENOMEM;
                }
                strcat(mti->mti_params, " "PARAM_SEC_RPC_MDT);
                strcat(mti->mti_params, lmd->lmd_sec_mdt);
                //mti->mti_flags |= LDD_F_UPDATE;
        }
        if (lmd->lmd_sec_cli) {
                if (strlen(mti->mti_params) + strlen(lmd->lmd_sec_cli) +
                    sizeof(PARAM_SEC_RPC_CLI) + 2 > sizeof(mti->mti_params)) {
                        CERROR("security params too big for mti\n");
                        return -ENOMEM;
                }
                strcat(mti->mti_params, " "PARAM_SEC_RPC_CLI);
                strcat(mti->mti_params, lmd->lmd_sec_cli);
                //mti->mti_flags |= LDD_F_UPDATE;
        }

        return 0;
}

static int server_sb2mti(struct super_block *sb, struct mgs_target_info *mti)
{
        struct lustre_sb_info    *lsi = s2lsi(sb);
        struct lustre_disk_data  *ldd = lsi->lsi_ldd;
        struct lustre_mount_data *lmd = lsi->lsi_lmd;
        lnet_process_id_t         id;
        int i = 0;
        ENTRY;

        if (!(lsi->lsi_flags & LSI_SERVER))
                RETURN(-EINVAL);

        strncpy(mti->mti_fsname, ldd->ldd_fsname,
                sizeof(mti->mti_fsname));
        strncpy(mti->mti_svname, ldd->ldd_svname,
                sizeof(mti->mti_svname));

        mti->mti_nid_count = 0;
        while (LNetGetId(i++, &id) != -ENOENT) {
                if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
                        continue;
                mti->mti_nids[mti->mti_nid_count] = id.nid;
                mti->mti_nid_count++;
                if (mti->mti_nid_count >= MTI_NIDS_MAX) {
                        CWARN("Only using first %d nids for %s\n",
                              mti->mti_nid_count, mti->mti_svname);
                        break;
                }
        }

        mti->mti_lustre_ver = LUSTRE_VERSION_CODE;
        mti->mti_config_ver = 0;
        mti->mti_flags = ldd->ldd_flags;
        mti->mti_stripe_index = ldd->ldd_svindex;
        memcpy(mti->mti_uuid, ldd->ldd_uuid, sizeof(mti->mti_uuid));
        if (strlen(ldd->ldd_params) > sizeof(mti->mti_params)) {
                CERROR("params too big for mti\n");
                RETURN(-ENOMEM);
        }
        memcpy(mti->mti_params, ldd->ldd_params, sizeof(mti->mti_params));

        RETURN(mti_set_sec_opts(mti, lmd));
}

/* Register an old or new target with the MGS. If needed MGS will construct
   startup logs and assign index */
int server_register_target(struct super_block *sb)
{
        struct lustre_sb_info *lsi = s2lsi(sb);
        struct obd_device *mgc = lsi->lsi_mgc;
        struct lustre_disk_data *ldd = lsi->lsi_ldd;
        struct mgs_target_info *mti = NULL;
        int rc;
        ENTRY;

        LASSERT(mgc);

        if (!(lsi->lsi_flags & LSI_SERVER))
                RETURN(-EINVAL);

        OBD_ALLOC_PTR(mti);
        if (!mti)
                RETURN(-ENOMEM);
        rc = server_sb2mti(sb, mti);
        if (rc)
                GOTO(out, rc);

        CDEBUG(D_MOUNT, "Registration %s, fs=%s, %s, index=%04x, flags=%#x\n",
               mti->mti_svname, mti->mti_fsname,
               libcfs_nid2str(mti->mti_nids[0]), mti->mti_stripe_index,
               mti->mti_flags);

        /* Register the target */
        /* FIXME use mgc_process_config instead */
        rc = obd_set_info_async(mgc->u.cli.cl_mgc_mgsexp,
                                strlen("register_target"), "register_target",
                                sizeof(*mti), mti, NULL);
        if (rc) {
                CERROR("registration with the MGS failed (%d)\n", rc);
                GOTO(out, rc);
        }

        /* Always update our flags */
        ldd->ldd_flags = mti->mti_flags & ~LDD_F_REWRITE_LDD;

        /* If this flag is set, it means the MGS wants us to change our
           on-disk data. (So far this means just the index.) */
        if (mti->mti_flags & LDD_F_REWRITE_LDD) {
                char *label;
                int err;
                CDEBUG(D_MOUNT, "Changing on-disk index from %#x to %#x "
                       "for %s\n", ldd->ldd_svindex, mti->mti_stripe_index,
                       mti->mti_svname);
                ldd->ldd_svindex = mti->mti_stripe_index;
                strncpy(ldd->ldd_svname, mti->mti_svname,
                        sizeof(ldd->ldd_svname));
                /* or ldd_make_sv_name(ldd); */
                ldd_write(&mgc->obd_lvfs_ctxt, ldd);
                err = fsfilt_set_label(mgc, lsi->lsi_srv_mnt->mnt_sb,
                                       mti->mti_svname);
                if (err)
                        CERROR("Label set error %d\n", err);
                label = fsfilt_get_label(mgc, lsi->lsi_srv_mnt->mnt_sb);
                if (label)
                        CDEBUG(D_MOUNT, "Disk label changed to %s\n", label);

                /* Flush the new ldd to disk */
                fsfilt_sync(mgc, lsi->lsi_srv_mnt->mnt_sb);
        }

out:
        if (mti)
                OBD_FREE_PTR(mti);
        RETURN(rc);
}

/* Start targets */
static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
{
        struct obd_device *obd;
        struct lustre_sb_info *lsi = s2lsi(sb);
        struct config_llog_instance cfg;
        int rc;
        ENTRY;

        CDEBUG(D_MOUNT, "starting target %s\n", lsi->lsi_ldd->ldd_svname);

#if 0
        /* If we're an MDT, make sure the global MDS is running */
        if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_MDT) {
                /* make sure the MDS is started */
                mutex_down(&server_start_lock);
                obd = class_name2obd(LUSTRE_MDS_OBDNAME);
                if (!obd) {
                        rc = lustre_start_simple(LUSTRE_MDS_OBDNAME,
                    /* FIXME pre-rename, should eventually be LUSTRE_MDS_NAME */
                                                 LUSTRE_MDT_NAME,
                                                 LUSTRE_MDS_OBDNAME"_uuid",
                                                 0, 0);
                        if (rc) {
                                mutex_up(&server_start_lock);
                                CERROR("failed to start MDS: %d\n", rc);
                                RETURN(rc);
                        }
                }
                mutex_up(&server_start_lock);
        }
#endif

        /* If we're an OST, make sure the global OSS is running */
        if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_OST) {
                /* make sure OSS is started */
                mutex_down(&server_start_lock);
                obd = class_name2obd(LUSTRE_OSS_OBDNAME);
                if (!obd) {
                        rc = lustre_start_simple(LUSTRE_OSS_OBDNAME,
                                                 LUSTRE_OSS_NAME,
                                                 LUSTRE_OSS_OBDNAME"_uuid",
                                                 0, 0);
                        if (rc) {
                                mutex_up(&server_start_lock);
                                CERROR("failed to start OSS: %d\n", rc);
                                RETURN(rc);
                        }
                }
                mutex_up(&server_start_lock);
        }

        /* Set the mgc fs to our server disk.  This allows the MGC
           to read and write configs locally. */
        rc = server_mgc_set_fs(lsi->lsi_mgc, sb);
        if (rc)
                RETURN(rc);

        /* Register with MGS */
        rc = server_register_target(sb);
        if (rc && (lsi->lsi_ldd->ldd_flags &
                   (LDD_F_NEED_INDEX | LDD_F_UPDATE | LDD_F_UPGRADE14))){
                CERROR("Required registration failed for %s: %d\n",
                       lsi->lsi_ldd->ldd_svname, rc);
                if (rc == -EIO) {
                        LCONSOLE_ERROR_MSG(0x15f, "Communication error with "
                                           "the MGS.  Is the MGS running?\n");
                }
                GOTO(out_mgc, rc);
        }
        if (rc == -EINVAL) {
                LCONSOLE_ERROR_MSG(0x160, "The MGS is refusing to allow this "
                                   "server (%s) to start. Please see messages"
                                   " on the MGS node.\n",
                                   lsi->lsi_ldd->ldd_svname);
                GOTO(out_mgc, rc);
        }

        /* Let the target look up the mount using the target's name
           (we can't pass the sb or mnt through class_process_config.) */
        rc = server_register_mount(lsi->lsi_ldd->ldd_svname, sb, mnt);
        if (rc)
                GOTO(out_mgc, rc);

        /* Start targets using the llog named for the target */
        memset(&cfg, 0, sizeof(cfg));
        rc = lustre_process_log(sb, lsi->lsi_ldd->ldd_svname, &cfg);
        if (rc) {
                CERROR("failed to start server %s: %d\n",
                       lsi->lsi_ldd->ldd_svname, rc);
                GOTO(out_mgc, rc);
        }

out_mgc:
        /* Release the mgc fs for others to use */
        server_mgc_clear_fs(lsi->lsi_mgc);

        if (!rc) {
                obd = class_name2obd(lsi->lsi_ldd->ldd_svname);
                if (!obd) {
                        CERROR("no server named %s was started\n",
                               lsi->lsi_ldd->ldd_svname);
                        RETURN(-ENXIO);
                }

                if ((lsi->lsi_lmd->lmd_flags & LMD_FLG_ABORT_RECOV) &&
                    (OBP(obd, iocontrol))) {
                        obd_iocontrol(OBD_IOC_ABORT_RECOVERY,
                                      obd->obd_self_export, 0, NULL, NULL);
                }

                /* log has been fully processed */
                obd_notify(obd, NULL, OBD_NOTIFY_CONFIG, (void *)CONFIG_LOG);
        }

        RETURN(rc);
}

/***************** lustre superblock **************/

struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
{
        struct lustre_sb_info *lsi = NULL;
        ENTRY;

        OBD_ALLOC(lsi, sizeof(*lsi));
        if (!lsi)
                RETURN(NULL);
        OBD_ALLOC(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd));
        if (!lsi->lsi_lmd) {
                OBD_FREE(lsi, sizeof(*lsi));
                RETURN(NULL);
        }

        lsi->lsi_lmd->lmd_exclude_count = 0;
        s2lsi_nocast(sb) = lsi;
        /* we take 1 extra ref for our setup */
        atomic_set(&lsi->lsi_mounts, 1);

        /* Default umount style */
        lsi->lsi_flags = LSI_UMOUNT_FAILOVER;

        lsi->lsi_lmd->lmd_nllu = NOBODY_UID;
        lsi->lsi_lmd->lmd_nllg = NOBODY_GID;
        RETURN(lsi);
}

static int lustre_free_lsi(struct super_block *sb)
{
        struct lustre_sb_info *lsi = s2lsi(sb);
        ENTRY;

        if (!lsi)
                RETURN(0);

        CDEBUG(D_MOUNT, "Freeing lsi\n");

        /* someone didn't call server_put_mount. */
        LASSERT(atomic_read(&lsi->lsi_mounts) == 0);

        if (lsi->lsi_ldd != NULL)
                OBD_FREE(lsi->lsi_ldd, sizeof(*lsi->lsi_ldd));

        if (lsi->lsi_lmd != NULL) {
                if (lsi->lsi_lmd->lmd_dev != NULL)
                        OBD_FREE(lsi->lsi_lmd->lmd_dev,
                                 strlen(lsi->lsi_lmd->lmd_dev) + 1);
                if (lsi->lsi_lmd->lmd_profile != NULL)
                        OBD_FREE(lsi->lsi_lmd->lmd_profile,
                                 strlen(lsi->lsi_lmd->lmd_profile) + 1);
                if (lsi->lsi_lmd->lmd_sec_mdt != NULL)
                        OBD_FREE(lsi->lsi_lmd->lmd_sec_mdt,
                                 strlen(lsi->lsi_lmd->lmd_sec_mdt) + 1);
                if (lsi->lsi_lmd->lmd_sec_cli != NULL)
                        OBD_FREE(lsi->lsi_lmd->lmd_sec_cli,
                                 strlen(lsi->lsi_lmd->lmd_sec_cli) + 1);
                if (lsi->lsi_lmd->lmd_opts != NULL)
                        OBD_FREE(lsi->lsi_lmd->lmd_opts,
                                 strlen(lsi->lsi_lmd->lmd_opts) + 1);
                if (lsi->lsi_lmd->lmd_exclude_count)
                        OBD_FREE(lsi->lsi_lmd->lmd_exclude,
                                 sizeof(lsi->lsi_lmd->lmd_exclude[0]) *
                                 lsi->lsi_lmd->lmd_exclude_count);
                OBD_FREE(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd));
        }

        LASSERT(lsi->lsi_llsbi == NULL);
        OBD_FREE(lsi, sizeof(*lsi));
        s2lsi_nocast(sb) = NULL;

        RETURN(0);
}

/* The lsi has one reference for every server that is using the disk -
   e.g. MDT, MGS, and potentially MGC */
static int lustre_put_lsi(struct super_block *sb)
{
        struct lustre_sb_info *lsi = s2lsi(sb);
        ENTRY;

        LASSERT(lsi);

        CDEBUG(D_MOUNT, "put %p %d\n", sb, atomic_read(&lsi->lsi_mounts));

        if (atomic_dec_and_test(&lsi->lsi_mounts)) {
                lustre_free_lsi(sb);
                RETURN(1);
        }
        RETURN(0);
}

/*************** server mount ******************/

/* Kernel mount using mount options in MOUNT_DATA_FILE */
static struct vfsmount *server_kernel_mount(struct super_block *sb)
{
        struct lvfs_run_ctxt mount_ctxt;
        struct lustre_sb_info *lsi = s2lsi(sb);
        struct lustre_disk_data *ldd;
        struct lustre_mount_data *lmd = lsi->lsi_lmd;
        struct vfsmount *mnt;
        char *options = NULL;
        unsigned long page, s_flags;
        int rc;
        ENTRY;

        OBD_ALLOC(ldd, sizeof(*ldd));
        if (!ldd)
                RETURN(ERR_PTR(-ENOMEM));

        /* In the past, we have always used flags = 0.
           Note ext3/ldiskfs can't be mounted ro. */
        s_flags = sb->s_flags;

        /* Pre-mount ldiskfs to read the MOUNT_DATA_FILE */
        CDEBUG(D_MOUNT, "Pre-mount ldiskfs %s\n", lmd->lmd_dev);
        mnt = ll_kern_mount("ldiskfs", s_flags, lmd->lmd_dev, 0);
        if (IS_ERR(mnt)) {
                rc = PTR_ERR(mnt);
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
                /* 2.6 kernels: if ldiskfs fails, try ldiskfs2 */
                mnt = ll_kern_mount("ldiskfs2", s_flags, lmd->lmd_dev, 0);
                if (IS_ERR(mnt)) {
                        int rc2 = PTR_ERR(mnt);
                        CERROR("premount %s:%#lx ldiskfs failed: %d, ldiskfs2 "
                               "failed: %d.  Is the ldiskfs module available?\n",
                               lmd->lmd_dev, s_flags, rc, rc2);
                        GOTO(out_free, rc);
                }
#else
                /* 2.4 kernels: if ldiskfs fails, try ext3 */
                mnt = ll_kern_mount("ext3", s_flags, lmd->lmd_dev, 0);
                if (IS_ERR(mnt)) {
                        rc = PTR_ERR(mnt);
                        CERROR("premount ext3 failed: rc = %d\n", rc);
                        GOTO(out_free, rc);
                }
#endif
        }

        OBD_SET_CTXT_MAGIC(&mount_ctxt);
        mount_ctxt.pwdmnt = mnt;
        mount_ctxt.pwd = mnt->mnt_root;
        mount_ctxt.fs = get_ds();

        rc = ldd_parse(&mount_ctxt, ldd);
        unlock_mntput(mnt);

        if (rc) {
                CERROR("premount parse options failed: rc = %d\n", rc);
                GOTO(out_free, rc);
        }

        /* Done with our pre-mount, now do the real mount. */

        /* Glom up mount options */
        page = __get_free_page(GFP_KERNEL);
        if (!page)
                GOTO(out_free, rc = -ENOMEM);

        options = (char *)page;
        memset(options, 0, CFS_PAGE_SIZE);
        strncpy(options, ldd->ldd_mount_opts, CFS_PAGE_SIZE - 2);

        /* Add in any mount-line options */
        if (lmd->lmd_opts && (*(lmd->lmd_opts) != 0)) {
                int len = CFS_PAGE_SIZE - strlen(options) - 2;
                if (*options != 0)
                        strcat(options, ",");
                strncat(options, lmd->lmd_opts, len);
        }

        /* Special permanent mount flags */
        if (IS_OST(ldd))
            s_flags |= MS_NOATIME | MS_NODIRATIME;

        CDEBUG(D_MOUNT, "kern_mount: %s %s %s\n",
               MT_STR(ldd), lmd->lmd_dev, options);
        mnt = ll_kern_mount(MT_STR(ldd), s_flags, lmd->lmd_dev,
                            (void *)options);
        free_page(page);
        if (IS_ERR(mnt)) {
                rc = PTR_ERR(mnt);
                CERROR("ll_kern_mount failed: rc = %d\n", rc);
                GOTO(out_free, rc);
        }

        lsi->lsi_ldd = ldd;   /* freed at lsi cleanup */
        CDEBUG(D_SUPER, "%s: mnt = %p\n", lmd->lmd_dev, mnt);
        RETURN(mnt);

out_free:
        OBD_FREE(ldd, sizeof(*ldd));
        lsi->lsi_ldd = NULL;
        RETURN(ERR_PTR(rc));
}

static void server_wait_finished(struct vfsmount *mnt)
{
        wait_queue_head_t   waitq;
        struct l_wait_info  lwi;
        int                 retries = 120;

        init_waitqueue_head(&waitq);

        while ((atomic_read(&mnt->mnt_count) > 1) && (retries > 0)) {
                LCONSOLE_WARN("Mount still busy with %d refs, waiting for "
                              "%d secs...\n",
                              atomic_read(&mnt->mnt_count), retries);

                /* Wait for a bit */
                retries -= 5;
                lwi = LWI_TIMEOUT(5 * HZ, NULL, NULL);
                l_wait_event(waitq, 0, &lwi);
        }
        if (atomic_read(&mnt->mnt_count) > 1) {
                CERROR("Mount %p is still busy (%d refs), giving up.\n",
                       mnt, atomic_read(&mnt->mnt_count));
        }
}

static void server_put_super(struct super_block *sb)
{
        struct lustre_sb_info *lsi = s2lsi(sb);
        struct obd_device     *obd;
        struct vfsmount       *mnt = lsi->lsi_srv_mnt;
        char *tmpname, *extraname = NULL;
        int tmpname_sz;
        int lddflags = lsi->lsi_ldd->ldd_flags;
        int lsiflags = lsi->lsi_flags;
        int rc;
        ENTRY;

        LASSERT(lsiflags & LSI_SERVER);

        tmpname_sz = strlen(lsi->lsi_ldd->ldd_svname) + 1;
        OBD_ALLOC(tmpname, tmpname_sz);
        memcpy(tmpname, lsi->lsi_ldd->ldd_svname, tmpname_sz);
        CDEBUG(D_MOUNT, "server put_super %s\n", tmpname);

        /* Stop the target */
        if (IS_MDT(lsi->lsi_ldd) || IS_OST(lsi->lsi_ldd)) {
                struct lustre_profile *lprof = NULL;

                /* tell the mgc to drop the config log */
                lustre_end_log(sb, lsi->lsi_ldd->ldd_svname, NULL);

                /* COMPAT_146 - profile may get deleted in mgc_cleanup.
                   If there are any setup/cleanup errors, save the lov
                   name for safety cleanup later. */
                lprof = class_get_profile(lsi->lsi_ldd->ldd_svname);
                if (lprof && lprof->lp_dt) {
                        OBD_ALLOC(extraname, strlen(lprof->lp_dt) + 1);
                        strcpy(extraname, lprof->lp_dt);
                }

                obd = class_name2obd(lsi->lsi_ldd->ldd_svname);
                if (obd) {
                        CDEBUG(D_MOUNT, "stopping %s\n", obd->obd_name);
                        if (lsi->lsi_flags & LSI_UMOUNT_FORCE)
                                obd->obd_force = 1;
                        if (lsi->lsi_flags & LSI_UMOUNT_FAILOVER)
                                obd->obd_fail = 1;
                        /* We can't seem to give an error return code
                           to .put_super, so we better make sure we clean up! */
                        obd->obd_force = 1;
                        class_manual_cleanup(obd);
                } else {
                        CERROR("no obd %s\n", lsi->lsi_ldd->ldd_svname);
                        server_deregister_mount(lsi->lsi_ldd->ldd_svname);
                }
        }

        /* If they wanted the mgs to stop separately from the mdt, they
           should have put it on a different device. */
        if (IS_MGS(lsi->lsi_ldd)) {
                /* stop the mgc before the mgs so the connection gets cleaned
                   up */
                lustre_stop_mgc(sb);
                server_stop_mgs(sb);
        }

        /* Clean the mgc and sb */
        rc = lustre_common_put_super(sb);
        /* FIXME how can I report a failure to umount? */

        /* Wait for the targets to really clean up - can't exit (and let the
           sb get destroyed) while the mount is still in use */
        server_wait_finished(mnt);

        /* drop the One True Mount */
        unlock_mntput(mnt);

        /* Stop the servers (MDS, OSS) if no longer needed.  We must wait
           until the target is really gone so that our type refcount check
           is right. */
        server_stop_servers(lddflags, lsiflags);

        /* In case of startup or cleanup err, stop related obds */
        if (extraname) {
                obd = class_name2obd(extraname);
                if (obd) {
                        CWARN("Cleaning orphaned obd %s\n", extraname);
                        obd->obd_force = 1;
                        class_manual_cleanup(obd);
                }
                OBD_FREE(extraname, strlen(extraname) + 1);
        }

        LCONSOLE_WARN("server umount %s complete\n", tmpname);
        OBD_FREE(tmpname, tmpname_sz);
        EXIT;
}

#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
static void server_umount_begin(struct vfsmount *vfsmnt, int flags)
{
        struct super_block *sb = vfsmnt->mnt_sb;
#else
static void server_umount_begin(struct super_block *sb)
{
#endif
        struct lustre_sb_info *lsi = s2lsi(sb);
        ENTRY;

#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
        if (!(flags & MNT_FORCE)) {
                EXIT;
                return;
        }
#endif

        CDEBUG(D_MOUNT, "umount -f\n");
        /* umount = failover
           umount -f = force
           no third way to do non-force, non-failover */
        lsi->lsi_flags &= ~LSI_UMOUNT_FAILOVER;
        lsi->lsi_flags |= LSI_UMOUNT_FORCE;
        EXIT;
}

#ifndef HAVE_STATFS_DENTRY_PARAM
static int server_statfs (struct super_block *sb, struct kstatfs *buf)
{
#else
static int server_statfs (struct dentry *dentry, struct kstatfs *buf)
{
        struct super_block *sb = dentry->d_sb;
#endif
        struct vfsmount *mnt = s2lsi(sb)->lsi_srv_mnt;
        ENTRY;

        if (mnt && mnt->mnt_sb && mnt->mnt_sb->s_op->statfs) {
#ifdef HAVE_STATFS_DENTRY_PARAM
                int rc = mnt->mnt_sb->s_op->statfs(mnt->mnt_root, buf);
#else
                int rc = mnt->mnt_sb->s_op->statfs(mnt->mnt_sb, buf);
#endif
                if (!rc) {
                        buf->f_type = sb->s_magic;
                        RETURN(0);
                }
        }

        /* just return 0 */
        buf->f_type = sb->s_magic;
        buf->f_bsize = sb->s_blocksize;
        buf->f_blocks = 1;
        buf->f_bfree = 0;
        buf->f_bavail = 0;
        buf->f_files = 1;
        buf->f_ffree = 0;
        buf->f_namelen = NAME_MAX;
        RETURN(0);
}

static struct super_operations server_ops =
{
        .put_super      = server_put_super,
        .umount_begin   = server_umount_begin, /* umount -f */
        .statfs         = server_statfs,
};

#define log2(n) ffz(~(n))
#define LUSTRE_SUPER_MAGIC 0x0BD00BD1

static int server_fill_super_common(struct super_block *sb)
{
        struct inode *root = 0;
        ENTRY;

        CDEBUG(D_MOUNT, "Server sb, dev=%d\n", (int)sb->s_dev);

        sb->s_blocksize = 4096;
        sb->s_blocksize_bits = log2(sb->s_blocksize);
        sb->s_magic = LUSTRE_SUPER_MAGIC;
        sb->s_maxbytes = 0; //PAGE_CACHE_MAXBYTES;
        sb->s_flags |= MS_RDONLY;
        sb->s_op = &server_ops;

        root = new_inode(sb);
        if (!root) {
                CERROR("Can't make root inode\n");
                RETURN(-EIO);
        }

        /* returns -EIO for every operation */
        /* make_bad_inode(root); -- badness - can't umount */
        /* apparently we need to be a directory for the mount to finish */
        root->i_mode = S_IFDIR;

        sb->s_root = d_alloc_root(root);
        if (!sb->s_root) {
                CERROR("Can't make root dentry\n");
                iput(root);
                RETURN(-EIO);
        }

        RETURN(0);
}

static int server_fill_super(struct super_block *sb)
{
        struct lustre_sb_info *lsi = s2lsi(sb);
        struct vfsmount *mnt;
        int rc;
        ENTRY;

        /* the One True Mount */
        mnt = server_kernel_mount(sb);
        if (IS_ERR(mnt)) {
                rc = PTR_ERR(mnt);
                CERROR("Unable to mount device %s: %d\n",
                      lsi->lsi_lmd->lmd_dev, rc);
                lustre_put_lsi(sb);
                GOTO(out, rc);
        }
        lsi->lsi_srv_mnt = mnt;

        LASSERT(lsi->lsi_ldd);
        CDEBUG(D_MOUNT, "Found service %s for fs '%s' on device %s\n",
               lsi->lsi_ldd->ldd_svname, lsi->lsi_ldd->ldd_fsname,
               lsi->lsi_lmd->lmd_dev);

        if (class_name2obd(lsi->lsi_ldd->ldd_svname)) {
                LCONSOLE_ERROR_MSG(0x161, "The target named %s is already "
                                   "running. Double-mount may have compromised"
                                   " the disk journal.\n",
                                   lsi->lsi_ldd->ldd_svname);
                unlock_mntput(mnt);
                lustre_put_lsi(sb);
                GOTO(out, rc = -EALREADY);
        }

        /* start MGS before MGC */
        if (IS_MGS(lsi->lsi_ldd)) {
                rc = server_start_mgs(sb);
                if (rc)
                        GOTO(out_mnt, rc);
        }

        rc = lustre_start_mgc(sb);
        if (rc)
                GOTO(out_mnt, rc);

        /* Set up all obd devices for service */
        if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
                (IS_OST(lsi->lsi_ldd) || IS_MDT(lsi->lsi_ldd))) {
                rc = server_start_targets(sb, mnt);
                if (rc < 0) {
                        CERROR("Unable to start targets: %d\n", rc);
                        GOTO(out_mnt, rc);
                }
        /* FIXME overmount client here,
           or can we just start a client log and client_fill_super on this sb?
           We need to make sure server_put_super gets called too - ll_put_super
           calls lustre_common_put_super; check there for LSI_SERVER flag,
           call s_p_s if so.
           Probably should start client from new thread so we can return.
           Client will not finish until all servers are connected.
           Note - MGS-only server does NOT get a client, since there is no
           lustre fs associated - the MGS is for all lustre fs's */
        }

        rc = server_fill_super_common(sb);
        if (rc)
                GOTO(out_mnt, rc);

        LCONSOLE_WARN("Server %s on device %s has started\n",
                      lsi->lsi_ldd->ldd_svname, lsi->lsi_lmd->lmd_dev);

        RETURN(0);

out_mnt:
        server_put_super(sb);
out:
        RETURN(rc);
}

/* Get the index from the obd name.
   rc = server type, or
   rc < 0  on error
   if endptr isn't NULL it is set to end of name */
int server_name2index(char *svname, __u32 *idx, char **endptr)
{
        unsigned long index;
        int rc;
        char *dash = strchr(svname, '-');
        if (!dash)
                return(-EINVAL);

        if (strncmp(dash + 1, "MDT", 3) == 0)
                rc = LDD_F_SV_TYPE_MDT;
        else if (strncmp(dash + 1, "OST", 3) == 0)
                rc = LDD_F_SV_TYPE_OST;
        else
                return(-EINVAL);

        index = simple_strtoul(dash + 4, endptr, 16);
        *idx = index;
        return rc;
}

/*************** mount common betweeen server and client ***************/

/* Common umount */
int lustre_common_put_super(struct super_block *sb)
{
        int rc;
        ENTRY;

        CDEBUG(D_MOUNT, "dropping sb %p\n", sb);

        /* Drop a ref to the MGC */
        rc = lustre_stop_mgc(sb);
        if (rc && (rc != -ENOENT)) {
                if (rc != -EBUSY) {
                        CERROR("Can't stop MGC: %d\n", rc);
                        RETURN(rc);
                }
                /* BUSY just means that there's some other obd that
                   needs the mgc.  Let him clean it up. */
                CDEBUG(D_MOUNT, "MGC still in use\n");
        }
        /* Drop a ref to the mounted disk */
        lustre_put_lsi(sb);
        RETURN(rc);
}

#if 0
static void lmd_print(struct lustre_mount_data *lmd)
{
        int i;

        PRINT_CMD(PRINT_MASK, "  mount data:\n");
        if (lmd_is_client(lmd))
                PRINT_CMD(PRINT_MASK, "profile: %s\n", lmd->lmd_profile);
        PRINT_CMD(PRINT_MASK, "device:  %s\n", lmd->lmd_dev);
        PRINT_CMD(PRINT_MASK, "flags:   %x\n", lmd->lmd_flags);
        if (lmd->lmd_sec_mdt)
                PRINT_CMD(PRINT_MASK, "sec_mdt: %s\n", lmd->lmd_sec_mdt);
        if (lmd->lmd_sec_cli)
                PRINT_CMD(PRINT_MASK, "sec_cli: %s\n", lmd->lmd_sec_cli);
        if (lmd->lmd_opts)
                PRINT_CMD(PRINT_MASK, "options: %s\n", lmd->lmd_opts);
        for (i = 0; i < lmd->lmd_exclude_count; i++) {
                PRINT_CMD(PRINT_MASK, "exclude %d:  OST%04x\n", i,
                          lmd->lmd_exclude[i]);
        }
}
#endif

/* Is this server on the exclusion list */
int lustre_check_exclusion(struct super_block *sb, char *svname)
{
        struct lustre_sb_info *lsi = s2lsi(sb);
        struct lustre_mount_data *lmd = lsi->lsi_lmd;
        __u32 index;
        int i, rc;
        ENTRY;

        rc = server_name2index(svname, &index, NULL);
        if (rc != LDD_F_SV_TYPE_OST)
                /* Only exclude OSTs */
                RETURN(0);

        CDEBUG(D_MOUNT, "Check exclusion %s (%d) in %d of %s\n", svname,
               index, lmd->lmd_exclude_count, lmd->lmd_dev);

        for(i = 0; i < lmd->lmd_exclude_count; i++) {
                if (index == lmd->lmd_exclude[i]) {
                        CWARN("Excluding %s (on exclusion list)\n", svname);
                        RETURN(1);
                }
        }
        RETURN(0);
}

/* mount -v  -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr)
{
        char *s1 = ptr, *s2;
        __u32 index, *exclude_list;
        int rc = 0, devmax;
        ENTRY;

        /* The shortest an ost name can be is 8 chars: -OST0000.
           We don't actually know the fsname at this time, so in fact
           a user could specify any fsname. */
        devmax = strlen(ptr) / 8 + 1;

        /* temp storage until we figure out how many we have */
        OBD_ALLOC(exclude_list, sizeof(index) * devmax);
        if (!exclude_list)
                RETURN(-ENOMEM);

        /* we enter this fn pointing at the '=' */
        while (*s1 && *s1 != ' ' && *s1 != ',') {
                s1++;
                rc = server_name2index(s1, &index, &s2);
                if (rc < 0) {
                        CERROR("Can't parse server name '%s'\n", s1);
                        break;
                }
                if (rc == LDD_F_SV_TYPE_OST)
                        exclude_list[lmd->lmd_exclude_count++] = index;
                else
                        CDEBUG(D_MOUNT, "ignoring exclude %.7s\n", s1);
                s1 = s2;
                /* now we are pointing at ':' (next exclude)
                   or ',' (end of excludes) */
                if (lmd->lmd_exclude_count >= devmax)
                        break;
        }
        if (rc >= 0) /* non-err */
                rc = 0;

        if (lmd->lmd_exclude_count) {
                /* permanent, freed in lustre_free_lsi */
                OBD_ALLOC(lmd->lmd_exclude, sizeof(index) *
                          lmd->lmd_exclude_count);
                if (lmd->lmd_exclude) {
                        memcpy(lmd->lmd_exclude, exclude_list,
                               sizeof(index) * lmd->lmd_exclude_count);
                } else {
                        rc = -ENOMEM;
                        lmd->lmd_exclude_count = 0;
                }
        }
        OBD_FREE(exclude_list, sizeof(index) * devmax);
        RETURN(rc);
}

static
int lmd_set_sec_opts(char **set, char *opts, int length)
{
        if (*set)
                OBD_FREE(*set, strlen(*set) + 1);

        OBD_ALLOC(*set, length + 1);
        if (*set == NULL)
                return -ENOMEM;

        memcpy(*set, opts, length);
        (*set)[length] = '\0';

        return 0;
}

static
int lmd_parse_sec_opts(struct lustre_mount_data *lmd, char *ptr)
{
        char  *tail;
        char **set = NULL;
        int    length;

        /* check peer name */
        if (strncmp(ptr, "sec_mdt=", 8) == 0) {
                set = &lmd->lmd_sec_mdt;
                ptr += 8;
        } else if (strncmp(ptr, "sec_cli=", 8) == 0) {
                set = &lmd->lmd_sec_cli;
                ptr += 8;
        } else if (strncmp(ptr, "sec=", 4) == 0) {
                /* leave 'set' be null */
                ptr += 4;
        } else {
                CERROR("invalid security options: %s\n", ptr);
                return -EINVAL;
        }

        tail = strchr(ptr, ',');
        if (tail == NULL)
                length = strlen(ptr);
        else
                length = tail - ptr;

        if (set) {
                if (lmd_set_sec_opts(set, ptr, length))
                        return -EINVAL;
        } else {
                if (lmd->lmd_sec_mdt == NULL &&
                    lmd_set_sec_opts(&lmd->lmd_sec_mdt, ptr, length))
                        return -EINVAL;

                if (lmd->lmd_sec_cli == NULL &&
                    lmd_set_sec_opts(&lmd->lmd_sec_cli, ptr, length))
                        return -EINVAL;
        }

        return 0;
}

/* mount -v -t lustre uml1:uml2:/lustre-client /mnt/lustre */
static int lmd_parse(char *options, struct lustre_mount_data *lmd)
{
        char *s1, *s2, *devname = NULL;
        struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
        int rc = 0;
        ENTRY;

        LASSERT(lmd);
        if (!options) {
                LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that "
                                   "/sbin/mount.lustre is installed.\n");
                RETURN(-EINVAL);
        }

        /* Options should be a string - try to detect old lmd data */
        if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) {
                LCONSOLE_ERROR_MSG(0x163, "You're using an old version of "
                                   "/sbin/mount.lustre.  Please install "
                                   "version %s\n", LUSTRE_VERSION_STRING);
                RETURN(-EINVAL);
        }
        lmd->lmd_magic = LMD_MAGIC;

        /* Set default flags here */

        s1 = options;
        while (*s1) {
                int clear = 0;
                /* Skip whitespace and extra commas */
                while (*s1 == ' ' || *s1 == ',')
                        s1++;

                /* Client options are parsed in ll_options: eg. flock,
                   user_xattr, acl */

                /* Parse non-ldiskfs options here. Rather than modifying
                   ldiskfs, we just zero these out here */
                if (strncmp(s1, "abort_recov", 11) == 0) {
                        lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
                        clear++;
                } else if (strncmp(s1, "nosvc", 5) == 0) {
                        lmd->lmd_flags |= LMD_FLG_NOSVC;
                        clear++;
                /* ost exclusion list */
                } else if (strncmp(s1, "exclude=", 8) == 0) {
                        rc = lmd_make_exclusion(lmd, s1 + 7);
                        if (rc)
                                goto invalid;
                        clear++;
                } else if (strncmp(s1, "nllu=", 5) == 0) {
                        lmd->lmd_nllu = simple_strtoul(s1 + 5, NULL, 10);
                        clear++;
                } else if (strncmp(s1, "nllg=", 5) == 0) {
                        lmd->lmd_nllg = simple_strtoul(s1 + 5, NULL, 10);
                        clear++;
                } else if (strncmp(s1, "sec", 3) == 0) {
                        rc = lmd_parse_sec_opts(lmd, s1);
                        if (rc)
                                goto invalid;
                        clear++;
                }
                /* Linux 2.4 doesn't pass the device, so we stuck it at the
                   end of the options. */
                else if (strncmp(s1, "device=", 7) == 0) {
                        devname = s1 + 7;
                        /* terminate options right before device.  device
                           must be the last one. */
                        *s1 = '\0';
                        break;
                }

                /* Find next opt */
                s2 = strchr(s1, ',');
                if (s2 == NULL) {
                        if (clear)
                                *s1 = '\0';
                        break;
                }
                s2++;
                if (clear)
                        memmove(s1, s2, strlen(s2) + 1);
                else
                        s1 = s2;
        }

        if (!devname) {
                LCONSOLE_ERROR_MSG(0x164, "Can't find the device name "
                                   "(need mount option 'device=...')\n");
                goto invalid;
        }

        s1 = strrchr(devname, ':');
        if (s1) {
                lmd->lmd_flags = LMD_FLG_CLIENT;
                /* Remove leading /s from fsname */
                while (*++s1 == '/') ;
                /* Freed in lustre_free_lsi */
                OBD_ALLOC(lmd->lmd_profile, strlen(s1) + 8);
                if (!lmd->lmd_profile)
                        RETURN(-ENOMEM);
                sprintf(lmd->lmd_profile, "%s-client", s1);
        }

        /* Freed in lustre_free_lsi */
        OBD_ALLOC(lmd->lmd_dev, strlen(devname) + 1);
        if (!lmd->lmd_dev)
                RETURN(-ENOMEM);
        strcpy(lmd->lmd_dev, devname);

        /* Save mount options */
        s1 = options + strlen(options) - 1;
        while (s1 >= options && (*s1 == ',' || *s1 == ' '))
                *s1-- = 0;
        if (*options != 0) {
                /* Freed in lustre_free_lsi */
                OBD_ALLOC(lmd->lmd_opts, strlen(options) + 1);
                if (!lmd->lmd_opts)
                        RETURN(-ENOMEM);
                strcpy(lmd->lmd_opts, options);
        }

        lmd->lmd_magic = LMD_MAGIC;

        RETURN(rc);

invalid:
        CERROR("Bad mount options %s\n", options);
        RETURN(-EINVAL);
}


/* Common mount */
int lustre_fill_super(struct super_block *sb, void *data, int silent)
{
        struct lustre_mount_data *lmd;
        struct lustre_sb_info *lsi;
        int rc;
        ENTRY;

        CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);

        lsi = lustre_init_lsi(sb);
        if (!lsi)
                RETURN(-ENOMEM);
        lmd = lsi->lsi_lmd;

        /* Figure out the lmd from the mount options */
        if (lmd_parse((char *)data, lmd)) {
                lustre_put_lsi(sb);
                RETURN(-EINVAL);
        }

        if (lmd_is_client(lmd)) {
                CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile);
                if (!client_fill_super) {
                        LCONSOLE_ERROR_MSG(0x165, "Nothing registered for "
                                           "client mount! Is the 'lustre' "
                                           "module loaded?\n");
                        rc = -ENODEV;
                } else {
                        rc = lustre_start_mgc(sb);
                        if (rc) {
                                lustre_stop_mgc(sb);
                                goto out;
                        }
                        /* Connect and start */
                        /* (should always be ll_fill_super) */
                        rc = (*client_fill_super)(sb);
                        /* c_f_s will call lustre_common_put_super on failure */
                }
        } else {
                CDEBUG(D_MOUNT, "Mounting server from %s\n", lmd->lmd_dev);
                lsi->lsi_flags |= LSI_SERVER;
                rc = server_fill_super(sb);
                /* s_f_s calls lustre_start_mgc after the mount because we need
                   the MGS nids which are stored on disk.  Plus, we may
                   need to start the MGS first. */
                /* s_f_s will call server_put_super on failure */
        }

out:
        if (rc){
                CERROR("Unable to mount %s (%d)\n",
                       s2lsi(sb) ? lmd->lmd_dev : "", rc);
        } else {
                CDEBUG(D_SUPER, "mount %s complete\n", lmd->lmd_dev);
        }
        RETURN(rc);
}


/* We can't call ll_fill_super by name because it lives in a module that
   must be loaded after this one. */
void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb))
{
        client_fill_super = cfs;
}

/***************** FS registration ******************/

#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
/* 2.5 and later */
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
struct super_block * lustre_get_sb(struct file_system_type *fs_type,
                               int flags, const char *devname, void * data)
{
        /* calls back in fill super */
        /* we could append devname= onto options (*data) here,
           but 2.4 doesn't get devname.  So we do it in mount_lustre.c */
        return get_sb_nodev(fs_type, flags, data, lustre_fill_super);
}
#else
int lustre_get_sb(struct file_system_type *fs_type,
                               int flags, const char *devname, void * data,
                               struct vfsmount *mnt)
{
        /* calls back in fill super */
        /* we could append devname= onto options (*data) here,
           but 2.4 doesn't get devname.  So we do it in mount_lustre.c */
        return get_sb_nodev(fs_type, flags, data, lustre_fill_super, mnt);
}
#endif

struct file_system_type lustre_fs_type = {
        .owner        = THIS_MODULE,
        .name         = "lustre",
        .get_sb       = lustre_get_sb,
        .kill_sb      = kill_anon_super,
        .fs_flags     = FS_BINARY_MOUNTDATA,
};

#else
/* 2.4 */
static struct super_block *lustre_read_super(struct super_block *sb,
                                             void *data, int silent)
{
        int rc;
        ENTRY;

        rc = lustre_fill_super(sb, data, silent);
        if (rc)
                RETURN(NULL);
        RETURN(sb);
}

static struct file_system_type lustre_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "lustre",
        .fs_flags       = FS_NFSEXP_FSID,
        .read_super     = lustre_read_super,
};
#endif

int lustre_register_fs(void)
{
        return register_filesystem(&lustre_fs_type);
}

int lustre_unregister_fs(void)
{
        return unregister_filesystem(&lustre_fs_type);
}

EXPORT_SYMBOL(lustre_register_client_fill_super);
EXPORT_SYMBOL(lustre_common_put_super);
EXPORT_SYMBOL(lustre_process_log);
EXPORT_SYMBOL(lustre_end_log);
EXPORT_SYMBOL(server_get_mount);
EXPORT_SYMBOL(server_get_mount_2);
EXPORT_SYMBOL(server_put_mount);
EXPORT_SYMBOL(server_put_mount_2);
EXPORT_SYMBOL(server_register_target);
EXPORT_SYMBOL(server_name2index);
EXPORT_SYMBOL(server_mti_print);
EXPORT_SYMBOL(do_lcfg);