lustre/obdclass/obd_mount.c

   1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
   2  * vim:expandtab:shiftwidth=8:tabstop=8:
   3  *
   4  *  lustre/obdclass/obd_mount.c
   5  *  Client/server mount routines
   6  *
   7  *  Copyright (c) 2006 Cluster File Systems, Inc.
   8  *   Author: Nathan Rutman <nathan@clusterfs.com>
   9  *
  10  *   This file is part of Lustre, http://www.lustre.org/
  11  *
  12  *   Lustre is free software; you can redistribute it and/or
  13  *   modify it under the terms of version 2 of the GNU General Public
  14  *   License as published by the Free Software Foundation.
  15  *
  16  *   Lustre is distributed in the hope that it will be useful,
  17  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19  *   GNU General Public License for more details.
  20  *
  21  *   You should have received a copy of the GNU General Public License
  22  *   along with Lustre; if not, write to the Free Software
  23  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  24  */
  25
  26
  27 #define DEBUG_SUBSYSTEM S_MGMT
  28 #define D_MOUNT D_SUPER|D_CONFIG|D_WARNING
  29 #define PRINT_CMD LCONSOLE
  30 #define PRINT_MASK D_WARNING
  31
  32 #include <linux/obd.h>
  33 #include <linux/lvfs.h>
  34 #include <linux/lustre_fsfilt.h>
  35 #include <linux/obd_class.h>
  36 #include <lustre/lustre_user.h>
  37 #include <linux/version.h>
  38 #include <linux/lustre_log.h>
  39 #include <linux/lustre_disk.h>
  40 #include <linux/lustre_ver.h>
  41
  42 static int (*client_fill_super)(struct super_block *sb) = NULL;
  43
  44
  45 /*********** mount lookup *********/
  46
  47 DECLARE_MUTEX(lustre_mount_info_lock);
  48 struct list_head server_mount_info_list = LIST_HEAD_INIT(server_mount_info_list);
  49
  50 static struct lustre_mount_info *server_find_mount(char *name)
  51 {
  52         struct list_head *tmp;
  53         struct lustre_mount_info *lmi;
  54
  55         list_for_each(tmp, &server_mount_info_list) {
  56                 lmi = list_entry(tmp, struct lustre_mount_info, lmi_list_chain);
  57                 if (strcmp(name, lmi->lmi_name) == 0)
  58                         return(lmi);
  59         }
  60         return(NULL);
  61 }
  62
  63 /* we must register an obd for a mount before we call the setup routine.
  64    *_setup will call lustre_get_mount to get the mnt struct
  65    by obd_name, since we can't pass the pointer to setup. */
  66 static int server_register_mount(char *name, struct super_block *sb,
  67                           struct vfsmount *mnt)
  68 {
  69         struct lustre_mount_info *lmi;
  70         char *name_cp;
  71         ENTRY;
  72
  73         LASSERT(mnt);
  74         LASSERT(sb);
  75
  76         OBD_ALLOC(lmi, sizeof(*lmi));
  77         if (!lmi)
  78                 RETURN(-ENOMEM);
  79         OBD_ALLOC(name_cp, strlen(name) + 1);
  80         if (!name_cp) {
  81                 OBD_FREE(lmi, sizeof(*lmi));
  82                 RETURN(-ENOMEM);
  83         }
  84         strcpy(name_cp, name);
  85
  86         down(&lustre_mount_info_lock);
  87
  88         if (server_find_mount(name)) {
  89                 up(&lustre_mount_info_lock);
  90                 OBD_FREE(lmi, sizeof(*lmi));
  91                 OBD_FREE(name_cp, strlen(name) + 1);
  92                 CERROR("Already registered %s\n", name);
  93                 RETURN(-EEXIST);
  94         }
  95         lmi->lmi_name = name_cp;
  96         lmi->lmi_sb = sb;
  97         lmi->lmi_mnt = mnt;
  98         list_add(&lmi->lmi_list_chain, &server_mount_info_list);
  99
 100         up(&lustre_mount_info_lock);
 101
 102         CDEBUG(D_MOUNT, "reg_mnt %p from %s, vfscount=%d\n",
 103                lmi->lmi_mnt, name, atomic_read(&lmi->lmi_mnt->mnt_count));
 104
 105         RETURN(0);
 106 }
 107
 108 /* when an obd no longer needs a mount */
 109 static int server_deregister_mount(char *name)
 110 {
 111         struct lustre_mount_info *lmi;
 112         ENTRY;
 113
 114         down(&lustre_mount_info_lock);
 115         lmi = server_find_mount(name);
 116         if (!lmi) {
 117                 up(&lustre_mount_info_lock);
 118                 CERROR("%s not registered\n", name);
 119                 RETURN(-ENOENT);
 120         }
 121
 122         CDEBUG(D_MOUNT, "dereg_mnt %p from %s, vfscount=%d\n",
 123                lmi->lmi_mnt, name, atomic_read(&lmi->lmi_mnt->mnt_count));
 124
 125         OBD_FREE(lmi->lmi_name, strlen(lmi->lmi_name) + 1);
 126         list_del(&lmi->lmi_list_chain);
 127         OBD_FREE(lmi, sizeof(*lmi));
 128         up(&lustre_mount_info_lock);
 129
 130         RETURN(0);
 131 }
 132
 133 /* Deregister anyone referencing the mnt. Everyone should have
 134    put_mount in *_cleanup, but this is a catch-all in case of err... */
 135 static void server_deregister_mount_all(struct vfsmount *mnt)
 136 {
 137         struct list_head *tmp, *n;
 138         struct lustre_mount_info *lmi;
 139
 140         if (!mnt)
 141                 return;
 142
 143         down(&lustre_mount_info_lock);
 144         list_for_each_safe(tmp, n, &server_mount_info_list) {
 145                 lmi = list_entry(tmp, struct lustre_mount_info, lmi_list_chain);
 146                 if (lmi->lmi_mnt == mnt) {
 147                         CERROR("Deregister failsafe %s\n", lmi->lmi_name);
 148                         OBD_FREE(lmi->lmi_name, strlen(lmi->lmi_name) + 1);
 149                         list_del(&lmi->lmi_list_chain);
 150                         OBD_FREE(lmi, sizeof(*lmi));
 151                 }
 152         }
 153         up(&lustre_mount_info_lock);
 154 }
 155
 156 /* obd's look up a registered mount using their name. This is just
 157    for initial obd setup to find the mount struct.  It should not be
 158    called every time you want to mntget. */
 159 struct lustre_mount_info *server_get_mount(char *name)
 160 {
 161         struct lustre_mount_info *lmi;
 162         struct lustre_sb_info *lsi;
 163         ENTRY;
 164
 165         down(&lustre_mount_info_lock);
 166
 167         lmi = server_find_mount(name);
 168         if (!lmi) {
 169                 up(&lustre_mount_info_lock);
 170                 CERROR("Can't find mount for %s\n", name);
 171                 RETURN(NULL);
 172         }
 173         lsi = s2lsi(lmi->lmi_sb);
 174         mntget(lmi->lmi_mnt);
 175         atomic_inc(&lsi->lsi_mounts);
 176
 177         up(&lustre_mount_info_lock);
 178
 179         CDEBUG(D_MOUNT, "get_mnt %p from %s, refs=%d, vfscount=%d\n",
 180                lmi->lmi_mnt, name, atomic_read(&lsi->lsi_mounts),
 181                atomic_read(&lmi->lmi_mnt->mnt_count));
 182
 183         RETURN(lmi);
 184 }
 185
 186 static void unlock_mntput(struct vfsmount *mnt)
 187 {
 188         if (kernel_locked()) {
 189                 unlock_kernel();
 190                 mntput(mnt);
 191                 lock_kernel();
 192         } else {
 193                 mntput(mnt);
 194         }
 195 }
 196
 197 static int lustre_put_lsi(struct super_block *sb);
 198
 199 /* to be called from obd_cleanup methods */
 200 int server_put_mount(char *name, struct vfsmount *mnt)
 201 {
 202         struct lustre_mount_info *lmi;
 203         struct lustre_sb_info *lsi;
 204         ENTRY;
 205
 206         down(&lustre_mount_info_lock);
 207         lmi = server_find_mount(name);
 208         if (!lmi) {
 209                 up(&lustre_mount_info_lock);
 210                 CERROR("Can't find mount for %s\n", name);
 211                 RETURN(-ENOENT);
 212         }
 213         lsi = s2lsi(lmi->lmi_sb);
 214         LASSERT(lmi->lmi_mnt == mnt);
 215         unlock_mntput(lmi->lmi_mnt);
 216
 217         CDEBUG(D_MOUNT, "put_mnt %p from %s, refs=%d, vfscount=%d\n",
 218                lmi->lmi_mnt, name, atomic_read(&lsi->lsi_mounts),
 219                atomic_read(&lmi->lmi_mnt->mnt_count));
 220
 221         if (lustre_put_lsi(lmi->lmi_sb)) {
 222                 CDEBUG(D_MOUNT, "Last put of mnt %p from %s, vfscount=%d\n",
 223                        lmi->lmi_mnt, name,
 224                        atomic_read(&lmi->lmi_mnt->mnt_count));
 225                 /* last mount is the One True Mount */
 226                 if (atomic_read(&lmi->lmi_mnt->mnt_count) > 1)
 227                         CERROR("%s: mount busy, vfscount=%d!\n", name,
 228                                atomic_read(&lmi->lmi_mnt->mnt_count));
 229         }
 230         up(&lustre_mount_info_lock);
 231
 232         /* this obd should never need the mount again */
 233         server_deregister_mount(name);
 234
 235         RETURN(0);
 236 }
 237
 238
 239 /******* mount helper utilities *********/
 240
 241 static void ldd_print(struct lustre_disk_data *ldd)
 242 {
 243         int i;
 244
 245         PRINT_CMD(PRINT_MASK, "  disk data:\n");
 246         PRINT_CMD(PRINT_MASK, "config:  %d\n", ldd->ldd_config_ver);
 247         PRINT_CMD(PRINT_MASK, "fs:      %s\n", ldd->ldd_fsname);
 248         PRINT_CMD(PRINT_MASK, "server:  %s\n", ldd->ldd_svname);
 249         PRINT_CMD(PRINT_MASK, "index:   %04x\n", ldd->ldd_svindex);
 250         PRINT_CMD(PRINT_MASK, "flags:   %#x\n", ldd->ldd_flags);
 251         PRINT_CMD(PRINT_MASK, "diskfs:  %s\n", MT_STR(ldd));
 252         PRINT_CMD(PRINT_MASK, "options: %s\n", ldd->ldd_mount_opts);
 253         if (!ldd->ldd_mgsnid_count)
 254                 PRINT_CMD(PRINT_MASK, "no MGS nids\n");
 255         else for (i = 0; i < ldd->ldd_mgsnid_count; i++) {
 256                 PRINT_CMD(PRINT_MASK, "mgs nid %d:  %s\n", i,
 257                        libcfs_nid2str(ldd->ldd_mgsnid[i]));
 258         }
 259         if (!ldd->ldd_failnid_count)
 260                 PRINT_CMD(PRINT_MASK, "no failover nids\n");
 261         else for (i = 0; i < ldd->ldd_failnid_count; i++) {
 262                 PRINT_CMD(PRINT_MASK, "failover nid %d:  %s\n", i,
 263                           libcfs_nid2str(ldd->ldd_failnid[i]));
 264         }
 265 }
 266
 267 static int ldd_parse(struct lvfs_run_ctxt *mount_ctxt,
 268                            struct lustre_disk_data *ldd)
 269 {
 270         struct lvfs_run_ctxt saved;
 271         struct file *file;
 272         loff_t off = 0;
 273         unsigned long len;
 274         int rc;
 275         ENTRY;
 276
 277         push_ctxt(&saved, mount_ctxt, NULL);
 278
 279         file = filp_open(MOUNT_DATA_FILE, O_RDONLY, 0644);
 280         if (IS_ERR(file)) {
 281                 rc = PTR_ERR(file);
 282                 CERROR("cannot open %s: rc = %d\n", MOUNT_DATA_FILE, rc);
 283                 GOTO(out, rc);
 284         }
 285
 286         len = file->f_dentry->d_inode->i_size;
 287         CDEBUG(D_MOUNT, "Have %s, size %lu\n", MOUNT_DATA_FILE, len);
 288         if (len != sizeof(*ldd)) {
 289                 CERROR("disk data size does not match: see %lu expect %u\n",
 290                        len, sizeof(*ldd));
 291                 GOTO(out_close, rc = -EINVAL);
 292         }
 293
 294         rc = lustre_fread(file, ldd, len, &off);
 295         if (rc != len) {
 296                 CERROR("error reading %s: read %d of %lu\n",
 297                        MOUNT_DATA_FILE, rc, len);
 298                 GOTO(out_close, rc = -EINVAL);
 299         }
 300         rc = 0;
 301
 302         if (ldd->ldd_magic != LDD_MAGIC) {
 303                 /* FIXME add swabbing support */
 304                 CERROR("Bad magic in %s: %x!=%x\n", MOUNT_DATA_FILE,
 305                        ldd->ldd_magic, LDD_MAGIC);
 306                 GOTO(out_close, rc = -EINVAL);
 307         }
 308
 309         if (ldd->ldd_feature_incompat & ~LDD_INCOMPAT_SUPP) {
 310                 CERROR("%s: unsupported incompat filesystem feature(s) %x\n",
 311                        ldd->ldd_svname,
 312                        ldd->ldd_feature_incompat & ~LDD_INCOMPAT_SUPP);
 313                 GOTO(out_close, rc = -EINVAL);
 314         }
 315         if (ldd->ldd_feature_rocompat & ~LDD_ROCOMPAT_SUPP) {
 316                 CERROR("%s: unsupported read-only filesystem feature(s) %x\n",
 317                        ldd->ldd_svname,
 318                        ldd->ldd_feature_rocompat & ~LDD_ROCOMPAT_SUPP);
 319                 /* Do something like remount filesystem read-only */
 320                 GOTO(out_close, rc = -EINVAL);
 321         }
 322
 323         ldd_print(ldd);
 324
 325 out_close:
 326         filp_close(file, 0);
 327 out:
 328         pop_ctxt(&saved, mount_ctxt, NULL);
 329         RETURN(rc);
 330 }
 331
 332 static int ldd_write(struct lvfs_run_ctxt *mount_ctxt,
 333                      struct lustre_disk_data *ldd)
 334 {
 335         struct lvfs_run_ctxt saved;
 336         struct file *file;
 337         loff_t off = 0;
 338         unsigned long len = sizeof(struct lustre_disk_data);
 339         int rc = 0;
 340         ENTRY;
 341
 342         LASSERT(ldd->ldd_magic == LDD_MAGIC);
 343
 344         ldd->ldd_config_ver++;
 345
 346         push_ctxt(&saved, mount_ctxt, NULL);
 347
 348         file = filp_open(MOUNT_DATA_FILE, O_RDWR, 0644);
 349         if (IS_ERR(file)) {
 350                 rc = PTR_ERR(file);
 351                 CERROR("cannot open %s: rc = %d\n", MOUNT_DATA_FILE, rc);
 352                 GOTO(out, rc);
 353         }
 354
 355         rc = lustre_fwrite(file, ldd, len, &off);
 356         if (rc != len) {
 357                 CERROR("error writing %s: read %d of %lu\n",
 358                        MOUNT_DATA_FILE, rc, len);
 359                 GOTO(out_close, rc = -EINVAL);
 360         }
 361
 362         rc = 0;
 363         ldd_print(ldd);
 364
 365 out_close:
 366         filp_close(file, 0);
 367 out:
 368         pop_ctxt(&saved, mount_ctxt, NULL);
 369         RETURN(rc);
 370 }
 371
 372
 373 /**************** config llog ********************/
 374
 375 /* Get a config log from the MGS and process it.
 376    This func is called for both clients and servers.
 377    Continue to process new statements appended to the logs
 378    (whenever the config lock is revoked) until lustre_end_log
 379    is called. */
 380 int lustre_process_log(struct super_block *sb, char *logname,
 381                      struct config_llog_instance *cfg)
 382 {
 383         struct lustre_cfg *lcfg;
 384         struct lustre_cfg_bufs bufs;
 385         struct lustre_sb_info *lsi = s2lsi(sb);
 386         struct obd_device *mgc = lsi->lsi_mgc;
 387         int rc;
 388         ENTRY;
 389
 390         LASSERT(mgc);
 391         LASSERT(cfg);
 392
 393         /* mgc_process_config */
 394         lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
 395         lustre_cfg_bufs_set_string(&bufs, 1, logname);
 396         lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
 397         lustre_cfg_bufs_set(&bufs, 3, &sb, sizeof(sb));
 398         lcfg = lustre_cfg_new(LCFG_LOG_START, &bufs);
 399         rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
 400         lustre_cfg_free(lcfg);
 401
 402         if (rc)
 403                 LCONSOLE_ERROR("%s: The configuration '%s' could not be read "
 404                                "(%d), mount will fail.\n",
 405                                mgc->obd_name, logname, rc);
 406
 407         class_obd_list();
 408         RETURN(rc);
 409 }
 410
 411 /* Stop watching this config log for updates */
 412 int lustre_end_log(struct super_block *sb, char *logname,
 413                        struct config_llog_instance *cfg)
 414 {
 415         struct lustre_cfg *lcfg;
 416         struct lustre_cfg_bufs bufs;
 417         struct lustre_sb_info *lsi = s2lsi(sb);
 418         struct obd_device *mgc = lsi->lsi_mgc;
 419         int rc;
 420         ENTRY;
 421
 422         LASSERT(mgc);
 423
 424         /* mgc_process_config */
 425         lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
 426         lustre_cfg_bufs_set_string(&bufs, 1, logname);
 427         if (cfg)
 428                 lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
 429         lcfg = lustre_cfg_new(LCFG_LOG_END, &bufs);
 430         rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
 431         lustre_cfg_free(lcfg);
 432         RETURN(rc);
 433 }
 434
 435 /**************** obd start *******************/
 436
 437 static int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
 438                    char *s1, char *s2, char *s3, char *s4)
 439 {
 440         struct lustre_cfg_bufs bufs;
 441         struct lustre_cfg    * lcfg = NULL;
 442         int rc;
 443
 444         CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname,
 445                cmd, s1, s2, s3, s4);
 446
 447         lustre_cfg_bufs_reset(&bufs, cfgname);
 448         if (s1)
 449                 lustre_cfg_bufs_set_string(&bufs, 1, s1);
 450         if (s2)
 451                 lustre_cfg_bufs_set_string(&bufs, 2, s2);
 452         if (s3)
 453                 lustre_cfg_bufs_set_string(&bufs, 3, s3);
 454         if (s4)
 455                 lustre_cfg_bufs_set_string(&bufs, 4, s4);
 456
 457         lcfg = lustre_cfg_new(cmd, &bufs);
 458         lcfg->lcfg_nid = nid;
 459         rc = class_process_config(lcfg);
 460         lustre_cfg_free(lcfg);
 461         return(rc);
 462 }
 463
 464 static int lustre_start_simple(char *obdname, char *type, char *uuid,
 465                                char *s1, char *s2)
 466 {
 467         int rc;
 468         CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
 469
 470         rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, 0, 0);
 471         if (rc) {
 472                 CERROR("%s attach error %d\n", obdname, rc);
 473                 return(rc);
 474         }
 475         rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, 0, 0);
 476         if (rc) {
 477                 CERROR("%s setup error %d\n", obdname, rc);
 478                 do_lcfg(obdname, 0, LCFG_DETACH, 0, 0, 0, 0);
 479         }
 480         return rc;
 481 }
 482
 483 /* Set up a MGS to serve startup logs */
 484 static int server_start_mgs(struct super_block *sb)
 485 {
 486         struct lustre_sb_info    *lsi = s2lsi(sb);
 487         struct vfsmount          *mnt = lsi->lsi_srv_mnt;
 488         struct lustre_mount_info *lmi;
 489         int    rc = 0;
 490         ENTRY;
 491         LASSERT(mnt);
 492
 493         /* It is impossible to have more than 1 MGS per node, since
 494            MGC wouldn't know which to connect to */
 495         lmi = server_find_mount(LUSTRE_MGS_OBDNAME);
 496         if (lmi) {
 497                 lsi = s2lsi(lmi->lmi_sb);
 498                 LCONSOLE_ERROR("The MGS service was already started from "
 499                                "server %s\n", lsi->lsi_ldd->ldd_svname);
 500                 RETURN(-EALREADY);
 501         }
 502
 503         CDEBUG(D_CONFIG, "Start MGS service %s\n", LUSTRE_MGS_OBDNAME);
 504
 505         rc = server_register_mount(LUSTRE_MGS_OBDNAME, sb, mnt);
 506
 507         if (!rc &&
 508             ((rc = lustre_start_simple(LUSTRE_MGS_OBDNAME, LUSTRE_MGS_NAME,
 509                                        LUSTRE_MGS_OBDNAME, 0, 0))))
 510                 server_deregister_mount(LUSTRE_MGS_OBDNAME);
 511
 512         if (rc)
 513                 LCONSOLE_ERROR("Failed to start MGS '%s' (%d).  Is the 'mgs' "
 514                                "module loaded?\n", LUSTRE_MGS_OBDNAME, rc);
 515
 516         RETURN(rc);
 517 }
 518
 519 static int server_stop_mgs(struct super_block *sb)
 520 {
 521         struct obd_device *obd;
 522         int rc;
 523         ENTRY;
 524
 525         CDEBUG(D_MOUNT, "Stop MGS service %s\n", LUSTRE_MGS_OBDNAME);
 526
 527         /* There better be only one MGS */
 528         obd = class_name2obd(LUSTRE_MGS_OBDNAME);
 529         if (!obd) {
 530                 CDEBUG(D_CONFIG, "mgs %s not running\n", LUSTRE_MGS_OBDNAME);
 531                 RETURN(-EALREADY);
 532         }
 533
 534         /* The MGS should always stop when we say so */
 535         obd->obd_force = 1;
 536         rc = class_manual_cleanup(obd);
 537         RETURN(rc);
 538 }
 539
 540 /* Set up a mgcobd to process startup logs */
 541 static int lustre_start_mgc(struct super_block *sb)
 542 {
 543         struct lustre_handle mgc_conn = {0, };
 544         struct obd_connect_data ocd = { 0 };
 545         struct lustre_sb_info *lsi = s2lsi(sb);
 546         struct obd_device *obd;
 547         struct obd_export *exp;
 548         struct obd_uuid *uuid;
 549         class_uuid_t uuidc;
 550         lnet_nid_t nid;
 551         int recov_bk;
 552         int rc = 0, i;
 553         ENTRY;
 554
 555         LASSERT(lsi->lsi_lmd);
 556
 557         obd = class_name2obd(LUSTRE_MGC_OBDNAME);
 558         if (obd) {
 559                 atomic_inc(&obd->u.cli.cl_mgc_refcount);
 560                 /* FIXME There's only one MGC, but users could give different
 561                    MGS nids on the mount line.  So now do we add new MGS uuids
 562                    or not?  If there's truly one MGS per site, the MGS uuids
 563                    _should_ all be the same. Maybe check here?
 564                 */
 565
 566                 /* Try all connections, but only once (again).
 567                    We don't want to block another target from starting
 568                    (using its local copy of the log), but we do want to connect
 569                    if at all possible. */
 570                 CDEBUG(D_MOUNT, "Set MGS reconnect\n");
 571                 recov_bk = 1;
 572                 rc = obd_set_info(obd->obd_self_export,
 573                                   strlen(KEY_INIT_RECOV_BACKUP),
 574                                   KEY_INIT_RECOV_BACKUP,
 575                                   sizeof(recov_bk), &recov_bk);
 576                 GOTO(out, rc = 0);
 577         }
 578
 579         if (lsi->lsi_lmd->lmd_mgsnid_count == 0) {
 580                 LCONSOLE_ERROR("No NIDs for the MGS were given.\n");
 581                 RETURN(-EINVAL);
 582         }
 583
 584         CDEBUG(D_MOUNT, "Start MGC '%s'\n", LUSTRE_MGC_OBDNAME);
 585
 586         /* Add the first uuid for the MGS */
 587         nid = lsi->lsi_lmd->lmd_mgsnid[0];
 588         rc = do_lcfg(LUSTRE_MGC_OBDNAME, nid, LCFG_ADD_UUID,
 589                      libcfs_nid2str(nid), 0,0,0);
 590         if (rc < 0)
 591                 RETURN(rc);
 592
 593         /* Generate a unique uuid for each MGC */
 594         OBD_ALLOC_PTR(uuid);
 595 #if 0
 596         /* use the 1st non-loopback nid */
 597         lnet_process_id_t id;
 598         i = 0;
 599         while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
 600                 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
 601                         continue;
 602                 break;
 603         }
 604         sprintf(uuid->uuid, "mgc_"LPX64, id.nid);
 605 #else
 606         /* random makes reconnect easier */
 607         class_generate_random_uuid(uuidc);
 608         class_uuid_unparse(uuidc, uuid);
 609 #endif
 610         CDEBUG(D_MOUNT, "generated uuid: %s\n", uuid->uuid);
 611
 612         /* Start the MGC */
 613         rc = lustre_start_simple(LUSTRE_MGC_OBDNAME, LUSTRE_MGC_NAME,
 614                                  (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
 615                                  libcfs_nid2str(nid));
 616         OBD_FREE_PTR(uuid);
 617         if (rc)
 618                 RETURN(rc);
 619
 620         /* Add the redundant MGS nids */
 621         for (i = 1; i < lsi->lsi_lmd->lmd_mgsnid_count; i++) {
 622                 nid = lsi->lsi_lmd->lmd_mgsnid[i];
 623                 rc = do_lcfg(LUSTRE_MGC_OBDNAME, nid, LCFG_ADD_UUID,
 624                              libcfs_nid2str(nid), 0, 0, 0);
 625                 if (rc) {
 626                         CERROR("Add uuid for %s failed %d\n",
 627                                libcfs_nid2str(nid), rc);
 628                         continue;
 629                 }
 630                 rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_ADD_CONN,
 631                              libcfs_nid2str(nid), 0, 0, 0);
 632                 if (rc)
 633                         CERROR("Add conn for %s failed %d\n",
 634                                libcfs_nid2str(nid), rc);
 635         }
 636
 637         obd = class_name2obd(LUSTRE_MGC_OBDNAME);
 638         if (!obd) {
 639                 CERROR("Can't find mgcobd %s\n", LUSTRE_MGC_OBDNAME);
 640                 RETURN(-ENOTCONN);
 641         }
 642
 643         /* Try all connections, but only once. */
 644         recov_bk = 1;
 645         rc = obd_set_info(obd->obd_self_export,
 646                           strlen(KEY_INIT_RECOV_BACKUP), KEY_INIT_RECOV_BACKUP,
 647                           sizeof(recov_bk), &recov_bk);
 648         if (rc)
 649                 /* nonfatal */
 650                 CERROR("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc);
 651
 652         /* FIXME add ACL support? */
 653         //ocd.ocd_connect_flags = OBD_CONNECT_ACL;
 654
 655         /* We connect to the MGS at setup, and don't disconnect until cleanup */
 656         rc = obd_connect(&mgc_conn, obd, &(obd->obd_uuid), &ocd);
 657         if (rc) {
 658                 CERROR("connect failed %d\n", rc);
 659                 GOTO(out, rc);
 660         }
 661
 662         exp = class_conn2export(&mgc_conn);
 663         obd->u.cli.cl_mgc_mgsexp = exp;
 664
 665         /* And keep a refcount of servers/clients who started with "mount",
 666            so we know when we can get rid of the mgc. */
 667         atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
 668
 669 out:
 670         /* Keep the mgc info in the sb. Note that many lsi's can point
 671            to the same mgc.*/
 672         lsi->lsi_mgc = obd;
 673         RETURN(rc);
 674 }
 675
 676 static int lustre_stop_mgc(struct super_block *sb)
 677 {
 678         struct lustre_sb_info *lsi = s2lsi(sb);
 679         struct obd_device *obd;
 680         lnet_nid_t nid;
 681         int i, rc;
 682         ENTRY;
 683
 684         if (!lsi)
 685                 RETURN(-ENOENT);
 686         obd = lsi->lsi_mgc;
 687         if (!obd)
 688                 RETURN(-ENOENT);
 689
 690         lsi->lsi_mgc = NULL;
 691         if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
 692                 /* This is not fatal, every client that stops
 693                    will call in here. */
 694                 CDEBUG(D_MOUNT, "mgc still has %d references.\n",
 695                        atomic_read(&obd->u.cli.cl_mgc_refcount));
 696                 RETURN(-EBUSY);
 697         }
 698
 699         /* MGC must always stop */
 700         obd->obd_force = 1;
 701         /* client_disconnect_export uses the no_recov flag to decide whether it
 702            should disconnect or just invalidate.  (The MGC has no
 703            recoverable data in any case.)
 704            Without no_recov, we wait for locks to be dropped, so if the
 705            MGS is down, we might wait for an obd timeout.  With no-recov,
 706            if the MGS is up, we don't tell it we're disconnecting, so
 707            we must wait until the MGS evicts the dead client before the
 708            client can reconnect. So it's either slow disconnect, or a
 709            slow reconnect. This could probably be fixed on the server side
 710            by ignoring handle mismatches in target_handle_reconnect. */
 711         if (lsi->lsi_flags & LSI_UMOUNT_FORCE) {
 712                 /* FIXME maybe always set this? */
 713                 obd->obd_no_recov = 1;
 714         }
 715
 716         if (obd->u.cli.cl_mgc_mgsexp)
 717                 obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
 718
 719         rc = class_manual_cleanup(obd);
 720         if (rc)
 721                 RETURN(rc);
 722
 723         /* class_add_uuid adds a nid even if the same uuid exists; we might
 724            delete any copy here.  So they all better match. */
 725         for (i = 0; i < lsi->lsi_lmd->lmd_mgsnid_count; i++) {
 726                 nid = lsi->lsi_lmd->lmd_mgsnid[i];
 727                 rc = do_lcfg(obd->obd_name, nid, LCFG_DEL_UUID,
 728                               libcfs_nid2str(nid), 0, 0, 0);
 729                 if (rc)
 730                         CERROR("del MDC UUID %s failed: rc = %d\n",
 731                                libcfs_nid2str(nid), rc);
 732         }
 733         /* class_import_put will get rid of the additional connections */
 734
 735         RETURN(0);
 736 }
 737
 738 /* Since there's only one mgc per node, we have to change it's fs to get
 739    access to the right disk. */
 740 static int server_mgc_set_fs(struct obd_device *mgc, struct super_block *sb)
 741 {
 742         struct lustre_sb_info *lsi = s2lsi(sb);
 743         int rc;
 744         ENTRY;
 745
 746         CDEBUG(D_MOUNT, "Set mgc disk for %s\n", lsi->lsi_lmd->lmd_dev);
 747
 748         /* cl_mgc_sem in mgc insures we sleep if the mgc_fs is busy */
 749         rc = obd_set_info(mgc->obd_self_export,
 750                           strlen("set_fs"), "set_fs",
 751                           sizeof(*sb), sb);
 752         if (rc) {
 753                 CERROR("can't set_fs %d\n", rc);
 754         }
 755
 756         RETURN(rc);
 757 }
 758
 759 static int server_mgc_clear_fs(struct obd_device *mgc)
 760 {
 761         int rc;
 762         ENTRY;
 763
 764         CDEBUG(D_MOUNT, "Unassign mgc disk\n");
 765
 766         rc = obd_set_info(mgc->obd_self_export,
 767                           strlen("clear_fs"), "clear_fs", 0, NULL);
 768         RETURN(rc);
 769 }
 770
 771 /* Stop MDS/OSS if nobody is using them */
 772 static int server_stop_servers(int lddflags, int lsiflags)
 773 {
 774         struct obd_device *obd = NULL;
 775         struct obd_type *type;
 776         int rc = 0;
 777         ENTRY;
 778
 779         /* Either an MDT or an OST or neither  */
 780
 781         /* if this was an MDT, and there are no more MDT's, clean up the MDS */
 782         if ((lddflags & LDD_F_SV_TYPE_MDT) && (obd = class_name2obd("MDS"))) {
 783                 //FIXME pre-rename, should eventually be LUSTRE_MDT_NAME
 784                 type = class_search_type(LUSTRE_MDS_NAME);
 785         }
 786         /* if this was an OST, and there are no more OST's, clean up the OSS */
 787         if ((lddflags & LDD_F_SV_TYPE_OST) && (obd = class_name2obd("OSS"))) {
 788                 type = class_search_type(LUSTRE_OST_NAME);
 789         }
 790
 791         if (obd && (!type || !type->typ_refcnt)) {
 792                 int err;
 793                 obd->obd_force = 1;
 794                 /* obd_fail doesn't mean much on a server obd */
 795                 err = class_manual_cleanup(obd);
 796                 if (!rc)
 797                         rc = err;
 798         }
 799
 800         RETURN(rc);
 801 }
 802
 803 int server_mti_print(char *title, struct mgs_target_info *mti)
 804 {
 805         PRINT_CMD(PRINT_MASK, "mti %s\n", title);
 806         PRINT_CMD(PRINT_MASK, "server: %s\n", mti->mti_svname);
 807         PRINT_CMD(PRINT_MASK, "fs:     %s\n", mti->mti_fsname);
 808         PRINT_CMD(PRINT_MASK, "uuid:   %s\n", mti->mti_uuid);
 809         PRINT_CMD(PRINT_MASK, "ver: %d  flags: %#x\n",
 810                   mti->mti_config_ver, mti->mti_flags);
 811         return(0);
 812 }
 813
 814 static int server_sb2mti(struct super_block *sb, struct mgs_target_info *mti)
 815 {
 816         struct lustre_sb_info   *lsi = s2lsi(sb);
 817         struct lustre_disk_data *ldd = lsi->lsi_ldd;
 818         lnet_process_id_t        id;
 819         int i = 0;
 820         ENTRY;
 821
 822         if (!mti)
 823                 RETURN(-ENOMEM);
 824         if (!(lsi->lsi_flags & LSI_SERVER))
 825                 RETURN(-EINVAL);
 826
 827         strncpy(mti->mti_fsname, ldd->ldd_fsname,
 828                 sizeof(mti->mti_fsname));
 829         strncpy(mti->mti_svname, ldd->ldd_svname,
 830                 sizeof(mti->mti_svname));
 831
 832         mti->mti_nid_count = 0;
 833         while (LNetGetId(i++, &id) != -ENOENT) {
 834                 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
 835                         continue;
 836                 mti->mti_nids[mti->mti_nid_count] = id.nid;
 837                 mti->mti_nid_count++;
 838                 if (mti->mti_nid_count >= MTI_NIDS_MAX) {
 839                         CWARN("Only using first %d nids for %s\n",
 840                               mti->mti_nid_count, mti->mti_svname);
 841                         break;
 842                 }
 843         }
 844
 845         mti->mti_failnid_count = ldd->ldd_failnid_count;
 846         memcpy(mti->mti_failnids, ldd->ldd_failnid, sizeof(mti->mti_failnids));
 847         memcpy(mti->mti_uuid, ldd->ldd_uuid, sizeof(mti->mti_uuid));
 848         mti->mti_config_ver = 0;
 849         mti->mti_flags = ldd->ldd_flags;
 850         mti->mti_stripe_index = ldd->ldd_svindex;
 851         RETURN(0);
 852 }
 853
 854 /* Register an old or new target with the MGS. If needed MGS will construct
 855    startup logs and assign index */
 856 int server_register_target(struct super_block *sb)
 857 {
 858         struct lustre_sb_info *lsi = s2lsi(sb);
 859         struct obd_device *mgc = lsi->lsi_mgc;
 860         struct lustre_disk_data *ldd = lsi->lsi_ldd;
 861         struct mgs_target_info *mti = NULL;
 862         int rc;
 863         ENTRY;
 864
 865         LASSERT(mgc);
 866
 867         if (!(lsi->lsi_flags & LSI_SERVER))
 868                 RETURN(-EINVAL);
 869
 870         OBD_ALLOC_PTR(mti);
 871         rc = server_sb2mti(sb, mti);
 872         if (rc)
 873                 GOTO(out, rc);
 874
 875         CDEBUG(D_MOUNT, "Registration %s, fs=%s, %s, index=%04x, flags=%#x\n",
 876                mti->mti_svname, mti->mti_fsname,
 877                libcfs_nid2str(mti->mti_nids[0]), mti->mti_stripe_index,
 878                mti->mti_flags);
 879
 880         /* Register the target */
 881         /* FIXME use mdc_process_config instead */
 882         rc = obd_set_info(mgc->u.cli.cl_mgc_mgsexp,
 883                           strlen("register_target"), "register_target",
 884                           sizeof(*mti), mti);
 885         if (rc) {
 886                 CERROR("registration with the MGS failed (%d)\n", rc);
 887                 GOTO(out, rc);
 888         }
 889
 890         /* Always update our flags */
 891         ldd->ldd_flags = mti->mti_flags & ~LDD_F_REWRITE_LDD;
 892
 893         /* If this flag is set, it means the MGS wants us to change our
 894            on-disk data. (So far this means just the index.) */
 895         if (mti->mti_flags & LDD_F_REWRITE_LDD) {
 896                 CDEBUG(D_MOUNT, "Must change on-disk index from %#x to %#x for "
 897                        " %s\n",
 898                        ldd->ldd_svindex, mti->mti_stripe_index,
 899                        mti->mti_svname);
 900                 ldd->ldd_svindex = mti->mti_stripe_index;
 901                 strncpy(ldd->ldd_svname, mti->mti_svname,
 902                         sizeof(ldd->ldd_svname));
 903                 /* or ldd_make_sv_name(ldd); */
 904                 ldd_write(&mgc->obd_lvfs_ctxt, ldd);
 905
 906                 /* FIXME write last_rcvd?, disk label? */
 907         }
 908
 909 out:
 910         if (mti)
 911                 OBD_FREE_PTR(mti);
 912         RETURN(rc);
 913 }
 914
 915 /* Start targets */
 916 static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
 917 {
 918         struct obd_device *obd;
 919         struct lustre_sb_info *lsi = s2lsi(sb);
 920         struct config_llog_instance cfg;
 921         int rc;
 922         ENTRY;
 923
 924         CDEBUG(D_MOUNT, "starting target %s\n", lsi->lsi_ldd->ldd_svname);
 925
 926         /* If we're an MDT, make sure the global MDS is running */
 927         if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_MDT) {
 928                 /* make sure (what will be called) the MDS is started */
 929                 obd = class_name2obd("MDS");
 930                 if (!obd) {
 931                         //FIXME pre-rename, should eventually be LUSTRE_MDS_NAME
 932                         rc = lustre_start_simple("MDS", LUSTRE_MDT_NAME,
 933                                                  "MDS_uuid", 0, 0);
 934                         if (rc) {
 935                                 CERROR("failed to start MDS: %d\n", rc);
 936                                 GOTO(out_servers, rc);
 937                         }
 938                 }
 939         }
 940
 941         /* If we're an OST, make sure the global OSS is running */
 942         if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_OST) {
 943                 /* make sure OSS is started */
 944                 obd = class_name2obd("OSS");
 945                 if (!obd) {
 946                         rc = lustre_start_simple("OSS", LUSTRE_OSS_NAME,
 947                                                  "OSS_uuid", 0, 0);
 948                         if (rc) {
 949                                 CERROR("failed to start OSS: %d\n", rc);
 950                                 GOTO(out_servers, rc);
 951                         }
 952                 }
 953         }
 954
 955         /* Set the mgc fs to our server disk.  This allows the MGC
 956            to read and write configs locally. */
 957         server_mgc_set_fs(lsi->lsi_mgc, sb);
 958
 959         /* Register with MGS */
 960         rc = server_register_target(sb);
 961         if (rc && (lsi->lsi_ldd->ldd_flags &
 962                    (LDD_F_NEED_INDEX | LDD_F_UPDATE | LDD_F_UPGRADE14))){
 963                 CERROR("Required registration failed for %s: %d\n",
 964                        lsi->lsi_ldd->ldd_svname, rc);
 965                 if (rc == -EIO) {
 966                         LCONSOLE_ERROR("Communication error with the MGS.  Is "
 967                                        "the MGS running?\n");
 968                 }
 969                 GOTO(out, rc);
 970         }
 971
 972         /* Let the target look up the mount using the target's name
 973            (we can't pass the sb or mnt through class_process_config.) */
 974         rc = server_register_mount(lsi->lsi_ldd->ldd_svname, sb, mnt);
 975         if (rc)
 976                 GOTO(out, rc);
 977
 978         /* Start targets using the llog named for the target */
 979         memset(&cfg, 0, sizeof(cfg));
 980         rc = lustre_process_log(sb, lsi->lsi_ldd->ldd_svname, &cfg);
 981         if (rc) {
 982                 CERROR("failed to start server %s: %d\n",
 983                        lsi->lsi_ldd->ldd_svname, rc);
 984                 GOTO(out, rc);
 985         }
 986
 987         if (!class_name2obd(lsi->lsi_ldd->ldd_svname)) {
 988                 CERROR("no server named %s was started\n",
 989                        lsi->lsi_ldd->ldd_svname);
 990                 rc = -ENXIO;
 991         }
 992
 993 out:
 994         /* Release the mgc fs for others to use */
 995         server_mgc_clear_fs(lsi->lsi_mgc);
 996
 997 out_servers:
 998         RETURN(rc);
 999 }
1000
1001 /***************** lustre superblock **************/
1002
1003 struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
1004 {
1005         struct lustre_sb_info *lsi = NULL;
1006         ENTRY;
1007
1008         OBD_ALLOC(lsi, sizeof(*lsi));
1009         if (!lsi)
1010                 RETURN(NULL);
1011         OBD_ALLOC(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd));
1012         if (!lsi->lsi_lmd) {
1013                 OBD_FREE(lsi, sizeof(*lsi));
1014                 RETURN(NULL);
1015         }
1016
1017         lsi->lsi_lmd->lmd_exclude_count = 0;
1018         s2lsi_nocast(sb) = lsi;
1019         /* we take 1 extra ref for our setup */
1020         atomic_set(&lsi->lsi_mounts, 1);
1021
1022         /* Default umount style */
1023         lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
1024         RETURN(lsi);
1025 }
1026
1027 static int lustre_free_lsi(struct super_block *sb)
1028 {
1029         struct lustre_sb_info *lsi = s2lsi(sb);
1030         ENTRY;
1031
1032         if (!lsi)
1033                 RETURN(0);
1034
1035         CDEBUG(D_MOUNT, "Freeing lsi\n");
1036
1037         /* someone didn't call server_put_mount. */
1038         LASSERT(atomic_read(&lsi->lsi_mounts) == 0);
1039
1040         if (lsi->lsi_ldd != NULL)
1041                 OBD_FREE(lsi->lsi_ldd, sizeof(*lsi->lsi_ldd));
1042
1043         if (lsi->lsi_lmd != NULL) {
1044                 if (lsi->lsi_lmd->lmd_dev != NULL)
1045                         OBD_FREE(lsi->lsi_lmd->lmd_dev,
1046                                  strlen(lsi->lsi_lmd->lmd_dev) + 1);
1047                 if (lsi->lsi_lmd->lmd_opts != NULL)
1048                         OBD_FREE(lsi->lsi_lmd->lmd_opts,
1049                                  strlen(lsi->lsi_lmd->lmd_opts) + 1);
1050                 if (lsi->lsi_lmd->lmd_exclude_count)
1051                         OBD_FREE(lsi->lsi_lmd->lmd_exclude,
1052                                  sizeof(lsi->lsi_lmd->lmd_exclude[0]) *
1053                                  lsi->lsi_lmd->lmd_exclude_count);
1054                 OBD_FREE(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd));
1055         }
1056
1057         LASSERT(lsi->lsi_llsbi == NULL);
1058
1059         server_deregister_mount_all(lsi->lsi_srv_mnt);
1060
1061         OBD_FREE(lsi, sizeof(*lsi));
1062         s2lsi_nocast(sb) = NULL;
1063
1064         RETURN(0);
1065 }
1066
1067 static int lustre_put_lsi(struct super_block *sb)
1068 {
1069         struct lustre_sb_info *lsi = s2lsi(sb);
1070         ENTRY;
1071
1072         LASSERT(lsi);
1073
1074         CDEBUG(D_MOUNT, "put %p %d\n", sb, atomic_read(&lsi->lsi_mounts));
1075
1076         if (atomic_dec_and_test(&lsi->lsi_mounts)) {
1077                 lustre_free_lsi(sb);
1078                 RETURN(1);
1079         }
1080         RETURN(0);
1081 }
1082
1083 /*************** server mount ******************/
1084
1085 /* Kernel mount using mount options in MOUNT_DATA_FILE */
1086 static struct vfsmount *server_kernel_mount(struct super_block *sb)
1087 {
1088         struct lvfs_run_ctxt mount_ctxt;
1089         struct lustre_sb_info *lsi = s2lsi(sb);
1090         struct lustre_disk_data *ldd;
1091         struct lustre_mount_data *lmd = lsi->lsi_lmd;
1092         struct vfsmount *mnt;
1093         char *options = NULL;
1094         unsigned long page, s_flags;
1095         int rc;
1096         ENTRY;
1097
1098         OBD_ALLOC(ldd, sizeof(*ldd));
1099         if (!ldd)
1100                 RETURN(ERR_PTR(-ENOMEM));
1101
1102         /* In the past, we have always used flags = 0.
1103            Note ext3/ldiskfs can't be mounted ro. */
1104         s_flags = sb->s_flags;
1105
1106         /* Pre-mount ext3 to read the MOUNT_DATA_FILE */
1107         CDEBUG(D_MOUNT, "Pre-mount ext3 %s\n", lmd->lmd_dev);
1108         mnt = do_kern_mount("ext3", s_flags, lmd->lmd_dev, 0);
1109         if (IS_ERR(mnt)) {
1110                 rc = PTR_ERR(mnt);
1111                 CERROR("premount ext3 failed (%d), trying ldiskfs\n", rc);
1112                 /* If ext3 fails (bec. of mballoc, extents), try ldiskfs */
1113                 mnt = do_kern_mount("ldiskfs", s_flags, lmd->lmd_dev, 0);
1114                 if (IS_ERR(mnt)) {
1115                         rc = PTR_ERR(mnt);
1116                         CERROR("premount ldiskfs failed: rc = %d\n", rc);
1117                         GOTO(out_free, rc);
1118                 }
1119         }
1120
1121         OBD_SET_CTXT_MAGIC(&mount_ctxt);
1122         mount_ctxt.pwdmnt = mnt;
1123         mount_ctxt.pwd = mnt->mnt_root;
1124         mount_ctxt.fs = get_ds();
1125
1126         rc = ldd_parse(&mount_ctxt, ldd);
1127         unlock_mntput(mnt);
1128
1129         if (rc) {
1130                 CERROR("premount parse options failed: rc = %d\n", rc);
1131                 GOTO(out_free, rc);
1132         }
1133
1134         /* Done with our pre-mount, now do the real mount. */
1135
1136         /* Glom up mount options */
1137         page = __get_free_page(GFP_KERNEL);
1138         if (!page)
1139                 GOTO(out_free, rc = -ENOMEM);
1140
1141         options = (char *)page;
1142         memset(options, 0, PAGE_SIZE);
1143         strncpy(options, ldd->ldd_mount_opts, PAGE_SIZE - 2);
1144
1145         /* Add in any mount-line options */
1146         if (lmd->lmd_opts && (*(lmd->lmd_opts) != 0)) {
1147                 int len = PAGE_SIZE - strlen(options) - 2;
1148                 if (*options != 0)
1149                         strcat(options, ",");
1150                 strncat(options, lmd->lmd_opts, len);
1151         }
1152
1153         /* Special permanent mount flags */
1154         if (IS_OST(ldd))
1155             s_flags |= MS_NOATIME | MS_NODIRATIME;
1156
1157         CDEBUG(D_MOUNT, "kern_mount: %s %s %s\n",
1158                MT_STR(ldd), lmd->lmd_dev, options);
1159         mnt = do_kern_mount(MT_STR(ldd), s_flags, lmd->lmd_dev,
1160                             (void *)options);
1161         free_page(page);
1162         if (IS_ERR(mnt)) {
1163                 rc = PTR_ERR(mnt);
1164                 CERROR("do_kern_mount failed: rc = %d\n", rc);
1165                 GOTO(out_free, rc);
1166         }
1167
1168         lsi->lsi_ldd = ldd;   /* freed at lsi cleanup */
1169         CDEBUG(D_SUPER, "%s: mnt = %p\n", lmd->lmd_dev, mnt);
1170         RETURN(mnt);
1171
1172 out_free:
1173         OBD_FREE(ldd, sizeof(*ldd));
1174         lsi->lsi_ldd = NULL;
1175         RETURN(ERR_PTR(rc));
1176 }
1177
1178 static void server_wait_finished(struct vfsmount *mnt)
1179 {
1180         wait_queue_head_t   waitq;
1181         struct l_wait_info  lwi;
1182         int                 retries = 10;
1183
1184         init_waitqueue_head(&waitq);
1185
1186         while ((atomic_read(&mnt->mnt_count) > 0) && retries--) {
1187                 CWARN("Mount still busy with %d refs\n",
1188                        atomic_read(&mnt->mnt_count));
1189
1190                 /* Wait for a bit */
1191                 lwi = LWI_TIMEOUT(2 * HZ, NULL, NULL);
1192                 l_wait_event(waitq, 0, &lwi);
1193         }
1194         if (atomic_read(&mnt->mnt_count)) {
1195                 CERROR("Mount is still busy, giving up.\n");
1196         }
1197 }
1198
1199 static void server_put_super(struct super_block *sb)
1200 {
1201         struct lustre_sb_info *lsi = s2lsi(sb);
1202         struct obd_device     *obd;
1203         struct vfsmount       *mnt = lsi->lsi_srv_mnt;
1204         int lddflags = lsi->lsi_ldd->ldd_flags;
1205         int lsiflags = lsi->lsi_flags;
1206         int rc;
1207         ENTRY;
1208
1209         LASSERT(lsiflags & LSI_SERVER);
1210
1211         CDEBUG(D_MOUNT, "server put_super %s\n", lsi->lsi_ldd->ldd_svname);
1212
1213         /* Stop the target */
1214         if (IS_MDT(lsi->lsi_ldd) || IS_OST(lsi->lsi_ldd)) {
1215
1216                 /* tell the mgc to drop the config log */
1217                 lustre_end_log(sb, lsi->lsi_ldd->ldd_svname, NULL);
1218
1219                 obd = class_name2obd(lsi->lsi_ldd->ldd_svname);
1220                 if (obd) {
1221                         CDEBUG(D_MOUNT, "stopping %s\n", obd->obd_name);
1222                         if (lsi->lsi_flags & LSI_UMOUNT_FORCE)
1223                                 obd->obd_force = 1;
1224                         if (lsi->lsi_flags & LSI_UMOUNT_FAILOVER)
1225                                 obd->obd_fail = 1;
1226                         /* We can't seem to give an error return code
1227                            to .put_super, so we better make sure we clean up!
1228                            FIXME is there a way to get around this? */
1229                         obd->obd_force = 1;
1230                         class_manual_cleanup(obd);
1231                 } else {
1232                         CERROR("no obd %s\n", lsi->lsi_ldd->ldd_svname);
1233                         server_deregister_mount(lsi->lsi_ldd->ldd_svname);
1234                 }
1235         }
1236
1237         /* If they wanted the mgs to stop separately from the mdt, they
1238            should have put it on a different device. */
1239         if (IS_MGS(lsi->lsi_ldd)) {
1240                 /* stop the mgc before the mgs so the connection gets cleaned
1241                    up */
1242                 lustre_stop_mgc(sb);
1243                 server_stop_mgs(sb);
1244         }
1245
1246         /* clean the mgc and sb */
1247         rc = lustre_common_put_super(sb);
1248         // FIXME how do I return a failure?
1249
1250         /* drop the One True Mount */
1251         unlock_mntput(mnt);
1252
1253         /* Wait for the targets to really clean up - can't exit (and let the
1254            sb get destroyed) while the mount is still in use */
1255         server_wait_finished(mnt);
1256
1257         /* Stop the servers (MDS, OSS) if no longer needed.  We must wait
1258            until the target is really gone so that our type refcount check
1259            is right. */
1260         server_stop_servers(lddflags, lsiflags);
1261
1262         CDEBUG(D_MOUNT|D_WARNING, "umount done\n");
1263         EXIT;
1264 }
1265
1266 static void server_umount_begin(struct super_block *sb)
1267 {
1268         struct lustre_sb_info *lsi = s2lsi(sb);
1269         ENTRY;
1270
1271         CDEBUG(D_MOUNT, "umount -f\n");
1272         /* umount = failover
1273            umount -f = force
1274            no third way to do non-force, non-failover */
1275         lsi->lsi_flags &= ~LSI_UMOUNT_FAILOVER;
1276         lsi->lsi_flags |= LSI_UMOUNT_FORCE;
1277         EXIT;
1278 }
1279
1280 static int server_statfs (struct super_block *sb, struct kstatfs *buf)
1281 {
1282         struct vfsmount *mnt = s2lsi(sb)->lsi_srv_mnt;
1283         ENTRY;
1284
1285         if (mnt && mnt->mnt_sb && mnt->mnt_sb->s_op->statfs) {
1286                 int rc = mnt->mnt_sb->s_op->statfs(mnt->mnt_sb, buf);
1287                 if (!rc) {
1288                         buf->f_type = sb->s_magic;
1289                         RETURN(0);
1290                 }
1291         }
1292
1293         /* just return 0 */
1294         buf->f_type = sb->s_magic;
1295         buf->f_bsize = sb->s_blocksize;
1296         buf->f_blocks = 1;
1297         buf->f_bfree = 0;
1298         buf->f_bavail = 0;
1299         buf->f_files = 1;
1300         buf->f_ffree = 0;
1301         buf->f_namelen = NAME_MAX;
1302         RETURN(0);
1303 }
1304
1305 static struct super_operations server_ops =
1306 {
1307         .put_super      = server_put_super,
1308         .umount_begin   = server_umount_begin, /* umount -f */
1309         .statfs         = server_statfs,
1310 };
1311
1312 #define log2(n) ffz(~(n))
1313 #define LUSTRE_SUPER_MAGIC 0x0BD00BD1
1314
1315 static int server_fill_super_common(struct super_block *sb)
1316 {
1317         struct inode *root = 0;
1318         ENTRY;
1319
1320         CDEBUG(D_MOUNT, "Server sb, dev=%d\n", (int)sb->s_dev);
1321
1322         sb->s_blocksize = 4096;
1323         sb->s_blocksize_bits = log2(sb->s_blocksize);
1324         sb->s_magic = LUSTRE_SUPER_MAGIC;
1325         sb->s_maxbytes = 0; //PAGE_CACHE_MAXBYTES;
1326         sb->s_flags |= MS_RDONLY;
1327         sb->s_op = &server_ops;
1328
1329         root = new_inode(sb);
1330         if (!root) {
1331                 CERROR("Can't make root inode\n");
1332                 RETURN(-EIO);
1333         }
1334
1335         /* returns -EIO for every operation */
1336         /* make_bad_inode(root); -- badness - can't umount */
1337         /* apparently we need to be a directory for the mount to finish */
1338         root->i_mode = S_IFDIR;
1339
1340         sb->s_root = d_alloc_root(root);
1341         if (!sb->s_root) {
1342                 CERROR("Can't make root dentry\n");
1343                 iput(root);
1344                 RETURN(-EIO);
1345         }
1346
1347         RETURN(0);
1348 }
1349
1350 static int server_fill_super(struct super_block *sb)
1351 {
1352         struct lustre_sb_info *lsi = s2lsi(sb);
1353         struct vfsmount *mnt;
1354         int mgs_service = 0, i = 0, rc;
1355         ENTRY;
1356
1357         /* the One True Mount */
1358         mnt = server_kernel_mount(sb);
1359         if (IS_ERR(mnt)) {
1360                 rc = PTR_ERR(mnt);
1361                 CERROR("Unable to mount device %s: %d\n",
1362                       lsi->lsi_lmd->lmd_dev, rc);
1363                 GOTO(out, rc);
1364         }
1365         lsi->lsi_srv_mnt = mnt;
1366
1367         LASSERT(lsi->lsi_ldd);
1368         CDEBUG(D_MOUNT, "Found service %s for fs '%s' on device %s\n",
1369                lsi->lsi_ldd->ldd_svname, lsi->lsi_ldd->ldd_fsname,
1370                lsi->lsi_lmd->lmd_dev);
1371
1372         if (class_name2obd(lsi->lsi_ldd->ldd_svname)) {
1373                 LCONSOLE_ERROR("The target named %s is already running. "
1374                                "Double-mount may have compromised the disk "
1375                                "journal.\n", lsi->lsi_ldd->ldd_svname);
1376                 unlock_mntput(mnt);
1377                 lustre_put_lsi(sb);
1378                 GOTO(out, rc = -EALREADY);
1379         }
1380
1381         /* append on-disk MGS nids to mount-line MGS nids */
1382         for (i = 0; (i < lsi->lsi_ldd->ldd_mgsnid_count) &&
1383               (lsi->lsi_lmd->lmd_mgsnid_count < MTI_NIDS_MAX); i++) {
1384                 lsi->lsi_lmd->lmd_mgsnid[lsi->lsi_lmd->lmd_mgsnid_count++] =
1385                         lsi->lsi_ldd->ldd_mgsnid[i];
1386         }
1387
1388         /* start MGS before MGC */
1389         if (IS_MGS(lsi->lsi_ldd)) {
1390                 rc = server_start_mgs(sb);
1391                 if (rc) {
1392                         CERROR("ignoring Failed MGS start!!\n");
1393                         //GOTO(out_mnt, rc);
1394                 } else {
1395                         /* add local nids (including LO) to MGS nids */
1396                         lnet_process_id_t id;
1397                         int j = lsi->lsi_lmd->lmd_mgsnid_count;
1398                         i = 0;
1399                         while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
1400                                 if (j >= MTI_NIDS_MAX)
1401                                         break;
1402                                 lsi->lsi_lmd->lmd_mgsnid[j++] = id.nid;
1403                         }
1404                         lsi->lsi_lmd->lmd_mgsnid_count = j;
1405
1406                         mgs_service++;
1407                 }
1408         }
1409
1410         rc = lustre_start_mgc(sb);
1411         if (rc)
1412                 GOTO(out_mnt, rc);
1413
1414         /* Set up all obd devices for service */
1415         if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
1416                 (IS_OST(lsi->lsi_ldd) || IS_MDT(lsi->lsi_ldd))) {
1417                 rc = server_start_targets(sb, mnt);
1418                 if (rc < 0) {
1419                         CERROR("Unable to start targets: %d\n", rc);
1420                         GOTO(out_mnt, rc);
1421                 }
1422         /* FIXME overmount client here,
1423            or can we just start a client log and client_fill_super on this sb?
1424            We need to make sure server_put_super gets called too - ll_put_super
1425            calls lustre_common_put_super; check there for LSI_SERVER flag,
1426            call s_p_s if so.
1427            Probably should start client from new thread so we can return.
1428            Client will not finish until all servers are connected.
1429            Note - MGMT-only server does NOT get a client, since there is no
1430            lustre fs associated - the MGMT is for all lustre fs's */
1431         }
1432
1433         rc = server_fill_super_common(sb);
1434         if (rc)
1435                 GOTO(out_mnt, rc);
1436
1437         RETURN(0);
1438
1439 out_mnt:
1440         server_put_super(sb);
1441 out:
1442         RETURN(rc);
1443 }
1444
1445 /* Get the index from the obd name.
1446    rc = server type, or
1447    rc < 0  on error */
1448 int server_name2index(char *svname, unsigned long *idx, char **endptr)
1449 {
1450         int rc;
1451         char *dash = strchr(svname, '-');
1452         if (!dash) {
1453                 CERROR("Can't understand server name %s\n", svname);
1454                 return(-EINVAL);
1455         }
1456
1457         if (strncmp(dash + 1, "MDT", 3) == 0)
1458                 rc = LDD_F_SV_TYPE_MDT;
1459         else if (strncmp(dash + 1, "OST", 3) == 0)
1460                 rc = LDD_F_SV_TYPE_OST;
1461         else
1462                 return(-EINVAL);
1463
1464         *idx = simple_strtoul(dash + 4, endptr, 16);
1465         return rc;
1466 }
1467
1468 /*************** mount common betweeen server and client ***************/
1469
1470 /* Common umount */
1471 int lustre_common_put_super(struct super_block *sb)
1472 {
1473         int rc;
1474         ENTRY;
1475
1476         CDEBUG(D_MOUNT, "dropping sb %p\n", sb);
1477
1478         rc = lustre_stop_mgc(sb);
1479         if (rc && (rc != -ENOENT)) {
1480                 if (rc != -EBUSY) {
1481                         CERROR("Can't stop MGC: %d\n", rc);
1482                         RETURN(rc);
1483                 }
1484                 /* BUSY just means that there's some other obd that
1485                    needs the mgc.  Let him clean it up. */
1486                 CDEBUG(D_MOUNT, "MGC still in use\n");
1487         }
1488         lustre_put_lsi(sb);
1489         RETURN(rc);
1490 }
1491
1492 static void lmd_print(struct lustre_mount_data *lmd)
1493 {
1494         int i;
1495
1496         PRINT_CMD(PRINT_MASK, "  mount data:\n");
1497         if (!lmd->lmd_mgsnid_count)
1498                 PRINT_CMD(PRINT_MASK, "no MGS nids\n");
1499         else for (i = 0; i < lmd->lmd_mgsnid_count; i++) {
1500                 PRINT_CMD(PRINT_MASK, "nid %d:  %s\n", i,
1501                        libcfs_nid2str(lmd->lmd_mgsnid[i]));
1502         }
1503         if (lmd_is_client(lmd))
1504                 PRINT_CMD(PRINT_MASK, "fsname:  %s\n", lmd->lmd_dev);
1505         else
1506                 PRINT_CMD(PRINT_MASK, "device:  %s\n", lmd->lmd_dev);
1507         PRINT_CMD(PRINT_MASK, "flags:   %x\n", lmd->lmd_flags);
1508         if (lmd->lmd_opts)
1509                 PRINT_CMD(PRINT_MASK, "options: %s\n", lmd->lmd_opts);
1510         for (i = 0; i < lmd->lmd_exclude_count; i++) {
1511                 PRINT_CMD(PRINT_MASK, "exclude %d:  OST%04x\n", i,
1512                           lmd->lmd_exclude[i]);
1513         }
1514 }
1515
1516 /* Is this server on the exclusion list */
1517 int lustre_check_exclusion(struct super_block *sb, char *svname)
1518 {
1519         struct lustre_sb_info *lsi = s2lsi(sb);
1520         struct lustre_mount_data *lmd = lsi->lsi_lmd;
1521         unsigned long index;
1522         int i, rc;
1523         ENTRY;
1524
1525         rc = server_name2index(svname, &index, NULL);
1526         if (rc != LDD_F_SV_TYPE_OST)
1527                 RETURN(0);
1528
1529         CDEBUG(D_MOUNT, "Check exclusion %s (%ld) in %d of %s\n", svname,
1530                index, lmd->lmd_exclude_count, lmd->lmd_dev);
1531
1532         for(i = 0; i < lmd->lmd_exclude_count; i++) {
1533                 if (index == lmd->lmd_exclude[i]) {
1534                         CWARN("Excluding %s (on exclusion list)\n", svname);
1535                         RETURN(1);
1536                 }
1537         }
1538         RETURN(0);
1539 }
1540
1541 /* mount -v  -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
1542 static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr)
1543 {
1544         char *s1 = ptr, *s2;
1545         unsigned long index, *exclude_list;
1546         int rc = 0;
1547         ENTRY;
1548
1549         /* temp storage until we figure out how many we have */
1550         OBD_ALLOC(exclude_list, sizeof(index) * MAX_OBD_DEVICES);
1551         if (!exclude_list)
1552                 RETURN(-ENOMEM);
1553
1554         /* we enter this fn pointing at the '=' */
1555         while (*s1 && *s1 != ' ' && *s1 != ',') {
1556                 s1++;
1557                 rc = server_name2index(s1, &index, &s2);
1558                 if (rc < 0) {
1559                         CERROR("Can't parse %s\n", s1);
1560                         break;
1561                 }
1562                 if (rc == LDD_F_SV_TYPE_OST)
1563                         exclude_list[lmd->lmd_exclude_count++] = index;
1564                 else
1565                         CDEBUG(D_MOUNT, "ignoring exclude %.7s\n", s1);
1566                 s1 = s2;
1567                 /* now we are pointing at ':' (next exclude)
1568                    or ',' (end of excludes) */
1569
1570                 if (lmd->lmd_exclude_count >= MAX_OBD_DEVICES)
1571                         break;
1572         }
1573         if (rc >= 0) /* non-err */
1574                 rc = 0;
1575
1576         if (lmd->lmd_exclude_count) {
1577                 /* permanent, freed in lustre_free_lsi */
1578                 OBD_ALLOC(lmd->lmd_exclude, sizeof(index) *
1579                           lmd->lmd_exclude_count);
1580                 if (lmd->lmd_exclude) {
1581                         memcpy(lmd->lmd_exclude, exclude_list,
1582                                sizeof(index) * lmd->lmd_exclude_count);
1583                 } else {
1584                         rc = -ENOMEM;
1585                         lmd->lmd_exclude_count = 0;
1586                 }
1587         }
1588         OBD_FREE(exclude_list, sizeof(index) * MAX_OBD_DEVICES);
1589         RETURN(rc);
1590 }
1591
1592 /* mount -v -t lustre uml1:uml2:/lustre-client /mnt/lustre */
1593 static int lmd_parse(char *options, struct lustre_mount_data *lmd)
1594 {
1595         char *s1, *s2, *devname = NULL;
1596         struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
1597         int rc = 0;
1598         ENTRY;
1599
1600         LASSERT(lmd);
1601         if (!options) {
1602                 LCONSOLE_ERROR("Missing mount data: check that "
1603                                "/sbin/mount.lustre is installed.\n");
1604                 RETURN(-EINVAL);
1605         }
1606
1607         /* Options should be a string - try to detect old lmd data */
1608         if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) {
1609                 LCONSOLE_ERROR("You're using an old version of "
1610                                "/sbin/mount.lustre.  Please install version "
1611                                "%s\n", LUSTRE_VERSION_STRING);
1612                 RETURN(-EINVAL);
1613         }
1614         lmd->lmd_magic = LMD_MAGIC;
1615
1616         /* default flags */
1617         lmd->lmd_flags |= LMD_FLG_RECOVER;
1618
1619         s1 = options;
1620         while (*s1) {
1621                 /* Skip whitespace and extra commas */
1622                 while (*s1 == ' ' || *s1 == ',')
1623                         s1++;
1624
1625                 /* Client options are parsed in ll_options: eg. flock,
1626                    user_xattr, acl */
1627
1628                 if (strncmp(s1, "recov", 5) == 0)
1629                         /* FIXME do something with the RECOVER flag - see lconf */
1630                         lmd->lmd_flags |= LMD_FLG_RECOVER;
1631                 else if (strncmp(s1, "norecov", 7) == 0)
1632                         lmd->lmd_flags &= ~LMD_FLG_RECOVER;
1633                 else if (strncmp(s1, "nosvc", 5) == 0)
1634                         lmd->lmd_flags |= LMD_FLG_NOSVC;
1635
1636                 /* ost exclusion list */
1637                 else if (strncmp(s1, "exclude=", 8) == 0) {
1638                         rc = lmd_make_exclusion(lmd, s1 + 7);
1639                         if (rc)
1640                                 goto invalid;
1641                 }
1642
1643                 /* Linux 2.4 doesn't pass the device, so we stuck it at the
1644                    end of the options. */
1645                 else if (strncmp(s1, "device=", 7) == 0) {
1646                         devname = s1 + 7;
1647                         /* terminate options right before device.  device
1648                            must be the last one. */
1649                         *s1 = 0;
1650                 }
1651
1652                 /* Find next opt */
1653                 s2 = strchr(s1, ',');
1654                 if (s2 == NULL)
1655                         break;
1656                 s1 = s2 + 1;
1657         }
1658
1659         if (!devname) {
1660                 LCONSOLE_ERROR("Can't find the device name "
1661                                "(need mount option 'device=...')\n");
1662                 goto invalid;
1663         }
1664
1665         if (strchr(devname, ',')) {
1666                 LCONSOLE_ERROR("Device name must be the final option\n");
1667                 goto invalid;
1668         }
1669
1670         s1 = devname;
1671         /* Get MGS nids if client mount:  uml1@tcp:uml2@tcp:/fsname-client */
1672         while ((s2 = strchr(s1, ':'))) {
1673                 lnet_nid_t nid;
1674                 *s2 = 0;
1675                 lmd->lmd_flags = LMD_FLG_CLIENT;
1676                 nid = libcfs_str2nid(s1);
1677                 if (nid == LNET_NID_ANY) {
1678                         LCONSOLE_ERROR("Can't parse NID '%s'\n", s1);
1679                         goto invalid;
1680                 }
1681                 if (lmd->lmd_mgsnid_count >= MTI_NIDS_MAX) {
1682                         LCONSOLE_ERROR("Too many NIDs: '%s'\n", s1);
1683                         goto invalid;
1684                 }
1685                 lmd->lmd_mgsnid[lmd->lmd_mgsnid_count++] = nid;
1686                 s1 = s2 + 1;
1687         }
1688
1689         if (lmd_is_client(lmd)) {
1690                 /* Remove leading /s from fsname */
1691                 while (*++s1 == '/')
1692                         ;
1693         }
1694
1695         if (*s1 == 0) {
1696                 LCONSOLE_ERROR("No filesytem specified\n");
1697                 goto invalid;
1698         }
1699
1700         /* freed in lustre_free_lsi */
1701         OBD_ALLOC(lmd->lmd_dev, strlen(s1) + 1);
1702         if (!lmd->lmd_dev)
1703                 RETURN(-ENOMEM);
1704         strcpy(lmd->lmd_dev, s1);
1705
1706         /* save mount options */
1707         s1 = options + strlen(options) - 1;
1708         while (s1 >= options && (*s1 == ',' || *s1 == ' '))
1709                 *s1-- = 0;
1710         if (*options != 0) {
1711                 /* freed in lustre_free_lsi */
1712                 OBD_ALLOC(lmd->lmd_opts, strlen(options) + 1);
1713                 if (!lmd->lmd_opts)
1714                         RETURN(-ENOMEM);
1715                 strcpy(lmd->lmd_opts, options);
1716         }
1717
1718         lmd->lmd_magic = LMD_MAGIC;
1719
1720         lmd_print(lmd);
1721         RETURN(rc);
1722
1723 invalid:
1724         CERROR("Bad mount options %s\n", options);
1725         RETURN(-EINVAL);
1726 }
1727
1728
1729 /* Common mount */
1730 int lustre_fill_super(struct super_block *sb, void *data, int silent)
1731 {
1732         struct lustre_mount_data *lmd;
1733         struct lustre_sb_info *lsi;
1734         int rc;
1735         ENTRY;
1736
1737         CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);
1738
1739         lsi = lustre_init_lsi(sb);
1740         if (!lsi)
1741                 RETURN(-ENOMEM);
1742         lmd = lsi->lsi_lmd;
1743
1744         /* Figure out the lmd from the mount options */
1745         if (lmd_parse((char *)data, lmd)) {
1746                 lustre_put_lsi(sb);
1747                 RETURN(-EINVAL);
1748         }
1749
1750         if (lmd_is_client(lmd)) {
1751                 CDEBUG(D_MOUNT, "Mounting client for fs %s\n", lmd->lmd_dev);
1752                 if (!client_fill_super) {
1753                         LCONSOLE_ERROR("Nothing registered for client mount!"
1754                                " Is llite module loaded?\n");
1755                         rc = -ENOSYS;
1756                 } else {
1757                         rc = lustre_start_mgc(sb);
1758                         if (rc)
1759                                 goto out;
1760                         /* Connect and start */
1761                         /* (should always be ll_fill_super) */
1762                         rc = (*client_fill_super)(sb);
1763                         if (rc)
1764                                 lustre_common_put_super(sb);
1765                 }
1766         } else {
1767                 CDEBUG(D_MOUNT, "Mounting server from %s\n", lmd->lmd_dev);
1768                 lsi->lsi_flags |= LSI_SERVER;
1769                 rc = server_fill_super(sb);
1770                 /* s_f_s calls lustre_start_mgc after the mount because we need
1771                    the MGS nids which are stored on disk.  Plus, we may
1772                    need to start the MGS first. */
1773                 /* s_f_s will call server_put_super on failure */
1774         }
1775
1776 out:
1777         if (rc){
1778                 CERROR("Unable to mount %s\n",
1779                        s2lsi(sb) ? lmd->lmd_dev : "");
1780         } else {
1781                 CDEBUG(D_MOUNT, "Successfully mounted %s\n", lmd->lmd_dev);
1782         }
1783         RETURN(rc);
1784 }
1785
1786
1787 /* We can't call ll_fill_super by name because it lives in a module that
1788    must be loaded after this one. */
1789 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb))
1790 {
1791         client_fill_super = cfs;
1792 }
1793
1794 /***************** FS registration ******************/
1795
1796 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
1797 /* 2.5 and later */
1798 struct super_block * lustre_get_sb(struct file_system_type *fs_type,
1799                                int flags, const char *devname, void * data)
1800 {
1801         /* calls back in fill super */
1802         /* we could append devname= onto options (*data) here,
1803            but 2.4 doesn't get devname.  So we do it in mount_lustre.c */
1804         return get_sb_nodev(fs_type, flags, data, lustre_fill_super);
1805 }
1806
1807 struct file_system_type lustre_fs_type = {
1808         .owner        = THIS_MODULE,
1809         .name         = "lustre",
1810         .get_sb       = lustre_get_sb,
1811         .kill_sb      = kill_anon_super,
1812         .fs_flags     = FS_BINARY_MOUNTDATA,
1813 };
1814
1815 #else
1816 /* 2.4 */
1817 static struct super_block *lustre_read_super(struct super_block *sb,
1818                                              void *data, int silent)
1819 {
1820         int rc;
1821         ENTRY;
1822
1823         rc = lustre_fill_super(sb, data, silent);
1824         if (rc)
1825                 RETURN(NULL);
1826         RETURN(sb);
1827 }
1828
1829 static struct file_system_type lustre_fs_type = {
1830         .owner          = THIS_MODULE,
1831         .name           = "lustre",
1832         .fs_flags       = FS_NFSEXP_FSID,
1833         .read_super     = lustre_read_super,
1834 };
1835 #endif
1836
1837 int lustre_register_fs(void)
1838 {
1839         return register_filesystem(&lustre_fs_type);
1840 }
1841
1842 int lustre_unregister_fs(void)
1843 {
1844         return unregister_filesystem(&lustre_fs_type);
1845 }
1846
1847 EXPORT_SYMBOL(lustre_register_client_fill_super);
1848 EXPORT_SYMBOL(lustre_common_put_super);
1849 EXPORT_SYMBOL(lustre_process_log);
1850 EXPORT_SYMBOL(lustre_end_log);
1851 EXPORT_SYMBOL(server_get_mount);
1852 EXPORT_SYMBOL(server_put_mount);
1853 EXPORT_SYMBOL(server_register_target);
1854 EXPORT_SYMBOL(server_name2index);
1855 EXPORT_SYMBOL(server_mti_print);
1856
1857
1858