lustre/obdclass/obd_mount.c

   1 /*
   2  * GPL HEADER START
   3  *
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License version 2 only,
   8  * as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope that it will be useful, but
  11  * WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * General Public License version 2 for more details (a copy is included
  14  * in the LICENSE file that accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * version 2 along with this program; If not, see
  18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  19  *
  20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  21  * CA 95054 USA or visit www.sun.com if you need additional information or
  22  * have any questions.
  23  *
  24  * GPL HEADER END
  25  */
  26 /*
  27  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  28  * Use is subject to license terms.
  29  *
  30  * Copyright (c) 2011, 2012, Whamcloud, Inc.
  31  */
  32 /*
  33  * This file is part of Lustre, http://www.lustre.org/
  34  * Lustre is a trademark of Sun Microsystems, Inc.
  35  *
  36  * lustre/obdclass/obd_mount.c
  37  *
  38  * Client/server mount routines
  39  *
  40  * Author: Nathan Rutman <nathan@clusterfs.com>
  41  */
  42
  43
  44 #define DEBUG_SUBSYSTEM S_CLASS
  45 #define D_MOUNT D_SUPER|D_CONFIG /*|D_WARNING */
  46 #define PRINT_CMD CDEBUG
  47 #define PRINT_MASK D_SUPER|D_CONFIG
  48
  49 #include <obd.h>
  50 #include <lvfs.h>
  51 #include <lustre_fsfilt.h>
  52 #include <obd_class.h>
  53 #include <lustre/lustre_user.h>
  54 #include <linux/version.h>
  55 #include <lustre_log.h>
  56 #include <lustre_disk.h>
  57 #include <lustre_param.h>
  58
  59 static int (*client_fill_super)(struct super_block *sb,
  60                                 struct vfsmount *mnt) = NULL;
  61 static void (*kill_super_cb)(struct super_block *sb) = NULL;
  62
  63 /*********** mount lookup *********/
  64
  65 CFS_DEFINE_MUTEX(lustre_mount_info_lock);
  66 static CFS_LIST_HEAD(server_mount_info_list);
  67
  68 static struct lustre_mount_info *server_find_mount(const char *name)
  69 {
  70         cfs_list_t *tmp;
  71         struct lustre_mount_info *lmi;
  72         ENTRY;
  73
  74         cfs_list_for_each(tmp, &server_mount_info_list) {
  75                 lmi = cfs_list_entry(tmp, struct lustre_mount_info,
  76                                      lmi_list_chain);
  77                 if (strcmp(name, lmi->lmi_name) == 0)
  78                         RETURN(lmi);
  79         }
  80         RETURN(NULL);
  81 }
  82
  83 /* we must register an obd for a mount before we call the setup routine.
  84    *_setup will call lustre_get_mount to get the mnt struct
  85    by obd_name, since we can't pass the pointer to setup. */
  86 static int server_register_mount(const char *name, struct super_block *sb,
  87                           struct vfsmount *mnt)
  88 {
  89         struct lustre_mount_info *lmi;
  90         char *name_cp;
  91         ENTRY;
  92
  93         LASSERT(mnt);
  94         LASSERT(sb);
  95
  96         OBD_ALLOC(lmi, sizeof(*lmi));
  97         if (!lmi)
  98                 RETURN(-ENOMEM);
  99         OBD_ALLOC(name_cp, strlen(name) + 1);
 100         if (!name_cp) {
 101                 OBD_FREE(lmi, sizeof(*lmi));
 102                 RETURN(-ENOMEM);
 103         }
 104         strcpy(name_cp, name);
 105
 106         cfs_mutex_lock(&lustre_mount_info_lock);
 107
 108         if (server_find_mount(name)) {
 109                 cfs_mutex_unlock(&lustre_mount_info_lock);
 110                 OBD_FREE(lmi, sizeof(*lmi));
 111                 OBD_FREE(name_cp, strlen(name) + 1);
 112                 CERROR("Already registered %s\n", name);
 113                 RETURN(-EEXIST);
 114         }
 115         lmi->lmi_name = name_cp;
 116         lmi->lmi_sb = sb;
 117         lmi->lmi_mnt = mnt;
 118         cfs_list_add(&lmi->lmi_list_chain, &server_mount_info_list);
 119
 120         cfs_mutex_unlock(&lustre_mount_info_lock);
 121
 122         CDEBUG(D_MOUNT, "reg_mnt %p from %s, vfscount=%d\n",
 123                lmi->lmi_mnt, name, mnt_get_count(lmi->lmi_mnt));
 124
 125         RETURN(0);
 126 }
 127
 128 /* when an obd no longer needs a mount */
 129 static int server_deregister_mount(const char *name)
 130 {
 131         struct lustre_mount_info *lmi;
 132         ENTRY;
 133
 134         cfs_mutex_lock(&lustre_mount_info_lock);
 135         lmi = server_find_mount(name);
 136         if (!lmi) {
 137                 cfs_mutex_unlock(&lustre_mount_info_lock);
 138                 CERROR("%s not registered\n", name);
 139                 RETURN(-ENOENT);
 140         }
 141
 142         CDEBUG(D_MOUNT, "dereg_mnt %p from %s, vfscount=%d\n",
 143                lmi->lmi_mnt, name, mnt_get_count(lmi->lmi_mnt));
 144
 145         OBD_FREE(lmi->lmi_name, strlen(lmi->lmi_name) + 1);
 146         cfs_list_del(&lmi->lmi_list_chain);
 147         OBD_FREE(lmi, sizeof(*lmi));
 148         cfs_mutex_unlock(&lustre_mount_info_lock);
 149
 150         RETURN(0);
 151 }
 152
 153 /* obd's look up a registered mount using their obdname. This is just
 154    for initial obd setup to find the mount struct.  It should not be
 155    called every time you want to mntget. */
 156 struct lustre_mount_info *server_get_mount(const char *name)
 157 {
 158         struct lustre_mount_info *lmi;
 159         struct lustre_sb_info *lsi;
 160         ENTRY;
 161
 162         cfs_mutex_lock(&lustre_mount_info_lock);
 163         lmi = server_find_mount(name);
 164         cfs_mutex_unlock(&lustre_mount_info_lock);
 165         if (!lmi) {
 166                 CERROR("Can't find mount for %s\n", name);
 167                 RETURN(NULL);
 168         }
 169         lsi = s2lsi(lmi->lmi_sb);
 170         mntget(lmi->lmi_mnt);
 171         cfs_atomic_inc(&lsi->lsi_mounts);
 172
 173         CDEBUG(D_MOUNT, "get_mnt %p from %s, refs=%d, vfscount=%d\n",
 174                lmi->lmi_mnt, name, cfs_atomic_read(&lsi->lsi_mounts),
 175                mnt_get_count(lmi->lmi_mnt));
 176
 177         RETURN(lmi);
 178 }
 179
 180 /*
 181  * Used by mdt to get mount_info from obdname.
 182  * There are no blocking when using the mount_info.
 183  * Do not use server_get_mount for this purpose.
 184  */
 185 struct lustre_mount_info *server_get_mount_2(const char *name)
 186 {
 187         struct lustre_mount_info *lmi;
 188         ENTRY;
 189
 190         cfs_mutex_lock(&lustre_mount_info_lock);
 191         lmi = server_find_mount(name);
 192         cfs_mutex_unlock(&lustre_mount_info_lock);
 193         if (!lmi)
 194                 CERROR("Can't find mount for %s\n", name);
 195
 196         RETURN(lmi);
 197 }
 198
 199 static void unlock_mntput(struct vfsmount *mnt)
 200 {
 201 #ifdef HAVE_KERNEL_LOCKED
 202         /* for kernel < 2.6.37 */
 203         if (kernel_locked()) {
 204                 unlock_kernel();
 205                 mntput(mnt);
 206                 lock_kernel();
 207         } else {
 208                 mntput(mnt);
 209         }
 210 #else
 211         mntput(mnt);
 212 #endif
 213 }
 214
 215 static int lustre_put_lsi(struct super_block *sb);
 216
 217 /* to be called from obd_cleanup methods */
 218 int server_put_mount(const char *name, struct vfsmount *mnt)
 219 {
 220         struct lustre_mount_info *lmi;
 221         struct lustre_sb_info *lsi;
 222         int count = mnt_get_count(mnt) - 1;
 223         ENTRY;
 224
 225         /* This might be the last one, can't deref after this */
 226         unlock_mntput(mnt);
 227
 228         cfs_mutex_lock(&lustre_mount_info_lock);
 229         lmi = server_find_mount(name);
 230         cfs_mutex_unlock(&lustre_mount_info_lock);
 231         if (!lmi) {
 232                 CERROR("Can't find mount for %s\n", name);
 233                 RETURN(-ENOENT);
 234         }
 235         lsi = s2lsi(lmi->lmi_sb);
 236         LASSERT(lmi->lmi_mnt == mnt);
 237
 238         CDEBUG(D_MOUNT, "put_mnt %p from %s, refs=%d, vfscount=%d\n",
 239                lmi->lmi_mnt, name, cfs_atomic_read(&lsi->lsi_mounts), count);
 240
 241         if (lustre_put_lsi(lmi->lmi_sb)) {
 242                 CDEBUG(D_MOUNT, "Last put of mnt %p from %s, vfscount=%d\n",
 243                        lmi->lmi_mnt, name, count);
 244                 /* last mount is the One True Mount */
 245                 if (count > 1)
 246                         CERROR("%s: mount busy, vfscount=%d!\n", name, count);
 247         }
 248
 249         /* this obd should never need the mount again */
 250         server_deregister_mount(name);
 251
 252         RETURN(0);
 253 }
 254
 255 /* Corresponding to server_get_mount_2 */
 256 int server_put_mount_2(const char *name, struct vfsmount *mnt)
 257 {
 258         ENTRY;
 259         RETURN(0);
 260 }
 261
 262 /******* mount helper utilities *********/
 263
 264 #if 0
 265 static void ldd_print(struct lustre_disk_data *ldd)
 266 {
 267         PRINT_CMD(PRINT_MASK, "  disk data:\n");
 268         PRINT_CMD(PRINT_MASK, "server:  %s\n", ldd->ldd_svname);
 269         PRINT_CMD(PRINT_MASK, "uuid:    %s\n", (char *)ldd->ldd_uuid);
 270         PRINT_CMD(PRINT_MASK, "fs:      %s\n", ldd->ldd_fsname);
 271         PRINT_CMD(PRINT_MASK, "index:   %04x\n", ldd->ldd_svindex);
 272         PRINT_CMD(PRINT_MASK, "config:  %d\n", ldd->ldd_config_ver);
 273         PRINT_CMD(PRINT_MASK, "flags:   %#x\n", ldd->ldd_flags);
 274         PRINT_CMD(PRINT_MASK, "diskfs:  %s\n", MT_STR(ldd));
 275         PRINT_CMD(PRINT_MASK, "options: %s\n", ldd->ldd_mount_opts);
 276         PRINT_CMD(PRINT_MASK, "params:  %s\n", ldd->ldd_params);
 277         PRINT_CMD(PRINT_MASK, "comment: %s\n", ldd->ldd_userdata);
 278 }
 279 #endif
 280
 281 static int ldd_parse(struct lvfs_run_ctxt *mount_ctxt,
 282                      struct lustre_disk_data *ldd)
 283 {
 284         struct lvfs_run_ctxt saved;
 285         struct file *file;
 286         loff_t off = 0;
 287         unsigned long len;
 288         int rc;
 289         ENTRY;
 290
 291         push_ctxt(&saved, mount_ctxt, NULL);
 292
 293         file = filp_open(MOUNT_DATA_FILE, O_RDONLY, 0644);
 294         if (IS_ERR(file)) {
 295                 rc = PTR_ERR(file);
 296                 CERROR("cannot open %s: rc = %d\n", MOUNT_DATA_FILE, rc);
 297                 GOTO(out, rc);
 298         }
 299
 300         len = i_size_read(file->f_dentry->d_inode);
 301         CDEBUG(D_MOUNT, "Have %s, size %lu\n", MOUNT_DATA_FILE, len);
 302         if (len != sizeof(*ldd)) {
 303                 CERROR("disk data size does not match: see %lu expect %u\n",
 304                        len, (int)sizeof(*ldd));
 305                 GOTO(out_close, rc = -EINVAL);
 306         }
 307
 308         rc = lustre_fread(file, ldd, len, &off);
 309         if (rc != len) {
 310                 CERROR("error reading %s: read %d of %lu\n",
 311                        MOUNT_DATA_FILE, rc, len);
 312                 GOTO(out_close, rc = -EINVAL);
 313         }
 314         rc = 0;
 315
 316         if (ldd->ldd_magic != LDD_MAGIC) {
 317                 /* FIXME add swabbing support */
 318                 CERROR("Bad magic in %s: %x!=%x\n", MOUNT_DATA_FILE,
 319                        ldd->ldd_magic, LDD_MAGIC);
 320                 GOTO(out_close, rc = -EINVAL);
 321         }
 322
 323         if (ldd->ldd_feature_incompat & ~LDD_INCOMPAT_SUPP) {
 324                 CERROR("%s: unsupported incompat filesystem feature(s) %x\n",
 325                        ldd->ldd_svname,
 326                        ldd->ldd_feature_incompat & ~LDD_INCOMPAT_SUPP);
 327                 GOTO(out_close, rc = -EINVAL);
 328         }
 329         if (ldd->ldd_feature_rocompat & ~LDD_ROCOMPAT_SUPP) {
 330                 CERROR("%s: unsupported read-only filesystem feature(s) %x\n",
 331                        ldd->ldd_svname,
 332                        ldd->ldd_feature_rocompat & ~LDD_ROCOMPAT_SUPP);
 333                 /* Do something like remount filesystem read-only */
 334                 GOTO(out_close, rc = -EINVAL);
 335         }
 336
 337 out_close:
 338         filp_close(file, 0);
 339 out:
 340         pop_ctxt(&saved, mount_ctxt, NULL);
 341         RETURN(rc);
 342 }
 343
 344 static int ldd_write(struct lvfs_run_ctxt *mount_ctxt,
 345                      struct lustre_disk_data *ldd)
 346 {
 347         struct lvfs_run_ctxt saved;
 348         struct file *file;
 349         loff_t off = 0;
 350         unsigned long len = sizeof(struct lustre_disk_data);
 351         int rc = 0;
 352         ENTRY;
 353
 354         LASSERT(ldd->ldd_magic == LDD_MAGIC);
 355
 356         ldd->ldd_config_ver++;
 357
 358         push_ctxt(&saved, mount_ctxt, NULL);
 359
 360         file = filp_open(MOUNT_DATA_FILE, O_RDWR|O_SYNC, 0644);
 361         if (IS_ERR(file)) {
 362                 rc = PTR_ERR(file);
 363                 CERROR("cannot open %s: rc = %d\n", MOUNT_DATA_FILE, rc);
 364                 GOTO(out, rc);
 365         }
 366
 367         rc = lustre_fwrite(file, ldd, len, &off);
 368         if (rc != len) {
 369                 CERROR("error writing %s: read %d of %lu\n",
 370                        MOUNT_DATA_FILE, rc, len);
 371                 GOTO(out_close, rc = -EINVAL);
 372         }
 373
 374         rc = 0;
 375
 376 out_close:
 377         filp_close(file, 0);
 378 out:
 379         pop_ctxt(&saved, mount_ctxt, NULL);
 380         RETURN(rc);
 381 }
 382
 383
 384 /**************** config llog ********************/
 385
 386 /** Get a config log from the MGS and process it.
 387  * This func is called for both clients and servers.
 388  * Continue to process new statements appended to the logs
 389  * (whenever the config lock is revoked) until lustre_end_log
 390  * is called.
 391  * @param sb The superblock is used by the MGC to write to the local copy of
 392  *   the config log
 393  * @param logname The name of the llog to replicate from the MGS
 394  * @param cfg Since the same mgc may be used to follow multiple config logs
 395  *   (e.g. ost1, ost2, client), the config_llog_instance keeps the state for
 396  *   this log, and is added to the mgc's list of logs to follow.
 397  */
 398 int lustre_process_log(struct super_block *sb, char *logname,
 399                      struct config_llog_instance *cfg)
 400 {
 401         struct lustre_cfg *lcfg;
 402         struct lustre_cfg_bufs *bufs;
 403         struct lustre_sb_info *lsi = s2lsi(sb);
 404         struct obd_device *mgc = lsi->lsi_mgc;
 405         int rc;
 406         ENTRY;
 407
 408         LASSERT(mgc);
 409         LASSERT(cfg);
 410
 411         OBD_ALLOC_PTR(bufs);
 412         if (bufs == NULL)
 413                 RETURN(-ENOMEM);
 414
 415         /* mgc_process_config */
 416         lustre_cfg_bufs_reset(bufs, mgc->obd_name);
 417         lustre_cfg_bufs_set_string(bufs, 1, logname);
 418         lustre_cfg_bufs_set(bufs, 2, cfg, sizeof(*cfg));
 419         lustre_cfg_bufs_set(bufs, 3, &sb, sizeof(sb));
 420         lcfg = lustre_cfg_new(LCFG_LOG_START, bufs);
 421         rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
 422         lustre_cfg_free(lcfg);
 423
 424         OBD_FREE_PTR(bufs);
 425
 426         if (rc == -EINVAL)
 427                 LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s'"
 428                                    "failed from the MGS (%d).  Make sure this "
 429                                    "client and the MGS are running compatible "
 430                                    "versions of Lustre.\n",
 431                                    mgc->obd_name, logname, rc);
 432
 433         if (rc)
 434                 LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' "
 435                                    "failed (%d). This may be the result of "
 436                                    "communication errors between this node and "
 437                                    "the MGS, a bad configuration, or other "
 438                                    "errors. See the syslog for more "
 439                                    "information.\n", mgc->obd_name, logname,
 440                                    rc);
 441
 442         /* class_obd_list(); */
 443         RETURN(rc);
 444 }
 445
 446 /* Stop watching this config log for updates */
 447 int lustre_end_log(struct super_block *sb, char *logname,
 448                        struct config_llog_instance *cfg)
 449 {
 450         struct lustre_cfg *lcfg;
 451         struct lustre_cfg_bufs bufs;
 452         struct lustre_sb_info *lsi = s2lsi(sb);
 453         struct obd_device *mgc = lsi->lsi_mgc;
 454         int rc;
 455         ENTRY;
 456
 457         if (!mgc)
 458                 RETURN(-ENOENT);
 459
 460         /* mgc_process_config */
 461         lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
 462         lustre_cfg_bufs_set_string(&bufs, 1, logname);
 463         if (cfg)
 464                 lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
 465         lcfg = lustre_cfg_new(LCFG_LOG_END, &bufs);
 466         rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
 467         lustre_cfg_free(lcfg);
 468         RETURN(rc);
 469 }
 470
 471 /**************** obd start *******************/
 472
 473 /** lustre_cfg_bufs are a holdover from 1.4; we can still set these up from
 474  * lctl (and do for echo cli/srv.
 475  */
 476 int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
 477             char *s1, char *s2, char *s3, char *s4)
 478 {
 479         struct lustre_cfg_bufs bufs;
 480         struct lustre_cfg    * lcfg = NULL;
 481         int rc;
 482
 483         CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname,
 484                cmd, s1, s2, s3, s4);
 485
 486         lustre_cfg_bufs_reset(&bufs, cfgname);
 487         if (s1)
 488                 lustre_cfg_bufs_set_string(&bufs, 1, s1);
 489         if (s2)
 490                 lustre_cfg_bufs_set_string(&bufs, 2, s2);
 491         if (s3)
 492                 lustre_cfg_bufs_set_string(&bufs, 3, s3);
 493         if (s4)
 494                 lustre_cfg_bufs_set_string(&bufs, 4, s4);
 495
 496         lcfg = lustre_cfg_new(cmd, &bufs);
 497         lcfg->lcfg_nid = nid;
 498         rc = class_process_config(lcfg);
 499         lustre_cfg_free(lcfg);
 500         return(rc);
 501 }
 502
 503 /** Call class_attach and class_setup.  These methods in turn call
 504  * obd type-specific methods.
 505  */
 506 static int lustre_start_simple(char *obdname, char *type, char *uuid,
 507                                char *s1, char *s2)
 508 {
 509         int rc;
 510         CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
 511
 512         rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, 0, 0);
 513         if (rc) {
 514                 CERROR("%s attach error %d\n", obdname, rc);
 515                 return(rc);
 516         }
 517         rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, 0, 0);
 518         if (rc) {
 519                 CERROR("%s setup error %d\n", obdname, rc);
 520                 do_lcfg(obdname, 0, LCFG_DETACH, 0, 0, 0, 0);
 521         }
 522         return rc;
 523 }
 524
 525 /* Set up a MGS to serve startup logs */
 526 static int server_start_mgs(struct super_block *sb)
 527 {
 528         struct lustre_sb_info    *lsi = s2lsi(sb);
 529         struct vfsmount          *mnt = lsi->lsi_srv_mnt;
 530         struct lustre_mount_info *lmi;
 531         int    rc = 0;
 532         ENTRY;
 533         LASSERT(mnt);
 534
 535         /* It is impossible to have more than 1 MGS per node, since
 536            MGC wouldn't know which to connect to */
 537         lmi = server_find_mount(LUSTRE_MGS_OBDNAME);
 538         if (lmi) {
 539                 lsi = s2lsi(lmi->lmi_sb);
 540                 LCONSOLE_ERROR_MSG(0x15d, "The MGS service was already started"
 541                                    " from server %s\n",
 542                                    lsi->lsi_ldd->ldd_svname);
 543                 RETURN(-EALREADY);
 544         }
 545
 546         CDEBUG(D_CONFIG, "Start MGS service %s\n", LUSTRE_MGS_OBDNAME);
 547
 548         rc = server_register_mount(LUSTRE_MGS_OBDNAME, sb, mnt);
 549
 550         if (!rc) {
 551                 rc = lustre_start_simple(LUSTRE_MGS_OBDNAME, LUSTRE_MGS_NAME,
 552                                          LUSTRE_MGS_OBDNAME, 0, 0);
 553                 /* Do NOT call server_deregister_mount() here. This leads to
 554                  * inability cleanup cleanly and free lsi and other stuff when
 555                  * mgs calls server_put_mount() in error handling case. -umka */
 556         }
 557
 558         if (rc)
 559                 LCONSOLE_ERROR_MSG(0x15e, "Failed to start MGS '%s' (%d). "
 560                                    "Is the 'mgs' module loaded?\n",
 561                                    LUSTRE_MGS_OBDNAME, rc);
 562         RETURN(rc);
 563 }
 564
 565 static int server_stop_mgs(struct super_block *sb)
 566 {
 567         struct obd_device *obd;
 568         int rc;
 569         ENTRY;
 570
 571         CDEBUG(D_MOUNT, "Stop MGS service %s\n", LUSTRE_MGS_OBDNAME);
 572
 573         /* There better be only one MGS */
 574         obd = class_name2obd(LUSTRE_MGS_OBDNAME);
 575         if (!obd) {
 576                 CDEBUG(D_CONFIG, "mgs %s not running\n", LUSTRE_MGS_OBDNAME);
 577                 RETURN(-EALREADY);
 578         }
 579
 580         /* The MGS should always stop when we say so */
 581         obd->obd_force = 1;
 582         rc = class_manual_cleanup(obd);
 583         RETURN(rc);
 584 }
 585
 586 CFS_DEFINE_MUTEX(mgc_start_lock);
 587
 588 /** Set up a mgc obd to process startup logs
 589  *
 590  * \param sb [in] super block of the mgc obd
 591  *
 592  * \retval 0 success, otherwise error code
 593  */
 594 static int lustre_start_mgc(struct super_block *sb)
 595 {
 596         struct obd_connect_data *data = NULL;
 597         struct lustre_sb_info *lsi = s2lsi(sb);
 598         struct obd_device *obd;
 599         struct obd_export *exp;
 600         struct obd_uuid *uuid;
 601         class_uuid_t uuidc;
 602         lnet_nid_t nid;
 603         char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL;
 604         char *ptr;
 605         int recov_bk;
 606         int rc = 0, i = 0, j, len;
 607         ENTRY;
 608
 609         LASSERT(lsi->lsi_lmd);
 610
 611         /* Find the first non-lo MGS nid for our MGC name */
 612         if (lsi->lsi_flags & LSI_SERVER) {
 613                 ptr = lsi->lsi_ldd->ldd_params;
 614                 /* Use mgsnode= nids */
 615                 if ((class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0) &&
 616                     (class_parse_nid(ptr, &nid, &ptr) == 0)) {
 617                         i++;
 618                 } else if (IS_MGS(lsi->lsi_ldd)) {
 619                         lnet_process_id_t id;
 620                         while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
 621                                 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
 622                                         continue;
 623                                 nid = id.nid;
 624                                 i++;
 625                                 break;
 626                         }
 627                 }
 628         } else { /* client */
 629                 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
 630                 ptr = lsi->lsi_lmd->lmd_dev;
 631                 if (class_parse_nid(ptr, &nid, &ptr) == 0)
 632                         i++;
 633         }
 634         if (i == 0) {
 635                 CERROR("No valid MGS nids found.\n");
 636                 RETURN(-EINVAL);
 637         }
 638
 639         cfs_mutex_lock(&mgc_start_lock);
 640
 641         len = strlen(LUSTRE_MGC_OBDNAME) + strlen(libcfs_nid2str(nid)) + 1;
 642         OBD_ALLOC(mgcname, len);
 643         OBD_ALLOC(niduuid, len + 2);
 644         if (!mgcname || !niduuid)
 645                 GOTO(out_free, rc = -ENOMEM);
 646         sprintf(mgcname, "%s%s", LUSTRE_MGC_OBDNAME, libcfs_nid2str(nid));
 647
 648         mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";
 649
 650         OBD_ALLOC_PTR(data);
 651         if (data == NULL)
 652                 GOTO(out_free, rc = -ENOMEM);
 653
 654         obd = class_name2obd(mgcname);
 655         if (obd && !obd->obd_stopping) {
 656                 rc = obd_set_info_async(NULL, obd->obd_self_export,
 657                                         strlen(KEY_MGSSEC), KEY_MGSSEC,
 658                                         strlen(mgssec), mgssec, NULL);
 659                 if (rc)
 660                         GOTO(out_free, rc);
 661
 662                 /* Re-using an existing MGC */
 663                 cfs_atomic_inc(&obd->u.cli.cl_mgc_refcount);
 664
 665                 /* IR compatibility check, only for clients */
 666                 if (lmd_is_client(lsi->lsi_lmd)) {
 667                         int has_ir;
 668                         int vallen = sizeof(*data);
 669                         __u32 *flags = &lsi->lsi_lmd->lmd_flags;
 670
 671                         rc = obd_get_info(NULL, obd->obd_self_export,
 672                                           strlen(KEY_CONN_DATA), KEY_CONN_DATA,
 673                                           &vallen, data, NULL);
 674                         LASSERT(rc == 0);
 675                         has_ir = OCD_HAS_FLAG(data, IMP_RECOV);
 676                         if (has_ir ^ !(*flags & LMD_FLG_NOIR)) {
 677                                 /* LMD_FLG_NOIR is for test purpose only */
 678                                 LCONSOLE_WARN(
 679                                     "Trying to mount a client with IR setting "
 680                                     "not compatible with current mgc. "
 681                                     "Force to use current mgc setting that is "
 682                                     "IR %s.\n",
 683                                     has_ir ? "enabled" : "disabled");
 684                                 if (has_ir)
 685                                         *flags &= ~LMD_FLG_NOIR;
 686                                 else
 687                                         *flags |= LMD_FLG_NOIR;
 688                         }
 689                 }
 690
 691                 recov_bk = 0;
 692                 /* If we are restarting the MGS, don't try to keep the MGC's
 693                    old connection, or registration will fail. */
 694                 if ((lsi->lsi_flags & LSI_SERVER) && IS_MGS(lsi->lsi_ldd)) {
 695                         CDEBUG(D_MOUNT, "New MGS with live MGC\n");
 696                         recov_bk = 1;
 697                 }
 698
 699                 /* Try all connections, but only once (again).
 700                    We don't want to block another target from starting
 701                    (using its local copy of the log), but we do want to connect
 702                    if at all possible. */
 703                 recov_bk++;
 704                 CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,recov_bk);
 705                 rc = obd_set_info_async(NULL, obd->obd_self_export,
 706                                         sizeof(KEY_INIT_RECOV_BACKUP),
 707                                         KEY_INIT_RECOV_BACKUP,
 708                                         sizeof(recov_bk), &recov_bk, NULL);
 709                 GOTO(out, rc = 0);
 710         }
 711
 712         CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname);
 713
 714         /* Add the primary nids for the MGS */
 715         i = 0;
 716         sprintf(niduuid, "%s_%x", mgcname, i);
 717         if (lsi->lsi_flags & LSI_SERVER) {
 718                 ptr = lsi->lsi_ldd->ldd_params;
 719                 if (IS_MGS(lsi->lsi_ldd)) {
 720                         /* Use local nids (including LO) */
 721                         lnet_process_id_t id;
 722                         while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
 723                                 rc = do_lcfg(mgcname, id.nid,
 724                                              LCFG_ADD_UUID, niduuid, 0,0,0);
 725                         }
 726                 } else {
 727                         /* Use mgsnode= nids */
 728                         if (class_find_param(ptr, PARAM_MGSNODE, &ptr) != 0) {
 729                                 CERROR("No MGS nids given.\n");
 730                                 GOTO(out_free, rc = -EINVAL);
 731                         }
 732                         while (class_parse_nid(ptr, &nid, &ptr) == 0) {
 733                                 rc = do_lcfg(mgcname, nid,
 734                                              LCFG_ADD_UUID, niduuid, 0,0,0);
 735                                 i++;
 736                         }
 737                 }
 738         } else { /* client */
 739                 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
 740                 ptr = lsi->lsi_lmd->lmd_dev;
 741                 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
 742                         rc = do_lcfg(mgcname, nid,
 743                                      LCFG_ADD_UUID, niduuid, 0,0,0);
 744                         i++;
 745                         /* Stop at the first failover nid */
 746                         if (*ptr == ':')
 747                                 break;
 748                 }
 749         }
 750         if (i == 0) {
 751                 CERROR("No valid MGS nids found.\n");
 752                 GOTO(out_free, rc = -EINVAL);
 753         }
 754         lsi->lsi_lmd->lmd_mgs_failnodes = 1;
 755
 756         /* Random uuid for MGC allows easier reconnects */
 757         OBD_ALLOC_PTR(uuid);
 758         ll_generate_random_uuid(uuidc);
 759         class_uuid_unparse(uuidc, uuid);
 760
 761         /* Start the MGC */
 762         rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
 763                                  (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
 764                                  niduuid);
 765         OBD_FREE_PTR(uuid);
 766         if (rc)
 767                 GOTO(out_free, rc);
 768
 769         /* Add any failover MGS nids */
 770         i = 1;
 771         while ((*ptr == ':' ||
 772                 class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0)) {
 773                 /* New failover node */
 774                 sprintf(niduuid, "%s_%x", mgcname, i);
 775                 j = 0;
 776                 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
 777                         j++;
 778                         rc = do_lcfg(mgcname, nid,
 779                                      LCFG_ADD_UUID, niduuid, 0,0,0);
 780                         if (*ptr == ':')
 781                                 break;
 782                 }
 783                 if (j > 0) {
 784                         rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
 785                                      niduuid, 0, 0, 0);
 786                         i++;
 787                 } else {
 788                         /* at ":/fsname" */
 789                         break;
 790                 }
 791         }
 792         lsi->lsi_lmd->lmd_mgs_failnodes = i;
 793
 794         obd = class_name2obd(mgcname);
 795         if (!obd) {
 796                 CERROR("Can't find mgcobd %s\n", mgcname);
 797                 GOTO(out_free, rc = -ENOTCONN);
 798         }
 799
 800         rc = obd_set_info_async(NULL, obd->obd_self_export,
 801                                 strlen(KEY_MGSSEC), KEY_MGSSEC,
 802                                 strlen(mgssec), mgssec, NULL);
 803         if (rc)
 804                 GOTO(out_free, rc);
 805
 806         /* Keep a refcount of servers/clients who started with "mount",
 807            so we know when we can get rid of the mgc. */
 808         cfs_atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
 809
 810         /* Try all connections, but only once. */
 811         recov_bk = 1;
 812         rc = obd_set_info_async(NULL, obd->obd_self_export,
 813                                 sizeof(KEY_INIT_RECOV_BACKUP),
 814                                 KEY_INIT_RECOV_BACKUP,
 815                                 sizeof(recov_bk), &recov_bk, NULL);
 816         if (rc)
 817                 /* nonfatal */
 818                 CWARN("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc);
 819         /* We connect to the MGS at setup, and don't disconnect until cleanup */
 820         data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_FID |
 821                                   OBD_CONNECT_AT | OBD_CONNECT_FULL20   |
 822                                   OBD_CONNECT_IMP_RECOV;
 823         if (lmd_is_client(lsi->lsi_lmd) &&
 824             lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
 825                 data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
 826         data->ocd_version = LUSTRE_VERSION_CODE;
 827         rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
 828         if (rc) {
 829                 CERROR("connect failed %d\n", rc);
 830                 GOTO(out, rc);
 831         }
 832
 833         obd->u.cli.cl_mgc_mgsexp = exp;
 834
 835 out:
 836         /* Keep the mgc info in the sb. Note that many lsi's can point
 837            to the same mgc.*/
 838         lsi->lsi_mgc = obd;
 839 out_free:
 840         cfs_mutex_unlock(&mgc_start_lock);
 841
 842         if (data)
 843                 OBD_FREE_PTR(data);
 844         if (mgcname)
 845                 OBD_FREE(mgcname, len);
 846         if (niduuid)
 847                 OBD_FREE(niduuid, len + 2);
 848         RETURN(rc);
 849 }
 850
 851 static int lustre_stop_mgc(struct super_block *sb)
 852 {
 853         struct lustre_sb_info *lsi = s2lsi(sb);
 854         struct obd_device *obd;
 855         char *niduuid = 0, *ptr = 0;
 856         int i, rc = 0, len = 0;
 857         ENTRY;
 858
 859         if (!lsi)
 860                 RETURN(-ENOENT);
 861         obd = lsi->lsi_mgc;
 862         if (!obd)
 863                 RETURN(-ENOENT);
 864         lsi->lsi_mgc = NULL;
 865
 866         cfs_mutex_lock(&mgc_start_lock);
 867         LASSERT(cfs_atomic_read(&obd->u.cli.cl_mgc_refcount) > 0);
 868         if (!cfs_atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
 869                 /* This is not fatal, every client that stops
 870                    will call in here. */
 871                 CDEBUG(D_MOUNT, "mgc still has %d references.\n",
 872                        cfs_atomic_read(&obd->u.cli.cl_mgc_refcount));
 873                 GOTO(out, rc = -EBUSY);
 874         }
 875
 876         /* The MGC has no recoverable data in any case.
 877          * force shotdown set in umount_begin */
 878         obd->obd_no_recov = 1;
 879
 880         if (obd->u.cli.cl_mgc_mgsexp) {
 881                 /* An error is not fatal, if we are unable to send the
 882                    disconnect mgs ping evictor cleans up the export */
 883                 rc = obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
 884                 if (rc)
 885                         CDEBUG(D_MOUNT, "disconnect failed %d\n", rc);
 886         }
 887
 888         /* Save the obdname for cleaning the nid uuids, which are
 889            obdname_XX */
 890         len = strlen(obd->obd_name) + 6;
 891         OBD_ALLOC(niduuid, len);
 892         if (niduuid) {
 893                 strcpy(niduuid, obd->obd_name);
 894                 ptr = niduuid + strlen(niduuid);
 895         }
 896
 897         rc = class_manual_cleanup(obd);
 898         if (rc)
 899                 GOTO(out, rc);
 900
 901         /* Clean the nid uuids */
 902         if (!niduuid)
 903                 GOTO(out, rc = -ENOMEM);
 904
 905         for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
 906                 sprintf(ptr, "_%x", i);
 907                 rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
 908                              niduuid, 0, 0, 0);
 909                 if (rc)
 910                         CERROR("del MDC UUID %s failed: rc = %d\n",
 911                                niduuid, rc);
 912         }
 913 out:
 914         if (niduuid)
 915                 OBD_FREE(niduuid, len);
 916
 917         /* class_import_put will get rid of the additional connections */
 918         cfs_mutex_unlock(&mgc_start_lock);
 919         RETURN(rc);
 920 }
 921
 922 /* Since there's only one mgc per node, we have to change it's fs to get
 923    access to the right disk. */
 924 static int server_mgc_set_fs(struct obd_device *mgc, struct super_block *sb)
 925 {
 926         struct lustre_sb_info *lsi = s2lsi(sb);
 927         int rc;
 928         ENTRY;
 929
 930         CDEBUG(D_MOUNT, "Set mgc disk for %s\n", lsi->lsi_lmd->lmd_dev);
 931
 932         /* cl_mgc_sem in mgc insures we sleep if the mgc_fs is busy */
 933         rc = obd_set_info_async(NULL, mgc->obd_self_export,
 934                                 sizeof(KEY_SET_FS), KEY_SET_FS,
 935                                 sizeof(*sb), sb, NULL);
 936         if (rc) {
 937                 CERROR("can't set_fs %d\n", rc);
 938         }
 939
 940         RETURN(rc);
 941 }
 942
 943 static int server_mgc_clear_fs(struct obd_device *mgc)
 944 {
 945         int rc;
 946         ENTRY;
 947
 948         CDEBUG(D_MOUNT, "Unassign mgc disk\n");
 949
 950         rc = obd_set_info_async(NULL, mgc->obd_self_export,
 951                                 sizeof(KEY_CLEAR_FS), KEY_CLEAR_FS,
 952                                 0, NULL, NULL);
 953         RETURN(rc);
 954 }
 955
 956 CFS_DEFINE_MUTEX(server_start_lock);
 957
 958 /* Stop MDS/OSS if nobody is using them */
 959 static int server_stop_servers(int lddflags, int lsiflags)
 960 {
 961         struct obd_device *obd = NULL;
 962         struct obd_type *type = NULL;
 963         int rc = 0;
 964         ENTRY;
 965
 966         cfs_mutex_lock(&server_start_lock);
 967
 968         /* Either an MDT or an OST or neither  */
 969         /* if this was an MDT, and there are no more MDT's, clean up the MDS */
 970         if ((lddflags & LDD_F_SV_TYPE_MDT) &&
 971             (obd = class_name2obd(LUSTRE_MDS_OBDNAME))) {
 972                 /*FIXME pre-rename, should eventually be LUSTRE_MDT_NAME*/
 973                 type = class_search_type(LUSTRE_MDS_NAME);
 974         }
 975         /* if this was an OST, and there are no more OST's, clean up the OSS */
 976         if ((lddflags & LDD_F_SV_TYPE_OST) &&
 977             (obd = class_name2obd(LUSTRE_OSS_OBDNAME))) {
 978                 type = class_search_type(LUSTRE_OST_NAME);
 979         }
 980
 981         if (obd && (!type || !type->typ_refcnt)) {
 982                 int err;
 983                 obd->obd_force = 1;
 984                 /* obd_fail doesn't mean much on a server obd */
 985                 err = class_manual_cleanup(obd);
 986                 if (!rc)
 987                         rc = err;
 988         }
 989
 990         cfs_mutex_unlock(&server_start_lock);
 991
 992         RETURN(rc);
 993 }
 994
 995 int server_mti_print(char *title, struct mgs_target_info *mti)
 996 {
 997         PRINT_CMD(PRINT_MASK, "mti %s\n", title);
 998         PRINT_CMD(PRINT_MASK, "server: %s\n", mti->mti_svname);
 999         PRINT_CMD(PRINT_MASK, "fs:     %s\n", mti->mti_fsname);
1000         PRINT_CMD(PRINT_MASK, "uuid:   %s\n", mti->mti_uuid);
1001         PRINT_CMD(PRINT_MASK, "ver: %d  flags: %#x\n",
1002                   mti->mti_config_ver, mti->mti_flags);
1003         return(0);
1004 }
1005
1006 static int server_sb2mti(struct super_block *sb, struct mgs_target_info *mti)
1007 {
1008         struct lustre_sb_info    *lsi = s2lsi(sb);
1009         struct lustre_disk_data  *ldd = lsi->lsi_ldd;
1010         lnet_process_id_t         id;
1011         int                       i = 0;
1012         ENTRY;
1013
1014         if (!(lsi->lsi_flags & LSI_SERVER))
1015                 RETURN(-EINVAL);
1016
1017         strncpy(mti->mti_fsname, ldd->ldd_fsname,
1018                 sizeof(mti->mti_fsname));
1019         strncpy(mti->mti_svname, ldd->ldd_svname,
1020                 sizeof(mti->mti_svname));
1021
1022         mti->mti_nid_count = 0;
1023         while (LNetGetId(i++, &id) != -ENOENT) {
1024                 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
1025                         continue;
1026
1027                 /* server use --servicenode param, only allow specified
1028                  * nids be registered */
1029                 if ((ldd->ldd_flags & LDD_F_NO_PRIMNODE) != 0 &&
1030                     class_match_nid(ldd->ldd_params,
1031                                     PARAM_FAILNODE, id.nid) < 1)
1032                         continue;
1033
1034                 /* match specified network */
1035                 if (!class_match_net(ldd->ldd_params,
1036                                      PARAM_NETWORK, LNET_NIDNET(id.nid)))
1037                         continue;
1038
1039                 mti->mti_nids[mti->mti_nid_count] = id.nid;
1040                 mti->mti_nid_count++;
1041                 if (mti->mti_nid_count >= MTI_NIDS_MAX) {
1042                         CWARN("Only using first %d nids for %s\n",
1043                               mti->mti_nid_count, mti->mti_svname);
1044                         break;
1045                 }
1046         }
1047
1048         mti->mti_lustre_ver = LUSTRE_VERSION_CODE;
1049         mti->mti_config_ver = 0;
1050         if (lsi->lsi_lmd->lmd_flags & LMD_FLG_WRITECONF)
1051                 ldd->ldd_flags |= LDD_F_WRITECONF;
1052         mti->mti_flags = ldd->ldd_flags;
1053         mti->mti_stripe_index = ldd->ldd_svindex;
1054         memcpy(mti->mti_uuid, ldd->ldd_uuid, sizeof(mti->mti_uuid));
1055         if (strlen(ldd->ldd_params) > sizeof(mti->mti_params)) {
1056                 CERROR("params too big for mti\n");
1057                 RETURN(-ENOMEM);
1058         }
1059         memcpy(mti->mti_params, ldd->ldd_params, sizeof(mti->mti_params));
1060         RETURN(0);
1061 }
1062
1063 /* Register an old or new target with the MGS. If needed MGS will construct
1064    startup logs and assign index */
1065 int server_register_target(struct super_block *sb)
1066 {
1067         struct lustre_sb_info *lsi = s2lsi(sb);
1068         struct obd_device *mgc = lsi->lsi_mgc;
1069         struct lustre_disk_data *ldd = lsi->lsi_ldd;
1070         struct mgs_target_info *mti = NULL;
1071         bool writeconf;
1072         int rc;
1073         ENTRY;
1074
1075         LASSERT(mgc);
1076
1077         if (!(lsi->lsi_flags & LSI_SERVER))
1078                 RETURN(-EINVAL);
1079
1080         OBD_ALLOC_PTR(mti);
1081         if (!mti)
1082                 RETURN(-ENOMEM);
1083         rc = server_sb2mti(sb, mti);
1084         if (rc)
1085                 GOTO(out, rc);
1086
1087         CDEBUG(D_MOUNT, "Registration %s, fs=%s, %s, index=%04x, flags=%#x\n",
1088                mti->mti_svname, mti->mti_fsname,
1089                libcfs_nid2str(mti->mti_nids[0]), mti->mti_stripe_index,
1090                mti->mti_flags);
1091
1092         /* if write_conf is true, the registration must succeed */
1093         writeconf = !!(ldd->ldd_flags & (LDD_F_NEED_INDEX | LDD_F_UPDATE));
1094         mti->mti_flags |= LDD_F_OPC_REG;
1095
1096         /* Register the target */
1097         /* FIXME use mgc_process_config instead */
1098         rc = obd_set_info_async(NULL, mgc->u.cli.cl_mgc_mgsexp,
1099                                 sizeof(KEY_REGISTER_TARGET), KEY_REGISTER_TARGET,
1100                                 sizeof(*mti), mti, NULL);
1101         if (rc) {
1102                 if (mti->mti_flags & LDD_F_ERROR) {
1103                         LCONSOLE_ERROR_MSG(0x160,
1104                                 "The MGS is refusing to allow this "
1105                                 "server (%s) to start. Please see messages"
1106                                 " on the MGS node.\n", ldd->ldd_svname);
1107                 } else if (writeconf) {
1108                         LCONSOLE_ERROR_MSG(0x15f,
1109                                 "Communication to the MGS return error %d. "
1110                                 "Is the MGS running?\n", rc);
1111                 } else {
1112                         CERROR("Cannot talk to the MGS: %d, not fatal\n", rc);
1113                         /* reset the error code for non-fatal error. */
1114                         rc = 0;
1115                 }
1116                 GOTO(out, rc);
1117         }
1118
1119         /* Always update our flags */
1120         ldd->ldd_flags = mti->mti_flags & LDD_F_ONDISK_MASK;
1121
1122         /* If this flag is set, it means the MGS wants us to change our
1123            on-disk data. (So far this means just the index.) */
1124         if (mti->mti_flags & LDD_F_REWRITE_LDD) {
1125                 char *label;
1126                 int err;
1127                 CDEBUG(D_MOUNT, "Changing on-disk index from %#x to %#x "
1128                        "for %s\n", ldd->ldd_svindex, mti->mti_stripe_index,
1129                        mti->mti_svname);
1130                 ldd->ldd_svindex = mti->mti_stripe_index;
1131                 strncpy(ldd->ldd_svname, mti->mti_svname,
1132                         sizeof(ldd->ldd_svname));
1133                 /* or ldd_make_sv_name(ldd); */
1134                 ldd_write(&mgc->obd_lvfs_ctxt, ldd);
1135                 err = fsfilt_set_label(mgc, lsi->lsi_srv_mnt->mnt_sb,
1136                                        mti->mti_svname);
1137                 if (err)
1138                         CERROR("Label set error %d\n", err);
1139                 label = fsfilt_get_label(mgc, lsi->lsi_srv_mnt->mnt_sb);
1140                 if (label)
1141                         CDEBUG(D_MOUNT, "Disk label changed to %s\n", label);
1142
1143                 /* Flush the new ldd to disk */
1144                 fsfilt_sync(mgc, lsi->lsi_srv_mnt->mnt_sb);
1145         }
1146
1147 out:
1148         if (mti)
1149                 OBD_FREE_PTR(mti);
1150         RETURN(rc);
1151 }
1152
1153 /**
1154  * Notify the MGS that this target is ready.
1155  * Used by IR - if the MGS receives this message, it will notify clients.
1156  */
1157 static int server_notify_target(struct super_block *sb, struct obd_device *obd)
1158 {
1159         struct lustre_sb_info *lsi = s2lsi(sb);
1160         struct obd_device *mgc = lsi->lsi_mgc;
1161         struct mgs_target_info *mti = NULL;
1162         int rc;
1163         ENTRY;
1164
1165         LASSERT(mgc);
1166
1167         if (!(lsi->lsi_flags & LSI_SERVER))
1168                 RETURN(-EINVAL);
1169
1170         OBD_ALLOC_PTR(mti);
1171         if (!mti)
1172                 RETURN(-ENOMEM);
1173         rc = server_sb2mti(sb, mti);
1174         if (rc)
1175                 GOTO(out, rc);
1176
1177         mti->mti_instance = obd->u.obt.obt_instance;
1178         mti->mti_flags |= LDD_F_OPC_READY;
1179
1180         /* FIXME use mgc_process_config instead */
1181         rc = obd_set_info_async(NULL, mgc->u.cli.cl_mgc_mgsexp,
1182                                 sizeof(KEY_REGISTER_TARGET),
1183                                 KEY_REGISTER_TARGET,
1184                                 sizeof(*mti), mti, NULL);
1185
1186         /* Imperative recovery: if the mgs informs us to use IR? */
1187         if (!rc && !(mti->mti_flags & LDD_F_ERROR) &&
1188             (mti->mti_flags & LDD_F_IR_CAPABLE))
1189                 lsi->lsi_flags |= LSI_IR_CAPABLE;
1190
1191 out:
1192         if (mti)
1193                 OBD_FREE_PTR(mti);
1194         RETURN(rc);
1195
1196 }
1197
1198 /** Start server targets: MDTs and OSTs
1199  */
1200 static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
1201 {
1202         struct obd_device *obd;
1203         struct lustre_sb_info *lsi = s2lsi(sb);
1204         struct config_llog_instance cfg;
1205         int rc;
1206         ENTRY;
1207
1208         CDEBUG(D_MOUNT, "starting target %s\n", lsi->lsi_ldd->ldd_svname);
1209
1210 #if 0
1211         /* If we're an MDT, make sure the global MDS is running */
1212         if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_MDT) {
1213                 /* make sure the MDS is started */
1214                 cfs_mutex_lock(&server_start_lock);
1215                 obd = class_name2obd(LUSTRE_MDS_OBDNAME);
1216                 if (!obd) {
1217                         rc = lustre_start_simple(LUSTRE_MDS_OBDNAME,
1218                     /* FIXME pre-rename, should eventually be LUSTRE_MDS_NAME */
1219                                                  LUSTRE_MDT_NAME,
1220                                                  LUSTRE_MDS_OBDNAME"_uuid",
1221                                                  0, 0);
1222                         if (rc) {
1223                                 cfs_mutex_unlock(&server_start_lock);
1224                                 CERROR("failed to start MDS: %d\n", rc);
1225                                 RETURN(rc);
1226                         }
1227                 }
1228                 cfs_mutex_unlock(&server_start_lock);
1229         }
1230 #endif
1231
1232         /* If we're an OST, make sure the global OSS is running */
1233         if (IS_OST(lsi->lsi_ldd)) {
1234                 /* make sure OSS is started */
1235                 cfs_mutex_lock(&server_start_lock);
1236                 obd = class_name2obd(LUSTRE_OSS_OBDNAME);
1237                 if (!obd) {
1238                         rc = lustre_start_simple(LUSTRE_OSS_OBDNAME,
1239                                                  LUSTRE_OSS_NAME,
1240                                                  LUSTRE_OSS_OBDNAME"_uuid",
1241                                                  0, 0);
1242                         if (rc) {
1243                                 cfs_mutex_unlock(&server_start_lock);
1244                                 CERROR("failed to start OSS: %d\n", rc);
1245                                 RETURN(rc);
1246                         }
1247                 }
1248                 cfs_mutex_unlock(&server_start_lock);
1249         }
1250
1251         /* Set the mgc fs to our server disk.  This allows the MGC to
1252          * read and write configs locally, in case it can't talk to the MGS. */
1253         rc = server_mgc_set_fs(lsi->lsi_mgc, sb);
1254         if (rc)
1255                 RETURN(rc);
1256
1257         /* Register with MGS */
1258         rc = server_register_target(sb);
1259         if (rc)
1260                 GOTO(out_mgc, rc);
1261
1262         /* Let the target look up the mount using the target's name
1263            (we can't pass the sb or mnt through class_process_config.) */
1264         rc = server_register_mount(lsi->lsi_ldd->ldd_svname, sb, mnt);
1265         if (rc)
1266                 GOTO(out_mgc, rc);
1267
1268         /* Start targets using the llog named for the target */
1269         memset(&cfg, 0, sizeof(cfg));
1270         rc = lustre_process_log(sb, lsi->lsi_ldd->ldd_svname, &cfg);
1271         if (rc) {
1272                 CERROR("failed to start server %s: %d\n",
1273                        lsi->lsi_ldd->ldd_svname, rc);
1274                 /* Do NOT call server_deregister_mount() here. This makes it
1275                  * impossible to find mount later in cleanup time and leaves
1276                  * @lsi and othder stuff leaked. -umka */
1277                 GOTO(out_mgc, rc);
1278         }
1279
1280 out_mgc:
1281         /* Release the mgc fs for others to use */
1282         server_mgc_clear_fs(lsi->lsi_mgc);
1283
1284         if (!rc) {
1285                 obd = class_name2obd(lsi->lsi_ldd->ldd_svname);
1286                 if (!obd) {
1287                         CERROR("no server named %s was started\n",
1288                                lsi->lsi_ldd->ldd_svname);
1289                         RETURN(-ENXIO);
1290                 }
1291
1292                 if ((lsi->lsi_lmd->lmd_flags & LMD_FLG_ABORT_RECOV) &&
1293                     (OBP(obd, iocontrol))) {
1294                         obd_iocontrol(OBD_IOC_ABORT_RECOVERY,
1295                                       obd->obd_self_export, 0, NULL, NULL);
1296                 }
1297
1298                 server_notify_target(sb, obd);
1299
1300                 /* calculate recovery timeout, do it after lustre_process_log */
1301                 server_calc_timeout(lsi, obd);
1302
1303                 /* log has been fully processed */
1304                 obd_notify(obd, NULL, OBD_NOTIFY_CONFIG, (void *)CONFIG_LOG);
1305         }
1306
1307         RETURN(rc);
1308 }
1309
1310 /***************** lustre superblock **************/
1311
1312 struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
1313 {
1314         struct lustre_sb_info *lsi;
1315         ENTRY;
1316
1317         OBD_ALLOC_PTR(lsi);
1318         if (!lsi)
1319                 RETURN(NULL);
1320         OBD_ALLOC_PTR(lsi->lsi_lmd);
1321         if (!lsi->lsi_lmd) {
1322                 OBD_FREE_PTR(lsi);
1323                 RETURN(NULL);
1324         }
1325
1326         lsi->lsi_lmd->lmd_exclude_count = 0;
1327         lsi->lsi_lmd->lmd_recovery_time_soft = 0;
1328         lsi->lsi_lmd->lmd_recovery_time_hard = 0;
1329         s2lsi_nocast(sb) = lsi;
1330         /* we take 1 extra ref for our setup */
1331         cfs_atomic_set(&lsi->lsi_mounts, 1);
1332
1333         /* Default umount style */
1334         lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
1335
1336         RETURN(lsi);
1337 }
1338
1339 static int lustre_free_lsi(struct super_block *sb)
1340 {
1341         struct lustre_sb_info *lsi = s2lsi(sb);
1342         ENTRY;
1343
1344         LASSERT(lsi != NULL);
1345         CDEBUG(D_MOUNT, "Freeing lsi %p\n", lsi);
1346
1347         /* someone didn't call server_put_mount. */
1348         LASSERT(cfs_atomic_read(&lsi->lsi_mounts) == 0);
1349
1350         if (lsi->lsi_ldd != NULL)
1351                 OBD_FREE(lsi->lsi_ldd, sizeof(*lsi->lsi_ldd));
1352
1353         if (lsi->lsi_lmd != NULL) {
1354                 if (lsi->lsi_lmd->lmd_dev != NULL)
1355                         OBD_FREE(lsi->lsi_lmd->lmd_dev,
1356                                  strlen(lsi->lsi_lmd->lmd_dev) + 1);
1357                 if (lsi->lsi_lmd->lmd_profile != NULL)
1358                         OBD_FREE(lsi->lsi_lmd->lmd_profile,
1359                                  strlen(lsi->lsi_lmd->lmd_profile) + 1);
1360                 if (lsi->lsi_lmd->lmd_mgssec != NULL)
1361                         OBD_FREE(lsi->lsi_lmd->lmd_mgssec,
1362                                  strlen(lsi->lsi_lmd->lmd_mgssec) + 1);
1363                 if (lsi->lsi_lmd->lmd_opts != NULL)
1364                         OBD_FREE(lsi->lsi_lmd->lmd_opts,
1365                                  strlen(lsi->lsi_lmd->lmd_opts) + 1);
1366                 if (lsi->lsi_lmd->lmd_exclude_count)
1367                         OBD_FREE(lsi->lsi_lmd->lmd_exclude,
1368                                  sizeof(lsi->lsi_lmd->lmd_exclude[0]) *
1369                                  lsi->lsi_lmd->lmd_exclude_count);
1370                 OBD_FREE(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd));
1371         }
1372
1373         LASSERT(lsi->lsi_llsbi == NULL);
1374         OBD_FREE(lsi, sizeof(*lsi));
1375         s2lsi_nocast(sb) = NULL;
1376
1377         RETURN(0);
1378 }
1379
1380 /* The lsi has one reference for every server that is using the disk -
1381    e.g. MDT, MGS, and potentially MGC */
1382 static int lustre_put_lsi(struct super_block *sb)
1383 {
1384         struct lustre_sb_info *lsi = s2lsi(sb);
1385         ENTRY;
1386
1387         LASSERT(lsi != NULL);
1388
1389         CDEBUG(D_MOUNT, "put %p %d\n", sb, cfs_atomic_read(&lsi->lsi_mounts));
1390         if (cfs_atomic_dec_and_test(&lsi->lsi_mounts)) {
1391                 lustre_free_lsi(sb);
1392                 RETURN(1);
1393         }
1394         RETURN(0);
1395 }
1396
1397 /*************** server mount ******************/
1398
1399 /** Kernel mount using mount options in MOUNT_DATA_FILE.
1400  * Since this file lives on the disk, we pre-mount using a common
1401  * type, read the file, then re-mount using the type specified in the
1402  * file.
1403  */
1404 static struct vfsmount *server_kernel_mount(struct super_block *sb)
1405 {
1406         struct lvfs_run_ctxt mount_ctxt;
1407         struct lustre_sb_info *lsi = s2lsi(sb);
1408         struct lustre_disk_data *ldd;
1409         struct lustre_mount_data *lmd = lsi->lsi_lmd;
1410         struct vfsmount *mnt;
1411         struct file_system_type *type;
1412         char *options = NULL;
1413         unsigned long page, s_flags;
1414         struct page *__page;
1415         int len;
1416         int rc;
1417         ENTRY;
1418
1419         OBD_ALLOC(ldd, sizeof(*ldd));
1420         if (!ldd)
1421                 RETURN(ERR_PTR(-ENOMEM));
1422
1423         /* In the past, we have always used flags = 0.
1424            Note ext3/ldiskfs can't be mounted ro. */
1425         s_flags = sb->s_flags;
1426
1427         /* allocate memory for options */
1428         OBD_PAGE_ALLOC(__page, CFS_ALLOC_STD);
1429         if (!__page)
1430                 GOTO(out_free, rc = -ENOMEM);
1431         page = (unsigned long)cfs_page_address(__page);
1432         options = (char *)page;
1433         memset(options, 0, CFS_PAGE_SIZE);
1434
1435         /* mount-line options must be added for pre-mount because it may
1436          * contain mount options such as journal_dev which are required
1437          * to mount successfuly the underlying filesystem */
1438         if (lmd->lmd_opts && (*(lmd->lmd_opts) != 0))
1439                 strncat(options, lmd->lmd_opts, CFS_PAGE_SIZE - 1);
1440
1441         /* Pre-mount ldiskfs to read the MOUNT_DATA_FILE */
1442         CDEBUG(D_MOUNT, "Pre-mount ldiskfs %s\n", lmd->lmd_dev);
1443         type = get_fs_type("ldiskfs");
1444         if (!type) {
1445                 CERROR("premount failed: cannot find ldiskfs module\n");
1446                 GOTO(out_free, rc = -ENODEV);
1447         }
1448         mnt = vfs_kern_mount(type, s_flags, lmd->lmd_dev, (void *)options);
1449         cfs_module_put(type->owner);
1450         if (IS_ERR(mnt)) {
1451                 rc = PTR_ERR(mnt);
1452                 CERROR("premount %s:%#lx ldiskfs failed: %d "
1453                         "Is the ldiskfs module available?\n",
1454                         lmd->lmd_dev, s_flags, rc );
1455                 GOTO(out_free, rc);
1456         }
1457
1458         OBD_SET_CTXT_MAGIC(&mount_ctxt);
1459         mount_ctxt.pwdmnt = mnt;
1460         mount_ctxt.pwd = mnt->mnt_root;
1461         mount_ctxt.fs = get_ds();
1462
1463         rc = ldd_parse(&mount_ctxt, ldd);
1464         unlock_mntput(mnt);
1465
1466         if (rc) {
1467                 CERROR("premount parse options failed: rc = %d\n", rc);
1468                 GOTO(out_free, rc);
1469         }
1470
1471         /* Done with our pre-mount, now do the real mount. */
1472
1473         /* Glom up mount options */
1474         memset(options, 0, CFS_PAGE_SIZE);
1475         strncpy(options, ldd->ldd_mount_opts, CFS_PAGE_SIZE - 2);
1476
1477         len = CFS_PAGE_SIZE - strlen(options) - 2;
1478         if (*options != 0)
1479                 strcat(options, ",");
1480         strncat(options, "no_mbcache", len);
1481
1482         /* Add in any mount-line options */
1483         if (lmd->lmd_opts && (*(lmd->lmd_opts) != 0)) {
1484                 len = CFS_PAGE_SIZE - strlen(options) - 2;
1485                 strcat(options, ",");
1486                 strncat(options, lmd->lmd_opts, len);
1487         }
1488
1489         /* Special permanent mount flags */
1490         if (IS_OST(ldd))
1491             s_flags |= MS_NOATIME | MS_NODIRATIME;
1492
1493         CDEBUG(D_MOUNT, "kern_mount: %s %s %s\n",
1494                MT_STR(ldd), lmd->lmd_dev, options);
1495         type = get_fs_type(MT_STR(ldd));
1496         if (!type) {
1497                 CERROR("get_fs_type failed\n");
1498                 GOTO(out_free, rc = -ENODEV);
1499         }
1500         mnt = vfs_kern_mount(type, s_flags, lmd->lmd_dev, (void *)options);
1501         cfs_module_put(type->owner);
1502         if (IS_ERR(mnt)) {
1503                 rc = PTR_ERR(mnt);
1504                 CERROR("vfs_kern_mount failed: rc = %d\n", rc);
1505                 GOTO(out_free, rc);
1506         }
1507
1508         if (lmd->lmd_flags & LMD_FLG_ABORT_RECOV)
1509                 simple_truncate(mnt->mnt_sb->s_root, mnt, LAST_RCVD,
1510                                 LR_CLIENT_START);
1511
1512         OBD_PAGE_FREE(__page);
1513         lsi->lsi_ldd = ldd;   /* freed at lsi cleanup */
1514         CDEBUG(D_SUPER, "%s: mnt = %p\n", lmd->lmd_dev, mnt);
1515         RETURN(mnt);
1516
1517 out_free:
1518         if (__page)
1519                 OBD_PAGE_FREE(__page);
1520         OBD_FREE(ldd, sizeof(*ldd));
1521         lsi->lsi_ldd = NULL;
1522         RETURN(ERR_PTR(rc));
1523 }
1524
1525 /** Wait here forever until the mount refcount is 0 before completing umount,
1526  * else we risk dereferencing a null pointer.
1527  * LNET may take e.g. 165s before killing zombies.
1528  */
1529 static void server_wait_finished(struct vfsmount *mnt)
1530 {
1531        cfs_waitq_t             waitq;
1532        int                     rc, waited = 0;
1533        cfs_sigset_t            blocked;
1534
1535        cfs_waitq_init(&waitq);
1536
1537        while (mnt_get_count(mnt) > 1) {
1538                if (waited && (waited % 30 == 0))
1539                        LCONSOLE_WARN("Mount still busy with %d refs after "
1540                                       "%d secs.\n",
1541                                       mnt_get_count(mnt),
1542                                       waited);
1543                /* Cannot use l_event_wait() for an interruptible sleep. */
1544                waited += 3;
1545                blocked = cfs_block_sigsinv(sigmask(SIGKILL));
1546                cfs_waitq_wait_event_interruptible_timeout(
1547                        waitq,
1548                        (mnt_get_count(mnt) == 1),
1549                        cfs_time_seconds(3),
1550                        rc);
1551                cfs_restore_sigs(blocked);
1552                if (rc < 0) {
1553                        LCONSOLE_EMERG("Danger: interrupted umount %s with "
1554                                       "%d refs!\n", mnt->mnt_devname,
1555                                       mnt_get_count(mnt));
1556                        break;
1557                }
1558
1559        }
1560 }
1561
1562 /** Start the shutdown of servers at umount.
1563  */
1564 static void server_put_super(struct super_block *sb)
1565 {
1566         struct lustre_sb_info *lsi = s2lsi(sb);
1567         struct obd_device     *obd;
1568         struct vfsmount       *mnt = lsi->lsi_srv_mnt;
1569         char *tmpname, *extraname = NULL;
1570         int tmpname_sz;
1571         int lddflags = lsi->lsi_ldd->ldd_flags;
1572         int lsiflags = lsi->lsi_flags;
1573         ENTRY;
1574
1575         LASSERT(lsiflags & LSI_SERVER);
1576
1577         tmpname_sz = strlen(lsi->lsi_ldd->ldd_svname) + 1;
1578         OBD_ALLOC(tmpname, tmpname_sz);
1579         memcpy(tmpname, lsi->lsi_ldd->ldd_svname, tmpname_sz);
1580         CDEBUG(D_MOUNT, "server put_super %s\n", tmpname);
1581         if (IS_MDT(lsi->lsi_ldd) && (lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC))
1582                 snprintf(tmpname, tmpname_sz, "MGS");
1583
1584         /* Stop the target */
1585         if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
1586             (IS_MDT(lsi->lsi_ldd) || IS_OST(lsi->lsi_ldd))) {
1587                 struct lustre_profile *lprof = NULL;
1588
1589                 /* tell the mgc to drop the config log */
1590                 lustre_end_log(sb, lsi->lsi_ldd->ldd_svname, NULL);
1591
1592                 /* COMPAT_146 - profile may get deleted in mgc_cleanup.
1593                    If there are any setup/cleanup errors, save the lov
1594                    name for safety cleanup later. */
1595                 lprof = class_get_profile(lsi->lsi_ldd->ldd_svname);
1596                 if (lprof && lprof->lp_dt) {
1597                         OBD_ALLOC(extraname, strlen(lprof->lp_dt) + 1);
1598                         strcpy(extraname, lprof->lp_dt);
1599                 }
1600
1601                 obd = class_name2obd(lsi->lsi_ldd->ldd_svname);
1602                 if (obd) {
1603                         CDEBUG(D_MOUNT, "stopping %s\n", obd->obd_name);
1604                         if (lsi->lsi_flags & LSI_UMOUNT_FAILOVER)
1605                                 obd->obd_fail = 1;
1606                         /* We can't seem to give an error return code
1607                          * to .put_super, so we better make sure we clean up! */
1608                         obd->obd_force = 1;
1609                         class_manual_cleanup(obd);
1610                 } else {
1611                         CERROR("no obd %s\n", lsi->lsi_ldd->ldd_svname);
1612                         server_deregister_mount(lsi->lsi_ldd->ldd_svname);
1613                 }
1614         }
1615
1616         /* If they wanted the mgs to stop separately from the mdt, they
1617            should have put it on a different device. */
1618         if (IS_MGS(lsi->lsi_ldd)) {
1619                 /* if MDS start with --nomgs, don't stop MGS then */
1620                 if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOMGS))
1621                         server_stop_mgs(sb);
1622         }
1623
1624         /* Clean the mgc and sb */
1625         lustre_common_put_super(sb);
1626
1627         /* Wait for the targets to really clean up - can't exit (and let the
1628            sb get destroyed) while the mount is still in use */
1629         server_wait_finished(mnt);
1630
1631         /* drop the One True Mount */
1632         unlock_mntput(mnt);
1633
1634         /* Stop the servers (MDS, OSS) if no longer needed.  We must wait
1635            until the target is really gone so that our type refcount check
1636            is right. */
1637         server_stop_servers(lddflags, lsiflags);
1638
1639         /* In case of startup or cleanup err, stop related obds */
1640         if (extraname) {
1641                 obd = class_name2obd(extraname);
1642                 if (obd) {
1643                         CWARN("Cleaning orphaned obd %s\n", extraname);
1644                         obd->obd_force = 1;
1645                         class_manual_cleanup(obd);
1646                 }
1647                 OBD_FREE(extraname, strlen(extraname) + 1);
1648         }
1649
1650         LCONSOLE_WARN("server umount %s complete\n", tmpname);
1651         OBD_FREE(tmpname, tmpname_sz);
1652         EXIT;
1653 }
1654
1655 /** Called only for 'umount -f'
1656  */
1657 #ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
1658 static void server_umount_begin(struct vfsmount *vfsmnt, int flags)
1659 {
1660         struct super_block *sb = vfsmnt->mnt_sb;
1661 #else
1662 static void server_umount_begin(struct super_block *sb)
1663 {
1664 #endif
1665         struct lustre_sb_info *lsi = s2lsi(sb);
1666         ENTRY;
1667
1668 #ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
1669         if (!(flags & MNT_FORCE)) {
1670                 EXIT;
1671                 return;
1672         }
1673 #endif
1674
1675         CDEBUG(D_MOUNT, "umount -f\n");
1676         /* umount = failover
1677            umount -f = force
1678            no third way to do non-force, non-failover */
1679         lsi->lsi_flags &= ~LSI_UMOUNT_FAILOVER;
1680         lsi->lsi_flags |= LSI_UMOUNT_FORCE;
1681         EXIT;
1682 }
1683
1684 #ifndef HAVE_STATFS_DENTRY_PARAM
1685 static int server_statfs (struct super_block *sb, cfs_kstatfs_t *buf)
1686 {
1687 #else
1688 static int server_statfs (struct dentry *dentry, cfs_kstatfs_t *buf)
1689 {
1690         struct super_block *sb = dentry->d_sb;
1691 #endif
1692         struct vfsmount *mnt = s2lsi(sb)->lsi_srv_mnt;
1693         ENTRY;
1694
1695         if (mnt && mnt->mnt_sb && mnt->mnt_sb->s_op->statfs) {
1696 #ifdef HAVE_STATFS_DENTRY_PARAM
1697                 int rc = mnt->mnt_sb->s_op->statfs(mnt->mnt_root, buf);
1698 #else
1699                 int rc = mnt->mnt_sb->s_op->statfs(mnt->mnt_sb, buf);
1700 #endif
1701                 if (!rc) {
1702                         buf->f_type = sb->s_magic;
1703                         RETURN(0);
1704                 }
1705         }
1706
1707         /* just return 0 */
1708         buf->f_type = sb->s_magic;
1709         buf->f_bsize = sb->s_blocksize;
1710         buf->f_blocks = 1;
1711         buf->f_bfree = 0;
1712         buf->f_bavail = 0;
1713         buf->f_files = 1;
1714         buf->f_ffree = 0;
1715         buf->f_namelen = NAME_MAX;
1716         RETURN(0);
1717 }
1718
1719 /** The operations we support directly on the superblock:
1720  * mount, umount, and df.
1721  */
1722 static struct super_operations server_ops =
1723 {
1724         .put_super      = server_put_super,
1725         .umount_begin   = server_umount_begin, /* umount -f */
1726         .statfs         = server_statfs,
1727 };
1728
1729 #define log2(n) cfs_ffz(~(n))
1730 #define LUSTRE_SUPER_MAGIC 0x0BD00BD1
1731
1732 static int server_fill_super_common(struct super_block *sb)
1733 {
1734         struct inode *root = 0;
1735         ENTRY;
1736
1737         CDEBUG(D_MOUNT, "Server sb, dev=%d\n", (int)sb->s_dev);
1738
1739         sb->s_blocksize = 4096;
1740         sb->s_blocksize_bits = log2(sb->s_blocksize);
1741         sb->s_magic = LUSTRE_SUPER_MAGIC;
1742         sb->s_maxbytes = 0; /* we don't allow file IO on server mountpoints */
1743         sb->s_flags |= MS_RDONLY;
1744         sb->s_op = &server_ops;
1745
1746         root = new_inode(sb);
1747         if (!root) {
1748                 CERROR("Can't make root inode\n");
1749                 RETURN(-EIO);
1750         }
1751
1752         /* returns -EIO for every operation */
1753         /* make_bad_inode(root); -- badness - can't umount */
1754         /* apparently we need to be a directory for the mount to finish */
1755         root->i_mode = S_IFDIR;
1756
1757         sb->s_root = d_alloc_root(root);
1758         if (!sb->s_root) {
1759                 CERROR("Can't make root dentry\n");
1760                 iput(root);
1761                 RETURN(-EIO);
1762         }
1763
1764         RETURN(0);
1765 }
1766
1767 /** Fill in the superblock info for a Lustre server.
1768  * Mount the device with the correct options.
1769  * Read the on-disk config file.
1770  * Start the services.
1771  */
1772 static int server_fill_super(struct super_block *sb)
1773 {
1774         struct lustre_sb_info *lsi = s2lsi(sb);
1775         struct vfsmount *mnt;
1776         int rc;
1777         ENTRY;
1778
1779         /* the One True Mount */
1780         mnt = server_kernel_mount(sb);
1781         if (IS_ERR(mnt)) {
1782                 rc = PTR_ERR(mnt);
1783                 CERROR("Unable to mount device %s: %d\n",
1784                        lsi->lsi_lmd->lmd_dev, rc);
1785                 lustre_put_lsi(sb);
1786                 RETURN(rc);
1787         }
1788         lsi->lsi_srv_mnt = mnt;
1789
1790         LASSERT(lsi->lsi_ldd);
1791         CDEBUG(D_MOUNT, "Found service %s for fs '%s' on device %s\n",
1792                lsi->lsi_ldd->ldd_svname, lsi->lsi_ldd->ldd_fsname,
1793                lsi->lsi_lmd->lmd_dev);
1794
1795         if (class_name2obd(lsi->lsi_ldd->ldd_svname)) {
1796                 LCONSOLE_ERROR_MSG(0x161, "The target named %s is already "
1797                                    "running. Double-mount may have compromised"
1798                                    " the disk journal.\n",
1799                                    lsi->lsi_ldd->ldd_svname);
1800                 lustre_put_lsi(sb);
1801                 unlock_mntput(mnt);
1802                 RETURN(-EALREADY);
1803         }
1804
1805         /* Start MGS before MGC */
1806         if (IS_MGS(lsi->lsi_ldd) && !(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOMGS)){
1807                 rc = server_start_mgs(sb);
1808                 if (rc)
1809                         GOTO(out_mnt, rc);
1810         }
1811
1812         /* Start MGC before servers */
1813         rc = lustre_start_mgc(sb);
1814         if (rc)
1815                 GOTO(out_mnt, rc);
1816
1817         /* Set up all obd devices for service */
1818         if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
1819                 (IS_OST(lsi->lsi_ldd) || IS_MDT(lsi->lsi_ldd))) {
1820                 rc = server_start_targets(sb, mnt);
1821                 if (rc < 0) {
1822                         CERROR("Unable to start targets: %d\n", rc);
1823                         GOTO(out_mnt, rc);
1824                 }
1825         /* FIXME overmount client here,
1826            or can we just start a client log and client_fill_super on this sb?
1827            We need to make sure server_put_super gets called too - ll_put_super
1828            calls lustre_common_put_super; check there for LSI_SERVER flag,
1829            call s_p_s if so.
1830            Probably should start client from new thread so we can return.
1831            Client will not finish until all servers are connected.
1832            Note - MGS-only server does NOT get a client, since there is no
1833            lustre fs associated - the MGS is for all lustre fs's */
1834         }
1835
1836         rc = server_fill_super_common(sb);
1837         if (rc)
1838                 GOTO(out_mnt, rc);
1839
1840         RETURN(0);
1841 out_mnt:
1842         /* We jump here in case of failure while starting targets or MGS.
1843          * In this case we can't just put @mnt and have to do real cleanup
1844          * with stoping targets, etc. */
1845         server_put_super(sb);
1846         return rc;
1847 }
1848
1849 /* Get the index from the obd name.
1850    rc = server type, or
1851    rc < 0  on error
1852    if endptr isn't NULL it is set to end of name */
1853 int server_name2index(char *svname, __u32 *idx, char **endptr)
1854 {
1855         unsigned long index;
1856         int rc;
1857         char *dash = strrchr(svname, '-');
1858         if (!dash)
1859                 return(-EINVAL);
1860
1861         /* intepret <fsname>-MDTXXXXX-mdc as mdt, the better way is to pass
1862          * in the fsname, then determine the server index */
1863         if (!strcmp(LUSTRE_MDC_NAME, dash + 1)) {
1864                 dash--;
1865                 for (; dash > svname && *dash != '-'; dash--);
1866                 if (dash == svname)
1867                         return(-EINVAL);
1868         }
1869
1870         if (strncmp(dash + 1, "MDT", 3) == 0)
1871                 rc = LDD_F_SV_TYPE_MDT;
1872         else if (strncmp(dash + 1, "OST", 3) == 0)
1873                 rc = LDD_F_SV_TYPE_OST;
1874         else
1875                 return(-EINVAL);
1876         if (strcmp(dash + 4, "all") == 0)
1877                 return rc | LDD_F_SV_ALL;
1878
1879         index = simple_strtoul(dash + 4, endptr, 16);
1880         *idx = index;
1881         return rc;
1882 }
1883
1884 /*
1885  * Calculate timeout value for a target.
1886  */
1887 void server_calc_timeout(struct lustre_sb_info *lsi, struct obd_device *obd)
1888 {
1889         struct lustre_mount_data *lmd;
1890         int soft = 0;
1891         int hard = 0;
1892         int factor = 0;
1893         bool has_ir = !!(lsi->lsi_flags & LSI_IR_CAPABLE);
1894         int min = OBD_RECOVERY_TIME_MIN;
1895
1896         LASSERT(lsi->lsi_flags & LSI_SERVER);
1897
1898         lmd = lsi->lsi_lmd;
1899         if (lmd) {
1900                 soft   = lmd->lmd_recovery_time_soft;
1901                 hard   = lmd->lmd_recovery_time_hard;
1902                 has_ir = has_ir && !(lmd->lmd_flags & LMD_FLG_NOIR);
1903                 obd->obd_no_ir = !has_ir;
1904         }
1905
1906         if (soft == 0)
1907                 soft = OBD_RECOVERY_TIME_SOFT;
1908         if (hard == 0)
1909                 hard = OBD_RECOVERY_TIME_HARD;
1910
1911         /* target may have ir_factor configured. */
1912         factor = OBD_IR_FACTOR_DEFAULT;
1913         if (obd->obd_recovery_ir_factor)
1914                 factor = obd->obd_recovery_ir_factor;
1915
1916         if (has_ir) {
1917                 int new_soft = soft;
1918                 int new_hard = hard;
1919
1920                 /* adjust timeout value by imperative recovery */
1921
1922                 new_soft = (soft * factor) / OBD_IR_FACTOR_MAX;
1923                 new_hard = (hard * factor) / OBD_IR_FACTOR_MAX;
1924
1925                 /* make sure the timeout is not too short */
1926                 new_soft = max(min, new_soft);
1927                 new_hard = max(new_soft, new_hard);
1928
1929                 LCONSOLE_INFO("%s: Imperative Recovery enabled, recovery "
1930                               "window shrunk from %d-%d down to %d-%d\n",
1931                               obd->obd_name, soft, hard, new_soft, new_hard);
1932
1933                 soft = new_soft;
1934                 hard = new_hard;
1935         }
1936
1937         /* we're done */
1938         obd->obd_recovery_timeout   = max(obd->obd_recovery_timeout, soft);
1939         obd->obd_recovery_time_hard = hard;
1940         obd->obd_recovery_ir_factor = factor;
1941 }
1942 EXPORT_SYMBOL(server_calc_timeout);
1943
1944 /*************** mount common betweeen server and client ***************/
1945
1946 /* Common umount */
1947 int lustre_common_put_super(struct super_block *sb)
1948 {
1949         int rc;
1950         ENTRY;
1951
1952         CDEBUG(D_MOUNT, "dropping sb %p\n", sb);
1953
1954         /* Drop a ref to the MGC */
1955         rc = lustre_stop_mgc(sb);
1956         if (rc && (rc != -ENOENT)) {
1957                 if (rc != -EBUSY) {
1958                         CERROR("Can't stop MGC: %d\n", rc);
1959                         RETURN(rc);
1960                 }
1961                 /* BUSY just means that there's some other obd that
1962                    needs the mgc.  Let him clean it up. */
1963                 CDEBUG(D_MOUNT, "MGC still in use\n");
1964         }
1965         /* Drop a ref to the mounted disk */
1966         lustre_put_lsi(sb);
1967         lu_types_stop();
1968         RETURN(rc);
1969 }
1970
1971 static void lmd_print(struct lustre_mount_data *lmd)
1972 {
1973         int i;
1974
1975         PRINT_CMD(PRINT_MASK, "  mount data:\n");
1976         if (lmd_is_client(lmd))
1977                 PRINT_CMD(PRINT_MASK, "profile: %s\n", lmd->lmd_profile);
1978         PRINT_CMD(PRINT_MASK, "device:  %s\n", lmd->lmd_dev);
1979         PRINT_CMD(PRINT_MASK, "flags:   %x\n", lmd->lmd_flags);
1980
1981         if (lmd->lmd_opts)
1982                 PRINT_CMD(PRINT_MASK, "options: %s\n", lmd->lmd_opts);
1983
1984         if (lmd->lmd_recovery_time_soft)
1985                 PRINT_CMD(PRINT_MASK, "recovery time soft: %d\n",
1986                           lmd->lmd_recovery_time_soft);
1987
1988         if (lmd->lmd_recovery_time_hard)
1989                 PRINT_CMD(PRINT_MASK, "recovery time hard: %d\n",
1990                           lmd->lmd_recovery_time_hard);
1991
1992         for (i = 0; i < lmd->lmd_exclude_count; i++) {
1993                 PRINT_CMD(PRINT_MASK, "exclude %d:  OST%04x\n", i,
1994                           lmd->lmd_exclude[i]);
1995         }
1996 }
1997
1998 /* Is this server on the exclusion list */
1999 int lustre_check_exclusion(struct super_block *sb, char *svname)
2000 {
2001         struct lustre_sb_info *lsi = s2lsi(sb);
2002         struct lustre_mount_data *lmd = lsi->lsi_lmd;
2003         __u32 index;
2004         int i, rc;
2005         ENTRY;
2006
2007         rc = server_name2index(svname, &index, NULL);
2008         if (rc != LDD_F_SV_TYPE_OST)
2009                 /* Only exclude OSTs */
2010                 RETURN(0);
2011
2012         CDEBUG(D_MOUNT, "Check exclusion %s (%d) in %d of %s\n", svname,
2013                index, lmd->lmd_exclude_count, lmd->lmd_dev);
2014
2015         for(i = 0; i < lmd->lmd_exclude_count; i++) {
2016                 if (index == lmd->lmd_exclude[i]) {
2017                         CWARN("Excluding %s (on exclusion list)\n", svname);
2018                         RETURN(1);
2019                 }
2020         }
2021         RETURN(0);
2022 }
2023
2024 /* mount -v  -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
2025 static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr)
2026 {
2027         char *s1 = ptr, *s2;
2028         __u32 index, *exclude_list;
2029         int rc = 0, devmax;
2030         ENTRY;
2031
2032         /* The shortest an ost name can be is 8 chars: -OST0000.
2033            We don't actually know the fsname at this time, so in fact
2034            a user could specify any fsname. */
2035         devmax = strlen(ptr) / 8 + 1;
2036
2037         /* temp storage until we figure out how many we have */
2038         OBD_ALLOC(exclude_list, sizeof(index) * devmax);
2039         if (!exclude_list)
2040                 RETURN(-ENOMEM);
2041
2042         /* we enter this fn pointing at the '=' */
2043         while (*s1 && *s1 != ' ' && *s1 != ',') {
2044                 s1++;
2045                 rc = server_name2index(s1, &index, &s2);
2046                 if (rc < 0) {
2047                         CERROR("Can't parse server name '%s'\n", s1);
2048                         break;
2049                 }
2050                 if (rc == LDD_F_SV_TYPE_OST)
2051                         exclude_list[lmd->lmd_exclude_count++] = index;
2052                 else
2053                         CDEBUG(D_MOUNT, "ignoring exclude %.7s\n", s1);
2054                 s1 = s2;
2055                 /* now we are pointing at ':' (next exclude)
2056                    or ',' (end of excludes) */
2057                 if (lmd->lmd_exclude_count >= devmax)
2058                         break;
2059         }
2060         if (rc >= 0) /* non-err */
2061                 rc = 0;
2062
2063         if (lmd->lmd_exclude_count) {
2064                 /* permanent, freed in lustre_free_lsi */
2065                 OBD_ALLOC(lmd->lmd_exclude, sizeof(index) *
2066                           lmd->lmd_exclude_count);
2067                 if (lmd->lmd_exclude) {
2068                         memcpy(lmd->lmd_exclude, exclude_list,
2069                                sizeof(index) * lmd->lmd_exclude_count);
2070                 } else {
2071                         rc = -ENOMEM;
2072                         lmd->lmd_exclude_count = 0;
2073                 }
2074         }
2075         OBD_FREE(exclude_list, sizeof(index) * devmax);
2076         RETURN(rc);
2077 }
2078
2079 static int lmd_parse_mgssec(struct lustre_mount_data *lmd, char *ptr)
2080 {
2081         char   *tail;
2082         int     length;
2083
2084         if (lmd->lmd_mgssec != NULL) {
2085                 OBD_FREE(lmd->lmd_mgssec, strlen(lmd->lmd_mgssec) + 1);
2086                 lmd->lmd_mgssec = NULL;
2087         }
2088
2089         tail = strchr(ptr, ',');
2090         if (tail == NULL)
2091                 length = strlen(ptr);
2092         else
2093                 length = tail - ptr;
2094
2095         OBD_ALLOC(lmd->lmd_mgssec, length + 1);
2096         if (lmd->lmd_mgssec == NULL)
2097                 return -ENOMEM;
2098
2099         memcpy(lmd->lmd_mgssec, ptr, length);
2100         lmd->lmd_mgssec[length] = '\0';
2101         return 0;
2102 }
2103
2104 /** Parse mount line options
2105  * e.g. mount -v -t lustre -o abort_recov uml1:uml2:/lustre-client /mnt/lustre
2106  * dev is passed as device=uml1:/lustre by mount.lustre
2107  */
2108 static int lmd_parse(char *options, struct lustre_mount_data *lmd)
2109 {
2110         char *s1, *s2, *devname = NULL;
2111         struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
2112         int rc = 0;
2113         ENTRY;
2114
2115         LASSERT(lmd);
2116         if (!options) {
2117                 LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that "
2118                                    "/sbin/mount.lustre is installed.\n");
2119                 RETURN(-EINVAL);
2120         }
2121
2122         /* Options should be a string - try to detect old lmd data */
2123         if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) {
2124                 LCONSOLE_ERROR_MSG(0x163, "You're using an old version of "
2125                                    "/sbin/mount.lustre.  Please install "
2126                                    "version %s\n", LUSTRE_VERSION_STRING);
2127                 RETURN(-EINVAL);
2128         }
2129         lmd->lmd_magic = LMD_MAGIC;
2130
2131         /* Set default flags here */
2132
2133         s1 = options;
2134         while (*s1) {
2135                 int clear = 0;
2136                 int time_min = OBD_RECOVERY_TIME_MIN;
2137
2138                 /* Skip whitespace and extra commas */
2139                 while (*s1 == ' ' || *s1 == ',')
2140                         s1++;
2141
2142                 /* Client options are parsed in ll_options: eg. flock,
2143                    user_xattr, acl */
2144
2145                 /* Parse non-ldiskfs options here. Rather than modifying
2146                    ldiskfs, we just zero these out here */
2147                 if (strncmp(s1, "abort_recov", 11) == 0) {
2148                         lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
2149                         clear++;
2150                 } else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
2151                         lmd->lmd_recovery_time_soft = max_t(int,
2152                                 simple_strtoul(s1 + 19, NULL, 10), time_min);
2153                         clear++;
2154                 } else if (strncmp(s1, "recovery_time_hard=", 19) == 0) {
2155                         lmd->lmd_recovery_time_hard = max_t(int,
2156                                 simple_strtoul(s1 + 19, NULL, 10), time_min);
2157                         clear++;
2158                 } else if (strncmp(s1, "noir", 4) == 0) {
2159                         lmd->lmd_flags |= LMD_FLG_NOIR; /* test purpose only. */
2160                         clear++;
2161                 } else if (strncmp(s1, "nosvc", 5) == 0) {
2162                         lmd->lmd_flags |= LMD_FLG_NOSVC;
2163                         clear++;
2164                 } else if (strncmp(s1, "nomgs", 5) == 0) {
2165                         lmd->lmd_flags |= LMD_FLG_NOMGS;
2166                         clear++;
2167                 } else if (strncmp(s1, "noscrub", 7) == 0) {
2168                         lmd->lmd_flags |= LMD_FLG_NOSCRUB;
2169                         clear++;
2170                 } else if (strncmp(s1, "writeconf", 9) == 0) {
2171                         lmd->lmd_flags |= LMD_FLG_WRITECONF;
2172                         clear++;
2173                 } else if (strncmp(s1, "mgssec=", 7) == 0) {
2174                         rc = lmd_parse_mgssec(lmd, s1 + 7);
2175                         if (rc)
2176                                 goto invalid;
2177                         clear++;
2178                 /* ost exclusion list */
2179                 } else if (strncmp(s1, "exclude=", 8) == 0) {
2180                         rc = lmd_make_exclusion(lmd, s1 + 7);
2181                         if (rc)
2182                                 goto invalid;
2183                         clear++;
2184                 }
2185                 /* Linux 2.4 doesn't pass the device, so we stuck it at the
2186                    end of the options. */
2187                 else if (strncmp(s1, "device=", 7) == 0) {
2188                         devname = s1 + 7;
2189                         /* terminate options right before device.  device
2190                            must be the last one. */
2191                         *s1 = '\0';
2192                         break;
2193                 }
2194
2195                 /* Find next opt */
2196                 s2 = strchr(s1, ',');
2197                 if (s2 == NULL) {
2198                         if (clear)
2199                                 *s1 = '\0';
2200                         break;
2201                 }
2202                 s2++;
2203                 if (clear)
2204                         memmove(s1, s2, strlen(s2) + 1);
2205                 else
2206                         s1 = s2;
2207         }
2208
2209         if (!devname) {
2210                 LCONSOLE_ERROR_MSG(0x164, "Can't find the device name "
2211                                    "(need mount option 'device=...')\n");
2212                 goto invalid;
2213         }
2214
2215         s1 = strstr(devname, ":/");
2216         if (s1) {
2217                 ++s1;
2218                 lmd->lmd_flags |= LMD_FLG_CLIENT;
2219                 /* Remove leading /s from fsname */
2220                 while (*++s1 == '/') ;
2221                 /* Freed in lustre_free_lsi */
2222                 OBD_ALLOC(lmd->lmd_profile, strlen(s1) + 8);
2223                 if (!lmd->lmd_profile)
2224                         RETURN(-ENOMEM);
2225                 sprintf(lmd->lmd_profile, "%s-client", s1);
2226         }
2227
2228         /* Freed in lustre_free_lsi */
2229         OBD_ALLOC(lmd->lmd_dev, strlen(devname) + 1);
2230         if (!lmd->lmd_dev)
2231                 RETURN(-ENOMEM);
2232         strcpy(lmd->lmd_dev, devname);
2233
2234         /* Save mount options */
2235         s1 = options + strlen(options) - 1;
2236         while (s1 >= options && (*s1 == ',' || *s1 == ' '))
2237                 *s1-- = 0;
2238         if (*options != 0) {
2239                 /* Freed in lustre_free_lsi */
2240                 OBD_ALLOC(lmd->lmd_opts, strlen(options) + 1);
2241                 if (!lmd->lmd_opts)
2242                         RETURN(-ENOMEM);
2243                 strcpy(lmd->lmd_opts, options);
2244         }
2245
2246         lmd_print(lmd);
2247         lmd->lmd_magic = LMD_MAGIC;
2248
2249         RETURN(rc);
2250
2251 invalid:
2252         CERROR("Bad mount options %s\n", options);
2253         RETURN(-EINVAL);
2254 }
2255
2256 struct lustre_mount_data2 {
2257         void *lmd2_data;
2258         struct vfsmount *lmd2_mnt;
2259 };
2260
2261 /** This is the entry point for the mount call into Lustre.
2262  * This is called when a server or client is mounted,
2263  * and this is where we start setting things up.
2264  * @param data Mount options (e.g. -o flock,abort_recov)
2265  */
2266 int lustre_fill_super(struct super_block *sb, void *data, int silent)
2267 {
2268         struct lustre_mount_data *lmd;
2269         struct lustre_mount_data2 *lmd2 = data;
2270         struct lustre_sb_info *lsi;
2271         int rc;
2272         ENTRY;
2273
2274         CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);
2275
2276         lsi = lustre_init_lsi(sb);
2277         if (!lsi)
2278                 RETURN(-ENOMEM);
2279         lmd = lsi->lsi_lmd;
2280
2281         /*
2282          * Disable lockdep during mount, because mount locking patterns are
2283          * `special'.
2284          */
2285         cfs_lockdep_off();
2286
2287         /*
2288          * LU-639: the obd cleanup of last mount may not finish yet, wait here.
2289          */
2290         obd_zombie_barrier();
2291
2292         /* Figure out the lmd from the mount options */
2293         if (lmd_parse((char *)(lmd2->lmd2_data), lmd)) {
2294                 lustre_put_lsi(sb);
2295                 GOTO(out, rc = -EINVAL);
2296         }
2297
2298         if (lmd_is_client(lmd)) {
2299                 CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile);
2300                 if (!client_fill_super) {
2301                         LCONSOLE_ERROR_MSG(0x165, "Nothing registered for "
2302                                            "client mount! Is the 'lustre' "
2303                                            "module loaded?\n");
2304                         lustre_put_lsi(sb);
2305                         rc = -ENODEV;
2306                 } else {
2307                         rc = lustre_start_mgc(sb);
2308                         if (rc) {
2309                                 lustre_put_lsi(sb);
2310                                 GOTO(out, rc);
2311                         }
2312                         /* Connect and start */
2313                         /* (should always be ll_fill_super) */
2314                         rc = (*client_fill_super)(sb, lmd2->lmd2_mnt);
2315                         /* c_f_s will call lustre_common_put_super on failure */
2316                 }
2317         } else {
2318                 CDEBUG(D_MOUNT, "Mounting server from %s\n", lmd->lmd_dev);
2319                 lsi->lsi_flags |= LSI_SERVER;
2320                 rc = server_fill_super(sb);
2321                 /* s_f_s calls lustre_start_mgc after the mount because we need
2322                    the MGS nids which are stored on disk.  Plus, we may
2323                    need to start the MGS first. */
2324                 /* s_f_s will call server_put_super on failure */
2325         }
2326
2327         /* If error happens in fill_super() call, @lsi will be killed there.
2328          * This is why we do not put it here. */
2329         GOTO(out, rc);
2330 out:
2331         if (rc) {
2332                 CERROR("Unable to mount %s (%d)\n",
2333                        s2lsi(sb) ? lmd->lmd_dev : "", rc);
2334         } else {
2335                 CDEBUG(D_SUPER, "Mount %s complete\n",
2336                        lmd->lmd_dev);
2337         }
2338         cfs_lockdep_on();
2339         return rc;
2340 }
2341
2342
2343 /* We can't call ll_fill_super by name because it lives in a module that
2344    must be loaded after this one. */
2345 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
2346                                                   struct vfsmount *mnt))
2347 {
2348         client_fill_super = cfs;
2349 }
2350
2351 void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb))
2352 {
2353         kill_super_cb = cfs;
2354 }
2355
2356 /***************** FS registration ******************/
2357
2358 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
2359 struct super_block * lustre_get_sb(struct file_system_type *fs_type, int flags,
2360                                    const char *devname, void * data)
2361 {
2362         return get_sb_nodev(fs_type, flags, data, lustre_fill_super);
2363 }
2364 #else
2365 int lustre_get_sb(struct file_system_type *fs_type, int flags,
2366                   const char *devname, void * data, struct vfsmount *mnt)
2367 {
2368         struct lustre_mount_data2 lmd2 = {data, mnt};
2369
2370         return get_sb_nodev(fs_type, flags, &lmd2, lustre_fill_super, mnt);
2371 }
2372 #endif
2373
2374 void lustre_kill_super(struct super_block *sb)
2375 {
2376         struct lustre_sb_info *lsi = s2lsi(sb);
2377
2378         if (kill_super_cb && lsi && !(lsi->lsi_flags & LSI_SERVER))
2379                 (*kill_super_cb)(sb);
2380
2381         kill_anon_super(sb);
2382 }
2383
2384 /** Register the "lustre" fs type
2385  */
2386 struct file_system_type lustre_fs_type = {
2387         .owner        = THIS_MODULE,
2388         .name         = "lustre",
2389         .get_sb       = lustre_get_sb,
2390         .kill_sb      = lustre_kill_super,
2391         .fs_flags     = FS_BINARY_MOUNTDATA | FS_REQUIRES_DEV |
2392 #ifdef FS_HAS_FIEMAP
2393                         FS_HAS_FIEMAP |
2394 #endif
2395                         LL_RENAME_DOES_D_MOVE,
2396 };
2397
2398 int lustre_register_fs(void)
2399 {
2400         return register_filesystem(&lustre_fs_type);
2401 }
2402
2403 int lustre_unregister_fs(void)
2404 {
2405         return unregister_filesystem(&lustre_fs_type);
2406 }
2407
2408 EXPORT_SYMBOL(lustre_register_client_fill_super);
2409 EXPORT_SYMBOL(lustre_register_kill_super_cb);
2410 EXPORT_SYMBOL(lustre_common_put_super);
2411 EXPORT_SYMBOL(lustre_process_log);
2412 EXPORT_SYMBOL(lustre_end_log);
2413 EXPORT_SYMBOL(server_get_mount);
2414 EXPORT_SYMBOL(server_get_mount_2);
2415 EXPORT_SYMBOL(server_put_mount);
2416 EXPORT_SYMBOL(server_put_mount_2);
2417 EXPORT_SYMBOL(server_register_target);
2418 EXPORT_SYMBOL(server_name2index);
2419 EXPORT_SYMBOL(server_mti_print);
2420 EXPORT_SYMBOL(do_lcfg);