lustre/obdclass/obd_mount.c

   1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
   2  * vim:expandtab:shiftwidth=8:tabstop=8:
   3  *
   4  * GPL HEADER START
   5  *
   6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License version 2 only,
  10  * as published by the Free Software Foundation.
  11  *
  12  * This program is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * General Public License version 2 for more details (a copy is included
  16  * in the LICENSE file that accompanied this code).
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * version 2 along with this program; If not, see
  20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  21  *
  22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  23  * CA 95054 USA or visit www.sun.com if you need additional information or
  24  * have any questions.
  25  *
  26  * GPL HEADER END
  27  */
  28 /*
  29  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  30  * Use is subject to license terms.
  31  *
  32  * Copyright (c) 2011, 2012, Whamcloud, Inc.
  33  */
  34 /*
  35  * This file is part of Lustre, http://www.lustre.org/
  36  * Lustre is a trademark of Sun Microsystems, Inc.
  37  *
  38  * lustre/obdclass/obd_mount.c
  39  *
  40  * Client/server mount routines
  41  *
  42  * Author: Nathan Rutman <nathan@clusterfs.com>
  43  */
  44
  45
  46 #define DEBUG_SUBSYSTEM S_CLASS
  47 #define D_MOUNT D_SUPER|D_CONFIG /*|D_WARNING */
  48 #define PRINT_CMD CDEBUG
  49 #define PRINT_MASK D_SUPER|D_CONFIG
  50
  51 #include <obd.h>
  52 #include <lvfs.h>
  53 #include <lustre_fsfilt.h>
  54 #include <obd_class.h>
  55 #include <lustre/lustre_user.h>
  56 #include <linux/version.h>
  57 #include <lustre_log.h>
  58 #include <lustre_disk.h>
  59 #include <lustre_param.h>
  60
  61 static int (*client_fill_super)(struct super_block *sb,
  62                                 struct vfsmount *mnt) = NULL;
  63 static void (*kill_super_cb)(struct super_block *sb) = NULL;
  64
  65 /*********** mount lookup *********/
  66
  67 CFS_DECLARE_MUTEX(lustre_mount_info_lock);
  68 static CFS_LIST_HEAD(server_mount_info_list);
  69
  70 static struct lustre_mount_info *server_find_mount(const char *name)
  71 {
  72         cfs_list_t *tmp;
  73         struct lustre_mount_info *lmi;
  74         ENTRY;
  75
  76         cfs_list_for_each(tmp, &server_mount_info_list) {
  77                 lmi = cfs_list_entry(tmp, struct lustre_mount_info,
  78                                      lmi_list_chain);
  79                 if (strcmp(name, lmi->lmi_name) == 0)
  80                         RETURN(lmi);
  81         }
  82         RETURN(NULL);
  83 }
  84
  85 /* we must register an obd for a mount before we call the setup routine.
  86    *_setup will call lustre_get_mount to get the mnt struct
  87    by obd_name, since we can't pass the pointer to setup. */
  88 static int server_register_mount(const char *name, struct super_block *sb,
  89                           struct vfsmount *mnt)
  90 {
  91         struct lustre_mount_info *lmi;
  92         char *name_cp;
  93         ENTRY;
  94
  95         LASSERT(mnt);
  96         LASSERT(sb);
  97
  98         OBD_ALLOC(lmi, sizeof(*lmi));
  99         if (!lmi)
 100                 RETURN(-ENOMEM);
 101         OBD_ALLOC(name_cp, strlen(name) + 1);
 102         if (!name_cp) {
 103                 OBD_FREE(lmi, sizeof(*lmi));
 104                 RETURN(-ENOMEM);
 105         }
 106         strcpy(name_cp, name);
 107
 108         cfs_down(&lustre_mount_info_lock);
 109
 110         if (server_find_mount(name)) {
 111                 cfs_up(&lustre_mount_info_lock);
 112                 OBD_FREE(lmi, sizeof(*lmi));
 113                 OBD_FREE(name_cp, strlen(name) + 1);
 114                 CERROR("Already registered %s\n", name);
 115                 RETURN(-EEXIST);
 116         }
 117         lmi->lmi_name = name_cp;
 118         lmi->lmi_sb = sb;
 119         lmi->lmi_mnt = mnt;
 120         cfs_list_add(&lmi->lmi_list_chain, &server_mount_info_list);
 121
 122         cfs_up(&lustre_mount_info_lock);
 123
 124         CDEBUG(D_MOUNT, "reg_mnt %p from %s, vfscount=%d\n",
 125                lmi->lmi_mnt, name, mnt_get_count(lmi->lmi_mnt));
 126
 127         RETURN(0);
 128 }
 129
 130 /* when an obd no longer needs a mount */
 131 static int server_deregister_mount(const char *name)
 132 {
 133         struct lustre_mount_info *lmi;
 134         ENTRY;
 135
 136         cfs_down(&lustre_mount_info_lock);
 137         lmi = server_find_mount(name);
 138         if (!lmi) {
 139                 cfs_up(&lustre_mount_info_lock);
 140                 CERROR("%s not registered\n", name);
 141                 RETURN(-ENOENT);
 142         }
 143
 144         CDEBUG(D_MOUNT, "dereg_mnt %p from %s, vfscount=%d\n",
 145                lmi->lmi_mnt, name, mnt_get_count(lmi->lmi_mnt));
 146
 147         OBD_FREE(lmi->lmi_name, strlen(lmi->lmi_name) + 1);
 148         cfs_list_del(&lmi->lmi_list_chain);
 149         OBD_FREE(lmi, sizeof(*lmi));
 150         cfs_up(&lustre_mount_info_lock);
 151
 152         RETURN(0);
 153 }
 154
 155 /* obd's look up a registered mount using their obdname. This is just
 156    for initial obd setup to find the mount struct.  It should not be
 157    called every time you want to mntget. */
 158 struct lustre_mount_info *server_get_mount(const char *name)
 159 {
 160         struct lustre_mount_info *lmi;
 161         struct lustre_sb_info *lsi;
 162         ENTRY;
 163
 164         cfs_down(&lustre_mount_info_lock);
 165         lmi = server_find_mount(name);
 166         cfs_up(&lustre_mount_info_lock);
 167         if (!lmi) {
 168                 CERROR("Can't find mount for %s\n", name);
 169                 RETURN(NULL);
 170         }
 171         lsi = s2lsi(lmi->lmi_sb);
 172         mntget(lmi->lmi_mnt);
 173         cfs_atomic_inc(&lsi->lsi_mounts);
 174
 175         CDEBUG(D_MOUNT, "get_mnt %p from %s, refs=%d, vfscount=%d\n",
 176                lmi->lmi_mnt, name, cfs_atomic_read(&lsi->lsi_mounts),
 177                mnt_get_count(lmi->lmi_mnt));
 178
 179         RETURN(lmi);
 180 }
 181
 182 /*
 183  * Used by mdt to get mount_info from obdname.
 184  * There are no blocking when using the mount_info.
 185  * Do not use server_get_mount for this purpose.
 186  */
 187 struct lustre_mount_info *server_get_mount_2(const char *name)
 188 {
 189         struct lustre_mount_info *lmi;
 190         ENTRY;
 191
 192         cfs_down(&lustre_mount_info_lock);
 193         lmi = server_find_mount(name);
 194         cfs_up(&lustre_mount_info_lock);
 195         if (!lmi)
 196                 CERROR("Can't find mount for %s\n", name);
 197
 198         RETURN(lmi);
 199 }
 200
 201 static void unlock_mntput(struct vfsmount *mnt)
 202 {
 203         if (kernel_locked()) {
 204                 cfs_unlock_kernel();
 205                 mntput(mnt);
 206                 cfs_lock_kernel();
 207         } else {
 208                 mntput(mnt);
 209         }
 210 }
 211
 212 static int lustre_put_lsi(struct super_block *sb);
 213
 214 /* to be called from obd_cleanup methods */
 215 int server_put_mount(const char *name, struct vfsmount *mnt)
 216 {
 217         struct lustre_mount_info *lmi;
 218         struct lustre_sb_info *lsi;
 219         int count = mnt_get_count(mnt) - 1;
 220         ENTRY;
 221
 222         /* This might be the last one, can't deref after this */
 223         unlock_mntput(mnt);
 224
 225         cfs_down(&lustre_mount_info_lock);
 226         lmi = server_find_mount(name);
 227         cfs_up(&lustre_mount_info_lock);
 228         if (!lmi) {
 229                 CERROR("Can't find mount for %s\n", name);
 230                 RETURN(-ENOENT);
 231         }
 232         lsi = s2lsi(lmi->lmi_sb);
 233         LASSERT(lmi->lmi_mnt == mnt);
 234
 235         CDEBUG(D_MOUNT, "put_mnt %p from %s, refs=%d, vfscount=%d\n",
 236                lmi->lmi_mnt, name, cfs_atomic_read(&lsi->lsi_mounts), count);
 237
 238         if (lustre_put_lsi(lmi->lmi_sb)) {
 239                 CDEBUG(D_MOUNT, "Last put of mnt %p from %s, vfscount=%d\n",
 240                        lmi->lmi_mnt, name, count);
 241                 /* last mount is the One True Mount */
 242                 if (count > 1)
 243                         CERROR("%s: mount busy, vfscount=%d!\n", name, count);
 244         }
 245
 246         /* this obd should never need the mount again */
 247         server_deregister_mount(name);
 248
 249         RETURN(0);
 250 }
 251
 252 /* Corresponding to server_get_mount_2 */
 253 int server_put_mount_2(const char *name, struct vfsmount *mnt)
 254 {
 255         ENTRY;
 256         RETURN(0);
 257 }
 258
 259 /******* mount helper utilities *********/
 260
 261 #if 0
 262 static void ldd_print(struct lustre_disk_data *ldd)
 263 {
 264         PRINT_CMD(PRINT_MASK, "  disk data:\n");
 265         PRINT_CMD(PRINT_MASK, "server:  %s\n", ldd->ldd_svname);
 266         PRINT_CMD(PRINT_MASK, "uuid:    %s\n", (char *)ldd->ldd_uuid);
 267         PRINT_CMD(PRINT_MASK, "fs:      %s\n", ldd->ldd_fsname);
 268         PRINT_CMD(PRINT_MASK, "index:   %04x\n", ldd->ldd_svindex);
 269         PRINT_CMD(PRINT_MASK, "config:  %d\n", ldd->ldd_config_ver);
 270         PRINT_CMD(PRINT_MASK, "flags:   %#x\n", ldd->ldd_flags);
 271         PRINT_CMD(PRINT_MASK, "diskfs:  %s\n", MT_STR(ldd));
 272         PRINT_CMD(PRINT_MASK, "options: %s\n", ldd->ldd_mount_opts);
 273         PRINT_CMD(PRINT_MASK, "params:  %s\n", ldd->ldd_params);
 274         PRINT_CMD(PRINT_MASK, "comment: %s\n", ldd->ldd_userdata);
 275 }
 276 #endif
 277
 278 static int ldd_parse(struct lvfs_run_ctxt *mount_ctxt,
 279                      struct lustre_disk_data *ldd)
 280 {
 281         struct lvfs_run_ctxt saved;
 282         struct file *file;
 283         loff_t off = 0;
 284         unsigned long len;
 285         int rc;
 286         ENTRY;
 287
 288         push_ctxt(&saved, mount_ctxt, NULL);
 289
 290         file = filp_open(MOUNT_DATA_FILE, O_RDONLY, 0644);
 291         if (IS_ERR(file)) {
 292                 rc = PTR_ERR(file);
 293                 CERROR("cannot open %s: rc = %d\n", MOUNT_DATA_FILE, rc);
 294                 GOTO(out, rc);
 295         }
 296
 297         len = i_size_read(file->f_dentry->d_inode);
 298         CDEBUG(D_MOUNT, "Have %s, size %lu\n", MOUNT_DATA_FILE, len);
 299         if (len != sizeof(*ldd)) {
 300                 CERROR("disk data size does not match: see %lu expect %u\n",
 301                        len, (int)sizeof(*ldd));
 302                 GOTO(out_close, rc = -EINVAL);
 303         }
 304
 305         rc = lustre_fread(file, ldd, len, &off);
 306         if (rc != len) {
 307                 CERROR("error reading %s: read %d of %lu\n",
 308                        MOUNT_DATA_FILE, rc, len);
 309                 GOTO(out_close, rc = -EINVAL);
 310         }
 311         rc = 0;
 312
 313         if (ldd->ldd_magic != LDD_MAGIC) {
 314                 /* FIXME add swabbing support */
 315                 CERROR("Bad magic in %s: %x!=%x\n", MOUNT_DATA_FILE,
 316                        ldd->ldd_magic, LDD_MAGIC);
 317                 GOTO(out_close, rc = -EINVAL);
 318         }
 319
 320         if (ldd->ldd_feature_incompat & ~LDD_INCOMPAT_SUPP) {
 321                 CERROR("%s: unsupported incompat filesystem feature(s) %x\n",
 322                        ldd->ldd_svname,
 323                        ldd->ldd_feature_incompat & ~LDD_INCOMPAT_SUPP);
 324                 GOTO(out_close, rc = -EINVAL);
 325         }
 326         if (ldd->ldd_feature_rocompat & ~LDD_ROCOMPAT_SUPP) {
 327                 CERROR("%s: unsupported read-only filesystem feature(s) %x\n",
 328                        ldd->ldd_svname,
 329                        ldd->ldd_feature_rocompat & ~LDD_ROCOMPAT_SUPP);
 330                 /* Do something like remount filesystem read-only */
 331                 GOTO(out_close, rc = -EINVAL);
 332         }
 333
 334 out_close:
 335         filp_close(file, 0);
 336 out:
 337         pop_ctxt(&saved, mount_ctxt, NULL);
 338         RETURN(rc);
 339 }
 340
 341 static int ldd_write(struct lvfs_run_ctxt *mount_ctxt,
 342                      struct lustre_disk_data *ldd)
 343 {
 344         struct lvfs_run_ctxt saved;
 345         struct file *file;
 346         loff_t off = 0;
 347         unsigned long len = sizeof(struct lustre_disk_data);
 348         int rc = 0;
 349         ENTRY;
 350
 351         LASSERT(ldd->ldd_magic == LDD_MAGIC);
 352
 353         ldd->ldd_config_ver++;
 354
 355         push_ctxt(&saved, mount_ctxt, NULL);
 356
 357         file = filp_open(MOUNT_DATA_FILE, O_RDWR|O_SYNC, 0644);
 358         if (IS_ERR(file)) {
 359                 rc = PTR_ERR(file);
 360                 CERROR("cannot open %s: rc = %d\n", MOUNT_DATA_FILE, rc);
 361                 GOTO(out, rc);
 362         }
 363
 364         rc = lustre_fwrite(file, ldd, len, &off);
 365         if (rc != len) {
 366                 CERROR("error writing %s: read %d of %lu\n",
 367                        MOUNT_DATA_FILE, rc, len);
 368                 GOTO(out_close, rc = -EINVAL);
 369         }
 370
 371         rc = 0;
 372
 373 out_close:
 374         filp_close(file, 0);
 375 out:
 376         pop_ctxt(&saved, mount_ctxt, NULL);
 377         RETURN(rc);
 378 }
 379
 380
 381 /**************** config llog ********************/
 382
 383 /** Get a config log from the MGS and process it.
 384  * This func is called for both clients and servers.
 385  * Continue to process new statements appended to the logs
 386  * (whenever the config lock is revoked) until lustre_end_log
 387  * is called.
 388  * @param sb The superblock is used by the MGC to write to the local copy of
 389  *   the config log
 390  * @param logname The name of the llog to replicate from the MGS
 391  * @param cfg Since the same mgc may be used to follow multiple config logs
 392  *   (e.g. ost1, ost2, client), the config_llog_instance keeps the state for
 393  *   this log, and is added to the mgc's list of logs to follow.
 394  */
 395 int lustre_process_log(struct super_block *sb, char *logname,
 396                      struct config_llog_instance *cfg)
 397 {
 398         struct lustre_cfg *lcfg;
 399         struct lustre_cfg_bufs *bufs;
 400         struct lustre_sb_info *lsi = s2lsi(sb);
 401         struct obd_device *mgc = lsi->lsi_mgc;
 402         int rc;
 403         ENTRY;
 404
 405         LASSERT(mgc);
 406         LASSERT(cfg);
 407
 408         OBD_ALLOC_PTR(bufs);
 409         if (bufs == NULL)
 410                 RETURN(-ENOMEM);
 411
 412         /* mgc_process_config */
 413         lustre_cfg_bufs_reset(bufs, mgc->obd_name);
 414         lustre_cfg_bufs_set_string(bufs, 1, logname);
 415         lustre_cfg_bufs_set(bufs, 2, cfg, sizeof(*cfg));
 416         lustre_cfg_bufs_set(bufs, 3, &sb, sizeof(sb));
 417         lcfg = lustre_cfg_new(LCFG_LOG_START, bufs);
 418         rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
 419         lustre_cfg_free(lcfg);
 420
 421         OBD_FREE_PTR(bufs);
 422
 423         if (rc == -EINVAL)
 424                 LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s'"
 425                                    "failed from the MGS (%d).  Make sure this "
 426                                    "client and the MGS are running compatible "
 427                                    "versions of Lustre.\n",
 428                                    mgc->obd_name, logname, rc);
 429
 430         if (rc)
 431                 LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' "
 432                                    "failed (%d). This may be the result of "
 433                                    "communication errors between this node and "
 434                                    "the MGS, a bad configuration, or other "
 435                                    "errors. See the syslog for more "
 436                                    "information.\n", mgc->obd_name, logname,
 437                                    rc);
 438
 439         /* class_obd_list(); */
 440         RETURN(rc);
 441 }
 442
 443 /* Stop watching this config log for updates */
 444 int lustre_end_log(struct super_block *sb, char *logname,
 445                        struct config_llog_instance *cfg)
 446 {
 447         struct lustre_cfg *lcfg;
 448         struct lustre_cfg_bufs bufs;
 449         struct lustre_sb_info *lsi = s2lsi(sb);
 450         struct obd_device *mgc = lsi->lsi_mgc;
 451         int rc;
 452         ENTRY;
 453
 454         if (!mgc)
 455                 RETURN(-ENOENT);
 456
 457         /* mgc_process_config */
 458         lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
 459         lustre_cfg_bufs_set_string(&bufs, 1, logname);
 460         if (cfg)
 461                 lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
 462         lcfg = lustre_cfg_new(LCFG_LOG_END, &bufs);
 463         rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
 464         lustre_cfg_free(lcfg);
 465         RETURN(rc);
 466 }
 467
 468 /**************** obd start *******************/
 469
 470 /** lustre_cfg_bufs are a holdover from 1.4; we can still set these up from
 471  * lctl (and do for echo cli/srv.
 472  */
 473 int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
 474             char *s1, char *s2, char *s3, char *s4)
 475 {
 476         struct lustre_cfg_bufs bufs;
 477         struct lustre_cfg    * lcfg = NULL;
 478         int rc;
 479
 480         CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname,
 481                cmd, s1, s2, s3, s4);
 482
 483         lustre_cfg_bufs_reset(&bufs, cfgname);
 484         if (s1)
 485                 lustre_cfg_bufs_set_string(&bufs, 1, s1);
 486         if (s2)
 487                 lustre_cfg_bufs_set_string(&bufs, 2, s2);
 488         if (s3)
 489                 lustre_cfg_bufs_set_string(&bufs, 3, s3);
 490         if (s4)
 491                 lustre_cfg_bufs_set_string(&bufs, 4, s4);
 492
 493         lcfg = lustre_cfg_new(cmd, &bufs);
 494         lcfg->lcfg_nid = nid;
 495         rc = class_process_config(lcfg);
 496         lustre_cfg_free(lcfg);
 497         return(rc);
 498 }
 499
 500 /** Call class_attach and class_setup.  These methods in turn call
 501  * obd type-specific methods.
 502  */
 503 static int lustre_start_simple(char *obdname, char *type, char *uuid,
 504                                char *s1, char *s2)
 505 {
 506         int rc;
 507         CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
 508
 509         rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, 0, 0);
 510         if (rc) {
 511                 CERROR("%s attach error %d\n", obdname, rc);
 512                 return(rc);
 513         }
 514         rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, 0, 0);
 515         if (rc) {
 516                 CERROR("%s setup error %d\n", obdname, rc);
 517                 do_lcfg(obdname, 0, LCFG_DETACH, 0, 0, 0, 0);
 518         }
 519         return rc;
 520 }
 521
 522 /* Set up a MGS to serve startup logs */
 523 static int server_start_mgs(struct super_block *sb)
 524 {
 525         struct lustre_sb_info    *lsi = s2lsi(sb);
 526         struct vfsmount          *mnt = lsi->lsi_srv_mnt;
 527         struct lustre_mount_info *lmi;
 528         int    rc = 0;
 529         ENTRY;
 530         LASSERT(mnt);
 531
 532         /* It is impossible to have more than 1 MGS per node, since
 533            MGC wouldn't know which to connect to */
 534         lmi = server_find_mount(LUSTRE_MGS_OBDNAME);
 535         if (lmi) {
 536                 lsi = s2lsi(lmi->lmi_sb);
 537                 LCONSOLE_ERROR_MSG(0x15d, "The MGS service was already started"
 538                                    " from server %s\n",
 539                                    lsi->lsi_ldd->ldd_svname);
 540                 RETURN(-EALREADY);
 541         }
 542
 543         CDEBUG(D_CONFIG, "Start MGS service %s\n", LUSTRE_MGS_OBDNAME);
 544
 545         rc = server_register_mount(LUSTRE_MGS_OBDNAME, sb, mnt);
 546
 547         if (!rc) {
 548                 rc = lustre_start_simple(LUSTRE_MGS_OBDNAME, LUSTRE_MGS_NAME,
 549                                          LUSTRE_MGS_OBDNAME, 0, 0);
 550                 /* Do NOT call server_deregister_mount() here. This leads to
 551                  * inability cleanup cleanly and free lsi and other stuff when
 552                  * mgs calls server_put_mount() in error handling case. -umka */
 553         }
 554
 555         if (rc)
 556                 LCONSOLE_ERROR_MSG(0x15e, "Failed to start MGS '%s' (%d). "
 557                                    "Is the 'mgs' module loaded?\n",
 558                                    LUSTRE_MGS_OBDNAME, rc);
 559         RETURN(rc);
 560 }
 561
 562 static int server_stop_mgs(struct super_block *sb)
 563 {
 564         struct obd_device *obd;
 565         int rc;
 566         ENTRY;
 567
 568         CDEBUG(D_MOUNT, "Stop MGS service %s\n", LUSTRE_MGS_OBDNAME);
 569
 570         /* There better be only one MGS */
 571         obd = class_name2obd(LUSTRE_MGS_OBDNAME);
 572         if (!obd) {
 573                 CDEBUG(D_CONFIG, "mgs %s not running\n", LUSTRE_MGS_OBDNAME);
 574                 RETURN(-EALREADY);
 575         }
 576
 577         /* The MGS should always stop when we say so */
 578         obd->obd_force = 1;
 579         rc = class_manual_cleanup(obd);
 580         RETURN(rc);
 581 }
 582
 583 CFS_DECLARE_MUTEX(mgc_start_lock);
 584
 585 /** Set up a mgc obd to process startup logs
 586  *
 587  * \param sb [in] super block of the mgc obd
 588  *
 589  * \retval 0 success, otherwise error code
 590  */
 591 static int lustre_start_mgc(struct super_block *sb)
 592 {
 593         struct obd_connect_data *data = NULL;
 594         struct lustre_sb_info *lsi = s2lsi(sb);
 595         struct obd_device *obd;
 596         struct obd_export *exp;
 597         struct obd_uuid *uuid;
 598         class_uuid_t uuidc;
 599         lnet_nid_t nid;
 600         char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL;
 601         char *ptr;
 602         int recov_bk;
 603         int rc = 0, i = 0, j, len;
 604         ENTRY;
 605
 606         LASSERT(lsi->lsi_lmd);
 607
 608         /* Find the first non-lo MGS nid for our MGC name */
 609         if (lsi->lsi_flags & LSI_SERVER) {
 610                 ptr = lsi->lsi_ldd->ldd_params;
 611                 /* Use mgsnode= nids */
 612                 if ((class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0) &&
 613                     (class_parse_nid(ptr, &nid, &ptr) == 0)) {
 614                         i++;
 615                 } else if (IS_MGS(lsi->lsi_ldd)) {
 616                         lnet_process_id_t id;
 617                         while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
 618                                 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
 619                                         continue;
 620                                 nid = id.nid;
 621                                 i++;
 622                                 break;
 623                         }
 624                 }
 625         } else { /* client */
 626                 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
 627                 ptr = lsi->lsi_lmd->lmd_dev;
 628                 if (class_parse_nid(ptr, &nid, &ptr) == 0)
 629                         i++;
 630         }
 631         if (i == 0) {
 632                 CERROR("No valid MGS nids found.\n");
 633                 RETURN(-EINVAL);
 634         }
 635
 636         cfs_mutex_down(&mgc_start_lock);
 637
 638         len = strlen(LUSTRE_MGC_OBDNAME) + strlen(libcfs_nid2str(nid)) + 1;
 639         OBD_ALLOC(mgcname, len);
 640         OBD_ALLOC(niduuid, len + 2);
 641         if (!mgcname || !niduuid)
 642                 GOTO(out_free, rc = -ENOMEM);
 643         sprintf(mgcname, "%s%s", LUSTRE_MGC_OBDNAME, libcfs_nid2str(nid));
 644
 645         mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";
 646
 647         OBD_ALLOC_PTR(data);
 648         if (data == NULL)
 649                 GOTO(out_free, rc = -ENOMEM);
 650
 651         obd = class_name2obd(mgcname);
 652         if (obd && !obd->obd_stopping) {
 653                 rc = obd_set_info_async(obd->obd_self_export,
 654                                         strlen(KEY_MGSSEC), KEY_MGSSEC,
 655                                         strlen(mgssec), mgssec, NULL);
 656                 if (rc)
 657                         GOTO(out_free, rc);
 658
 659                 /* Re-using an existing MGC */
 660                 cfs_atomic_inc(&obd->u.cli.cl_mgc_refcount);
 661
 662                 /* IR compatibility check, only for clients */
 663                 if (lmd_is_client(lsi->lsi_lmd)) {
 664                         int has_ir;
 665                         int vallen = sizeof(*data);
 666                         __u32 *flags = &lsi->lsi_lmd->lmd_flags;
 667
 668                         rc = obd_get_info(obd->obd_self_export,
 669                                           strlen(KEY_CONN_DATA), KEY_CONN_DATA,
 670                                           &vallen, data, NULL);
 671                         LASSERT(rc == 0);
 672                         has_ir = OCD_HAS_FLAG(data, IMP_RECOV);
 673                         if (has_ir ^ !(*flags & LMD_FLG_NOIR)) {
 674                                 /* LMD_FLG_NOIR is for test purpose only */
 675                                 LCONSOLE_WARN(
 676                                     "Trying to mount a client with IR setting "
 677                                     "not compatible with current mgc. "
 678                                     "Force to use current mgc setting that is "
 679                                     "IR %s.\n",
 680                                     has_ir ? "enabled" : "disabled");
 681                                 if (has_ir)
 682                                         *flags &= ~LMD_FLG_NOIR;
 683                                 else
 684                                         *flags |= LMD_FLG_NOIR;
 685                         }
 686                 }
 687
 688                 recov_bk = 0;
 689                 /* If we are restarting the MGS, don't try to keep the MGC's
 690                    old connection, or registration will fail. */
 691                 if ((lsi->lsi_flags & LSI_SERVER) && IS_MGS(lsi->lsi_ldd)) {
 692                         CDEBUG(D_MOUNT, "New MGS with live MGC\n");
 693                         recov_bk = 1;
 694                 }
 695
 696                 /* Try all connections, but only once (again).
 697                    We don't want to block another target from starting
 698                    (using its local copy of the log), but we do want to connect
 699                    if at all possible. */
 700                 recov_bk++;
 701                 CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,recov_bk);
 702                 rc = obd_set_info_async(obd->obd_self_export,
 703                                         sizeof(KEY_INIT_RECOV_BACKUP),
 704                                         KEY_INIT_RECOV_BACKUP,
 705                                         sizeof(recov_bk), &recov_bk, NULL);
 706                 GOTO(out, rc = 0);
 707         }
 708
 709         CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname);
 710
 711         /* Add the primary nids for the MGS */
 712         i = 0;
 713         sprintf(niduuid, "%s_%x", mgcname, i);
 714         if (lsi->lsi_flags & LSI_SERVER) {
 715                 ptr = lsi->lsi_ldd->ldd_params;
 716                 if (IS_MGS(lsi->lsi_ldd)) {
 717                         /* Use local nids (including LO) */
 718                         lnet_process_id_t id;
 719                         while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
 720                                 rc = do_lcfg(mgcname, id.nid,
 721                                              LCFG_ADD_UUID, niduuid, 0,0,0);
 722                         }
 723                 } else {
 724                         /* Use mgsnode= nids */
 725                         if (class_find_param(ptr, PARAM_MGSNODE, &ptr) != 0) {
 726                                 CERROR("No MGS nids given.\n");
 727                                 GOTO(out_free, rc = -EINVAL);
 728                         }
 729                         while (class_parse_nid(ptr, &nid, &ptr) == 0) {
 730                                 rc = do_lcfg(mgcname, nid,
 731                                              LCFG_ADD_UUID, niduuid, 0,0,0);
 732                                 i++;
 733                         }
 734                 }
 735         } else { /* client */
 736                 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
 737                 ptr = lsi->lsi_lmd->lmd_dev;
 738                 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
 739                         rc = do_lcfg(mgcname, nid,
 740                                      LCFG_ADD_UUID, niduuid, 0,0,0);
 741                         i++;
 742                         /* Stop at the first failover nid */
 743                         if (*ptr == ':')
 744                                 break;
 745                 }
 746         }
 747         if (i == 0) {
 748                 CERROR("No valid MGS nids found.\n");
 749                 GOTO(out_free, rc = -EINVAL);
 750         }
 751         lsi->lsi_lmd->lmd_mgs_failnodes = 1;
 752
 753         /* Random uuid for MGC allows easier reconnects */
 754         OBD_ALLOC_PTR(uuid);
 755         ll_generate_random_uuid(uuidc);
 756         class_uuid_unparse(uuidc, uuid);
 757
 758         /* Start the MGC */
 759         rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
 760                                  (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
 761                                  niduuid);
 762         OBD_FREE_PTR(uuid);
 763         if (rc)
 764                 GOTO(out_free, rc);
 765
 766         /* Add any failover MGS nids */
 767         i = 1;
 768         while ((*ptr == ':' ||
 769                 class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0)) {
 770                 /* New failover node */
 771                 sprintf(niduuid, "%s_%x", mgcname, i);
 772                 j = 0;
 773                 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
 774                         j++;
 775                         rc = do_lcfg(mgcname, nid,
 776                                      LCFG_ADD_UUID, niduuid, 0,0,0);
 777                         if (*ptr == ':')
 778                                 break;
 779                 }
 780                 if (j > 0) {
 781                         rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
 782                                      niduuid, 0, 0, 0);
 783                         i++;
 784                 } else {
 785                         /* at ":/fsname" */
 786                         break;
 787                 }
 788         }
 789         lsi->lsi_lmd->lmd_mgs_failnodes = i;
 790
 791         obd = class_name2obd(mgcname);
 792         if (!obd) {
 793                 CERROR("Can't find mgcobd %s\n", mgcname);
 794                 GOTO(out_free, rc = -ENOTCONN);
 795         }
 796
 797         rc = obd_set_info_async(obd->obd_self_export,
 798                                 strlen(KEY_MGSSEC), KEY_MGSSEC,
 799                                 strlen(mgssec), mgssec, NULL);
 800         if (rc)
 801                 GOTO(out_free, rc);
 802
 803         /* Keep a refcount of servers/clients who started with "mount",
 804            so we know when we can get rid of the mgc. */
 805         cfs_atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
 806
 807         /* Try all connections, but only once. */
 808         recov_bk = 1;
 809         rc = obd_set_info_async(obd->obd_self_export,
 810                                 sizeof(KEY_INIT_RECOV_BACKUP),
 811                                 KEY_INIT_RECOV_BACKUP,
 812                                 sizeof(recov_bk), &recov_bk, NULL);
 813         if (rc)
 814                 /* nonfatal */
 815                 CWARN("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc);
 816         /* We connect to the MGS at setup, and don't disconnect until cleanup */
 817         data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_FID |
 818                                   OBD_CONNECT_AT | OBD_CONNECT_FULL20   |
 819                                   OBD_CONNECT_IMP_RECOV;
 820         if (lmd_is_client(lsi->lsi_lmd) &&
 821             lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
 822                 data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
 823         data->ocd_version = LUSTRE_VERSION_CODE;
 824         rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
 825         if (rc) {
 826                 CERROR("connect failed %d\n", rc);
 827                 GOTO(out, rc);
 828         }
 829
 830         obd->u.cli.cl_mgc_mgsexp = exp;
 831
 832 out:
 833         /* Keep the mgc info in the sb. Note that many lsi's can point
 834            to the same mgc.*/
 835         lsi->lsi_mgc = obd;
 836 out_free:
 837         cfs_mutex_up(&mgc_start_lock);
 838
 839         if (data)
 840                 OBD_FREE_PTR(data);
 841         if (mgcname)
 842                 OBD_FREE(mgcname, len);
 843         if (niduuid)
 844                 OBD_FREE(niduuid, len + 2);
 845         RETURN(rc);
 846 }
 847
 848 static int lustre_stop_mgc(struct super_block *sb)
 849 {
 850         struct lustre_sb_info *lsi = s2lsi(sb);
 851         struct obd_device *obd;
 852         char *niduuid = 0, *ptr = 0;
 853         int i, rc = 0, len = 0;
 854         ENTRY;
 855
 856         if (!lsi)
 857                 RETURN(-ENOENT);
 858         obd = lsi->lsi_mgc;
 859         if (!obd)
 860                 RETURN(-ENOENT);
 861         lsi->lsi_mgc = NULL;
 862
 863         cfs_mutex_down(&mgc_start_lock);
 864         LASSERT(cfs_atomic_read(&obd->u.cli.cl_mgc_refcount) > 0);
 865         if (!cfs_atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
 866                 /* This is not fatal, every client that stops
 867                    will call in here. */
 868                 CDEBUG(D_MOUNT, "mgc still has %d references.\n",
 869                        cfs_atomic_read(&obd->u.cli.cl_mgc_refcount));
 870                 GOTO(out, rc = -EBUSY);
 871         }
 872
 873         /* The MGC has no recoverable data in any case.
 874          * force shotdown set in umount_begin */
 875         obd->obd_no_recov = 1;
 876
 877         if (obd->u.cli.cl_mgc_mgsexp) {
 878                 /* An error is not fatal, if we are unable to send the
 879                    disconnect mgs ping evictor cleans up the export */
 880                 rc = obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
 881                 if (rc)
 882                         CDEBUG(D_MOUNT, "disconnect failed %d\n", rc);
 883         }
 884
 885         /* Save the obdname for cleaning the nid uuids, which are
 886            obdname_XX */
 887         len = strlen(obd->obd_name) + 6;
 888         OBD_ALLOC(niduuid, len);
 889         if (niduuid) {
 890                 strcpy(niduuid, obd->obd_name);
 891                 ptr = niduuid + strlen(niduuid);
 892         }
 893
 894         rc = class_manual_cleanup(obd);
 895         if (rc)
 896                 GOTO(out, rc);
 897
 898         /* Clean the nid uuids */
 899         if (!niduuid)
 900                 GOTO(out, rc = -ENOMEM);
 901
 902         for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
 903                 sprintf(ptr, "_%x", i);
 904                 rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
 905                              niduuid, 0, 0, 0);
 906                 if (rc)
 907                         CERROR("del MDC UUID %s failed: rc = %d\n",
 908                                niduuid, rc);
 909         }
 910 out:
 911         if (niduuid)
 912                 OBD_FREE(niduuid, len);
 913
 914         /* class_import_put will get rid of the additional connections */
 915         cfs_mutex_up(&mgc_start_lock);
 916         RETURN(rc);
 917 }
 918
 919 /* Since there's only one mgc per node, we have to change it's fs to get
 920    access to the right disk. */
 921 static int server_mgc_set_fs(struct obd_device *mgc, struct super_block *sb)
 922 {
 923         struct lustre_sb_info *lsi = s2lsi(sb);
 924         int rc;
 925         ENTRY;
 926
 927         CDEBUG(D_MOUNT, "Set mgc disk for %s\n", lsi->lsi_lmd->lmd_dev);
 928
 929         /* cl_mgc_sem in mgc insures we sleep if the mgc_fs is busy */
 930         rc = obd_set_info_async(mgc->obd_self_export,
 931                                 sizeof(KEY_SET_FS), KEY_SET_FS,
 932                                 sizeof(*sb), sb, NULL);
 933         if (rc) {
 934                 CERROR("can't set_fs %d\n", rc);
 935         }
 936
 937         RETURN(rc);
 938 }
 939
 940 static int server_mgc_clear_fs(struct obd_device *mgc)
 941 {
 942         int rc;
 943         ENTRY;
 944
 945         CDEBUG(D_MOUNT, "Unassign mgc disk\n");
 946
 947         rc = obd_set_info_async(mgc->obd_self_export,
 948                                 sizeof(KEY_CLEAR_FS), KEY_CLEAR_FS,
 949                                 0, NULL, NULL);
 950         RETURN(rc);
 951 }
 952
 953 CFS_DECLARE_MUTEX(server_start_lock);
 954
 955 /* Stop MDS/OSS if nobody is using them */
 956 static int server_stop_servers(int lddflags, int lsiflags)
 957 {
 958         struct obd_device *obd = NULL;
 959         struct obd_type *type = NULL;
 960         int rc = 0;
 961         ENTRY;
 962
 963         cfs_mutex_down(&server_start_lock);
 964
 965         /* Either an MDT or an OST or neither  */
 966         /* if this was an MDT, and there are no more MDT's, clean up the MDS */
 967         if ((lddflags & LDD_F_SV_TYPE_MDT) &&
 968             (obd = class_name2obd(LUSTRE_MDS_OBDNAME))) {
 969                 /*FIXME pre-rename, should eventually be LUSTRE_MDT_NAME*/
 970                 type = class_search_type(LUSTRE_MDS_NAME);
 971         }
 972         /* if this was an OST, and there are no more OST's, clean up the OSS */
 973         if ((lddflags & LDD_F_SV_TYPE_OST) &&
 974             (obd = class_name2obd(LUSTRE_OSS_OBDNAME))) {
 975                 type = class_search_type(LUSTRE_OST_NAME);
 976         }
 977
 978         if (obd && (!type || !type->typ_refcnt)) {
 979                 int err;
 980                 obd->obd_force = 1;
 981                 /* obd_fail doesn't mean much on a server obd */
 982                 err = class_manual_cleanup(obd);
 983                 if (!rc)
 984                         rc = err;
 985         }
 986
 987         cfs_mutex_up(&server_start_lock);
 988
 989         RETURN(rc);
 990 }
 991
 992 int server_mti_print(char *title, struct mgs_target_info *mti)
 993 {
 994         PRINT_CMD(PRINT_MASK, "mti %s\n", title);
 995         PRINT_CMD(PRINT_MASK, "server: %s\n", mti->mti_svname);
 996         PRINT_CMD(PRINT_MASK, "fs:     %s\n", mti->mti_fsname);
 997         PRINT_CMD(PRINT_MASK, "uuid:   %s\n", mti->mti_uuid);
 998         PRINT_CMD(PRINT_MASK, "ver: %d  flags: %#x\n",
 999                   mti->mti_config_ver, mti->mti_flags);
1000         return(0);
1001 }
1002
1003 static int server_sb2mti(struct super_block *sb, struct mgs_target_info *mti)
1004 {
1005         struct lustre_sb_info    *lsi = s2lsi(sb);
1006         struct lustre_disk_data  *ldd = lsi->lsi_ldd;
1007         lnet_process_id_t         id;
1008         int                       i = 0;
1009         ENTRY;
1010
1011         if (!(lsi->lsi_flags & LSI_SERVER))
1012                 RETURN(-EINVAL);
1013
1014         strncpy(mti->mti_fsname, ldd->ldd_fsname,
1015                 sizeof(mti->mti_fsname));
1016         strncpy(mti->mti_svname, ldd->ldd_svname,
1017                 sizeof(mti->mti_svname));
1018
1019         mti->mti_nid_count = 0;
1020         while (LNetGetId(i++, &id) != -ENOENT) {
1021                 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
1022                         continue;
1023
1024                 /* server use --servicenode param, only allow specified
1025                  * nids be registered */
1026                 if ((ldd->ldd_flags & LDD_F_NO_PRIMNODE) != 0 &&
1027                     class_match_nid(ldd->ldd_params,
1028                                     PARAM_FAILNODE, id.nid) < 1)
1029                         continue;
1030
1031                 /* match specified network */
1032                 if (!class_match_net(ldd->ldd_params,
1033                                      PARAM_NETWORK, LNET_NIDNET(id.nid)))
1034                         continue;
1035
1036                 mti->mti_nids[mti->mti_nid_count] = id.nid;
1037                 mti->mti_nid_count++;
1038                 if (mti->mti_nid_count >= MTI_NIDS_MAX) {
1039                         CWARN("Only using first %d nids for %s\n",
1040                               mti->mti_nid_count, mti->mti_svname);
1041                         break;
1042                 }
1043         }
1044
1045         mti->mti_lustre_ver = LUSTRE_VERSION_CODE;
1046         mti->mti_config_ver = 0;
1047         if (lsi->lsi_lmd->lmd_flags & LMD_FLG_WRITECONF)
1048                 ldd->ldd_flags |= LDD_F_WRITECONF;
1049         mti->mti_flags = ldd->ldd_flags;
1050         mti->mti_stripe_index = ldd->ldd_svindex;
1051         memcpy(mti->mti_uuid, ldd->ldd_uuid, sizeof(mti->mti_uuid));
1052         if (strlen(ldd->ldd_params) > sizeof(mti->mti_params)) {
1053                 CERROR("params too big for mti\n");
1054                 RETURN(-ENOMEM);
1055         }
1056         memcpy(mti->mti_params, ldd->ldd_params, sizeof(mti->mti_params));
1057         RETURN(0);
1058 }
1059
1060 /* Register an old or new target with the MGS. If needed MGS will construct
1061    startup logs and assign index */
1062 int server_register_target(struct super_block *sb)
1063 {
1064         struct lustre_sb_info *lsi = s2lsi(sb);
1065         struct obd_device *mgc = lsi->lsi_mgc;
1066         struct lustre_disk_data *ldd = lsi->lsi_ldd;
1067         struct mgs_target_info *mti = NULL;
1068         bool writeconf;
1069         int rc;
1070         ENTRY;
1071
1072         LASSERT(mgc);
1073
1074         if (!(lsi->lsi_flags & LSI_SERVER))
1075                 RETURN(-EINVAL);
1076
1077         OBD_ALLOC_PTR(mti);
1078         if (!mti)
1079                 RETURN(-ENOMEM);
1080         rc = server_sb2mti(sb, mti);
1081         if (rc)
1082                 GOTO(out, rc);
1083
1084         CDEBUG(D_MOUNT, "Registration %s, fs=%s, %s, index=%04x, flags=%#x\n",
1085                mti->mti_svname, mti->mti_fsname,
1086                libcfs_nid2str(mti->mti_nids[0]), mti->mti_stripe_index,
1087                mti->mti_flags);
1088
1089         /* if write_conf is true, the registration must succeed */
1090         writeconf = !!(ldd->ldd_flags & (LDD_F_NEED_INDEX | LDD_F_UPDATE));
1091         mti->mti_flags |= LDD_F_OPC_REG;
1092
1093         /* Register the target */
1094         /* FIXME use mgc_process_config instead */
1095         rc = obd_set_info_async(mgc->u.cli.cl_mgc_mgsexp,
1096                                 sizeof(KEY_REGISTER_TARGET), KEY_REGISTER_TARGET,
1097                                 sizeof(*mti), mti, NULL);
1098         if (rc) {
1099                 if (mti->mti_flags & LDD_F_ERROR) {
1100                         LCONSOLE_ERROR_MSG(0x160,
1101                                 "The MGS is refusing to allow this "
1102                                 "server (%s) to start. Please see messages"
1103                                 " on the MGS node.\n", ldd->ldd_svname);
1104                 } else if (writeconf) {
1105                         LCONSOLE_ERROR_MSG(0x15f,
1106                                 "Communication to the MGS return error %d. "
1107                                 "Is the MGS running?\n", rc);
1108                 } else {
1109                         CERROR("Cannot talk to the MGS: %d, not fatal\n", rc);
1110                         /* reset the error code for non-fatal error. */
1111                         rc = 0;
1112                 }
1113                 GOTO(out, rc);
1114         }
1115
1116         /* Always update our flags */
1117         ldd->ldd_flags = mti->mti_flags & LDD_F_ONDISK_MASK;
1118
1119         /* If this flag is set, it means the MGS wants us to change our
1120            on-disk data. (So far this means just the index.) */
1121         if (mti->mti_flags & LDD_F_REWRITE_LDD) {
1122                 char *label;
1123                 int err;
1124                 CDEBUG(D_MOUNT, "Changing on-disk index from %#x to %#x "
1125                        "for %s\n", ldd->ldd_svindex, mti->mti_stripe_index,
1126                        mti->mti_svname);
1127                 ldd->ldd_svindex = mti->mti_stripe_index;
1128                 strncpy(ldd->ldd_svname, mti->mti_svname,
1129                         sizeof(ldd->ldd_svname));
1130                 /* or ldd_make_sv_name(ldd); */
1131                 ldd_write(&mgc->obd_lvfs_ctxt, ldd);
1132                 err = fsfilt_set_label(mgc, lsi->lsi_srv_mnt->mnt_sb,
1133                                        mti->mti_svname);
1134                 if (err)
1135                         CERROR("Label set error %d\n", err);
1136                 label = fsfilt_get_label(mgc, lsi->lsi_srv_mnt->mnt_sb);
1137                 if (label)
1138                         CDEBUG(D_MOUNT, "Disk label changed to %s\n", label);
1139
1140                 /* Flush the new ldd to disk */
1141                 fsfilt_sync(mgc, lsi->lsi_srv_mnt->mnt_sb);
1142         }
1143
1144 out:
1145         if (mti)
1146                 OBD_FREE_PTR(mti);
1147         RETURN(rc);
1148 }
1149
1150 /**
1151  * Notify the MGS that this target is ready.
1152  * Used by IR - if the MGS receives this message, it will notify clients.
1153  */
1154 static int server_notify_target(struct super_block *sb, struct obd_device *obd)
1155 {
1156         struct lustre_sb_info *lsi = s2lsi(sb);
1157         struct obd_device *mgc = lsi->lsi_mgc;
1158         struct mgs_target_info *mti = NULL;
1159         int rc;
1160         ENTRY;
1161
1162         LASSERT(mgc);
1163
1164         if (!(lsi->lsi_flags & LSI_SERVER))
1165                 RETURN(-EINVAL);
1166
1167         OBD_ALLOC_PTR(mti);
1168         if (!mti)
1169                 RETURN(-ENOMEM);
1170         rc = server_sb2mti(sb, mti);
1171         if (rc)
1172                 GOTO(out, rc);
1173
1174         mti->mti_instance = obd->u.obt.obt_instance;
1175         mti->mti_flags |= LDD_F_OPC_READY;
1176
1177         /* FIXME use mgc_process_config instead */
1178         rc = obd_set_info_async(mgc->u.cli.cl_mgc_mgsexp,
1179                                 sizeof(KEY_REGISTER_TARGET),
1180                                 KEY_REGISTER_TARGET,
1181                                 sizeof(*mti), mti, NULL);
1182
1183         /* Imperative recovery: if the mgs informs us to use IR? */
1184         if (!rc && !(mti->mti_flags & LDD_F_ERROR) &&
1185             (mti->mti_flags & LDD_F_IR_CAPABLE))
1186                 lsi->lsi_flags |= LSI_IR_CAPABLE;
1187
1188 out:
1189         if (mti)
1190                 OBD_FREE_PTR(mti);
1191         RETURN(rc);
1192
1193 }
1194
1195 /** Start server targets: MDTs and OSTs
1196  */
1197 static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
1198 {
1199         struct obd_device *obd;
1200         struct lustre_sb_info *lsi = s2lsi(sb);
1201         struct config_llog_instance cfg;
1202         int rc;
1203         ENTRY;
1204
1205         CDEBUG(D_MOUNT, "starting target %s\n", lsi->lsi_ldd->ldd_svname);
1206
1207 #if 0
1208         /* If we're an MDT, make sure the global MDS is running */
1209         if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_MDT) {
1210                 /* make sure the MDS is started */
1211                 cfs_mutex_down(&server_start_lock);
1212                 obd = class_name2obd(LUSTRE_MDS_OBDNAME);
1213                 if (!obd) {
1214                         rc = lustre_start_simple(LUSTRE_MDS_OBDNAME,
1215                     /* FIXME pre-rename, should eventually be LUSTRE_MDS_NAME */
1216                                                  LUSTRE_MDT_NAME,
1217                                                  LUSTRE_MDS_OBDNAME"_uuid",
1218                                                  0, 0);
1219                         if (rc) {
1220                                 cfs_mutex_up(&server_start_lock);
1221                                 CERROR("failed to start MDS: %d\n", rc);
1222                                 RETURN(rc);
1223                         }
1224                 }
1225                 cfs_mutex_up(&server_start_lock);
1226         }
1227 #endif
1228
1229         /* If we're an OST, make sure the global OSS is running */
1230         if (IS_OST(lsi->lsi_ldd)) {
1231                 /* make sure OSS is started */
1232                 cfs_mutex_down(&server_start_lock);
1233                 obd = class_name2obd(LUSTRE_OSS_OBDNAME);
1234                 if (!obd) {
1235                         rc = lustre_start_simple(LUSTRE_OSS_OBDNAME,
1236                                                  LUSTRE_OSS_NAME,
1237                                                  LUSTRE_OSS_OBDNAME"_uuid",
1238                                                  0, 0);
1239                         if (rc) {
1240                                 cfs_mutex_up(&server_start_lock);
1241                                 CERROR("failed to start OSS: %d\n", rc);
1242                                 RETURN(rc);
1243                         }
1244                 }
1245                 cfs_mutex_up(&server_start_lock);
1246         }
1247
1248         /* Set the mgc fs to our server disk.  This allows the MGC to
1249          * read and write configs locally, in case it can't talk to the MGS. */
1250         rc = server_mgc_set_fs(lsi->lsi_mgc, sb);
1251         if (rc)
1252                 RETURN(rc);
1253
1254         /* Register with MGS */
1255         rc = server_register_target(sb);
1256         if (rc)
1257                 GOTO(out_mgc, rc);
1258
1259         /* Let the target look up the mount using the target's name
1260            (we can't pass the sb or mnt through class_process_config.) */
1261         rc = server_register_mount(lsi->lsi_ldd->ldd_svname, sb, mnt);
1262         if (rc)
1263                 GOTO(out_mgc, rc);
1264
1265         /* Start targets using the llog named for the target */
1266         memset(&cfg, 0, sizeof(cfg));
1267         rc = lustre_process_log(sb, lsi->lsi_ldd->ldd_svname, &cfg);
1268         if (rc) {
1269                 CERROR("failed to start server %s: %d\n",
1270                        lsi->lsi_ldd->ldd_svname, rc);
1271                 /* Do NOT call server_deregister_mount() here. This makes it
1272                  * impossible to find mount later in cleanup time and leaves
1273                  * @lsi and othder stuff leaked. -umka */
1274                 GOTO(out_mgc, rc);
1275         }
1276
1277 out_mgc:
1278         /* Release the mgc fs for others to use */
1279         server_mgc_clear_fs(lsi->lsi_mgc);
1280
1281         if (!rc) {
1282                 obd = class_name2obd(lsi->lsi_ldd->ldd_svname);
1283                 if (!obd) {
1284                         CERROR("no server named %s was started\n",
1285                                lsi->lsi_ldd->ldd_svname);
1286                         RETURN(-ENXIO);
1287                 }
1288
1289                 if ((lsi->lsi_lmd->lmd_flags & LMD_FLG_ABORT_RECOV) &&
1290                     (OBP(obd, iocontrol))) {
1291                         obd_iocontrol(OBD_IOC_ABORT_RECOVERY,
1292                                       obd->obd_self_export, 0, NULL, NULL);
1293                 }
1294
1295                 server_notify_target(sb, obd);
1296
1297                 /* calculate recovery timeout, do it after lustre_process_log */
1298                 server_calc_timeout(lsi, obd);
1299
1300                 /* log has been fully processed */
1301                 obd_notify(obd, NULL, OBD_NOTIFY_CONFIG, (void *)CONFIG_LOG);
1302         }
1303
1304         RETURN(rc);
1305 }
1306
1307 /***************** lustre superblock **************/
1308
1309 struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
1310 {
1311         struct lustre_sb_info *lsi;
1312         ENTRY;
1313
1314         OBD_ALLOC_PTR(lsi);
1315         if (!lsi)
1316                 RETURN(NULL);
1317         OBD_ALLOC_PTR(lsi->lsi_lmd);
1318         if (!lsi->lsi_lmd) {
1319                 OBD_FREE_PTR(lsi);
1320                 RETURN(NULL);
1321         }
1322
1323         lsi->lsi_lmd->lmd_exclude_count = 0;
1324         lsi->lsi_lmd->lmd_recovery_time_soft = 0;
1325         lsi->lsi_lmd->lmd_recovery_time_hard = 0;
1326         s2lsi_nocast(sb) = lsi;
1327         /* we take 1 extra ref for our setup */
1328         cfs_atomic_set(&lsi->lsi_mounts, 1);
1329
1330         /* Default umount style */
1331         lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
1332
1333         RETURN(lsi);
1334 }
1335
1336 static int lustre_free_lsi(struct super_block *sb)
1337 {
1338         struct lustre_sb_info *lsi = s2lsi(sb);
1339         ENTRY;
1340
1341         LASSERT(lsi != NULL);
1342         CDEBUG(D_MOUNT, "Freeing lsi %p\n", lsi);
1343
1344         /* someone didn't call server_put_mount. */
1345         LASSERT(cfs_atomic_read(&lsi->lsi_mounts) == 0);
1346
1347         if (lsi->lsi_ldd != NULL)
1348                 OBD_FREE(lsi->lsi_ldd, sizeof(*lsi->lsi_ldd));
1349
1350         if (lsi->lsi_lmd != NULL) {
1351                 if (lsi->lsi_lmd->lmd_dev != NULL)
1352                         OBD_FREE(lsi->lsi_lmd->lmd_dev,
1353                                  strlen(lsi->lsi_lmd->lmd_dev) + 1);
1354                 if (lsi->lsi_lmd->lmd_profile != NULL)
1355                         OBD_FREE(lsi->lsi_lmd->lmd_profile,
1356                                  strlen(lsi->lsi_lmd->lmd_profile) + 1);
1357                 if (lsi->lsi_lmd->lmd_mgssec != NULL)
1358                         OBD_FREE(lsi->lsi_lmd->lmd_mgssec,
1359                                  strlen(lsi->lsi_lmd->lmd_mgssec) + 1);
1360                 if (lsi->lsi_lmd->lmd_opts != NULL)
1361                         OBD_FREE(lsi->lsi_lmd->lmd_opts,
1362                                  strlen(lsi->lsi_lmd->lmd_opts) + 1);
1363                 if (lsi->lsi_lmd->lmd_exclude_count)
1364                         OBD_FREE(lsi->lsi_lmd->lmd_exclude,
1365                                  sizeof(lsi->lsi_lmd->lmd_exclude[0]) *
1366                                  lsi->lsi_lmd->lmd_exclude_count);
1367                 OBD_FREE(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd));
1368         }
1369
1370         LASSERT(lsi->lsi_llsbi == NULL);
1371         OBD_FREE(lsi, sizeof(*lsi));
1372         s2lsi_nocast(sb) = NULL;
1373
1374         RETURN(0);
1375 }
1376
1377 /* The lsi has one reference for every server that is using the disk -
1378    e.g. MDT, MGS, and potentially MGC */
1379 static int lustre_put_lsi(struct super_block *sb)
1380 {
1381         struct lustre_sb_info *lsi = s2lsi(sb);
1382         ENTRY;
1383
1384         LASSERT(lsi != NULL);
1385
1386         CDEBUG(D_MOUNT, "put %p %d\n", sb, cfs_atomic_read(&lsi->lsi_mounts));
1387         if (cfs_atomic_dec_and_test(&lsi->lsi_mounts)) {
1388                 lustre_free_lsi(sb);
1389                 RETURN(1);
1390         }
1391         RETURN(0);
1392 }
1393
1394 /*************** server mount ******************/
1395
1396 /** Kernel mount using mount options in MOUNT_DATA_FILE.
1397  * Since this file lives on the disk, we pre-mount using a common
1398  * type, read the file, then re-mount using the type specified in the
1399  * file.
1400  */
1401 static struct vfsmount *server_kernel_mount(struct super_block *sb)
1402 {
1403         struct lvfs_run_ctxt mount_ctxt;
1404         struct lustre_sb_info *lsi = s2lsi(sb);
1405         struct lustre_disk_data *ldd;
1406         struct lustre_mount_data *lmd = lsi->lsi_lmd;
1407         struct vfsmount *mnt;
1408         char *options = NULL;
1409         unsigned long page, s_flags;
1410         struct page *__page;
1411         int len;
1412         int rc;
1413         ENTRY;
1414
1415         OBD_ALLOC(ldd, sizeof(*ldd));
1416         if (!ldd)
1417                 RETURN(ERR_PTR(-ENOMEM));
1418
1419         /* In the past, we have always used flags = 0.
1420            Note ext3/ldiskfs can't be mounted ro. */
1421         s_flags = sb->s_flags;
1422
1423         /* allocate memory for options */
1424         OBD_PAGE_ALLOC(__page, CFS_ALLOC_STD);
1425         if (!__page)
1426                 GOTO(out_free, rc = -ENOMEM);
1427         page = (unsigned long)cfs_page_address(__page);
1428         options = (char *)page;
1429         memset(options, 0, CFS_PAGE_SIZE);
1430
1431         /* mount-line options must be added for pre-mount because it may
1432          * contain mount options such as journal_dev which are required
1433          * to mount successfuly the underlying filesystem */
1434         if (lmd->lmd_opts && (*(lmd->lmd_opts) != 0))
1435                 strncat(options, lmd->lmd_opts, CFS_PAGE_SIZE - 1);
1436
1437         /* Pre-mount ldiskfs to read the MOUNT_DATA_FILE */
1438         CDEBUG(D_MOUNT, "Pre-mount ldiskfs %s\n", lmd->lmd_dev);
1439         mnt = ll_kern_mount("ldiskfs", s_flags, lmd->lmd_dev, (void *)options);
1440         if (IS_ERR(mnt)) {
1441                 rc = PTR_ERR(mnt);
1442                 CERROR("premount %s:%#lx ldiskfs failed: %d "
1443                         "Is the ldiskfs module available?\n",
1444                         lmd->lmd_dev, s_flags, rc );
1445                 GOTO(out_free, rc);
1446         }
1447
1448         OBD_SET_CTXT_MAGIC(&mount_ctxt);
1449         mount_ctxt.pwdmnt = mnt;
1450         mount_ctxt.pwd = mnt->mnt_root;
1451         mount_ctxt.fs = get_ds();
1452
1453         rc = ldd_parse(&mount_ctxt, ldd);
1454         unlock_mntput(mnt);
1455
1456         if (rc) {
1457                 CERROR("premount parse options failed: rc = %d\n", rc);
1458                 GOTO(out_free, rc);
1459         }
1460
1461         /* Done with our pre-mount, now do the real mount. */
1462
1463         /* Glom up mount options */
1464         memset(options, 0, CFS_PAGE_SIZE);
1465         strncpy(options, ldd->ldd_mount_opts, CFS_PAGE_SIZE - 2);
1466
1467         len = CFS_PAGE_SIZE - strlen(options) - 2;
1468         if (*options != 0)
1469                 strcat(options, ",");
1470         strncat(options, "no_mbcache", len);
1471
1472         /* Add in any mount-line options */
1473         if (lmd->lmd_opts && (*(lmd->lmd_opts) != 0)) {
1474                 len = CFS_PAGE_SIZE - strlen(options) - 2;
1475                 strcat(options, ",");
1476                 strncat(options, lmd->lmd_opts, len);
1477         }
1478
1479         /* Special permanent mount flags */
1480         if (IS_OST(ldd))
1481             s_flags |= MS_NOATIME | MS_NODIRATIME;
1482
1483         CDEBUG(D_MOUNT, "kern_mount: %s %s %s\n",
1484                MT_STR(ldd), lmd->lmd_dev, options);
1485         mnt = ll_kern_mount(MT_STR(ldd), s_flags, lmd->lmd_dev,
1486                             (void *)options);
1487         if (IS_ERR(mnt)) {
1488                 rc = PTR_ERR(mnt);
1489                 CERROR("ll_kern_mount failed: rc = %d\n", rc);
1490                 GOTO(out_free, rc);
1491         }
1492
1493         if (lmd->lmd_flags & LMD_FLG_ABORT_RECOV)
1494                 simple_truncate(mnt->mnt_sb->s_root, mnt, LAST_RCVD,
1495                                 LR_CLIENT_START);
1496
1497         OBD_PAGE_FREE(__page);
1498         lsi->lsi_ldd = ldd;   /* freed at lsi cleanup */
1499         CDEBUG(D_SUPER, "%s: mnt = %p\n", lmd->lmd_dev, mnt);
1500         RETURN(mnt);
1501
1502 out_free:
1503         if (__page)
1504                 OBD_PAGE_FREE(__page);
1505         OBD_FREE(ldd, sizeof(*ldd));
1506         lsi->lsi_ldd = NULL;
1507         RETURN(ERR_PTR(rc));
1508 }
1509
1510 /** Wait here forever until the mount refcount is 0 before completing umount,
1511  * else we risk dereferencing a null pointer.
1512  * LNET may take e.g. 165s before killing zombies.
1513  */
1514 static void server_wait_finished(struct vfsmount *mnt)
1515 {
1516        cfs_waitq_t             waitq;
1517        int                     rc, waited = 0;
1518        cfs_sigset_t            blocked;
1519
1520        cfs_waitq_init(&waitq);
1521
1522        while (mnt_get_count(mnt) > 1) {
1523                if (waited && (waited % 30 == 0))
1524                        LCONSOLE_WARN("Mount still busy with %d refs after "
1525                                       "%d secs.\n",
1526                                       mnt_get_count(mnt),
1527                                       waited);
1528                /* Cannot use l_event_wait() for an interruptible sleep. */
1529                waited += 3;
1530                blocked = cfs_block_sigsinv(sigmask(SIGKILL));
1531                cfs_waitq_wait_event_interruptible_timeout(
1532                        waitq,
1533                        (mnt_get_count(mnt) == 1),
1534                        cfs_time_seconds(3),
1535                        rc);
1536                cfs_block_sigs(blocked);
1537                if (rc < 0) {
1538                        LCONSOLE_EMERG("Danger: interrupted umount %s with "
1539                                       "%d refs!\n", mnt->mnt_devname,
1540                                       mnt_get_count(mnt));
1541                        break;
1542                }
1543
1544        }
1545 }
1546
1547 /** Start the shutdown of servers at umount.
1548  */
1549 static void server_put_super(struct super_block *sb)
1550 {
1551         struct lustre_sb_info *lsi = s2lsi(sb);
1552         struct obd_device     *obd;
1553         struct vfsmount       *mnt = lsi->lsi_srv_mnt;
1554         char *tmpname, *extraname = NULL;
1555         int tmpname_sz;
1556         int lddflags = lsi->lsi_ldd->ldd_flags;
1557         int lsiflags = lsi->lsi_flags;
1558         ENTRY;
1559
1560         LASSERT(lsiflags & LSI_SERVER);
1561
1562         tmpname_sz = strlen(lsi->lsi_ldd->ldd_svname) + 1;
1563         OBD_ALLOC(tmpname, tmpname_sz);
1564         memcpy(tmpname, lsi->lsi_ldd->ldd_svname, tmpname_sz);
1565         CDEBUG(D_MOUNT, "server put_super %s\n", tmpname);
1566         if (IS_MDT(lsi->lsi_ldd) && (lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC))
1567                 snprintf(tmpname, tmpname_sz, "MGS");
1568
1569         /* Stop the target */
1570         if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
1571             (IS_MDT(lsi->lsi_ldd) || IS_OST(lsi->lsi_ldd))) {
1572                 struct lustre_profile *lprof = NULL;
1573
1574                 /* tell the mgc to drop the config log */
1575                 lustre_end_log(sb, lsi->lsi_ldd->ldd_svname, NULL);
1576
1577                 /* COMPAT_146 - profile may get deleted in mgc_cleanup.
1578                    If there are any setup/cleanup errors, save the lov
1579                    name for safety cleanup later. */
1580                 lprof = class_get_profile(lsi->lsi_ldd->ldd_svname);
1581                 if (lprof && lprof->lp_dt) {
1582                         OBD_ALLOC(extraname, strlen(lprof->lp_dt) + 1);
1583                         strcpy(extraname, lprof->lp_dt);
1584                 }
1585
1586                 obd = class_name2obd(lsi->lsi_ldd->ldd_svname);
1587                 if (obd) {
1588                         CDEBUG(D_MOUNT, "stopping %s\n", obd->obd_name);
1589                         if (lsi->lsi_flags & LSI_UMOUNT_FAILOVER)
1590                                 obd->obd_fail = 1;
1591                         /* We can't seem to give an error return code
1592                          * to .put_super, so we better make sure we clean up! */
1593                         obd->obd_force = 1;
1594                         class_manual_cleanup(obd);
1595                 } else {
1596                         CERROR("no obd %s\n", lsi->lsi_ldd->ldd_svname);
1597                         server_deregister_mount(lsi->lsi_ldd->ldd_svname);
1598                 }
1599         }
1600
1601         /* If they wanted the mgs to stop separately from the mdt, they
1602            should have put it on a different device. */
1603         if (IS_MGS(lsi->lsi_ldd)) {
1604                 /* if MDS start with --nomgs, don't stop MGS then */
1605                 if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOMGS)) {
1606                         char *logname;
1607
1608                         OBD_ALLOC(logname, MGS_PARAM_MAXLEN);
1609                         if (!logname) {
1610                                 LCONSOLE_WARN("Stopping mgs failed %d, please "
1611                                               "try again.", -ENOMEM);
1612                                 return;
1613                         }
1614                         strcpy(logname, lsi->lsi_ldd->ldd_fsname);
1615                         strcat(logname, "-params");
1616                         /* tell the mgc to drop parameter config log */
1617                         lustre_end_log(sb, logname, NULL);
1618                         OBD_FREE(logname, MGS_PARAM_MAXLEN);
1619
1620                         server_stop_mgs(sb);
1621                 }
1622         }
1623
1624         /* Clean the mgc and sb */
1625         lustre_common_put_super(sb);
1626
1627         /* Wait for the targets to really clean up - can't exit (and let the
1628            sb get destroyed) while the mount is still in use */
1629         server_wait_finished(mnt);
1630
1631         /* drop the One True Mount */
1632         unlock_mntput(mnt);
1633
1634         /* Stop the servers (MDS, OSS) if no longer needed.  We must wait
1635            until the target is really gone so that our type refcount check
1636            is right. */
1637         server_stop_servers(lddflags, lsiflags);
1638
1639         /* In case of startup or cleanup err, stop related obds */
1640         if (extraname) {
1641                 obd = class_name2obd(extraname);
1642                 if (obd) {
1643                         CWARN("Cleaning orphaned obd %s\n", extraname);
1644                         obd->obd_force = 1;
1645                         class_manual_cleanup(obd);
1646                 }
1647                 OBD_FREE(extraname, strlen(extraname) + 1);
1648         }
1649
1650         LCONSOLE_WARN("server umount %s complete\n", tmpname);
1651         OBD_FREE(tmpname, tmpname_sz);
1652         EXIT;
1653 }
1654
1655 /** Called only for 'umount -f'
1656  */
1657 #ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
1658 static void server_umount_begin(struct vfsmount *vfsmnt, int flags)
1659 {
1660         struct super_block *sb = vfsmnt->mnt_sb;
1661 #else
1662 static void server_umount_begin(struct super_block *sb)
1663 {
1664 #endif
1665         struct lustre_sb_info *lsi = s2lsi(sb);
1666         ENTRY;
1667
1668 #ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
1669         if (!(flags & MNT_FORCE)) {
1670                 EXIT;
1671                 return;
1672         }
1673 #endif
1674
1675         CDEBUG(D_MOUNT, "umount -f\n");
1676         /* umount = failover
1677            umount -f = force
1678            no third way to do non-force, non-failover */
1679         lsi->lsi_flags &= ~LSI_UMOUNT_FAILOVER;
1680         lsi->lsi_flags |= LSI_UMOUNT_FORCE;
1681         EXIT;
1682 }
1683
1684 #ifndef HAVE_STATFS_DENTRY_PARAM
1685 static int server_statfs (struct super_block *sb, cfs_kstatfs_t *buf)
1686 {
1687 #else
1688 static int server_statfs (struct dentry *dentry, cfs_kstatfs_t *buf)
1689 {
1690         struct super_block *sb = dentry->d_sb;
1691 #endif
1692         struct vfsmount *mnt = s2lsi(sb)->lsi_srv_mnt;
1693         ENTRY;
1694
1695         if (mnt && mnt->mnt_sb && mnt->mnt_sb->s_op->statfs) {
1696 #ifdef HAVE_STATFS_DENTRY_PARAM
1697                 int rc = mnt->mnt_sb->s_op->statfs(mnt->mnt_root, buf);
1698 #else
1699                 int rc = mnt->mnt_sb->s_op->statfs(mnt->mnt_sb, buf);
1700 #endif
1701                 if (!rc) {
1702                         buf->f_type = sb->s_magic;
1703                         RETURN(0);
1704                 }
1705         }
1706
1707         /* just return 0 */
1708         buf->f_type = sb->s_magic;
1709         buf->f_bsize = sb->s_blocksize;
1710         buf->f_blocks = 1;
1711         buf->f_bfree = 0;
1712         buf->f_bavail = 0;
1713         buf->f_files = 1;
1714         buf->f_ffree = 0;
1715         buf->f_namelen = NAME_MAX;
1716         RETURN(0);
1717 }
1718
1719 /** The operations we support directly on the superblock:
1720  * mount, umount, and df.
1721  */
1722 static struct super_operations server_ops =
1723 {
1724         .put_super      = server_put_super,
1725         .umount_begin   = server_umount_begin, /* umount -f */
1726         .statfs         = server_statfs,
1727 };
1728
1729 #define log2(n) cfs_ffz(~(n))
1730 #define LUSTRE_SUPER_MAGIC 0x0BD00BD1
1731
1732 static int server_fill_super_common(struct super_block *sb)
1733 {
1734         struct inode *root = 0;
1735         ENTRY;
1736
1737         CDEBUG(D_MOUNT, "Server sb, dev=%d\n", (int)sb->s_dev);
1738
1739         sb->s_blocksize = 4096;
1740         sb->s_blocksize_bits = log2(sb->s_blocksize);
1741         sb->s_magic = LUSTRE_SUPER_MAGIC;
1742         sb->s_maxbytes = 0; //PAGE_CACHE_MAXBYTES;
1743         sb->s_flags |= MS_RDONLY;
1744         sb->s_op = &server_ops;
1745
1746         root = new_inode(sb);
1747         if (!root) {
1748                 CERROR("Can't make root inode\n");
1749                 RETURN(-EIO);
1750         }
1751
1752         /* returns -EIO for every operation */
1753         /* make_bad_inode(root); -- badness - can't umount */
1754         /* apparently we need to be a directory for the mount to finish */
1755         root->i_mode = S_IFDIR;
1756
1757         sb->s_root = d_alloc_root(root);
1758         if (!sb->s_root) {
1759                 CERROR("Can't make root dentry\n");
1760                 iput(root);
1761                 RETURN(-EIO);
1762         }
1763
1764         RETURN(0);
1765 }
1766
1767 /** Fill in the superblock info for a Lustre server.
1768  * Mount the device with the correct options.
1769  * Read the on-disk config file.
1770  * Start the services.
1771  */
1772 static int server_fill_super(struct super_block *sb)
1773 {
1774         struct lustre_sb_info *lsi = s2lsi(sb);
1775         struct vfsmount *mnt;
1776         int rc;
1777         ENTRY;
1778
1779         /* the One True Mount */
1780         mnt = server_kernel_mount(sb);
1781         if (IS_ERR(mnt)) {
1782                 rc = PTR_ERR(mnt);
1783                 CERROR("Unable to mount device %s: %d\n",
1784                        lsi->lsi_lmd->lmd_dev, rc);
1785                 lustre_put_lsi(sb);
1786                 RETURN(rc);
1787         }
1788         lsi->lsi_srv_mnt = mnt;
1789
1790         LASSERT(lsi->lsi_ldd);
1791         CDEBUG(D_MOUNT, "Found service %s for fs '%s' on device %s\n",
1792                lsi->lsi_ldd->ldd_svname, lsi->lsi_ldd->ldd_fsname,
1793                lsi->lsi_lmd->lmd_dev);
1794
1795         if (class_name2obd(lsi->lsi_ldd->ldd_svname)) {
1796                 LCONSOLE_ERROR_MSG(0x161, "The target named %s is already "
1797                                    "running. Double-mount may have compromised"
1798                                    " the disk journal.\n",
1799                                    lsi->lsi_ldd->ldd_svname);
1800                 lustre_put_lsi(sb);
1801                 unlock_mntput(mnt);
1802                 RETURN(-EALREADY);
1803         }
1804
1805         /* Start MGS before MGC */
1806         if (IS_MGS(lsi->lsi_ldd) && !(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOMGS)){
1807                 rc = server_start_mgs(sb);
1808                 if (rc)
1809                         GOTO(out_mnt, rc);
1810         }
1811
1812         /* Start MGC before servers */
1813         rc = lustre_start_mgc(sb);
1814         if (rc)
1815                 GOTO(out_mnt, rc);
1816
1817         /* Set up all obd devices for service */
1818         if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
1819                 (IS_OST(lsi->lsi_ldd) || IS_MDT(lsi->lsi_ldd))) {
1820                 rc = server_start_targets(sb, mnt);
1821                 if (rc < 0) {
1822                         CERROR("Unable to start targets: %d\n", rc);
1823                         GOTO(out_mnt, rc);
1824                 }
1825         /* FIXME overmount client here,
1826            or can we just start a client log and client_fill_super on this sb?
1827            We need to make sure server_put_super gets called too - ll_put_super
1828            calls lustre_common_put_super; check there for LSI_SERVER flag,
1829            call s_p_s if so.
1830            Probably should start client from new thread so we can return.
1831            Client will not finish until all servers are connected.
1832            Note - MGS-only server does NOT get a client, since there is no
1833            lustre fs associated - the MGS is for all lustre fs's */
1834         } else if (IS_MGS(lsi->lsi_ldd) &&
1835                    !(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOMGS)){
1836                 struct config_llog_instance cfg;
1837                 char *logname;
1838
1839                 OBD_ALLOC(logname, MGS_PARAM_MAXLEN);
1840                 if (logname == NULL)
1841                         GOTO(out_mnt, rc = -ENOMEM);
1842                 strcpy(logname, lsi->lsi_ldd->ldd_fsname);
1843                 strcat(logname, "-params");
1844
1845                 memset(&cfg, 0, sizeof(cfg));
1846                 rc = lustre_process_log(sb, logname, &cfg);
1847                 OBD_FREE(logname, MGS_PARAM_MAXLEN);
1848                 if (rc) {
1849                         CERROR("failed to process parameters %s: %d\n",
1850                                logname, rc);
1851                         GOTO(out_mnt, rc);
1852                 }
1853         }
1854
1855         rc = server_fill_super_common(sb);
1856         if (rc)
1857                 GOTO(out_mnt, rc);
1858
1859         RETURN(0);
1860 out_mnt:
1861         /* We jump here in case of failure while starting targets or MGS.
1862          * In this case we can't just put @mnt and have to do real cleanup
1863          * with stoping targets, etc. */
1864         server_put_super(sb);
1865         return rc;
1866 }
1867
1868 /* Get the index from the obd name.
1869    rc = server type, or
1870    rc < 0  on error
1871    if endptr isn't NULL it is set to end of name */
1872 int server_name2index(char *svname, __u32 *idx, char **endptr)
1873 {
1874         unsigned long index;
1875         int rc;
1876         char *dash = strrchr(svname, '-');
1877         if (!dash)
1878                 return(-EINVAL);
1879
1880         /* intepret <fsname>-MDTXXXXX-mdc as mdt, the better way is to pass
1881          * in the fsname, then determine the server index */
1882         if (!strcmp(LUSTRE_MDC_NAME, dash + 1)) {
1883                 dash--;
1884                 for (; dash > svname && *dash != '-'; dash--);
1885                 if (dash == svname)
1886                         return(-EINVAL);
1887         }
1888
1889         if (strncmp(dash + 1, "MDT", 3) == 0)
1890                 rc = LDD_F_SV_TYPE_MDT;
1891         else if (strncmp(dash + 1, "OST", 3) == 0)
1892                 rc = LDD_F_SV_TYPE_OST;
1893         else
1894                 return(-EINVAL);
1895         if (strcmp(dash + 4, "all") == 0)
1896                 return rc | LDD_F_SV_ALL;
1897
1898         index = simple_strtoul(dash + 4, endptr, 16);
1899         *idx = index;
1900         return rc;
1901 }
1902
1903 /*
1904  * Calculate timeout value for a target.
1905  */
1906 void server_calc_timeout(struct lustre_sb_info *lsi, struct obd_device *obd)
1907 {
1908         struct lustre_mount_data *lmd;
1909         int soft = 0;
1910         int hard = 0;
1911         int factor = 0;
1912         bool has_ir = !!(lsi->lsi_flags & LSI_IR_CAPABLE);
1913         int min = OBD_RECOVERY_TIME_MIN;
1914
1915         LASSERT(lsi->lsi_flags & LSI_SERVER);
1916
1917         lmd = lsi->lsi_lmd;
1918         if (lmd) {
1919                 soft   = lmd->lmd_recovery_time_soft;
1920                 hard   = lmd->lmd_recovery_time_hard;
1921                 has_ir = has_ir && !(lmd->lmd_flags & LMD_FLG_NOIR);
1922                 obd->obd_no_ir = !has_ir;
1923         }
1924
1925         if (soft == 0)
1926                 soft = OBD_RECOVERY_TIME_SOFT;
1927         if (hard == 0)
1928                 hard = OBD_RECOVERY_TIME_HARD;
1929
1930         /* target may have ir_factor configured. */
1931         factor = OBD_IR_FACTOR_DEFAULT;
1932         if (obd->obd_recovery_ir_factor)
1933                 factor = obd->obd_recovery_ir_factor;
1934
1935         if (has_ir) {
1936                 int new_soft = soft;
1937                 int new_hard = hard;
1938
1939                 /* adjust timeout value by imperative recovery */
1940
1941                 new_soft = (soft * factor) / OBD_IR_FACTOR_MAX;
1942                 new_hard = (hard * factor) / OBD_IR_FACTOR_MAX;
1943
1944                 /* make sure the timeout is not too short */
1945                 new_soft = max(min, new_soft);
1946                 new_hard = max(new_soft, new_hard);
1947
1948                 LCONSOLE_INFO("%s: Imperative Recovery enabled, recovery "
1949                               "window shrunk from %d-%d down to %d-%d\n",
1950                               obd->obd_name, soft, hard, new_soft, new_hard);
1951
1952                 soft = new_soft;
1953                 hard = new_hard;
1954         }
1955
1956         /* we're done */
1957         obd->obd_recovery_timeout   = max(obd->obd_recovery_timeout, soft);
1958         obd->obd_recovery_time_hard = hard;
1959         obd->obd_recovery_ir_factor = factor;
1960 }
1961 EXPORT_SYMBOL(server_calc_timeout);
1962
1963 /*************** mount common betweeen server and client ***************/
1964
1965 /* Common umount */
1966 int lustre_common_put_super(struct super_block *sb)
1967 {
1968         int rc;
1969         ENTRY;
1970
1971         CDEBUG(D_MOUNT, "dropping sb %p\n", sb);
1972
1973         /* Drop a ref to the MGC */
1974         rc = lustre_stop_mgc(sb);
1975         if (rc && (rc != -ENOENT)) {
1976                 if (rc != -EBUSY) {
1977                         CERROR("Can't stop MGC: %d\n", rc);
1978                         RETURN(rc);
1979                 }
1980                 /* BUSY just means that there's some other obd that
1981                    needs the mgc.  Let him clean it up. */
1982                 CDEBUG(D_MOUNT, "MGC still in use\n");
1983         }
1984         /* Drop a ref to the mounted disk */
1985         lustre_put_lsi(sb);
1986         lu_types_stop();
1987         RETURN(rc);
1988 }
1989
1990 static void lmd_print(struct lustre_mount_data *lmd)
1991 {
1992         int i;
1993
1994         PRINT_CMD(PRINT_MASK, "  mount data:\n");
1995         if (lmd_is_client(lmd))
1996                 PRINT_CMD(PRINT_MASK, "profile: %s\n", lmd->lmd_profile);
1997         PRINT_CMD(PRINT_MASK, "device:  %s\n", lmd->lmd_dev);
1998         PRINT_CMD(PRINT_MASK, "flags:   %x\n", lmd->lmd_flags);
1999
2000         if (lmd->lmd_opts)
2001                 PRINT_CMD(PRINT_MASK, "options: %s\n", lmd->lmd_opts);
2002
2003         if (lmd->lmd_recovery_time_soft)
2004                 PRINT_CMD(PRINT_MASK, "recovery time soft: %d\n",
2005                           lmd->lmd_recovery_time_soft);
2006
2007         if (lmd->lmd_recovery_time_hard)
2008                 PRINT_CMD(PRINT_MASK, "recovery time hard: %d\n",
2009                           lmd->lmd_recovery_time_hard);
2010
2011         for (i = 0; i < lmd->lmd_exclude_count; i++) {
2012                 PRINT_CMD(PRINT_MASK, "exclude %d:  OST%04x\n", i,
2013                           lmd->lmd_exclude[i]);
2014         }
2015 }
2016
2017 /* Is this server on the exclusion list */
2018 int lustre_check_exclusion(struct super_block *sb, char *svname)
2019 {
2020         struct lustre_sb_info *lsi = s2lsi(sb);
2021         struct lustre_mount_data *lmd = lsi->lsi_lmd;
2022         __u32 index;
2023         int i, rc;
2024         ENTRY;
2025
2026         rc = server_name2index(svname, &index, NULL);
2027         if (rc != LDD_F_SV_TYPE_OST)
2028                 /* Only exclude OSTs */
2029                 RETURN(0);
2030
2031         CDEBUG(D_MOUNT, "Check exclusion %s (%d) in %d of %s\n", svname,
2032                index, lmd->lmd_exclude_count, lmd->lmd_dev);
2033
2034         for(i = 0; i < lmd->lmd_exclude_count; i++) {
2035                 if (index == lmd->lmd_exclude[i]) {
2036                         CWARN("Excluding %s (on exclusion list)\n", svname);
2037                         RETURN(1);
2038                 }
2039         }
2040         RETURN(0);
2041 }
2042
2043 /* mount -v  -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
2044 static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr)
2045 {
2046         char *s1 = ptr, *s2;
2047         __u32 index, *exclude_list;
2048         int rc = 0, devmax;
2049         ENTRY;
2050
2051         /* The shortest an ost name can be is 8 chars: -OST0000.
2052            We don't actually know the fsname at this time, so in fact
2053            a user could specify any fsname. */
2054         devmax = strlen(ptr) / 8 + 1;
2055
2056         /* temp storage until we figure out how many we have */
2057         OBD_ALLOC(exclude_list, sizeof(index) * devmax);
2058         if (!exclude_list)
2059                 RETURN(-ENOMEM);
2060
2061         /* we enter this fn pointing at the '=' */
2062         while (*s1 && *s1 != ' ' && *s1 != ',') {
2063                 s1++;
2064                 rc = server_name2index(s1, &index, &s2);
2065                 if (rc < 0) {
2066                         CERROR("Can't parse server name '%s'\n", s1);
2067                         break;
2068                 }
2069                 if (rc == LDD_F_SV_TYPE_OST)
2070                         exclude_list[lmd->lmd_exclude_count++] = index;
2071                 else
2072                         CDEBUG(D_MOUNT, "ignoring exclude %.7s\n", s1);
2073                 s1 = s2;
2074                 /* now we are pointing at ':' (next exclude)
2075                    or ',' (end of excludes) */
2076                 if (lmd->lmd_exclude_count >= devmax)
2077                         break;
2078         }
2079         if (rc >= 0) /* non-err */
2080                 rc = 0;
2081
2082         if (lmd->lmd_exclude_count) {
2083                 /* permanent, freed in lustre_free_lsi */
2084                 OBD_ALLOC(lmd->lmd_exclude, sizeof(index) *
2085                           lmd->lmd_exclude_count);
2086                 if (lmd->lmd_exclude) {
2087                         memcpy(lmd->lmd_exclude, exclude_list,
2088                                sizeof(index) * lmd->lmd_exclude_count);
2089                 } else {
2090                         rc = -ENOMEM;
2091                         lmd->lmd_exclude_count = 0;
2092                 }
2093         }
2094         OBD_FREE(exclude_list, sizeof(index) * devmax);
2095         RETURN(rc);
2096 }
2097
2098 static int lmd_parse_mgssec(struct lustre_mount_data *lmd, char *ptr)
2099 {
2100         char   *tail;
2101         int     length;
2102
2103         if (lmd->lmd_mgssec != NULL) {
2104                 OBD_FREE(lmd->lmd_mgssec, strlen(lmd->lmd_mgssec) + 1);
2105                 lmd->lmd_mgssec = NULL;
2106         }
2107
2108         tail = strchr(ptr, ',');
2109         if (tail == NULL)
2110                 length = strlen(ptr);
2111         else
2112                 length = tail - ptr;
2113
2114         OBD_ALLOC(lmd->lmd_mgssec, length + 1);
2115         if (lmd->lmd_mgssec == NULL)
2116                 return -ENOMEM;
2117
2118         memcpy(lmd->lmd_mgssec, ptr, length);
2119         lmd->lmd_mgssec[length] = '\0';
2120         return 0;
2121 }
2122
2123 /** Parse mount line options
2124  * e.g. mount -v -t lustre -o abort_recov uml1:uml2:/lustre-client /mnt/lustre
2125  * dev is passed as device=uml1:/lustre by mount.lustre
2126  */
2127 static int lmd_parse(char *options, struct lustre_mount_data *lmd)
2128 {
2129         char *s1, *s2, *devname = NULL;
2130         struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
2131         int rc = 0;
2132         ENTRY;
2133
2134         LASSERT(lmd);
2135         if (!options) {
2136                 LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that "
2137                                    "/sbin/mount.lustre is installed.\n");
2138                 RETURN(-EINVAL);
2139         }
2140
2141         /* Options should be a string - try to detect old lmd data */
2142         if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) {
2143                 LCONSOLE_ERROR_MSG(0x163, "You're using an old version of "
2144                                    "/sbin/mount.lustre.  Please install "
2145                                    "version %s\n", LUSTRE_VERSION_STRING);
2146                 RETURN(-EINVAL);
2147         }
2148         lmd->lmd_magic = LMD_MAGIC;
2149
2150         /* Set default flags here */
2151
2152         s1 = options;
2153         while (*s1) {
2154                 int clear = 0;
2155                 int time_min = OBD_RECOVERY_TIME_MIN;
2156
2157                 /* Skip whitespace and extra commas */
2158                 while (*s1 == ' ' || *s1 == ',')
2159                         s1++;
2160
2161                 /* Client options are parsed in ll_options: eg. flock,
2162                    user_xattr, acl */
2163
2164                 /* Parse non-ldiskfs options here. Rather than modifying
2165                    ldiskfs, we just zero these out here */
2166                 if (strncmp(s1, "abort_recov", 11) == 0) {
2167                         lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
2168                         clear++;
2169                 } else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
2170                         lmd->lmd_recovery_time_soft = max_t(int,
2171                                 simple_strtoul(s1 + 19, NULL, 10), time_min);
2172                         clear++;
2173                 } else if (strncmp(s1, "recovery_time_hard=", 19) == 0) {
2174                         lmd->lmd_recovery_time_hard = max_t(int,
2175                                 simple_strtoul(s1 + 19, NULL, 10), time_min);
2176                         clear++;
2177                 } else if (strncmp(s1, "noir", 4) == 0) {
2178                         lmd->lmd_flags |= LMD_FLG_NOIR; /* test purpose only. */
2179                         clear++;
2180                 } else if (strncmp(s1, "nosvc", 5) == 0) {
2181                         lmd->lmd_flags |= LMD_FLG_NOSVC;
2182                         clear++;
2183                 } else if (strncmp(s1, "nomgs", 5) == 0) {
2184                         lmd->lmd_flags |= LMD_FLG_NOMGS;
2185                         clear++;
2186                 } else if (strncmp(s1, "writeconf", 9) == 0) {
2187                         lmd->lmd_flags |= LMD_FLG_WRITECONF;
2188                         clear++;
2189                 } else if (strncmp(s1, "mgssec=", 7) == 0) {
2190                         rc = lmd_parse_mgssec(lmd, s1 + 7);
2191                         if (rc)
2192                                 goto invalid;
2193                         clear++;
2194                 /* ost exclusion list */
2195                 } else if (strncmp(s1, "exclude=", 8) == 0) {
2196                         rc = lmd_make_exclusion(lmd, s1 + 7);
2197                         if (rc)
2198                                 goto invalid;
2199                         clear++;
2200                 }
2201                 /* Linux 2.4 doesn't pass the device, so we stuck it at the
2202                    end of the options. */
2203                 else if (strncmp(s1, "device=", 7) == 0) {
2204                         devname = s1 + 7;
2205                         /* terminate options right before device.  device
2206                            must be the last one. */
2207                         *s1 = '\0';
2208                         break;
2209                 }
2210
2211                 /* Find next opt */
2212                 s2 = strchr(s1, ',');
2213                 if (s2 == NULL) {
2214                         if (clear)
2215                                 *s1 = '\0';
2216                         break;
2217                 }
2218                 s2++;
2219                 if (clear)
2220                         memmove(s1, s2, strlen(s2) + 1);
2221                 else
2222                         s1 = s2;
2223         }
2224
2225         if (!devname) {
2226                 LCONSOLE_ERROR_MSG(0x164, "Can't find the device name "
2227                                    "(need mount option 'device=...')\n");
2228                 goto invalid;
2229         }
2230
2231         s1 = strstr(devname, ":/");
2232         if (s1) {
2233                 ++s1;
2234                 lmd->lmd_flags |= LMD_FLG_CLIENT;
2235                 /* Remove leading /s from fsname */
2236                 while (*++s1 == '/') ;
2237                 /* Freed in lustre_free_lsi */
2238                 OBD_ALLOC(lmd->lmd_profile, strlen(s1) + 8);
2239                 if (!lmd->lmd_profile)
2240                         RETURN(-ENOMEM);
2241                 sprintf(lmd->lmd_profile, "%s-client", s1);
2242         }
2243
2244         /* Freed in lustre_free_lsi */
2245         OBD_ALLOC(lmd->lmd_dev, strlen(devname) + 1);
2246         if (!lmd->lmd_dev)
2247                 RETURN(-ENOMEM);
2248         strcpy(lmd->lmd_dev, devname);
2249
2250         /* Save mount options */
2251         s1 = options + strlen(options) - 1;
2252         while (s1 >= options && (*s1 == ',' || *s1 == ' '))
2253                 *s1-- = 0;
2254         if (*options != 0) {
2255                 /* Freed in lustre_free_lsi */
2256                 OBD_ALLOC(lmd->lmd_opts, strlen(options) + 1);
2257                 if (!lmd->lmd_opts)
2258                         RETURN(-ENOMEM);
2259                 strcpy(lmd->lmd_opts, options);
2260         }
2261
2262         lmd_print(lmd);
2263         lmd->lmd_magic = LMD_MAGIC;
2264
2265         RETURN(rc);
2266
2267 invalid:
2268         CERROR("Bad mount options %s\n", options);
2269         RETURN(-EINVAL);
2270 }
2271
2272 struct lustre_mount_data2 {
2273         void *lmd2_data;
2274         struct vfsmount *lmd2_mnt;
2275 };
2276
2277 /** This is the entry point for the mount call into Lustre.
2278  * This is called when a server or client is mounted,
2279  * and this is where we start setting things up.
2280  * @param data Mount options (e.g. -o flock,abort_recov)
2281  */
2282 int lustre_fill_super(struct super_block *sb, void *data, int silent)
2283 {
2284         struct lustre_mount_data *lmd;
2285         struct lustre_mount_data2 *lmd2 = data;
2286         struct lustre_sb_info *lsi;
2287         int rc;
2288         ENTRY;
2289
2290         CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);
2291
2292         lsi = lustre_init_lsi(sb);
2293         if (!lsi)
2294                 RETURN(-ENOMEM);
2295         lmd = lsi->lsi_lmd;
2296
2297         /*
2298          * Disable lockdep during mount, because mount locking patterns are
2299          * `special'.
2300          */
2301         cfs_lockdep_off();
2302
2303         /*
2304          * LU-639: the obd cleanup of last mount may not finish yet, wait here.
2305          */
2306         obd_zombie_barrier();
2307
2308         /* Figure out the lmd from the mount options */
2309         if (lmd_parse((char *)(lmd2->lmd2_data), lmd)) {
2310                 lustre_put_lsi(sb);
2311                 GOTO(out, rc = -EINVAL);
2312         }
2313
2314         if (lmd_is_client(lmd)) {
2315                 CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile);
2316                 if (!client_fill_super) {
2317                         LCONSOLE_ERROR_MSG(0x165, "Nothing registered for "
2318                                            "client mount! Is the 'lustre' "
2319                                            "module loaded?\n");
2320                         lustre_put_lsi(sb);
2321                         rc = -ENODEV;
2322                 } else {
2323                         rc = lustre_start_mgc(sb);
2324                         if (rc) {
2325                                 lustre_put_lsi(sb);
2326                                 GOTO(out, rc);
2327                         }
2328                         /* Connect and start */
2329                         /* (should always be ll_fill_super) */
2330                         rc = (*client_fill_super)(sb, lmd2->lmd2_mnt);
2331                         /* c_f_s will call lustre_common_put_super on failure */
2332                 }
2333         } else {
2334                 CDEBUG(D_MOUNT, "Mounting server from %s\n", lmd->lmd_dev);
2335                 lsi->lsi_flags |= LSI_SERVER;
2336                 rc = server_fill_super(sb);
2337                 /* s_f_s calls lustre_start_mgc after the mount because we need
2338                    the MGS nids which are stored on disk.  Plus, we may
2339                    need to start the MGS first. */
2340                 /* s_f_s will call server_put_super on failure */
2341         }
2342
2343         /* If error happens in fill_super() call, @lsi will be killed there.
2344          * This is why we do not put it here. */
2345         GOTO(out, rc);
2346 out:
2347         if (rc) {
2348                 CERROR("Unable to mount %s (%d)\n",
2349                        s2lsi(sb) ? lmd->lmd_dev : "", rc);
2350         } else {
2351                 CDEBUG(D_SUPER, "Mount %s complete\n",
2352                        lmd->lmd_dev);
2353         }
2354         cfs_lockdep_on();
2355         return rc;
2356 }
2357
2358
2359 /* We can't call ll_fill_super by name because it lives in a module that
2360    must be loaded after this one. */
2361 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
2362                                                   struct vfsmount *mnt))
2363 {
2364         client_fill_super = cfs;
2365 }
2366
2367 void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb))
2368 {
2369         kill_super_cb = cfs;
2370 }
2371
2372 /***************** FS registration ******************/
2373
2374 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
2375 struct super_block * lustre_get_sb(struct file_system_type *fs_type, int flags,
2376                                    const char *devname, void * data)
2377 {
2378         return get_sb_nodev(fs_type, flags, data, lustre_fill_super);
2379 }
2380 #else
2381 int lustre_get_sb(struct file_system_type *fs_type, int flags,
2382                   const char *devname, void * data, struct vfsmount *mnt)
2383 {
2384         struct lustre_mount_data2 lmd2 = {data, mnt};
2385
2386         return get_sb_nodev(fs_type, flags, &lmd2, lustre_fill_super, mnt);
2387 }
2388 #endif
2389
2390 void lustre_kill_super(struct super_block *sb)
2391 {
2392         struct lustre_sb_info *lsi = s2lsi(sb);
2393
2394         if (kill_super_cb && lsi && !(lsi->lsi_flags & LSI_SERVER))
2395                 (*kill_super_cb)(sb);
2396
2397         kill_anon_super(sb);
2398 }
2399
2400 /** Register the "lustre" fs type
2401  */
2402 struct file_system_type lustre_fs_type = {
2403         .owner        = THIS_MODULE,
2404         .name         = "lustre",
2405         .get_sb       = lustre_get_sb,
2406         .kill_sb      = lustre_kill_super,
2407         .fs_flags     = FS_BINARY_MOUNTDATA | FS_REQUIRES_DEV |
2408 #ifdef FS_HAS_FIEMAP
2409                         FS_HAS_FIEMAP |
2410 #endif
2411                         LL_RENAME_DOES_D_MOVE,
2412 };
2413
2414 int lustre_register_fs(void)
2415 {
2416         return register_filesystem(&lustre_fs_type);
2417 }
2418
2419 int lustre_unregister_fs(void)
2420 {
2421         return unregister_filesystem(&lustre_fs_type);
2422 }
2423
2424 EXPORT_SYMBOL(lustre_register_client_fill_super);
2425 EXPORT_SYMBOL(lustre_register_kill_super_cb);
2426 EXPORT_SYMBOL(lustre_common_put_super);
2427 EXPORT_SYMBOL(lustre_process_log);
2428 EXPORT_SYMBOL(lustre_end_log);
2429 EXPORT_SYMBOL(server_get_mount);
2430 EXPORT_SYMBOL(server_get_mount_2);
2431 EXPORT_SYMBOL(server_put_mount);
2432 EXPORT_SYMBOL(server_put_mount_2);
2433 EXPORT_SYMBOL(server_register_target);
2434 EXPORT_SYMBOL(server_name2index);
2435 EXPORT_SYMBOL(server_mti_print);
2436 EXPORT_SYMBOL(do_lcfg);