4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Whamcloud, Inc.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/obdclass/obd_mount.c
38 * Client/server mount routines
40 * Author: Nathan Rutman <nathan@clusterfs.com>
44 #define DEBUG_SUBSYSTEM S_CLASS
45 #define D_MOUNT D_SUPER|D_CONFIG /*|D_WARNING */
46 #define PRINT_CMD CDEBUG
47 #define PRINT_MASK D_SUPER|D_CONFIG
51 #include <lustre_fsfilt.h>
52 #include <obd_class.h>
53 #include <lustre/lustre_user.h>
54 #include <linux/version.h>
55 #include <lustre_log.h>
56 #include <lustre_disk.h>
57 #include <lustre_param.h>
58 #ifdef HAVE_KERNEL_LOCKED
59 #include <linux/smp_lock.h>
62 static int (*client_fill_super)(struct super_block *sb,
63 struct vfsmount *mnt) = NULL;
64 static void (*kill_super_cb)(struct super_block *sb) = NULL;
66 /*********** mount lookup *********/
68 CFS_DEFINE_MUTEX(lustre_mount_info_lock);
69 static CFS_LIST_HEAD(server_mount_info_list);
71 static struct lustre_mount_info *server_find_mount(const char *name)
74 struct lustre_mount_info *lmi;
77 cfs_list_for_each(tmp, &server_mount_info_list) {
78 lmi = cfs_list_entry(tmp, struct lustre_mount_info,
80 if (strcmp(name, lmi->lmi_name) == 0)
86 /* we must register an obd for a mount before we call the setup routine.
87 *_setup will call lustre_get_mount to get the mnt struct
88 by obd_name, since we can't pass the pointer to setup. */
89 static int server_register_mount(const char *name, struct super_block *sb,
92 struct lustre_mount_info *lmi;
98 OBD_ALLOC(lmi, sizeof(*lmi));
101 OBD_ALLOC(name_cp, strlen(name) + 1);
103 OBD_FREE(lmi, sizeof(*lmi));
106 strcpy(name_cp, name);
108 cfs_mutex_lock(&lustre_mount_info_lock);
110 if (server_find_mount(name)) {
111 cfs_mutex_unlock(&lustre_mount_info_lock);
112 OBD_FREE(lmi, sizeof(*lmi));
113 OBD_FREE(name_cp, strlen(name) + 1);
114 CERROR("Already registered %s\n", name);
117 lmi->lmi_name = name_cp;
120 cfs_list_add(&lmi->lmi_list_chain, &server_mount_info_list);
122 cfs_mutex_unlock(&lustre_mount_info_lock);
124 CDEBUG(D_MOUNT, "reg_mnt %p from %s, vfscount=%d\n",
126 lmi->lmi_mnt ? mnt_get_count(lmi->lmi_mnt) : -1);
131 /* when an obd no longer needs a mount */
132 static int server_deregister_mount(const char *name)
134 struct lustre_mount_info *lmi;
137 cfs_mutex_lock(&lustre_mount_info_lock);
138 lmi = server_find_mount(name);
140 cfs_mutex_unlock(&lustre_mount_info_lock);
141 CERROR("%s not registered\n", name);
145 CDEBUG(D_MOUNT, "dereg_mnt %p from %s, vfscount=%d\n",
147 lmi->lmi_mnt ? mnt_get_count(lmi->lmi_mnt) : -1);
149 OBD_FREE(lmi->lmi_name, strlen(lmi->lmi_name) + 1);
150 cfs_list_del(&lmi->lmi_list_chain);
151 OBD_FREE(lmi, sizeof(*lmi));
152 cfs_mutex_unlock(&lustre_mount_info_lock);
157 /* obd's look up a registered mount using their obdname. This is just
158 for initial obd setup to find the mount struct. It should not be
159 called every time you want to mntget. */
160 struct lustre_mount_info *server_get_mount(const char *name)
162 struct lustre_mount_info *lmi;
163 struct lustre_sb_info *lsi;
166 cfs_mutex_lock(&lustre_mount_info_lock);
167 lmi = server_find_mount(name);
168 cfs_mutex_unlock(&lustre_mount_info_lock);
170 CERROR("Can't find mount for %s\n", name);
173 lsi = s2lsi(lmi->lmi_sb);
176 mntget(lmi->lmi_mnt);
177 cfs_atomic_inc(&lsi->lsi_mounts);
179 CDEBUG(D_MOUNT, "get_mnt %p from %s, refs=%d, vfscount=%d\n",
180 lmi->lmi_mnt, name, cfs_atomic_read(&lsi->lsi_mounts),
181 lmi->lmi_mnt ? mnt_get_count(lmi->lmi_mnt) - 1 : -1);
185 EXPORT_SYMBOL(server_get_mount);
188 * Used by mdt to get mount_info from obdname.
189 * There are no blocking when using the mount_info.
190 * Do not use server_get_mount for this purpose.
192 struct lustre_mount_info *server_get_mount_2(const char *name)
194 struct lustre_mount_info *lmi;
197 cfs_mutex_lock(&lustre_mount_info_lock);
198 lmi = server_find_mount(name);
199 cfs_mutex_unlock(&lustre_mount_info_lock);
201 CERROR("Can't find mount for %s\n", name);
205 EXPORT_SYMBOL(server_get_mount_2);
207 static void unlock_mntput(struct vfsmount *mnt)
209 #ifdef HAVE_KERNEL_LOCKED
210 /* for kernel < 2.6.37 */
211 if (kernel_locked()) {
223 static int lustre_put_lsi(struct super_block *sb);
225 /* to be called from obd_cleanup methods */
226 int server_put_mount(const char *name, struct vfsmount *mnt)
228 struct lustre_mount_info *lmi;
229 struct lustre_sb_info *lsi;
233 /* This might be the last one, can't deref after this */
235 count = mnt_get_count(mnt) - 1;
239 cfs_mutex_lock(&lustre_mount_info_lock);
240 lmi = server_find_mount(name);
241 cfs_mutex_unlock(&lustre_mount_info_lock);
243 CERROR("Can't find mount for %s\n", name);
246 lsi = s2lsi(lmi->lmi_sb);
247 LASSERT(lmi->lmi_mnt == mnt);
249 CDEBUG(D_MOUNT, "put_mnt %p from %s, refs=%d, vfscount=%d\n",
250 lmi->lmi_mnt, name, cfs_atomic_read(&lsi->lsi_mounts), count);
252 if (lustre_put_lsi(lmi->lmi_sb)) {
253 CDEBUG(D_MOUNT, "Last put of mnt %p from %s, vfscount=%d\n",
254 lmi->lmi_mnt, name, count);
255 /* last mount is the One True Mount */
257 CERROR("%s: mount busy, vfscount=%d!\n", name, count);
260 /* this obd should never need the mount again */
261 server_deregister_mount(name);
265 EXPORT_SYMBOL(server_put_mount);
267 /* Corresponding to server_get_mount_2 */
268 int server_put_mount_2(const char *name, struct vfsmount *mnt)
273 EXPORT_SYMBOL(server_put_mount_2);
275 /**************** config llog ********************/
277 /** Get a config log from the MGS and process it.
278 * This func is called for both clients and servers.
279 * Continue to process new statements appended to the logs
280 * (whenever the config lock is revoked) until lustre_end_log
282 * @param sb The superblock is used by the MGC to write to the local copy of
284 * @param logname The name of the llog to replicate from the MGS
285 * @param cfg Since the same mgc may be used to follow multiple config logs
286 * (e.g. ost1, ost2, client), the config_llog_instance keeps the state for
287 * this log, and is added to the mgc's list of logs to follow.
289 int lustre_process_log(struct super_block *sb, char *logname,
290 struct config_llog_instance *cfg)
292 struct lustre_cfg *lcfg;
293 struct lustre_cfg_bufs *bufs;
294 struct lustre_sb_info *lsi = s2lsi(sb);
295 struct obd_device *mgc = lsi->lsi_mgc;
306 /* mgc_process_config */
307 lustre_cfg_bufs_reset(bufs, mgc->obd_name);
308 lustre_cfg_bufs_set_string(bufs, 1, logname);
309 lustre_cfg_bufs_set(bufs, 2, cfg, sizeof(*cfg));
310 lustre_cfg_bufs_set(bufs, 3, &sb, sizeof(sb));
311 lcfg = lustre_cfg_new(LCFG_LOG_START, bufs);
312 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
313 lustre_cfg_free(lcfg);
318 LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s'"
319 "failed from the MGS (%d). Make sure this "
320 "client and the MGS are running compatible "
321 "versions of Lustre.\n",
322 mgc->obd_name, logname, rc);
325 LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' "
326 "failed (%d). This may be the result of "
327 "communication errors between this node and "
328 "the MGS, a bad configuration, or other "
329 "errors. See the syslog for more "
330 "information.\n", mgc->obd_name, logname,
333 /* class_obd_list(); */
336 EXPORT_SYMBOL(lustre_process_log);
338 /* Stop watching this config log for updates */
339 int lustre_end_log(struct super_block *sb, char *logname,
340 struct config_llog_instance *cfg)
342 struct lustre_cfg *lcfg;
343 struct lustre_cfg_bufs bufs;
344 struct lustre_sb_info *lsi = s2lsi(sb);
345 struct obd_device *mgc = lsi->lsi_mgc;
352 /* mgc_process_config */
353 lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
354 lustre_cfg_bufs_set_string(&bufs, 1, logname);
356 lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
357 lcfg = lustre_cfg_new(LCFG_LOG_END, &bufs);
358 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
359 lustre_cfg_free(lcfg);
362 EXPORT_SYMBOL(lustre_end_log);
364 /**************** obd start *******************/
366 /** lustre_cfg_bufs are a holdover from 1.4; we can still set these up from
367 * lctl (and do for echo cli/srv.
369 int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
370 char *s1, char *s2, char *s3, char *s4)
372 struct lustre_cfg_bufs bufs;
373 struct lustre_cfg * lcfg = NULL;
376 CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname,
377 cmd, s1, s2, s3, s4);
379 lustre_cfg_bufs_reset(&bufs, cfgname);
381 lustre_cfg_bufs_set_string(&bufs, 1, s1);
383 lustre_cfg_bufs_set_string(&bufs, 2, s2);
385 lustre_cfg_bufs_set_string(&bufs, 3, s3);
387 lustre_cfg_bufs_set_string(&bufs, 4, s4);
389 lcfg = lustre_cfg_new(cmd, &bufs);
390 lcfg->lcfg_nid = nid;
391 rc = class_process_config(lcfg);
392 lustre_cfg_free(lcfg);
395 EXPORT_SYMBOL(do_lcfg);
397 /** Call class_attach and class_setup. These methods in turn call
398 * obd type-specific methods.
400 static int lustre_start_simple(char *obdname, char *type, char *uuid,
404 CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
406 rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, 0, 0);
408 CERROR("%s attach error %d\n", obdname, rc);
411 rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, 0, 0);
413 CERROR("%s setup error %d\n", obdname, rc);
414 do_lcfg(obdname, 0, LCFG_DETACH, 0, 0, 0, 0);
419 /* Set up a MGS to serve startup logs */
420 static int server_start_mgs(struct super_block *sb)
422 struct lustre_sb_info *lsi = s2lsi(sb);
423 struct vfsmount *mnt = lsi->lsi_srv_mnt;
424 struct lustre_mount_info *lmi;
429 /* It is impossible to have more than 1 MGS per node, since
430 MGC wouldn't know which to connect to */
431 lmi = server_find_mount(LUSTRE_MGS_OBDNAME);
433 lsi = s2lsi(lmi->lmi_sb);
434 LCONSOLE_ERROR_MSG(0x15d, "The MGS service was already started"
439 CDEBUG(D_CONFIG, "Start MGS service %s\n", LUSTRE_MGS_OBDNAME);
441 rc = server_register_mount(LUSTRE_MGS_OBDNAME, sb, mnt);
444 rc = lustre_start_simple(LUSTRE_MGS_OBDNAME, LUSTRE_MGS_NAME,
445 LUSTRE_MGS_OBDNAME, 0, 0);
446 /* Do NOT call server_deregister_mount() here. This leads to
447 * inability cleanup cleanly and free lsi and other stuff when
448 * mgs calls server_put_mount() in error handling case. -umka */
452 LCONSOLE_ERROR_MSG(0x15e, "Failed to start MGS '%s' (%d). "
453 "Is the 'mgs' module loaded?\n",
454 LUSTRE_MGS_OBDNAME, rc);
458 static int server_stop_mgs(struct super_block *sb)
460 struct obd_device *obd;
464 CDEBUG(D_MOUNT, "Stop MGS service %s\n", LUSTRE_MGS_OBDNAME);
466 /* There better be only one MGS */
467 obd = class_name2obd(LUSTRE_MGS_OBDNAME);
469 CDEBUG(D_CONFIG, "mgs %s not running\n", LUSTRE_MGS_OBDNAME);
473 /* The MGS should always stop when we say so */
475 rc = class_manual_cleanup(obd);
479 CFS_DEFINE_MUTEX(mgc_start_lock);
481 /** Set up a mgc obd to process startup logs
483 * \param sb [in] super block of the mgc obd
485 * \retval 0 success, otherwise error code
487 static int lustre_start_mgc(struct super_block *sb)
489 struct obd_connect_data *data = NULL;
490 struct lustre_sb_info *lsi = s2lsi(sb);
491 struct obd_device *obd;
492 struct obd_export *exp;
493 struct obd_uuid *uuid;
496 char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL;
499 int rc = 0, i = 0, j, len;
502 LASSERT(lsi->lsi_lmd);
504 /* Find the first non-lo MGS nid for our MGC name */
505 if (IS_SERVER(lsi)) {
506 /* mount -o mgsnode=nid */
507 ptr = lsi->lsi_lmd->lmd_mgs;
508 if (lsi->lsi_lmd->lmd_mgs &&
509 (class_parse_nid(lsi->lsi_lmd->lmd_mgs, &nid, &ptr) == 0)) {
511 } else if (IS_MGS(lsi)) {
512 lnet_process_id_t id;
513 while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
514 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
521 } else { /* client */
522 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
523 ptr = lsi->lsi_lmd->lmd_dev;
524 if (class_parse_nid(ptr, &nid, &ptr) == 0)
528 CERROR("No valid MGS nids found.\n");
532 cfs_mutex_lock(&mgc_start_lock);
534 len = strlen(LUSTRE_MGC_OBDNAME) + strlen(libcfs_nid2str(nid)) + 1;
535 OBD_ALLOC(mgcname, len);
536 OBD_ALLOC(niduuid, len + 2);
537 if (!mgcname || !niduuid)
538 GOTO(out_free, rc = -ENOMEM);
539 sprintf(mgcname, "%s%s", LUSTRE_MGC_OBDNAME, libcfs_nid2str(nid));
541 mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";
545 GOTO(out_free, rc = -ENOMEM);
547 obd = class_name2obd(mgcname);
548 if (obd && !obd->obd_stopping) {
549 rc = obd_set_info_async(NULL, obd->obd_self_export,
550 strlen(KEY_MGSSEC), KEY_MGSSEC,
551 strlen(mgssec), mgssec, NULL);
555 /* Re-using an existing MGC */
556 cfs_atomic_inc(&obd->u.cli.cl_mgc_refcount);
558 /* IR compatibility check, only for clients */
559 if (lmd_is_client(lsi->lsi_lmd)) {
561 int vallen = sizeof(*data);
562 __u32 *flags = &lsi->lsi_lmd->lmd_flags;
564 rc = obd_get_info(NULL, obd->obd_self_export,
565 strlen(KEY_CONN_DATA), KEY_CONN_DATA,
566 &vallen, data, NULL);
568 has_ir = OCD_HAS_FLAG(data, IMP_RECOV);
569 if (has_ir ^ !(*flags & LMD_FLG_NOIR)) {
570 /* LMD_FLG_NOIR is for test purpose only */
572 "Trying to mount a client with IR setting "
573 "not compatible with current mgc. "
574 "Force to use current mgc setting that is "
576 has_ir ? "enabled" : "disabled");
578 *flags &= ~LMD_FLG_NOIR;
580 *flags |= LMD_FLG_NOIR;
585 /* If we are restarting the MGS, don't try to keep the MGC's
586 old connection, or registration will fail. */
588 CDEBUG(D_MOUNT, "New MGS with live MGC\n");
592 /* Try all connections, but only once (again).
593 We don't want to block another target from starting
594 (using its local copy of the log), but we do want to connect
595 if at all possible. */
597 CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,recov_bk);
598 rc = obd_set_info_async(NULL, obd->obd_self_export,
599 sizeof(KEY_INIT_RECOV_BACKUP),
600 KEY_INIT_RECOV_BACKUP,
601 sizeof(recov_bk), &recov_bk, NULL);
605 CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname);
607 /* Add the primary nids for the MGS */
609 sprintf(niduuid, "%s_%x", mgcname, i);
610 if (IS_SERVER(lsi)) {
611 ptr = lsi->lsi_lmd->lmd_mgs;
613 /* Use local nids (including LO) */
614 lnet_process_id_t id;
615 while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
616 rc = do_lcfg(mgcname, id.nid,
617 LCFG_ADD_UUID, niduuid, 0,0,0);
620 /* Use mgsnode= nids */
621 /* mount -o mgsnode=nid */
622 if (lsi->lsi_lmd->lmd_mgs) {
623 ptr = lsi->lsi_lmd->lmd_mgs;
624 } else if (class_find_param(ptr, PARAM_MGSNODE,
626 CERROR("No MGS nids given.\n");
627 GOTO(out_free, rc = -EINVAL);
629 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
630 rc = do_lcfg(mgcname, nid,
631 LCFG_ADD_UUID, niduuid, 0,0,0);
635 } else { /* client */
636 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
637 ptr = lsi->lsi_lmd->lmd_dev;
638 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
639 rc = do_lcfg(mgcname, nid,
640 LCFG_ADD_UUID, niduuid, 0,0,0);
642 /* Stop at the first failover nid */
648 CERROR("No valid MGS nids found.\n");
649 GOTO(out_free, rc = -EINVAL);
651 lsi->lsi_lmd->lmd_mgs_failnodes = 1;
653 /* Random uuid for MGC allows easier reconnects */
655 ll_generate_random_uuid(uuidc);
656 class_uuid_unparse(uuidc, uuid);
659 rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
660 (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
666 /* Add any failover MGS nids */
668 while (ptr && ((*ptr == ':' ||
669 class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0))) {
670 /* New failover node */
671 sprintf(niduuid, "%s_%x", mgcname, i);
673 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
675 rc = do_lcfg(mgcname, nid,
676 LCFG_ADD_UUID, niduuid, 0,0,0);
681 rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
689 lsi->lsi_lmd->lmd_mgs_failnodes = i;
691 obd = class_name2obd(mgcname);
693 CERROR("Can't find mgcobd %s\n", mgcname);
694 GOTO(out_free, rc = -ENOTCONN);
697 rc = obd_set_info_async(NULL, obd->obd_self_export,
698 strlen(KEY_MGSSEC), KEY_MGSSEC,
699 strlen(mgssec), mgssec, NULL);
703 /* Keep a refcount of servers/clients who started with "mount",
704 so we know when we can get rid of the mgc. */
705 cfs_atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
707 /* Try all connections, but only once. */
709 rc = obd_set_info_async(NULL, obd->obd_self_export,
710 sizeof(KEY_INIT_RECOV_BACKUP),
711 KEY_INIT_RECOV_BACKUP,
712 sizeof(recov_bk), &recov_bk, NULL);
715 CWARN("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc);
717 /* We connect to the MGS at setup, and don't disconnect until cleanup */
718 data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT |
719 OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV |
720 OBD_CONNECT_MNE_SWAB;
721 if (lmd_is_client(lsi->lsi_lmd) &&
722 lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
723 data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
724 data->ocd_version = LUSTRE_VERSION_CODE;
725 rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
727 CERROR("connect failed %d\n", rc);
731 obd->u.cli.cl_mgc_mgsexp = exp;
734 /* Keep the mgc info in the sb. Note that many lsi's can point
738 cfs_mutex_unlock(&mgc_start_lock);
743 OBD_FREE(mgcname, len);
745 OBD_FREE(niduuid, len + 2);
749 static int lustre_stop_mgc(struct super_block *sb)
751 struct lustre_sb_info *lsi = s2lsi(sb);
752 struct obd_device *obd;
753 char *niduuid = 0, *ptr = 0;
754 int i, rc = 0, len = 0;
764 cfs_mutex_lock(&mgc_start_lock);
765 LASSERT(cfs_atomic_read(&obd->u.cli.cl_mgc_refcount) > 0);
766 if (!cfs_atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
767 /* This is not fatal, every client that stops
768 will call in here. */
769 CDEBUG(D_MOUNT, "mgc still has %d references.\n",
770 cfs_atomic_read(&obd->u.cli.cl_mgc_refcount));
771 GOTO(out, rc = -EBUSY);
774 /* The MGC has no recoverable data in any case.
775 * force shotdown set in umount_begin */
776 obd->obd_no_recov = 1;
778 if (obd->u.cli.cl_mgc_mgsexp) {
779 /* An error is not fatal, if we are unable to send the
780 disconnect mgs ping evictor cleans up the export */
781 rc = obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
783 CDEBUG(D_MOUNT, "disconnect failed %d\n", rc);
786 /* Save the obdname for cleaning the nid uuids, which are
788 len = strlen(obd->obd_name) + 6;
789 OBD_ALLOC(niduuid, len);
791 strcpy(niduuid, obd->obd_name);
792 ptr = niduuid + strlen(niduuid);
795 rc = class_manual_cleanup(obd);
799 /* Clean the nid uuids */
801 GOTO(out, rc = -ENOMEM);
803 for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
804 sprintf(ptr, "_%x", i);
805 rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
808 CERROR("del MDC UUID %s failed: rc = %d\n",
813 OBD_FREE(niduuid, len);
815 /* class_import_put will get rid of the additional connections */
816 cfs_mutex_unlock(&mgc_start_lock);
820 /* Since there's only one mgc per node, we have to change it's fs to get
821 access to the right disk. */
822 static int server_mgc_set_fs(struct obd_device *mgc, struct super_block *sb)
824 struct lustre_sb_info *lsi = s2lsi(sb);
828 CDEBUG(D_MOUNT, "Set mgc disk for %s\n", lsi->lsi_lmd->lmd_dev);
830 /* cl_mgc_sem in mgc insures we sleep if the mgc_fs is busy */
831 rc = obd_set_info_async(NULL, mgc->obd_self_export,
832 sizeof(KEY_SET_FS), KEY_SET_FS,
833 sizeof(*sb), sb, NULL);
835 CERROR("can't set_fs %d\n", rc);
841 static int server_mgc_clear_fs(struct obd_device *mgc)
846 CDEBUG(D_MOUNT, "Unassign mgc disk\n");
848 rc = obd_set_info_async(NULL, mgc->obd_self_export,
849 sizeof(KEY_CLEAR_FS), KEY_CLEAR_FS,
854 CFS_DEFINE_MUTEX(server_start_lock);
856 /* Stop MDS/OSS if nobody is using them */
857 static int server_stop_servers(int lsiflags)
859 struct obd_device *obd = NULL;
860 struct obd_type *type = NULL;
864 cfs_mutex_lock(&server_start_lock);
866 /* Either an MDT or an OST or neither */
867 /* if this was an MDT, and there are no more MDT's, clean up the MDS */
868 if ((lsiflags & LDD_F_SV_TYPE_MDT) &&
869 (obd = class_name2obd(LUSTRE_MDS_OBDNAME))) {
870 /*FIXME pre-rename, should eventually be LUSTRE_MDT_NAME*/
871 type = class_search_type(LUSTRE_MDS_NAME);
873 /* if this was an OST, and there are no more OST's, clean up the OSS */
874 if ((lsiflags & LDD_F_SV_TYPE_OST) &&
875 (obd = class_name2obd(LUSTRE_OSS_OBDNAME))) {
876 type = class_search_type(LUSTRE_OST_NAME);
879 if (obd && (!type || !type->typ_refcnt)) {
882 /* obd_fail doesn't mean much on a server obd */
883 err = class_manual_cleanup(obd);
888 cfs_mutex_unlock(&server_start_lock);
893 int server_mti_print(char *title, struct mgs_target_info *mti)
895 PRINT_CMD(PRINT_MASK, "mti %s\n", title);
896 PRINT_CMD(PRINT_MASK, "server: %s\n", mti->mti_svname);
897 PRINT_CMD(PRINT_MASK, "fs: %s\n", mti->mti_fsname);
898 PRINT_CMD(PRINT_MASK, "uuid: %s\n", mti->mti_uuid);
899 PRINT_CMD(PRINT_MASK, "ver: %d flags: %#x\n",
900 mti->mti_config_ver, mti->mti_flags);
904 /** Get the fsname ("lustre") from the server name ("lustre-OST003F").
905 * @param [in] svname server name including type and index
906 * @param [out] fsname Buffer to copy filesystem name prefix into.
907 * Must have at least 'strlen(fsname) + 1' chars.
908 * @param [out] endptr if endptr isn't NULL it is set to end of fsname
911 static int server_name2fsname(char *svname, char *fsname, char **endptr)
915 p = strstr(svname, "-OST");
917 p = strstr(svname, "-MDT");
922 strncpy(fsname, svname, p - svname);
923 fsname[p - svname] = '\0';
933 * Get service name (svname) from string
935 * if endptr isn't NULL it is set to end of fsname *
937 int server_name2svname(char *label, char *svname, char **endptr)
942 /* We use server_name2fsname() just for parsing */
943 rc = server_name2fsname(label, NULL, &dash);
950 strncpy(svname, dash + 1, MTI_NAME_MAXLEN);
954 EXPORT_SYMBOL(server_name2svname);
957 /* Get the index from the obd name.
960 if endptr isn't NULL it is set to end of name */
961 int server_name2index(char *svname, __u32 *idx, char **endptr)
967 /* We use server_name2fsname() just for parsing */
968 rc = server_name2fsname(svname, NULL, &dash);
977 if (strncmp(dash, "MDT", 3) == 0)
978 rc = LDD_F_SV_TYPE_MDT;
979 else if (strncmp(dash, "OST", 3) == 0)
980 rc = LDD_F_SV_TYPE_OST;
986 if (strcmp(dash, "all") == 0)
987 return rc | LDD_F_SV_ALL;
989 index = simple_strtoul(dash, endptr, 16);
994 EXPORT_SYMBOL(server_name2index);
996 /* Generate data for registration */
997 static int server_lsi2mti(struct lustre_sb_info *lsi,
998 struct mgs_target_info *mti)
1000 lnet_process_id_t id;
1004 if (!IS_SERVER(lsi))
1007 strncpy(mti->mti_svname, lsi->lsi_svname, sizeof(mti->mti_svname));
1009 mti->mti_nid_count = 0;
1010 while (LNetGetId(i++, &id) != -ENOENT) {
1011 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
1014 /* server use --servicenode param, only allow specified
1015 * nids be registered */
1016 if ((lsi->lsi_lmd->lmd_flags & LMD_FLG_NO_PRIMNODE) != 0 &&
1017 class_match_nid(lsi->lsi_lmd->lmd_params,
1018 PARAM_FAILNODE, id.nid) < 1)
1021 /* match specified network */
1022 if (!class_match_net(lsi->lsi_lmd->lmd_params,
1023 PARAM_NETWORK, LNET_NIDNET(id.nid)))
1026 mti->mti_nids[mti->mti_nid_count] = id.nid;
1027 mti->mti_nid_count++;
1028 if (mti->mti_nid_count >= MTI_NIDS_MAX) {
1029 CWARN("Only using first %d nids for %s\n",
1030 mti->mti_nid_count, mti->mti_svname);
1035 mti->mti_lustre_ver = LUSTRE_VERSION_CODE;
1036 mti->mti_config_ver = 0;
1038 rc = server_name2fsname(lsi->lsi_svname, mti->mti_fsname, NULL);
1042 rc = server_name2index(lsi->lsi_svname, &mti->mti_stripe_index, NULL);
1045 /* Orion requires index to be set */
1046 LASSERT(!(rc & LDD_F_NEED_INDEX));
1047 /* keep only LDD flags */
1048 mti->mti_flags = lsi->lsi_flags & LDD_F_MASK;
1049 mti->mti_flags |= LDD_F_UPDATE;
1050 strncpy(mti->mti_params, lsi->lsi_lmd->lmd_params,
1051 sizeof(mti->mti_params));
1055 /* Register an old or new target with the MGS. If needed MGS will construct
1056 startup logs and assign index */
1057 static int server_register_target(struct lustre_sb_info *lsi)
1059 struct obd_device *mgc = lsi->lsi_mgc;
1060 struct mgs_target_info *mti = NULL;
1067 if (!IS_SERVER(lsi))
1074 rc = server_lsi2mti(lsi, mti);
1078 CDEBUG(D_MOUNT, "Registration %s, fs=%s, %s, index=%04x, flags=%#x\n",
1079 mti->mti_svname, mti->mti_fsname,
1080 libcfs_nid2str(mti->mti_nids[0]), mti->mti_stripe_index,
1083 /* if write_conf is true, the registration must succeed */
1084 writeconf = !!(lsi->lsi_flags & (LDD_F_NEED_INDEX | LDD_F_UPDATE));
1085 mti->mti_flags |= LDD_F_OPC_REG;
1087 /* Register the target */
1088 /* FIXME use mgc_process_config instead */
1089 rc = obd_set_info_async(NULL, mgc->u.cli.cl_mgc_mgsexp,
1090 sizeof(KEY_REGISTER_TARGET), KEY_REGISTER_TARGET,
1091 sizeof(*mti), mti, NULL);
1093 if (mti->mti_flags & LDD_F_ERROR) {
1094 LCONSOLE_ERROR_MSG(0x160,
1095 "The MGS is refusing to allow this "
1096 "server (%s) to start. Please see messages"
1097 " on the MGS node.\n", lsi->lsi_svname);
1098 } else if (writeconf) {
1099 LCONSOLE_ERROR_MSG(0x15f,
1100 "Communication to the MGS return error %d. "
1101 "Is the MGS running?\n", rc);
1103 CERROR("Cannot talk to the MGS: %d, not fatal\n", rc);
1104 /* reset the error code for non-fatal error. */
1117 * Notify the MGS that this target is ready.
1118 * Used by IR - if the MGS receives this message, it will notify clients.
1120 static int server_notify_target(struct super_block *sb, struct obd_device *obd)
1122 struct lustre_sb_info *lsi = s2lsi(sb);
1123 struct obd_device *mgc = lsi->lsi_mgc;
1124 struct mgs_target_info *mti = NULL;
1130 if (!(IS_SERVER(lsi)))
1136 rc = server_lsi2mti(lsi, mti);
1140 mti->mti_instance = obd->u.obt.obt_instance;
1141 mti->mti_flags |= LDD_F_OPC_READY;
1143 /* FIXME use mgc_process_config instead */
1144 rc = obd_set_info_async(NULL, mgc->u.cli.cl_mgc_mgsexp,
1145 sizeof(KEY_REGISTER_TARGET),
1146 KEY_REGISTER_TARGET,
1147 sizeof(*mti), mti, NULL);
1149 /* Imperative recovery: if the mgs informs us to use IR? */
1150 if (!rc && !(mti->mti_flags & LDD_F_ERROR) &&
1151 (mti->mti_flags & LDD_F_IR_CAPABLE))
1152 lsi->lsi_flags |= LDD_F_IR_CAPABLE;
1161 /** Start server targets: MDTs and OSTs
1163 static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
1165 struct obd_device *obd;
1166 struct lustre_sb_info *lsi = s2lsi(sb);
1167 struct config_llog_instance cfg;
1171 CDEBUG(D_MOUNT, "starting target %s\n", lsi->lsi_svname);
1174 /* If we're an MDT, make sure the global MDS is running */
1175 if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_MDT) {
1176 /* make sure the MDS is started */
1177 cfs_mutex_lock(&server_start_lock);
1178 obd = class_name2obd(LUSTRE_MDS_OBDNAME);
1180 rc = lustre_start_simple(LUSTRE_MDS_OBDNAME,
1181 /* FIXME pre-rename, should eventually be LUSTRE_MDS_NAME */
1183 LUSTRE_MDS_OBDNAME"_uuid",
1186 cfs_mutex_unlock(&server_start_lock);
1187 CERROR("failed to start MDS: %d\n", rc);
1191 cfs_mutex_unlock(&server_start_lock);
1195 /* If we're an OST, make sure the global OSS is running */
1197 /* make sure OSS is started */
1198 cfs_mutex_lock(&server_start_lock);
1199 obd = class_name2obd(LUSTRE_OSS_OBDNAME);
1201 rc = lustre_start_simple(LUSTRE_OSS_OBDNAME,
1203 LUSTRE_OSS_OBDNAME"_uuid",
1206 cfs_mutex_unlock(&server_start_lock);
1207 CERROR("failed to start OSS: %d\n", rc);
1211 cfs_mutex_unlock(&server_start_lock);
1214 /* Set the mgc fs to our server disk. This allows the MGC to
1215 * read and write configs locally, in case it can't talk to the MGS. */
1216 if (lsi->lsi_srv_mnt) {
1217 rc = server_mgc_set_fs(lsi->lsi_mgc, sb);
1222 /* Register with MGS */
1223 rc = server_register_target(lsi);
1227 /* Let the target look up the mount using the target's name
1228 (we can't pass the sb or mnt through class_process_config.) */
1229 rc = server_register_mount(lsi->lsi_svname, sb, mnt);
1233 /* Start targets using the llog named for the target */
1234 memset(&cfg, 0, sizeof(cfg));
1235 rc = lustre_process_log(sb, lsi->lsi_svname, &cfg);
1237 CERROR("failed to start server %s: %d\n",
1238 lsi->lsi_svname, rc);
1239 /* Do NOT call server_deregister_mount() here. This makes it
1240 * impossible to find mount later in cleanup time and leaves
1241 * @lsi and othder stuff leaked. -umka */
1246 /* Release the mgc fs for others to use */
1247 if (lsi->lsi_srv_mnt)
1248 server_mgc_clear_fs(lsi->lsi_mgc);
1251 obd = class_name2obd(lsi->lsi_svname);
1253 CERROR("no server named %s was started\n",
1258 if ((lsi->lsi_lmd->lmd_flags & LMD_FLG_ABORT_RECOV) &&
1259 (OBP(obd, iocontrol))) {
1260 obd_iocontrol(OBD_IOC_ABORT_RECOVERY,
1261 obd->obd_self_export, 0, NULL, NULL);
1264 server_notify_target(sb, obd);
1266 /* calculate recovery timeout, do it after lustre_process_log */
1267 server_calc_timeout(lsi, obd);
1269 /* log has been fully processed */
1270 obd_notify(obd, NULL, OBD_NOTIFY_CONFIG, (void *)CONFIG_LOG);
1276 /***************** lustre superblock **************/
1278 struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
1280 struct lustre_sb_info *lsi;
1286 OBD_ALLOC_PTR(lsi->lsi_lmd);
1287 if (!lsi->lsi_lmd) {
1292 lsi->lsi_lmd->lmd_exclude_count = 0;
1293 lsi->lsi_lmd->lmd_recovery_time_soft = 0;
1294 lsi->lsi_lmd->lmd_recovery_time_hard = 0;
1295 s2lsi_nocast(sb) = lsi;
1296 /* we take 1 extra ref for our setup */
1297 cfs_atomic_set(&lsi->lsi_mounts, 1);
1299 /* Default umount style */
1300 lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
1305 static int lustre_free_lsi(struct super_block *sb)
1307 struct lustre_sb_info *lsi = s2lsi(sb);
1310 LASSERT(lsi != NULL);
1311 CDEBUG(D_MOUNT, "Freeing lsi %p\n", lsi);
1313 /* someone didn't call server_put_mount. */
1314 LASSERT(cfs_atomic_read(&lsi->lsi_mounts) == 0);
1316 if (lsi->lsi_lmd != NULL) {
1317 if (lsi->lsi_lmd->lmd_dev != NULL)
1318 OBD_FREE(lsi->lsi_lmd->lmd_dev,
1319 strlen(lsi->lsi_lmd->lmd_dev) + 1);
1320 if (lsi->lsi_lmd->lmd_profile != NULL)
1321 OBD_FREE(lsi->lsi_lmd->lmd_profile,
1322 strlen(lsi->lsi_lmd->lmd_profile) + 1);
1323 if (lsi->lsi_lmd->lmd_mgssec != NULL)
1324 OBD_FREE(lsi->lsi_lmd->lmd_mgssec,
1325 strlen(lsi->lsi_lmd->lmd_mgssec) + 1);
1326 if (lsi->lsi_lmd->lmd_opts != NULL)
1327 OBD_FREE(lsi->lsi_lmd->lmd_opts,
1328 strlen(lsi->lsi_lmd->lmd_opts) + 1);
1329 if (lsi->lsi_lmd->lmd_exclude_count)
1330 OBD_FREE(lsi->lsi_lmd->lmd_exclude,
1331 sizeof(lsi->lsi_lmd->lmd_exclude[0]) *
1332 lsi->lsi_lmd->lmd_exclude_count);
1333 if (lsi->lsi_lmd->lmd_mgs != NULL)
1334 OBD_FREE(lsi->lsi_lmd->lmd_mgs,
1335 strlen(lsi->lsi_lmd->lmd_mgs) + 1);
1336 if (lsi->lsi_lmd->lmd_osd_type != NULL)
1337 OBD_FREE(lsi->lsi_lmd->lmd_osd_type,
1338 strlen(lsi->lsi_lmd->lmd_osd_type) + 1);
1339 if (lsi->lsi_lmd->lmd_params != NULL)
1340 OBD_FREE(lsi->lsi_lmd->lmd_params, 4096);
1342 OBD_FREE(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd));
1345 LASSERT(lsi->lsi_llsbi == NULL);
1346 OBD_FREE(lsi, sizeof(*lsi));
1347 s2lsi_nocast(sb) = NULL;
1352 /* The lsi has one reference for every server that is using the disk -
1353 e.g. MDT, MGS, and potentially MGC */
1354 static int lustre_put_lsi(struct super_block *sb)
1356 struct lustre_sb_info *lsi = s2lsi(sb);
1359 LASSERT(lsi != NULL);
1361 CDEBUG(D_MOUNT, "put %p %d\n", sb, cfs_atomic_read(&lsi->lsi_mounts));
1362 if (cfs_atomic_dec_and_test(&lsi->lsi_mounts)) {
1363 lustre_free_lsi(sb);
1369 static int lsi_prepare(struct lustre_sb_info *lsi)
1376 LASSERT(lsi->lsi_lmd);
1378 /* The server name is given as a mount line option */
1379 if (lsi->lsi_lmd->lmd_profile == NULL) {
1380 LCONSOLE_ERROR("Can't determine server name\n");
1384 if (strlen(lsi->lsi_lmd->lmd_profile) >= sizeof(lsi->lsi_svname))
1385 RETURN(-ENAMETOOLONG);
1387 strcpy(lsi->lsi_svname, lsi->lsi_lmd->lmd_profile);
1389 /* Determine osd type */
1390 if (lsi->lsi_lmd->lmd_osd_type != NULL) {
1391 if (strlen(lsi->lsi_lmd->lmd_osd_type) >=
1392 sizeof(lsi->lsi_osd_type))
1393 RETURN(-ENAMETOOLONG);
1395 strcpy(lsi->lsi_osd_type, lsi->lsi_lmd->lmd_osd_type);
1397 strcpy(lsi->lsi_osd_type, LUSTRE_OSD_NAME);
1400 /* XXX: a temp. solution for components using fsfilt
1401 * to be removed in one of the subsequent patches */
1402 if (!strcmp(lsi->lsi_lmd->lmd_osd_type, "osd-ldiskfs")) {
1403 strcpy(lsi->lsi_fstype, "ldiskfs");
1405 strcpy(lsi->lsi_fstype, lsi->lsi_lmd->lmd_osd_type);
1408 /* Determine server type */
1409 rc = server_name2index(lsi->lsi_svname, &index, NULL);
1411 if (lsi->lsi_lmd->lmd_flags & LMD_FLG_MGS) {
1412 /* Assume we're a bare MGS */
1414 lsi->lsi_lmd->lmd_flags |= LMD_FLG_NOSVC;
1416 LCONSOLE_ERROR("Can't determine server type of '%s'\n",
1421 lsi->lsi_flags |= rc;
1423 /* Add mount line flags that used to be in ldd:
1424 * writeconf, mgs, iam, anything else?
1426 lsi->lsi_flags |= (lsi->lsi_lmd->lmd_flags & LMD_FLG_WRITECONF) ?
1427 LDD_F_WRITECONF : 0;
1428 lsi->lsi_flags |= (lsi->lsi_lmd->lmd_flags & LMD_FLG_VIRGIN) ?
1430 lsi->lsi_flags |= (lsi->lsi_lmd->lmd_flags & LMD_FLG_MGS) ?
1431 LDD_F_SV_TYPE_MGS : 0;
1432 lsi->lsi_flags |= (lsi->lsi_lmd->lmd_flags & LMD_FLG_IAM) ?
1434 lsi->lsi_flags |= (lsi->lsi_lmd->lmd_flags & LMD_FLG_NO_PRIMNODE) ?
1435 LDD_F_NO_PRIMNODE : 0;
1440 /*************** server mount ******************/
1442 /** Kernel mount using mount options in MOUNT_DATA_FILE.
1443 * Since this file lives on the disk, we pre-mount using a common
1444 * type, read the file, then re-mount using the type specified in the
1447 static struct vfsmount *server_kernel_mount(struct super_block *sb)
1449 struct lustre_sb_info *lsi = s2lsi(sb);
1450 struct lustre_mount_data *lmd = lsi->lsi_lmd;
1451 struct vfsmount *mnt;
1452 struct file_system_type *type;
1453 char *options = NULL;
1454 unsigned long page, s_flags;
1455 struct page *__page;
1460 rc = lsi_prepare(lsi);
1462 RETURN(ERR_PTR(rc));
1464 if (strcmp(lmd->lmd_osd_type, "osd-ldiskfs") == 0) {
1465 /* with ldiskfs we're still mounting in the kernel space */
1466 OBD_FREE(lmd->lmd_osd_type,
1467 strlen(lmd->lmd_osd_type) + 1);
1468 lmd->lmd_osd_type = NULL;
1470 /* non-ldiskfs backends (zfs) do mounting internally */
1474 /* In the past, we have always used flags = 0.
1475 Note ext3/ldiskfs can't be mounted ro. */
1476 s_flags = sb->s_flags;
1478 /* allocate memory for options */
1479 OBD_PAGE_ALLOC(__page, CFS_ALLOC_STD);
1481 GOTO(out_free, rc = -ENOMEM);
1482 page = (unsigned long)cfs_page_address(__page);
1483 options = (char *)page;
1484 memset(options, 0, CFS_PAGE_SIZE);
1486 /* Glom up mount options */
1487 memset(options, 0, CFS_PAGE_SIZE);
1488 strncpy(options, lsi->lsi_lmd->lmd_opts, CFS_PAGE_SIZE - 2);
1490 len = CFS_PAGE_SIZE - strlen(options) - 2;
1492 strcat(options, ",");
1493 strncat(options, "no_mbcache", len);
1495 /* Add in any mount-line options */
1496 if (lmd->lmd_opts && (*(lmd->lmd_opts) != 0)) {
1497 len = CFS_PAGE_SIZE - strlen(options) - 2;
1498 strcat(options, ",");
1499 strncat(options, lmd->lmd_opts, len);
1502 /* Special permanent mount flags */
1504 s_flags |= MS_NOATIME | MS_NODIRATIME;
1506 CDEBUG(D_MOUNT, "kern_mount: %s %s %s\n",
1507 lsi->lsi_osd_type, lmd->lmd_dev, options);
1508 type = get_fs_type(lsi->lsi_fstype);
1510 CERROR("get_fs_type failed\n");
1511 GOTO(out_free, rc = -ENODEV);
1513 mnt = vfs_kern_mount(type, s_flags, lmd->lmd_dev, (void *)options);
1514 cfs_module_put(type->owner);
1517 CERROR("vfs_kern_mount failed: rc = %d\n", rc);
1521 if (lmd->lmd_flags & LMD_FLG_ABORT_RECOV)
1522 simple_truncate(mnt->mnt_sb->s_root, mnt, LAST_RCVD,
1525 OBD_PAGE_FREE(__page);
1526 CDEBUG(D_SUPER, "%s: mnt = %p\n", lmd->lmd_dev, mnt);
1531 OBD_PAGE_FREE(__page);
1532 RETURN(ERR_PTR(rc));
1535 /** Wait here forever until the mount refcount is 0 before completing umount,
1536 * else we risk dereferencing a null pointer.
1537 * LNET may take e.g. 165s before killing zombies.
1539 static void server_wait_finished(struct vfsmount *mnt)
1543 cfs_sigset_t blocked;
1546 cfs_waitq_init(&waitq);
1547 cfs_waitq_wait_event_interruptible_timeout(waitq, 0,
1548 cfs_time_seconds(3), rc);
1553 cfs_waitq_init(&waitq);
1555 while (mnt_get_count(mnt) > 1) {
1556 if (waited && (waited % 30 == 0))
1557 LCONSOLE_WARN("Mount still busy with %d refs after "
1561 /* Cannot use l_event_wait() for an interruptible sleep. */
1563 blocked = cfs_block_sigsinv(sigmask(SIGKILL));
1564 cfs_waitq_wait_event_interruptible_timeout(
1566 (mnt_get_count(mnt) == 1),
1567 cfs_time_seconds(3),
1569 cfs_restore_sigs(blocked);
1571 LCONSOLE_EMERG("Danger: interrupted umount %s with "
1572 "%d refs!\n", mnt_get_devname(mnt),
1573 mnt_get_count(mnt));
1580 /** Start the shutdown of servers at umount.
1582 static void server_put_super(struct super_block *sb)
1584 struct lustre_sb_info *lsi = s2lsi(sb);
1585 struct obd_device *obd;
1586 struct vfsmount *mnt = lsi->lsi_srv_mnt;
1587 char *tmpname, *extraname = NULL;
1589 int lsiflags = lsi->lsi_flags;
1592 LASSERT(IS_SERVER(lsi));
1594 tmpname_sz = strlen(lsi->lsi_svname) + 1;
1595 OBD_ALLOC(tmpname, tmpname_sz);
1596 memcpy(tmpname, lsi->lsi_svname, tmpname_sz);
1597 CDEBUG(D_MOUNT, "server put_super %s\n", tmpname);
1598 if (IS_MDT(lsi) && (lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC))
1599 snprintf(tmpname, tmpname_sz, "MGS");
1601 /* Stop the target */
1602 if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
1603 (IS_MDT(lsi) || IS_OST(lsi))) {
1604 struct lustre_profile *lprof = NULL;
1606 /* tell the mgc to drop the config log */
1607 lustre_end_log(sb, lsi->lsi_svname, NULL);
1609 /* COMPAT_146 - profile may get deleted in mgc_cleanup.
1610 If there are any setup/cleanup errors, save the lov
1611 name for safety cleanup later. */
1612 lprof = class_get_profile(lsi->lsi_svname);
1613 if (lprof && lprof->lp_dt) {
1614 OBD_ALLOC(extraname, strlen(lprof->lp_dt) + 1);
1615 strcpy(extraname, lprof->lp_dt);
1618 obd = class_name2obd(lsi->lsi_svname);
1620 CDEBUG(D_MOUNT, "stopping %s\n", obd->obd_name);
1621 if (lsi->lsi_flags & LSI_UMOUNT_FAILOVER)
1623 /* We can't seem to give an error return code
1624 * to .put_super, so we better make sure we clean up! */
1626 class_manual_cleanup(obd);
1628 CERROR("no obd %s\n", lsi->lsi_svname);
1629 server_deregister_mount(lsi->lsi_svname);
1633 /* If they wanted the mgs to stop separately from the mdt, they
1634 should have put it on a different device. */
1636 /* if MDS start with --nomgs, don't stop MGS then */
1637 if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOMGS))
1638 server_stop_mgs(sb);
1641 /* Clean the mgc and sb */
1642 lustre_common_put_super(sb);
1644 /* Wait for the targets to really clean up - can't exit (and let the
1645 sb get destroyed) while the mount is still in use */
1646 server_wait_finished(mnt);
1648 /* drop the One True Mount */
1652 /* Stop the servers (MDS, OSS) if no longer needed. We must wait
1653 until the target is really gone so that our type refcount check
1655 server_stop_servers(lsiflags);
1657 /* In case of startup or cleanup err, stop related obds */
1659 obd = class_name2obd(extraname);
1661 CWARN("Cleaning orphaned obd %s\n", extraname);
1663 class_manual_cleanup(obd);
1665 OBD_FREE(extraname, strlen(extraname) + 1);
1668 LCONSOLE_WARN("server umount %s complete\n", tmpname);
1669 OBD_FREE(tmpname, tmpname_sz);
1673 /** Called only for 'umount -f'
1675 #ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
1676 static void server_umount_begin(struct vfsmount *vfsmnt, int flags)
1678 struct super_block *sb = vfsmnt->mnt_sb;
1680 static void server_umount_begin(struct super_block *sb)
1683 struct lustre_sb_info *lsi = s2lsi(sb);
1686 #ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
1687 if (!(flags & MNT_FORCE)) {
1693 CDEBUG(D_MOUNT, "umount -f\n");
1694 /* umount = failover
1696 no third way to do non-force, non-failover */
1697 lsi->lsi_flags &= ~LSI_UMOUNT_FAILOVER;
1701 static int server_statfs (struct dentry *dentry, cfs_kstatfs_t *buf)
1703 struct super_block *sb = dentry->d_sb;
1704 struct vfsmount *mnt = s2lsi(sb)->lsi_srv_mnt;
1707 if (mnt && mnt->mnt_sb && mnt->mnt_sb->s_op->statfs) {
1708 int rc = mnt->mnt_sb->s_op->statfs(mnt->mnt_root, buf);
1710 buf->f_type = sb->s_magic;
1716 buf->f_type = sb->s_magic;
1717 buf->f_bsize = sb->s_blocksize;
1723 buf->f_namelen = NAME_MAX;
1727 /** The operations we support directly on the superblock:
1728 * mount, umount, and df.
1730 static struct super_operations server_ops =
1732 .put_super = server_put_super,
1733 .umount_begin = server_umount_begin, /* umount -f */
1734 .statfs = server_statfs,
1737 #define log2(n) cfs_ffz(~(n))
1738 #define LUSTRE_SUPER_MAGIC 0x0BD00BD1
1740 static int server_fill_super_common(struct super_block *sb)
1742 struct inode *root = 0;
1745 CDEBUG(D_MOUNT, "Server sb, dev=%d\n", (int)sb->s_dev);
1747 sb->s_blocksize = 4096;
1748 sb->s_blocksize_bits = log2(sb->s_blocksize);
1749 sb->s_magic = LUSTRE_SUPER_MAGIC;
1750 sb->s_maxbytes = 0; /* we don't allow file IO on server mountpoints */
1751 sb->s_flags |= MS_RDONLY;
1752 sb->s_op = &server_ops;
1754 root = new_inode(sb);
1756 CERROR("Can't make root inode\n");
1760 /* returns -EIO for every operation */
1761 /* make_bad_inode(root); -- badness - can't umount */
1762 /* apparently we need to be a directory for the mount to finish */
1763 root->i_mode = S_IFDIR;
1765 sb->s_root = d_alloc_root(root);
1767 CERROR("Can't make root dentry\n");
1775 /** Fill in the superblock info for a Lustre server.
1776 * Mount the device with the correct options.
1777 * Read the on-disk config file.
1778 * Start the services.
1780 static int server_fill_super(struct super_block *sb)
1782 struct lustre_sb_info *lsi = s2lsi(sb);
1783 struct vfsmount *mnt;
1787 /* the One True Mount */
1788 mnt = server_kernel_mount(sb);
1791 CERROR("Unable to mount device %s: %d\n",
1792 lsi->lsi_lmd->lmd_dev, rc);
1796 lsi->lsi_srv_mnt = mnt;
1798 CDEBUG(D_MOUNT, "Found service %s on device %s\n",
1799 lsi->lsi_svname, lsi->lsi_lmd->lmd_dev);
1801 if (class_name2obd(lsi->lsi_svname)) {
1802 LCONSOLE_ERROR_MSG(0x161, "The target named %s is already "
1803 "running. Double-mount may have compromised"
1804 " the disk journal.\n",
1811 /* Start MGS before MGC */
1812 if (IS_MGS(lsi) && !(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOMGS)){
1813 rc = server_start_mgs(sb);
1818 /* Start MGC before servers */
1819 rc = lustre_start_mgc(sb);
1823 /* Set up all obd devices for service */
1824 if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
1825 (IS_OST(lsi) || IS_MDT(lsi))) {
1826 rc = server_start_targets(sb, mnt);
1828 CERROR("Unable to start targets: %d\n", rc);
1831 /* FIXME overmount client here,
1832 or can we just start a client log and client_fill_super on this sb?
1833 We need to make sure server_put_super gets called too - ll_put_super
1834 calls lustre_common_put_super; check there for LSI_SERVER flag,
1836 Probably should start client from new thread so we can return.
1837 Client will not finish until all servers are connected.
1838 Note - MGS-only server does NOT get a client, since there is no
1839 lustre fs associated - the MGS is for all lustre fs's */
1842 rc = server_fill_super_common(sb);
1848 /* We jump here in case of failure while starting targets or MGS.
1849 * In this case we can't just put @mnt and have to do real cleanup
1850 * with stoping targets, etc. */
1851 server_put_super(sb);
1856 * Calculate timeout value for a target.
1858 void server_calc_timeout(struct lustre_sb_info *lsi, struct obd_device *obd)
1860 struct lustre_mount_data *lmd;
1864 bool has_ir = !!(lsi->lsi_flags & LDD_F_IR_CAPABLE);
1865 int min = OBD_RECOVERY_TIME_MIN;
1867 LASSERT(IS_SERVER(lsi));
1871 soft = lmd->lmd_recovery_time_soft;
1872 hard = lmd->lmd_recovery_time_hard;
1873 has_ir = has_ir && !(lmd->lmd_flags & LMD_FLG_NOIR);
1874 obd->obd_no_ir = !has_ir;
1878 soft = OBD_RECOVERY_TIME_SOFT;
1880 hard = OBD_RECOVERY_TIME_HARD;
1882 /* target may have ir_factor configured. */
1883 factor = OBD_IR_FACTOR_DEFAULT;
1884 if (obd->obd_recovery_ir_factor)
1885 factor = obd->obd_recovery_ir_factor;
1888 int new_soft = soft;
1889 int new_hard = hard;
1891 /* adjust timeout value by imperative recovery */
1893 new_soft = (soft * factor) / OBD_IR_FACTOR_MAX;
1894 new_hard = (hard * factor) / OBD_IR_FACTOR_MAX;
1896 /* make sure the timeout is not too short */
1897 new_soft = max(min, new_soft);
1898 new_hard = max(new_soft, new_hard);
1900 LCONSOLE_INFO("%s: Imperative Recovery enabled, recovery "
1901 "window shrunk from %d-%d down to %d-%d\n",
1902 obd->obd_name, soft, hard, new_soft, new_hard);
1909 obd->obd_recovery_timeout = max(obd->obd_recovery_timeout, soft);
1910 obd->obd_recovery_time_hard = hard;
1911 obd->obd_recovery_ir_factor = factor;
1913 EXPORT_SYMBOL(server_calc_timeout);
1915 /*************** mount common betweeen server and client ***************/
1918 int lustre_common_put_super(struct super_block *sb)
1923 CDEBUG(D_MOUNT, "dropping sb %p\n", sb);
1925 /* Drop a ref to the MGC */
1926 rc = lustre_stop_mgc(sb);
1927 if (rc && (rc != -ENOENT)) {
1929 CERROR("Can't stop MGC: %d\n", rc);
1932 /* BUSY just means that there's some other obd that
1933 needs the mgc. Let him clean it up. */
1934 CDEBUG(D_MOUNT, "MGC still in use\n");
1936 /* Drop a ref to the mounted disk */
1941 EXPORT_SYMBOL(lustre_common_put_super);
1943 static void lmd_print(struct lustre_mount_data *lmd)
1947 PRINT_CMD(PRINT_MASK, " mount data:\n");
1948 if (lmd_is_client(lmd))
1949 PRINT_CMD(PRINT_MASK, "profile: %s\n", lmd->lmd_profile);
1950 PRINT_CMD(PRINT_MASK, "device: %s\n", lmd->lmd_dev);
1951 PRINT_CMD(PRINT_MASK, "flags: %x\n", lmd->lmd_flags);
1954 PRINT_CMD(PRINT_MASK, "options: %s\n", lmd->lmd_opts);
1956 if (lmd->lmd_recovery_time_soft)
1957 PRINT_CMD(PRINT_MASK, "recovery time soft: %d\n",
1958 lmd->lmd_recovery_time_soft);
1960 if (lmd->lmd_recovery_time_hard)
1961 PRINT_CMD(PRINT_MASK, "recovery time hard: %d\n",
1962 lmd->lmd_recovery_time_hard);
1964 for (i = 0; i < lmd->lmd_exclude_count; i++) {
1965 PRINT_CMD(PRINT_MASK, "exclude %d: OST%04x\n", i,
1966 lmd->lmd_exclude[i]);
1970 /* Is this server on the exclusion list */
1971 int lustre_check_exclusion(struct super_block *sb, char *svname)
1973 struct lustre_sb_info *lsi = s2lsi(sb);
1974 struct lustre_mount_data *lmd = lsi->lsi_lmd;
1979 rc = server_name2index(svname, &index, NULL);
1980 if (rc != LDD_F_SV_TYPE_OST)
1981 /* Only exclude OSTs */
1984 CDEBUG(D_MOUNT, "Check exclusion %s (%d) in %d of %s\n", svname,
1985 index, lmd->lmd_exclude_count, lmd->lmd_dev);
1987 for(i = 0; i < lmd->lmd_exclude_count; i++) {
1988 if (index == lmd->lmd_exclude[i]) {
1989 CWARN("Excluding %s (on exclusion list)\n", svname);
1996 /* mount -v -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
1997 static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr)
1999 char *s1 = ptr, *s2;
2000 __u32 index, *exclude_list;
2004 /* The shortest an ost name can be is 8 chars: -OST0000.
2005 We don't actually know the fsname at this time, so in fact
2006 a user could specify any fsname. */
2007 devmax = strlen(ptr) / 8 + 1;
2009 /* temp storage until we figure out how many we have */
2010 OBD_ALLOC(exclude_list, sizeof(index) * devmax);
2014 /* we enter this fn pointing at the '=' */
2015 while (*s1 && *s1 != ' ' && *s1 != ',') {
2017 rc = server_name2index(s1, &index, &s2);
2019 CERROR("Can't parse server name '%s'\n", s1);
2022 if (rc == LDD_F_SV_TYPE_OST)
2023 exclude_list[lmd->lmd_exclude_count++] = index;
2025 CDEBUG(D_MOUNT, "ignoring exclude %.7s\n", s1);
2027 /* now we are pointing at ':' (next exclude)
2028 or ',' (end of excludes) */
2029 if (lmd->lmd_exclude_count >= devmax)
2032 if (rc >= 0) /* non-err */
2035 if (lmd->lmd_exclude_count) {
2036 /* permanent, freed in lustre_free_lsi */
2037 OBD_ALLOC(lmd->lmd_exclude, sizeof(index) *
2038 lmd->lmd_exclude_count);
2039 if (lmd->lmd_exclude) {
2040 memcpy(lmd->lmd_exclude, exclude_list,
2041 sizeof(index) * lmd->lmd_exclude_count);
2044 lmd->lmd_exclude_count = 0;
2047 OBD_FREE(exclude_list, sizeof(index) * devmax);
2051 static int lmd_parse_mgssec(struct lustre_mount_data *lmd, char *ptr)
2056 if (lmd->lmd_mgssec != NULL) {
2057 OBD_FREE(lmd->lmd_mgssec, strlen(lmd->lmd_mgssec) + 1);
2058 lmd->lmd_mgssec = NULL;
2061 tail = strchr(ptr, ',');
2063 length = strlen(ptr);
2065 length = tail - ptr;
2067 OBD_ALLOC(lmd->lmd_mgssec, length + 1);
2068 if (lmd->lmd_mgssec == NULL)
2071 memcpy(lmd->lmd_mgssec, ptr, length);
2072 lmd->lmd_mgssec[length] = '\0';
2076 static int lmd_parse_string(char **handle, char *ptr)
2081 if ((handle == NULL) || (ptr == NULL))
2084 if (*handle != NULL) {
2085 OBD_FREE(*handle, strlen(*handle) + 1);
2089 tail = strchr(ptr, ',');
2091 length = strlen(ptr);
2093 length = tail - ptr;
2095 OBD_ALLOC(*handle, length + 1);
2096 if (*handle == NULL)
2099 memcpy(*handle, ptr, length);
2100 (*handle)[length] = '\0';
2105 /* Collect multiple values for mgsnid specifiers */
2106 static int lmd_parse_mgs(struct lustre_mount_data *lmd, char **ptr)
2114 /* Find end of nidlist */
2115 while (class_parse_nid(tail, &nid, &tail) == 0) {}
2116 length = tail - *ptr;
2118 LCONSOLE_ERROR_MSG(0x159, "Can't parse NID '%s'\n", *ptr);
2122 if (lmd->lmd_mgs != NULL)
2123 oldlen = strlen(lmd->lmd_mgs) + 1;
2125 OBD_ALLOC(mgsnid, oldlen + length + 1);
2129 if (lmd->lmd_mgs != NULL) {
2130 /* Multiple mgsnid= are taken to mean failover locations */
2131 memcpy(mgsnid, lmd->lmd_mgs, oldlen);
2132 mgsnid[oldlen - 1] = ':';
2133 OBD_FREE(lmd->lmd_mgs, oldlen);
2135 memcpy(mgsnid + oldlen, *ptr, length);
2136 mgsnid[oldlen + length] = '\0';
2137 lmd->lmd_mgs = mgsnid;
2143 /** Parse mount line options
2144 * e.g. mount -v -t lustre -o abort_recov uml1:uml2:/lustre-client /mnt/lustre
2145 * dev is passed as device=uml1:/lustre by mount.lustre
2147 static int lmd_parse(char *options, struct lustre_mount_data *lmd)
2149 char *s1, *s2, *devname = NULL;
2150 struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
2156 LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that "
2157 "/sbin/mount.lustre is installed.\n");
2161 /* Options should be a string - try to detect old lmd data */
2162 if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) {
2163 LCONSOLE_ERROR_MSG(0x163, "You're using an old version of "
2164 "/sbin/mount.lustre. Please install "
2165 "version %s\n", LUSTRE_VERSION_STRING);
2168 lmd->lmd_magic = LMD_MAGIC;
2170 OBD_ALLOC(lmd->lmd_params, 4096);
2171 if (lmd->lmd_params == NULL)
2173 lmd->lmd_params[0] = '\0';
2175 /* Set default flags here */
2180 int time_min = OBD_RECOVERY_TIME_MIN;
2182 /* Skip whitespace and extra commas */
2183 while (*s1 == ' ' || *s1 == ',')
2186 /* Client options are parsed in ll_options: eg. flock,
2189 /* Parse non-ldiskfs options here. Rather than modifying
2190 ldiskfs, we just zero these out here */
2191 if (strncmp(s1, "abort_recov", 11) == 0) {
2192 lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
2194 } else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
2195 lmd->lmd_recovery_time_soft = max_t(int,
2196 simple_strtoul(s1 + 19, NULL, 10), time_min);
2198 } else if (strncmp(s1, "recovery_time_hard=", 19) == 0) {
2199 lmd->lmd_recovery_time_hard = max_t(int,
2200 simple_strtoul(s1 + 19, NULL, 10), time_min);
2202 } else if (strncmp(s1, "noir", 4) == 0) {
2203 lmd->lmd_flags |= LMD_FLG_NOIR; /* test purpose only. */
2205 } else if (strncmp(s1, "nosvc", 5) == 0) {
2206 lmd->lmd_flags |= LMD_FLG_NOSVC;
2208 } else if (strncmp(s1, "nomgs", 5) == 0) {
2209 lmd->lmd_flags |= LMD_FLG_NOMGS;
2211 } else if (strncmp(s1, "noscrub", 7) == 0) {
2212 lmd->lmd_flags |= LMD_FLG_NOSCRUB;
2214 } else if (strncmp(s1, PARAM_MGSNODE,
2215 sizeof(PARAM_MGSNODE) - 1) == 0) {
2216 s2 = s1 + sizeof(PARAM_MGSNODE) - 1;
2217 /* Assume the next mount opt is the first
2218 invalid nid we get to. */
2219 rc = lmd_parse_mgs(lmd, &s2);
2223 } else if (strncmp(s1, "writeconf", 9) == 0) {
2224 lmd->lmd_flags |= LMD_FLG_WRITECONF;
2226 } else if (strncmp(s1, "virgin", 6) == 0) {
2227 lmd->lmd_flags |= LMD_FLG_VIRGIN;
2229 } else if (strncmp(s1, "noprimnode", 10) == 0) {
2230 lmd->lmd_flags |= LMD_FLG_NO_PRIMNODE;
2232 } else if (strncmp(s1, "mgssec=", 7) == 0) {
2233 rc = lmd_parse_mgssec(lmd, s1 + 7);
2237 /* ost exclusion list */
2238 } else if (strncmp(s1, "exclude=", 8) == 0) {
2239 rc = lmd_make_exclusion(lmd, s1 + 7);
2243 } else if (strncmp(s1, "mgs", 3) == 0) {
2245 lmd->lmd_flags |= LMD_FLG_MGS;
2247 } else if (strncmp(s1, "svname=", 7) == 0) {
2248 rc = lmd_parse_string(&lmd->lmd_profile, s1 + 7);
2252 } else if (strncmp(s1, "param=", 6) == 0) {
2254 char *tail = strchr(s1 + 6, ',');
2256 length = strlen(s1);
2260 strncat(lmd->lmd_params, s1 + 6, length);
2261 strcat(lmd->lmd_params, " ");
2263 } else if (strncmp(s1, "osd=", 4) == 0) {
2264 rc = lmd_parse_string(&lmd->lmd_osd_type, s1 + 4);
2269 /* Linux 2.4 doesn't pass the device, so we stuck it at the
2270 end of the options. */
2271 else if (strncmp(s1, "device=", 7) == 0) {
2273 /* terminate options right before device. device
2274 must be the last one. */
2280 s2 = strchr(s1, ',');
2288 memmove(s1, s2, strlen(s2) + 1);
2294 LCONSOLE_ERROR_MSG(0x164, "Can't find the device name "
2295 "(need mount option 'device=...')\n");
2299 s1 = strstr(devname, ":/");
2302 lmd->lmd_flags |= LMD_FLG_CLIENT;
2303 /* Remove leading /s from fsname */
2304 while (*++s1 == '/') ;
2305 /* Freed in lustre_free_lsi */
2306 OBD_ALLOC(lmd->lmd_profile, strlen(s1) + 8);
2307 if (!lmd->lmd_profile)
2309 sprintf(lmd->lmd_profile, "%s-client", s1);
2312 /* Freed in lustre_free_lsi */
2313 OBD_ALLOC(lmd->lmd_dev, strlen(devname) + 1);
2316 strcpy(lmd->lmd_dev, devname);
2318 /* Save mount options */
2319 s1 = options + strlen(options) - 1;
2320 while (s1 >= options && (*s1 == ',' || *s1 == ' '))
2322 if (*options != 0) {
2323 /* Freed in lustre_free_lsi */
2324 OBD_ALLOC(lmd->lmd_opts, strlen(options) + 1);
2327 strcpy(lmd->lmd_opts, options);
2331 lmd->lmd_magic = LMD_MAGIC;
2336 CERROR("Bad mount options %s\n", options);
2340 struct lustre_mount_data2 {
2342 struct vfsmount *lmd2_mnt;
2345 /** This is the entry point for the mount call into Lustre.
2346 * This is called when a server or client is mounted,
2347 * and this is where we start setting things up.
2348 * @param data Mount options (e.g. -o flock,abort_recov)
2350 int lustre_fill_super(struct super_block *sb, void *data, int silent)
2352 struct lustre_mount_data *lmd;
2353 struct lustre_mount_data2 *lmd2 = data;
2354 struct lustre_sb_info *lsi;
2358 CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);
2360 lsi = lustre_init_lsi(sb);
2366 * Disable lockdep during mount, because mount locking patterns are
2372 * LU-639: the obd cleanup of last mount may not finish yet, wait here.
2374 obd_zombie_barrier();
2376 /* Figure out the lmd from the mount options */
2377 if (lmd_parse((char *)(lmd2->lmd2_data), lmd)) {
2379 GOTO(out, rc = -EINVAL);
2382 if (lmd_is_client(lmd)) {
2383 CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile);
2384 if (!client_fill_super) {
2385 LCONSOLE_ERROR_MSG(0x165, "Nothing registered for "
2386 "client mount! Is the 'lustre' "
2387 "module loaded?\n");
2391 rc = lustre_start_mgc(sb);
2396 /* Connect and start */
2397 /* (should always be ll_fill_super) */
2398 rc = (*client_fill_super)(sb, lmd2->lmd2_mnt);
2399 /* c_f_s will call lustre_common_put_super on failure */
2402 CDEBUG(D_MOUNT, "Mounting server from %s\n", lmd->lmd_dev);
2403 rc = server_fill_super(sb);
2404 /* s_f_s calls lustre_start_mgc after the mount because we need
2405 the MGS nids which are stored on disk. Plus, we may
2406 need to start the MGS first. */
2407 /* s_f_s will call server_put_super on failure */
2410 /* If error happens in fill_super() call, @lsi will be killed there.
2411 * This is why we do not put it here. */
2415 CERROR("Unable to mount %s (%d)\n",
2416 s2lsi(sb) ? lmd->lmd_dev : "", rc);
2418 CDEBUG(D_SUPER, "Mount %s complete\n",
2426 /* We can't call ll_fill_super by name because it lives in a module that
2427 must be loaded after this one. */
2428 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
2429 struct vfsmount *mnt))
2431 client_fill_super = cfs;
2433 EXPORT_SYMBOL(lustre_register_client_fill_super);
2435 void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb))
2437 kill_super_cb = cfs;
2439 EXPORT_SYMBOL(lustre_register_kill_super_cb);
2441 /***************** FS registration ******************/
2442 #ifdef HAVE_FSTYPE_MOUNT
2443 struct dentry *lustre_mount(struct file_system_type *fs_type, int flags,
2444 const char *devname, void *data)
2446 struct lustre_mount_data2 lmd2 = { data, NULL };
2448 return mount_nodev(fs_type, flags, &lmd2, lustre_fill_super);
2451 int lustre_get_sb(struct file_system_type *fs_type, int flags,
2452 const char *devname, void * data, struct vfsmount *mnt)
2454 struct lustre_mount_data2 lmd2 = { data, mnt };
2456 return get_sb_nodev(fs_type, flags, &lmd2, lustre_fill_super, mnt);
2460 void lustre_kill_super(struct super_block *sb)
2462 struct lustre_sb_info *lsi = s2lsi(sb);
2464 if (kill_super_cb && lsi && !IS_SERVER(lsi))
2465 (*kill_super_cb)(sb);
2467 kill_anon_super(sb);
2470 /** Register the "lustre" fs type
2472 struct file_system_type lustre_fs_type = {
2473 .owner = THIS_MODULE,
2475 #ifdef HAVE_FSTYPE_MOUNT
2476 .mount = lustre_mount,
2478 .get_sb = lustre_get_sb,
2480 .kill_sb = lustre_kill_super,
2481 .fs_flags = FS_BINARY_MOUNTDATA | FS_REQUIRES_DEV |
2482 #ifdef FS_HAS_FIEMAP
2485 LL_RENAME_DOES_D_MOVE,
2488 int lustre_register_fs(void)
2490 return register_filesystem(&lustre_fs_type);
2493 int lustre_unregister_fs(void)
2495 return unregister_filesystem(&lustre_fs_type);
2498 EXPORT_SYMBOL(server_mti_print);