4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Whamcloud, Inc.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/obdclass/obd_mount.c
38 * Client/server mount routines
40 * Author: Nathan Rutman <nathan@clusterfs.com>
44 #define DEBUG_SUBSYSTEM S_CLASS
45 #define D_MOUNT D_SUPER|D_CONFIG /*|D_WARNING */
46 #define PRINT_CMD CDEBUG
47 #define PRINT_MASK D_SUPER|D_CONFIG
51 #include <lustre_fsfilt.h>
52 #include <obd_class.h>
53 #include <lustre/lustre_user.h>
54 #include <linux/version.h>
55 #include <lustre_log.h>
56 #include <lustre_disk.h>
57 #include <lustre_param.h>
59 static int (*client_fill_super)(struct super_block *sb,
60 struct vfsmount *mnt) = NULL;
61 static void (*kill_super_cb)(struct super_block *sb) = NULL;
63 /*********** mount lookup *********/
65 CFS_DEFINE_MUTEX(lustre_mount_info_lock);
66 static CFS_LIST_HEAD(server_mount_info_list);
68 static struct lustre_mount_info *server_find_mount(const char *name)
71 struct lustre_mount_info *lmi;
74 cfs_list_for_each(tmp, &server_mount_info_list) {
75 lmi = cfs_list_entry(tmp, struct lustre_mount_info,
77 if (strcmp(name, lmi->lmi_name) == 0)
83 /* we must register an obd for a mount before we call the setup routine.
84 *_setup will call lustre_get_mount to get the mnt struct
85 by obd_name, since we can't pass the pointer to setup. */
86 static int server_register_mount(const char *name, struct super_block *sb,
89 struct lustre_mount_info *lmi;
96 OBD_ALLOC(lmi, sizeof(*lmi));
99 OBD_ALLOC(name_cp, strlen(name) + 1);
101 OBD_FREE(lmi, sizeof(*lmi));
104 strcpy(name_cp, name);
106 cfs_mutex_lock(&lustre_mount_info_lock);
108 if (server_find_mount(name)) {
109 cfs_mutex_unlock(&lustre_mount_info_lock);
110 OBD_FREE(lmi, sizeof(*lmi));
111 OBD_FREE(name_cp, strlen(name) + 1);
112 CERROR("Already registered %s\n", name);
115 lmi->lmi_name = name_cp;
118 cfs_list_add(&lmi->lmi_list_chain, &server_mount_info_list);
120 cfs_mutex_unlock(&lustre_mount_info_lock);
122 CDEBUG(D_MOUNT, "reg_mnt %p from %s, vfscount=%d\n",
123 lmi->lmi_mnt, name, mnt_get_count(lmi->lmi_mnt));
128 /* when an obd no longer needs a mount */
129 static int server_deregister_mount(const char *name)
131 struct lustre_mount_info *lmi;
134 cfs_mutex_lock(&lustre_mount_info_lock);
135 lmi = server_find_mount(name);
137 cfs_mutex_unlock(&lustre_mount_info_lock);
138 CERROR("%s not registered\n", name);
142 CDEBUG(D_MOUNT, "dereg_mnt %p from %s, vfscount=%d\n",
143 lmi->lmi_mnt, name, mnt_get_count(lmi->lmi_mnt));
145 OBD_FREE(lmi->lmi_name, strlen(lmi->lmi_name) + 1);
146 cfs_list_del(&lmi->lmi_list_chain);
147 OBD_FREE(lmi, sizeof(*lmi));
148 cfs_mutex_unlock(&lustre_mount_info_lock);
153 /* obd's look up a registered mount using their obdname. This is just
154 for initial obd setup to find the mount struct. It should not be
155 called every time you want to mntget. */
156 struct lustre_mount_info *server_get_mount(const char *name)
158 struct lustre_mount_info *lmi;
159 struct lustre_sb_info *lsi;
162 cfs_mutex_lock(&lustre_mount_info_lock);
163 lmi = server_find_mount(name);
164 cfs_mutex_unlock(&lustre_mount_info_lock);
166 CERROR("Can't find mount for %s\n", name);
169 lsi = s2lsi(lmi->lmi_sb);
170 mntget(lmi->lmi_mnt);
171 cfs_atomic_inc(&lsi->lsi_mounts);
173 CDEBUG(D_MOUNT, "get_mnt %p from %s, refs=%d, vfscount=%d\n",
174 lmi->lmi_mnt, name, cfs_atomic_read(&lsi->lsi_mounts),
175 mnt_get_count(lmi->lmi_mnt));
181 * Used by mdt to get mount_info from obdname.
182 * There are no blocking when using the mount_info.
183 * Do not use server_get_mount for this purpose.
185 struct lustre_mount_info *server_get_mount_2(const char *name)
187 struct lustre_mount_info *lmi;
190 cfs_mutex_lock(&lustre_mount_info_lock);
191 lmi = server_find_mount(name);
192 cfs_mutex_unlock(&lustre_mount_info_lock);
194 CERROR("Can't find mount for %s\n", name);
199 static void unlock_mntput(struct vfsmount *mnt)
201 #ifdef HAVE_KERNEL_LOCKED
202 /* for kernel < 2.6.37 */
203 if (kernel_locked()) {
215 static int lustre_put_lsi(struct super_block *sb);
217 /* to be called from obd_cleanup methods */
218 int server_put_mount(const char *name, struct vfsmount *mnt)
220 struct lustre_mount_info *lmi;
221 struct lustre_sb_info *lsi;
222 int count = mnt_get_count(mnt) - 1;
225 /* This might be the last one, can't deref after this */
228 cfs_mutex_lock(&lustre_mount_info_lock);
229 lmi = server_find_mount(name);
230 cfs_mutex_unlock(&lustre_mount_info_lock);
232 CERROR("Can't find mount for %s\n", name);
235 lsi = s2lsi(lmi->lmi_sb);
236 LASSERT(lmi->lmi_mnt == mnt);
238 CDEBUG(D_MOUNT, "put_mnt %p from %s, refs=%d, vfscount=%d\n",
239 lmi->lmi_mnt, name, cfs_atomic_read(&lsi->lsi_mounts), count);
241 if (lustre_put_lsi(lmi->lmi_sb)) {
242 CDEBUG(D_MOUNT, "Last put of mnt %p from %s, vfscount=%d\n",
243 lmi->lmi_mnt, name, count);
244 /* last mount is the One True Mount */
246 CERROR("%s: mount busy, vfscount=%d!\n", name, count);
249 /* this obd should never need the mount again */
250 server_deregister_mount(name);
255 /* Corresponding to server_get_mount_2 */
256 int server_put_mount_2(const char *name, struct vfsmount *mnt)
262 /******* mount helper utilities *********/
265 static void ldd_print(struct lustre_disk_data *ldd)
267 PRINT_CMD(PRINT_MASK, " disk data:\n");
268 PRINT_CMD(PRINT_MASK, "server: %s\n", ldd->ldd_svname);
269 PRINT_CMD(PRINT_MASK, "uuid: %s\n", (char *)ldd->ldd_uuid);
270 PRINT_CMD(PRINT_MASK, "fs: %s\n", ldd->ldd_fsname);
271 PRINT_CMD(PRINT_MASK, "index: %04x\n", ldd->ldd_svindex);
272 PRINT_CMD(PRINT_MASK, "config: %d\n", ldd->ldd_config_ver);
273 PRINT_CMD(PRINT_MASK, "flags: %#x\n", ldd->ldd_flags);
274 PRINT_CMD(PRINT_MASK, "diskfs: %s\n", MT_STR(ldd));
275 PRINT_CMD(PRINT_MASK, "options: %s\n", ldd->ldd_mount_opts);
276 PRINT_CMD(PRINT_MASK, "params: %s\n", ldd->ldd_params);
277 PRINT_CMD(PRINT_MASK, "comment: %s\n", ldd->ldd_userdata);
281 static int ldd_parse(struct lvfs_run_ctxt *mount_ctxt,
282 struct lustre_disk_data *ldd)
284 struct lvfs_run_ctxt saved;
291 push_ctxt(&saved, mount_ctxt, NULL);
293 file = filp_open(MOUNT_DATA_FILE, O_RDONLY, 0644);
296 CERROR("cannot open %s: rc = %d\n", MOUNT_DATA_FILE, rc);
300 len = i_size_read(file->f_dentry->d_inode);
301 CDEBUG(D_MOUNT, "Have %s, size %lu\n", MOUNT_DATA_FILE, len);
302 if (len != sizeof(*ldd)) {
303 CERROR("disk data size does not match: see %lu expect %u\n",
304 len, (int)sizeof(*ldd));
305 GOTO(out_close, rc = -EINVAL);
308 rc = lustre_fread(file, ldd, len, &off);
310 CERROR("error reading %s: read %d of %lu\n",
311 MOUNT_DATA_FILE, rc, len);
312 GOTO(out_close, rc = -EINVAL);
316 if (ldd->ldd_magic != LDD_MAGIC) {
317 /* FIXME add swabbing support */
318 CERROR("Bad magic in %s: %x!=%x\n", MOUNT_DATA_FILE,
319 ldd->ldd_magic, LDD_MAGIC);
320 GOTO(out_close, rc = -EINVAL);
323 if (ldd->ldd_feature_incompat & ~LDD_INCOMPAT_SUPP) {
324 CERROR("%s: unsupported incompat filesystem feature(s) %x\n",
326 ldd->ldd_feature_incompat & ~LDD_INCOMPAT_SUPP);
327 GOTO(out_close, rc = -EINVAL);
329 if (ldd->ldd_feature_rocompat & ~LDD_ROCOMPAT_SUPP) {
330 CERROR("%s: unsupported read-only filesystem feature(s) %x\n",
332 ldd->ldd_feature_rocompat & ~LDD_ROCOMPAT_SUPP);
333 /* Do something like remount filesystem read-only */
334 GOTO(out_close, rc = -EINVAL);
340 pop_ctxt(&saved, mount_ctxt, NULL);
344 static int ldd_write(struct lvfs_run_ctxt *mount_ctxt,
345 struct lustre_disk_data *ldd)
347 struct lvfs_run_ctxt saved;
350 unsigned long len = sizeof(struct lustre_disk_data);
354 LASSERT(ldd->ldd_magic == LDD_MAGIC);
356 ldd->ldd_config_ver++;
358 push_ctxt(&saved, mount_ctxt, NULL);
360 file = filp_open(MOUNT_DATA_FILE, O_RDWR|O_SYNC, 0644);
363 CERROR("cannot open %s: rc = %d\n", MOUNT_DATA_FILE, rc);
367 rc = lustre_fwrite(file, ldd, len, &off);
369 CERROR("error writing %s: read %d of %lu\n",
370 MOUNT_DATA_FILE, rc, len);
371 GOTO(out_close, rc = -EINVAL);
379 pop_ctxt(&saved, mount_ctxt, NULL);
384 /**************** config llog ********************/
386 /** Get a config log from the MGS and process it.
387 * This func is called for both clients and servers.
388 * Continue to process new statements appended to the logs
389 * (whenever the config lock is revoked) until lustre_end_log
391 * @param sb The superblock is used by the MGC to write to the local copy of
393 * @param logname The name of the llog to replicate from the MGS
394 * @param cfg Since the same mgc may be used to follow multiple config logs
395 * (e.g. ost1, ost2, client), the config_llog_instance keeps the state for
396 * this log, and is added to the mgc's list of logs to follow.
398 int lustre_process_log(struct super_block *sb, char *logname,
399 struct config_llog_instance *cfg)
401 struct lustre_cfg *lcfg;
402 struct lustre_cfg_bufs *bufs;
403 struct lustre_sb_info *lsi = s2lsi(sb);
404 struct obd_device *mgc = lsi->lsi_mgc;
415 /* mgc_process_config */
416 lustre_cfg_bufs_reset(bufs, mgc->obd_name);
417 lustre_cfg_bufs_set_string(bufs, 1, logname);
418 lustre_cfg_bufs_set(bufs, 2, cfg, sizeof(*cfg));
419 lustre_cfg_bufs_set(bufs, 3, &sb, sizeof(sb));
420 lcfg = lustre_cfg_new(LCFG_LOG_START, bufs);
421 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
422 lustre_cfg_free(lcfg);
427 LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s'"
428 "failed from the MGS (%d). Make sure this "
429 "client and the MGS are running compatible "
430 "versions of Lustre.\n",
431 mgc->obd_name, logname, rc);
434 LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' "
435 "failed (%d). This may be the result of "
436 "communication errors between this node and "
437 "the MGS, a bad configuration, or other "
438 "errors. See the syslog for more "
439 "information.\n", mgc->obd_name, logname,
442 /* class_obd_list(); */
446 /* Stop watching this config log for updates */
447 int lustre_end_log(struct super_block *sb, char *logname,
448 struct config_llog_instance *cfg)
450 struct lustre_cfg *lcfg;
451 struct lustre_cfg_bufs bufs;
452 struct lustre_sb_info *lsi = s2lsi(sb);
453 struct obd_device *mgc = lsi->lsi_mgc;
460 /* mgc_process_config */
461 lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
462 lustre_cfg_bufs_set_string(&bufs, 1, logname);
464 lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
465 lcfg = lustre_cfg_new(LCFG_LOG_END, &bufs);
466 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
467 lustre_cfg_free(lcfg);
471 /**************** obd start *******************/
473 /** lustre_cfg_bufs are a holdover from 1.4; we can still set these up from
474 * lctl (and do for echo cli/srv.
476 int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
477 char *s1, char *s2, char *s3, char *s4)
479 struct lustre_cfg_bufs bufs;
480 struct lustre_cfg * lcfg = NULL;
483 CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname,
484 cmd, s1, s2, s3, s4);
486 lustre_cfg_bufs_reset(&bufs, cfgname);
488 lustre_cfg_bufs_set_string(&bufs, 1, s1);
490 lustre_cfg_bufs_set_string(&bufs, 2, s2);
492 lustre_cfg_bufs_set_string(&bufs, 3, s3);
494 lustre_cfg_bufs_set_string(&bufs, 4, s4);
496 lcfg = lustre_cfg_new(cmd, &bufs);
497 lcfg->lcfg_nid = nid;
498 rc = class_process_config(lcfg);
499 lustre_cfg_free(lcfg);
503 /** Call class_attach and class_setup. These methods in turn call
504 * obd type-specific methods.
506 static int lustre_start_simple(char *obdname, char *type, char *uuid,
510 CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
512 rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, 0, 0);
514 CERROR("%s attach error %d\n", obdname, rc);
517 rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, 0, 0);
519 CERROR("%s setup error %d\n", obdname, rc);
520 do_lcfg(obdname, 0, LCFG_DETACH, 0, 0, 0, 0);
525 /* Set up a MGS to serve startup logs */
526 static int server_start_mgs(struct super_block *sb)
528 struct lustre_sb_info *lsi = s2lsi(sb);
529 struct vfsmount *mnt = lsi->lsi_srv_mnt;
530 struct lustre_mount_info *lmi;
535 /* It is impossible to have more than 1 MGS per node, since
536 MGC wouldn't know which to connect to */
537 lmi = server_find_mount(LUSTRE_MGS_OBDNAME);
539 lsi = s2lsi(lmi->lmi_sb);
540 LCONSOLE_ERROR_MSG(0x15d, "The MGS service was already started"
542 lsi->lsi_ldd->ldd_svname);
546 CDEBUG(D_CONFIG, "Start MGS service %s\n", LUSTRE_MGS_OBDNAME);
548 rc = server_register_mount(LUSTRE_MGS_OBDNAME, sb, mnt);
551 rc = lustre_start_simple(LUSTRE_MGS_OBDNAME, LUSTRE_MGS_NAME,
552 LUSTRE_MGS_OBDNAME, 0, 0);
553 /* Do NOT call server_deregister_mount() here. This leads to
554 * inability cleanup cleanly and free lsi and other stuff when
555 * mgs calls server_put_mount() in error handling case. -umka */
559 LCONSOLE_ERROR_MSG(0x15e, "Failed to start MGS '%s' (%d). "
560 "Is the 'mgs' module loaded?\n",
561 LUSTRE_MGS_OBDNAME, rc);
565 static int server_stop_mgs(struct super_block *sb)
567 struct obd_device *obd;
571 CDEBUG(D_MOUNT, "Stop MGS service %s\n", LUSTRE_MGS_OBDNAME);
573 /* There better be only one MGS */
574 obd = class_name2obd(LUSTRE_MGS_OBDNAME);
576 CDEBUG(D_CONFIG, "mgs %s not running\n", LUSTRE_MGS_OBDNAME);
580 /* The MGS should always stop when we say so */
582 rc = class_manual_cleanup(obd);
586 CFS_DEFINE_MUTEX(mgc_start_lock);
588 /** Set up a mgc obd to process startup logs
590 * \param sb [in] super block of the mgc obd
592 * \retval 0 success, otherwise error code
594 static int lustre_start_mgc(struct super_block *sb)
596 struct obd_connect_data *data = NULL;
597 struct lustre_sb_info *lsi = s2lsi(sb);
598 struct obd_device *obd;
599 struct obd_export *exp;
600 struct obd_uuid *uuid;
603 char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL;
606 int rc = 0, i = 0, j, len;
609 LASSERT(lsi->lsi_lmd);
611 /* Find the first non-lo MGS nid for our MGC name */
612 if (lsi->lsi_flags & LSI_SERVER) {
613 ptr = lsi->lsi_ldd->ldd_params;
614 /* Use mgsnode= nids */
615 if ((class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0) &&
616 (class_parse_nid(ptr, &nid, &ptr) == 0)) {
618 } else if (IS_MGS(lsi->lsi_ldd)) {
619 lnet_process_id_t id;
620 while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
621 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
628 } else { /* client */
629 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
630 ptr = lsi->lsi_lmd->lmd_dev;
631 if (class_parse_nid(ptr, &nid, &ptr) == 0)
635 CERROR("No valid MGS nids found.\n");
639 cfs_mutex_lock(&mgc_start_lock);
641 len = strlen(LUSTRE_MGC_OBDNAME) + strlen(libcfs_nid2str(nid)) + 1;
642 OBD_ALLOC(mgcname, len);
643 OBD_ALLOC(niduuid, len + 2);
644 if (!mgcname || !niduuid)
645 GOTO(out_free, rc = -ENOMEM);
646 sprintf(mgcname, "%s%s", LUSTRE_MGC_OBDNAME, libcfs_nid2str(nid));
648 mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";
652 GOTO(out_free, rc = -ENOMEM);
654 obd = class_name2obd(mgcname);
655 if (obd && !obd->obd_stopping) {
656 rc = obd_set_info_async(NULL, obd->obd_self_export,
657 strlen(KEY_MGSSEC), KEY_MGSSEC,
658 strlen(mgssec), mgssec, NULL);
662 /* Re-using an existing MGC */
663 cfs_atomic_inc(&obd->u.cli.cl_mgc_refcount);
665 /* IR compatibility check, only for clients */
666 if (lmd_is_client(lsi->lsi_lmd)) {
668 int vallen = sizeof(*data);
669 __u32 *flags = &lsi->lsi_lmd->lmd_flags;
671 rc = obd_get_info(NULL, obd->obd_self_export,
672 strlen(KEY_CONN_DATA), KEY_CONN_DATA,
673 &vallen, data, NULL);
675 has_ir = OCD_HAS_FLAG(data, IMP_RECOV);
676 if (has_ir ^ !(*flags & LMD_FLG_NOIR)) {
677 /* LMD_FLG_NOIR is for test purpose only */
679 "Trying to mount a client with IR setting "
680 "not compatible with current mgc. "
681 "Force to use current mgc setting that is "
683 has_ir ? "enabled" : "disabled");
685 *flags &= ~LMD_FLG_NOIR;
687 *flags |= LMD_FLG_NOIR;
692 /* If we are restarting the MGS, don't try to keep the MGC's
693 old connection, or registration will fail. */
694 if ((lsi->lsi_flags & LSI_SERVER) && IS_MGS(lsi->lsi_ldd)) {
695 CDEBUG(D_MOUNT, "New MGS with live MGC\n");
699 /* Try all connections, but only once (again).
700 We don't want to block another target from starting
701 (using its local copy of the log), but we do want to connect
702 if at all possible. */
704 CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,recov_bk);
705 rc = obd_set_info_async(NULL, obd->obd_self_export,
706 sizeof(KEY_INIT_RECOV_BACKUP),
707 KEY_INIT_RECOV_BACKUP,
708 sizeof(recov_bk), &recov_bk, NULL);
712 CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname);
714 /* Add the primary nids for the MGS */
716 sprintf(niduuid, "%s_%x", mgcname, i);
717 if (lsi->lsi_flags & LSI_SERVER) {
718 ptr = lsi->lsi_ldd->ldd_params;
719 if (IS_MGS(lsi->lsi_ldd)) {
720 /* Use local nids (including LO) */
721 lnet_process_id_t id;
722 while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
723 rc = do_lcfg(mgcname, id.nid,
724 LCFG_ADD_UUID, niduuid, 0,0,0);
727 /* Use mgsnode= nids */
728 if (class_find_param(ptr, PARAM_MGSNODE, &ptr) != 0) {
729 CERROR("No MGS nids given.\n");
730 GOTO(out_free, rc = -EINVAL);
732 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
733 rc = do_lcfg(mgcname, nid,
734 LCFG_ADD_UUID, niduuid, 0,0,0);
738 } else { /* client */
739 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
740 ptr = lsi->lsi_lmd->lmd_dev;
741 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
742 rc = do_lcfg(mgcname, nid,
743 LCFG_ADD_UUID, niduuid, 0,0,0);
745 /* Stop at the first failover nid */
751 CERROR("No valid MGS nids found.\n");
752 GOTO(out_free, rc = -EINVAL);
754 lsi->lsi_lmd->lmd_mgs_failnodes = 1;
756 /* Random uuid for MGC allows easier reconnects */
758 ll_generate_random_uuid(uuidc);
759 class_uuid_unparse(uuidc, uuid);
762 rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
763 (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
769 /* Add any failover MGS nids */
771 while ((*ptr == ':' ||
772 class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0)) {
773 /* New failover node */
774 sprintf(niduuid, "%s_%x", mgcname, i);
776 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
778 rc = do_lcfg(mgcname, nid,
779 LCFG_ADD_UUID, niduuid, 0,0,0);
784 rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
792 lsi->lsi_lmd->lmd_mgs_failnodes = i;
794 obd = class_name2obd(mgcname);
796 CERROR("Can't find mgcobd %s\n", mgcname);
797 GOTO(out_free, rc = -ENOTCONN);
800 rc = obd_set_info_async(NULL, obd->obd_self_export,
801 strlen(KEY_MGSSEC), KEY_MGSSEC,
802 strlen(mgssec), mgssec, NULL);
806 /* Keep a refcount of servers/clients who started with "mount",
807 so we know when we can get rid of the mgc. */
808 cfs_atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
810 /* Try all connections, but only once. */
812 rc = obd_set_info_async(NULL, obd->obd_self_export,
813 sizeof(KEY_INIT_RECOV_BACKUP),
814 KEY_INIT_RECOV_BACKUP,
815 sizeof(recov_bk), &recov_bk, NULL);
818 CWARN("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc);
819 /* We connect to the MGS at setup, and don't disconnect until cleanup */
820 data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_FID |
821 OBD_CONNECT_AT | OBD_CONNECT_FULL20 |
822 OBD_CONNECT_IMP_RECOV;
823 if (lmd_is_client(lsi->lsi_lmd) &&
824 lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
825 data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
826 data->ocd_version = LUSTRE_VERSION_CODE;
827 rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
829 CERROR("connect failed %d\n", rc);
833 obd->u.cli.cl_mgc_mgsexp = exp;
836 /* Keep the mgc info in the sb. Note that many lsi's can point
840 cfs_mutex_unlock(&mgc_start_lock);
845 OBD_FREE(mgcname, len);
847 OBD_FREE(niduuid, len + 2);
851 static int lustre_stop_mgc(struct super_block *sb)
853 struct lustre_sb_info *lsi = s2lsi(sb);
854 struct obd_device *obd;
855 char *niduuid = 0, *ptr = 0;
856 int i, rc = 0, len = 0;
866 cfs_mutex_lock(&mgc_start_lock);
867 LASSERT(cfs_atomic_read(&obd->u.cli.cl_mgc_refcount) > 0);
868 if (!cfs_atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
869 /* This is not fatal, every client that stops
870 will call in here. */
871 CDEBUG(D_MOUNT, "mgc still has %d references.\n",
872 cfs_atomic_read(&obd->u.cli.cl_mgc_refcount));
873 GOTO(out, rc = -EBUSY);
876 /* The MGC has no recoverable data in any case.
877 * force shotdown set in umount_begin */
878 obd->obd_no_recov = 1;
880 if (obd->u.cli.cl_mgc_mgsexp) {
881 /* An error is not fatal, if we are unable to send the
882 disconnect mgs ping evictor cleans up the export */
883 rc = obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
885 CDEBUG(D_MOUNT, "disconnect failed %d\n", rc);
888 /* Save the obdname for cleaning the nid uuids, which are
890 len = strlen(obd->obd_name) + 6;
891 OBD_ALLOC(niduuid, len);
893 strcpy(niduuid, obd->obd_name);
894 ptr = niduuid + strlen(niduuid);
897 rc = class_manual_cleanup(obd);
901 /* Clean the nid uuids */
903 GOTO(out, rc = -ENOMEM);
905 for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
906 sprintf(ptr, "_%x", i);
907 rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
910 CERROR("del MDC UUID %s failed: rc = %d\n",
915 OBD_FREE(niduuid, len);
917 /* class_import_put will get rid of the additional connections */
918 cfs_mutex_unlock(&mgc_start_lock);
922 /* Since there's only one mgc per node, we have to change it's fs to get
923 access to the right disk. */
924 static int server_mgc_set_fs(struct obd_device *mgc, struct super_block *sb)
926 struct lustre_sb_info *lsi = s2lsi(sb);
930 CDEBUG(D_MOUNT, "Set mgc disk for %s\n", lsi->lsi_lmd->lmd_dev);
932 /* cl_mgc_sem in mgc insures we sleep if the mgc_fs is busy */
933 rc = obd_set_info_async(NULL, mgc->obd_self_export,
934 sizeof(KEY_SET_FS), KEY_SET_FS,
935 sizeof(*sb), sb, NULL);
937 CERROR("can't set_fs %d\n", rc);
943 static int server_mgc_clear_fs(struct obd_device *mgc)
948 CDEBUG(D_MOUNT, "Unassign mgc disk\n");
950 rc = obd_set_info_async(NULL, mgc->obd_self_export,
951 sizeof(KEY_CLEAR_FS), KEY_CLEAR_FS,
956 CFS_DEFINE_MUTEX(server_start_lock);
958 /* Stop MDS/OSS if nobody is using them */
959 static int server_stop_servers(int lddflags, int lsiflags)
961 struct obd_device *obd = NULL;
962 struct obd_type *type = NULL;
966 cfs_mutex_lock(&server_start_lock);
968 /* Either an MDT or an OST or neither */
969 /* if this was an MDT, and there are no more MDT's, clean up the MDS */
970 if ((lddflags & LDD_F_SV_TYPE_MDT) &&
971 (obd = class_name2obd(LUSTRE_MDS_OBDNAME))) {
972 /*FIXME pre-rename, should eventually be LUSTRE_MDT_NAME*/
973 type = class_search_type(LUSTRE_MDS_NAME);
975 /* if this was an OST, and there are no more OST's, clean up the OSS */
976 if ((lddflags & LDD_F_SV_TYPE_OST) &&
977 (obd = class_name2obd(LUSTRE_OSS_OBDNAME))) {
978 type = class_search_type(LUSTRE_OST_NAME);
981 if (obd && (!type || !type->typ_refcnt)) {
984 /* obd_fail doesn't mean much on a server obd */
985 err = class_manual_cleanup(obd);
990 cfs_mutex_unlock(&server_start_lock);
995 int server_mti_print(char *title, struct mgs_target_info *mti)
997 PRINT_CMD(PRINT_MASK, "mti %s\n", title);
998 PRINT_CMD(PRINT_MASK, "server: %s\n", mti->mti_svname);
999 PRINT_CMD(PRINT_MASK, "fs: %s\n", mti->mti_fsname);
1000 PRINT_CMD(PRINT_MASK, "uuid: %s\n", mti->mti_uuid);
1001 PRINT_CMD(PRINT_MASK, "ver: %d flags: %#x\n",
1002 mti->mti_config_ver, mti->mti_flags);
1006 static int server_sb2mti(struct super_block *sb, struct mgs_target_info *mti)
1008 struct lustre_sb_info *lsi = s2lsi(sb);
1009 struct lustre_disk_data *ldd = lsi->lsi_ldd;
1010 lnet_process_id_t id;
1014 if (!(lsi->lsi_flags & LSI_SERVER))
1017 strncpy(mti->mti_fsname, ldd->ldd_fsname,
1018 sizeof(mti->mti_fsname));
1019 strncpy(mti->mti_svname, ldd->ldd_svname,
1020 sizeof(mti->mti_svname));
1022 mti->mti_nid_count = 0;
1023 while (LNetGetId(i++, &id) != -ENOENT) {
1024 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
1027 /* server use --servicenode param, only allow specified
1028 * nids be registered */
1029 if ((ldd->ldd_flags & LDD_F_NO_PRIMNODE) != 0 &&
1030 class_match_nid(ldd->ldd_params,
1031 PARAM_FAILNODE, id.nid) < 1)
1034 /* match specified network */
1035 if (!class_match_net(ldd->ldd_params,
1036 PARAM_NETWORK, LNET_NIDNET(id.nid)))
1039 mti->mti_nids[mti->mti_nid_count] = id.nid;
1040 mti->mti_nid_count++;
1041 if (mti->mti_nid_count >= MTI_NIDS_MAX) {
1042 CWARN("Only using first %d nids for %s\n",
1043 mti->mti_nid_count, mti->mti_svname);
1048 mti->mti_lustre_ver = LUSTRE_VERSION_CODE;
1049 mti->mti_config_ver = 0;
1050 if (lsi->lsi_lmd->lmd_flags & LMD_FLG_WRITECONF)
1051 ldd->ldd_flags |= LDD_F_WRITECONF;
1052 mti->mti_flags = ldd->ldd_flags;
1053 mti->mti_stripe_index = ldd->ldd_svindex;
1054 memcpy(mti->mti_uuid, ldd->ldd_uuid, sizeof(mti->mti_uuid));
1055 if (strlen(ldd->ldd_params) > sizeof(mti->mti_params)) {
1056 CERROR("params too big for mti\n");
1059 memcpy(mti->mti_params, ldd->ldd_params, sizeof(mti->mti_params));
1063 /* Register an old or new target with the MGS. If needed MGS will construct
1064 startup logs and assign index */
1065 int server_register_target(struct super_block *sb)
1067 struct lustre_sb_info *lsi = s2lsi(sb);
1068 struct obd_device *mgc = lsi->lsi_mgc;
1069 struct lustre_disk_data *ldd = lsi->lsi_ldd;
1070 struct mgs_target_info *mti = NULL;
1077 if (!(lsi->lsi_flags & LSI_SERVER))
1083 rc = server_sb2mti(sb, mti);
1087 CDEBUG(D_MOUNT, "Registration %s, fs=%s, %s, index=%04x, flags=%#x\n",
1088 mti->mti_svname, mti->mti_fsname,
1089 libcfs_nid2str(mti->mti_nids[0]), mti->mti_stripe_index,
1092 /* if write_conf is true, the registration must succeed */
1093 writeconf = !!(ldd->ldd_flags & (LDD_F_NEED_INDEX | LDD_F_UPDATE));
1094 mti->mti_flags |= LDD_F_OPC_REG;
1096 /* Register the target */
1097 /* FIXME use mgc_process_config instead */
1098 rc = obd_set_info_async(NULL, mgc->u.cli.cl_mgc_mgsexp,
1099 sizeof(KEY_REGISTER_TARGET), KEY_REGISTER_TARGET,
1100 sizeof(*mti), mti, NULL);
1102 if (mti->mti_flags & LDD_F_ERROR) {
1103 LCONSOLE_ERROR_MSG(0x160,
1104 "The MGS is refusing to allow this "
1105 "server (%s) to start. Please see messages"
1106 " on the MGS node.\n", ldd->ldd_svname);
1107 } else if (writeconf) {
1108 LCONSOLE_ERROR_MSG(0x15f,
1109 "Communication to the MGS return error %d. "
1110 "Is the MGS running?\n", rc);
1112 CERROR("Cannot talk to the MGS: %d, not fatal\n", rc);
1113 /* reset the error code for non-fatal error. */
1119 /* Always update our flags */
1120 ldd->ldd_flags = mti->mti_flags & LDD_F_ONDISK_MASK;
1122 /* If this flag is set, it means the MGS wants us to change our
1123 on-disk data. (So far this means just the index.) */
1124 if (mti->mti_flags & LDD_F_REWRITE_LDD) {
1127 CDEBUG(D_MOUNT, "Changing on-disk index from %#x to %#x "
1128 "for %s\n", ldd->ldd_svindex, mti->mti_stripe_index,
1130 ldd->ldd_svindex = mti->mti_stripe_index;
1131 strncpy(ldd->ldd_svname, mti->mti_svname,
1132 sizeof(ldd->ldd_svname));
1133 /* or ldd_make_sv_name(ldd); */
1134 ldd_write(&mgc->obd_lvfs_ctxt, ldd);
1135 err = fsfilt_set_label(mgc, lsi->lsi_srv_mnt->mnt_sb,
1138 CERROR("Label set error %d\n", err);
1139 label = fsfilt_get_label(mgc, lsi->lsi_srv_mnt->mnt_sb);
1141 CDEBUG(D_MOUNT, "Disk label changed to %s\n", label);
1143 /* Flush the new ldd to disk */
1144 fsfilt_sync(mgc, lsi->lsi_srv_mnt->mnt_sb);
1154 * Notify the MGS that this target is ready.
1155 * Used by IR - if the MGS receives this message, it will notify clients.
1157 static int server_notify_target(struct super_block *sb, struct obd_device *obd)
1159 struct lustre_sb_info *lsi = s2lsi(sb);
1160 struct obd_device *mgc = lsi->lsi_mgc;
1161 struct mgs_target_info *mti = NULL;
1167 if (!(lsi->lsi_flags & LSI_SERVER))
1173 rc = server_sb2mti(sb, mti);
1177 mti->mti_instance = obd->u.obt.obt_instance;
1178 mti->mti_flags |= LDD_F_OPC_READY;
1180 /* FIXME use mgc_process_config instead */
1181 rc = obd_set_info_async(NULL, mgc->u.cli.cl_mgc_mgsexp,
1182 sizeof(KEY_REGISTER_TARGET),
1183 KEY_REGISTER_TARGET,
1184 sizeof(*mti), mti, NULL);
1186 /* Imperative recovery: if the mgs informs us to use IR? */
1187 if (!rc && !(mti->mti_flags & LDD_F_ERROR) &&
1188 (mti->mti_flags & LDD_F_IR_CAPABLE))
1189 lsi->lsi_flags |= LSI_IR_CAPABLE;
1198 /** Start server targets: MDTs and OSTs
1200 static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
1202 struct obd_device *obd;
1203 struct lustre_sb_info *lsi = s2lsi(sb);
1204 struct config_llog_instance cfg;
1208 CDEBUG(D_MOUNT, "starting target %s\n", lsi->lsi_ldd->ldd_svname);
1211 /* If we're an MDT, make sure the global MDS is running */
1212 if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_MDT) {
1213 /* make sure the MDS is started */
1214 cfs_mutex_lock(&server_start_lock);
1215 obd = class_name2obd(LUSTRE_MDS_OBDNAME);
1217 rc = lustre_start_simple(LUSTRE_MDS_OBDNAME,
1218 /* FIXME pre-rename, should eventually be LUSTRE_MDS_NAME */
1220 LUSTRE_MDS_OBDNAME"_uuid",
1223 cfs_mutex_unlock(&server_start_lock);
1224 CERROR("failed to start MDS: %d\n", rc);
1228 cfs_mutex_unlock(&server_start_lock);
1232 /* If we're an OST, make sure the global OSS is running */
1233 if (IS_OST(lsi->lsi_ldd)) {
1234 /* make sure OSS is started */
1235 cfs_mutex_lock(&server_start_lock);
1236 obd = class_name2obd(LUSTRE_OSS_OBDNAME);
1238 rc = lustre_start_simple(LUSTRE_OSS_OBDNAME,
1240 LUSTRE_OSS_OBDNAME"_uuid",
1243 cfs_mutex_unlock(&server_start_lock);
1244 CERROR("failed to start OSS: %d\n", rc);
1248 cfs_mutex_unlock(&server_start_lock);
1251 /* Set the mgc fs to our server disk. This allows the MGC to
1252 * read and write configs locally, in case it can't talk to the MGS. */
1253 rc = server_mgc_set_fs(lsi->lsi_mgc, sb);
1257 /* Register with MGS */
1258 rc = server_register_target(sb);
1262 /* Let the target look up the mount using the target's name
1263 (we can't pass the sb or mnt through class_process_config.) */
1264 rc = server_register_mount(lsi->lsi_ldd->ldd_svname, sb, mnt);
1268 /* Start targets using the llog named for the target */
1269 memset(&cfg, 0, sizeof(cfg));
1270 rc = lustre_process_log(sb, lsi->lsi_ldd->ldd_svname, &cfg);
1272 CERROR("failed to start server %s: %d\n",
1273 lsi->lsi_ldd->ldd_svname, rc);
1274 /* Do NOT call server_deregister_mount() here. This makes it
1275 * impossible to find mount later in cleanup time and leaves
1276 * @lsi and othder stuff leaked. -umka */
1281 /* Release the mgc fs for others to use */
1282 server_mgc_clear_fs(lsi->lsi_mgc);
1285 obd = class_name2obd(lsi->lsi_ldd->ldd_svname);
1287 CERROR("no server named %s was started\n",
1288 lsi->lsi_ldd->ldd_svname);
1292 if ((lsi->lsi_lmd->lmd_flags & LMD_FLG_ABORT_RECOV) &&
1293 (OBP(obd, iocontrol))) {
1294 obd_iocontrol(OBD_IOC_ABORT_RECOVERY,
1295 obd->obd_self_export, 0, NULL, NULL);
1298 server_notify_target(sb, obd);
1300 /* calculate recovery timeout, do it after lustre_process_log */
1301 server_calc_timeout(lsi, obd);
1303 /* log has been fully processed */
1304 obd_notify(obd, NULL, OBD_NOTIFY_CONFIG, (void *)CONFIG_LOG);
1310 /***************** lustre superblock **************/
1312 struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
1314 struct lustre_sb_info *lsi;
1320 OBD_ALLOC_PTR(lsi->lsi_lmd);
1321 if (!lsi->lsi_lmd) {
1326 lsi->lsi_lmd->lmd_exclude_count = 0;
1327 lsi->lsi_lmd->lmd_recovery_time_soft = 0;
1328 lsi->lsi_lmd->lmd_recovery_time_hard = 0;
1329 s2lsi_nocast(sb) = lsi;
1330 /* we take 1 extra ref for our setup */
1331 cfs_atomic_set(&lsi->lsi_mounts, 1);
1333 /* Default umount style */
1334 lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
1339 static int lustre_free_lsi(struct super_block *sb)
1341 struct lustre_sb_info *lsi = s2lsi(sb);
1344 LASSERT(lsi != NULL);
1345 CDEBUG(D_MOUNT, "Freeing lsi %p\n", lsi);
1347 /* someone didn't call server_put_mount. */
1348 LASSERT(cfs_atomic_read(&lsi->lsi_mounts) == 0);
1350 if (lsi->lsi_ldd != NULL)
1351 OBD_FREE(lsi->lsi_ldd, sizeof(*lsi->lsi_ldd));
1353 if (lsi->lsi_lmd != NULL) {
1354 if (lsi->lsi_lmd->lmd_dev != NULL)
1355 OBD_FREE(lsi->lsi_lmd->lmd_dev,
1356 strlen(lsi->lsi_lmd->lmd_dev) + 1);
1357 if (lsi->lsi_lmd->lmd_profile != NULL)
1358 OBD_FREE(lsi->lsi_lmd->lmd_profile,
1359 strlen(lsi->lsi_lmd->lmd_profile) + 1);
1360 if (lsi->lsi_lmd->lmd_mgssec != NULL)
1361 OBD_FREE(lsi->lsi_lmd->lmd_mgssec,
1362 strlen(lsi->lsi_lmd->lmd_mgssec) + 1);
1363 if (lsi->lsi_lmd->lmd_opts != NULL)
1364 OBD_FREE(lsi->lsi_lmd->lmd_opts,
1365 strlen(lsi->lsi_lmd->lmd_opts) + 1);
1366 if (lsi->lsi_lmd->lmd_exclude_count)
1367 OBD_FREE(lsi->lsi_lmd->lmd_exclude,
1368 sizeof(lsi->lsi_lmd->lmd_exclude[0]) *
1369 lsi->lsi_lmd->lmd_exclude_count);
1370 OBD_FREE(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd));
1373 LASSERT(lsi->lsi_llsbi == NULL);
1374 OBD_FREE(lsi, sizeof(*lsi));
1375 s2lsi_nocast(sb) = NULL;
1380 /* The lsi has one reference for every server that is using the disk -
1381 e.g. MDT, MGS, and potentially MGC */
1382 static int lustre_put_lsi(struct super_block *sb)
1384 struct lustre_sb_info *lsi = s2lsi(sb);
1387 LASSERT(lsi != NULL);
1389 CDEBUG(D_MOUNT, "put %p %d\n", sb, cfs_atomic_read(&lsi->lsi_mounts));
1390 if (cfs_atomic_dec_and_test(&lsi->lsi_mounts)) {
1391 lustre_free_lsi(sb);
1397 /*************** server mount ******************/
1399 /** Kernel mount using mount options in MOUNT_DATA_FILE.
1400 * Since this file lives on the disk, we pre-mount using a common
1401 * type, read the file, then re-mount using the type specified in the
1404 static struct vfsmount *server_kernel_mount(struct super_block *sb)
1406 struct lvfs_run_ctxt mount_ctxt;
1407 struct lustre_sb_info *lsi = s2lsi(sb);
1408 struct lustre_disk_data *ldd;
1409 struct lustre_mount_data *lmd = lsi->lsi_lmd;
1410 struct vfsmount *mnt;
1411 struct file_system_type *type;
1412 char *options = NULL;
1413 unsigned long page, s_flags;
1414 struct page *__page;
1419 OBD_ALLOC(ldd, sizeof(*ldd));
1421 RETURN(ERR_PTR(-ENOMEM));
1423 /* In the past, we have always used flags = 0.
1424 Note ext3/ldiskfs can't be mounted ro. */
1425 s_flags = sb->s_flags;
1427 /* allocate memory for options */
1428 OBD_PAGE_ALLOC(__page, CFS_ALLOC_STD);
1430 GOTO(out_free, rc = -ENOMEM);
1431 page = (unsigned long)cfs_page_address(__page);
1432 options = (char *)page;
1433 memset(options, 0, CFS_PAGE_SIZE);
1435 /* mount-line options must be added for pre-mount because it may
1436 * contain mount options such as journal_dev which are required
1437 * to mount successfuly the underlying filesystem */
1438 if (lmd->lmd_opts && (*(lmd->lmd_opts) != 0))
1439 strncat(options, lmd->lmd_opts, CFS_PAGE_SIZE - 1);
1441 /* Pre-mount ldiskfs to read the MOUNT_DATA_FILE */
1442 CDEBUG(D_MOUNT, "Pre-mount ldiskfs %s\n", lmd->lmd_dev);
1443 type = get_fs_type("ldiskfs");
1445 CERROR("premount failed: cannot find ldiskfs module\n");
1446 GOTO(out_free, rc = -ENODEV);
1448 mnt = vfs_kern_mount(type, s_flags, lmd->lmd_dev, (void *)options);
1449 cfs_module_put(type->owner);
1452 CERROR("premount %s:%#lx ldiskfs failed: %d "
1453 "Is the ldiskfs module available?\n",
1454 lmd->lmd_dev, s_flags, rc );
1458 OBD_SET_CTXT_MAGIC(&mount_ctxt);
1459 mount_ctxt.pwdmnt = mnt;
1460 mount_ctxt.pwd = mnt->mnt_root;
1461 mount_ctxt.fs = get_ds();
1463 rc = ldd_parse(&mount_ctxt, ldd);
1467 CERROR("premount parse options failed: rc = %d\n", rc);
1471 /* Done with our pre-mount, now do the real mount. */
1473 /* Glom up mount options */
1474 memset(options, 0, CFS_PAGE_SIZE);
1475 strncpy(options, ldd->ldd_mount_opts, CFS_PAGE_SIZE - 2);
1477 len = CFS_PAGE_SIZE - strlen(options) - 2;
1479 strcat(options, ",");
1480 strncat(options, "no_mbcache", len);
1482 /* Add in any mount-line options */
1483 if (lmd->lmd_opts && (*(lmd->lmd_opts) != 0)) {
1484 len = CFS_PAGE_SIZE - strlen(options) - 2;
1485 strcat(options, ",");
1486 strncat(options, lmd->lmd_opts, len);
1489 /* Special permanent mount flags */
1491 s_flags |= MS_NOATIME | MS_NODIRATIME;
1493 CDEBUG(D_MOUNT, "kern_mount: %s %s %s\n",
1494 MT_STR(ldd), lmd->lmd_dev, options);
1495 type = get_fs_type(MT_STR(ldd));
1497 CERROR("get_fs_type failed\n");
1498 GOTO(out_free, rc = -ENODEV);
1500 mnt = vfs_kern_mount(type, s_flags, lmd->lmd_dev, (void *)options);
1501 cfs_module_put(type->owner);
1504 CERROR("vfs_kern_mount failed: rc = %d\n", rc);
1508 if (lmd->lmd_flags & LMD_FLG_ABORT_RECOV)
1509 simple_truncate(mnt->mnt_sb->s_root, mnt, LAST_RCVD,
1512 OBD_PAGE_FREE(__page);
1513 lsi->lsi_ldd = ldd; /* freed at lsi cleanup */
1514 CDEBUG(D_SUPER, "%s: mnt = %p\n", lmd->lmd_dev, mnt);
1519 OBD_PAGE_FREE(__page);
1520 OBD_FREE(ldd, sizeof(*ldd));
1521 lsi->lsi_ldd = NULL;
1522 RETURN(ERR_PTR(rc));
1525 /** Wait here forever until the mount refcount is 0 before completing umount,
1526 * else we risk dereferencing a null pointer.
1527 * LNET may take e.g. 165s before killing zombies.
1529 static void server_wait_finished(struct vfsmount *mnt)
1533 cfs_sigset_t blocked;
1535 cfs_waitq_init(&waitq);
1537 while (mnt_get_count(mnt) > 1) {
1538 if (waited && (waited % 30 == 0))
1539 LCONSOLE_WARN("Mount still busy with %d refs after "
1543 /* Cannot use l_event_wait() for an interruptible sleep. */
1545 blocked = cfs_block_sigsinv(sigmask(SIGKILL));
1546 cfs_waitq_wait_event_interruptible_timeout(
1548 (mnt_get_count(mnt) == 1),
1549 cfs_time_seconds(3),
1551 cfs_restore_sigs(blocked);
1553 LCONSOLE_EMERG("Danger: interrupted umount %s with "
1554 "%d refs!\n", mnt->mnt_devname,
1555 mnt_get_count(mnt));
1562 /** Start the shutdown of servers at umount.
1564 static void server_put_super(struct super_block *sb)
1566 struct lustre_sb_info *lsi = s2lsi(sb);
1567 struct obd_device *obd;
1568 struct vfsmount *mnt = lsi->lsi_srv_mnt;
1569 char *tmpname, *extraname = NULL;
1571 int lddflags = lsi->lsi_ldd->ldd_flags;
1572 int lsiflags = lsi->lsi_flags;
1575 LASSERT(lsiflags & LSI_SERVER);
1577 tmpname_sz = strlen(lsi->lsi_ldd->ldd_svname) + 1;
1578 OBD_ALLOC(tmpname, tmpname_sz);
1579 memcpy(tmpname, lsi->lsi_ldd->ldd_svname, tmpname_sz);
1580 CDEBUG(D_MOUNT, "server put_super %s\n", tmpname);
1581 if (IS_MDT(lsi->lsi_ldd) && (lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC))
1582 snprintf(tmpname, tmpname_sz, "MGS");
1584 /* Stop the target */
1585 if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
1586 (IS_MDT(lsi->lsi_ldd) || IS_OST(lsi->lsi_ldd))) {
1587 struct lustre_profile *lprof = NULL;
1589 /* tell the mgc to drop the config log */
1590 lustre_end_log(sb, lsi->lsi_ldd->ldd_svname, NULL);
1592 /* COMPAT_146 - profile may get deleted in mgc_cleanup.
1593 If there are any setup/cleanup errors, save the lov
1594 name for safety cleanup later. */
1595 lprof = class_get_profile(lsi->lsi_ldd->ldd_svname);
1596 if (lprof && lprof->lp_dt) {
1597 OBD_ALLOC(extraname, strlen(lprof->lp_dt) + 1);
1598 strcpy(extraname, lprof->lp_dt);
1601 obd = class_name2obd(lsi->lsi_ldd->ldd_svname);
1603 CDEBUG(D_MOUNT, "stopping %s\n", obd->obd_name);
1604 if (lsi->lsi_flags & LSI_UMOUNT_FAILOVER)
1606 /* We can't seem to give an error return code
1607 * to .put_super, so we better make sure we clean up! */
1609 class_manual_cleanup(obd);
1611 CERROR("no obd %s\n", lsi->lsi_ldd->ldd_svname);
1612 server_deregister_mount(lsi->lsi_ldd->ldd_svname);
1616 /* If they wanted the mgs to stop separately from the mdt, they
1617 should have put it on a different device. */
1618 if (IS_MGS(lsi->lsi_ldd)) {
1619 /* if MDS start with --nomgs, don't stop MGS then */
1620 if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOMGS))
1621 server_stop_mgs(sb);
1624 /* Clean the mgc and sb */
1625 lustre_common_put_super(sb);
1627 /* Wait for the targets to really clean up - can't exit (and let the
1628 sb get destroyed) while the mount is still in use */
1629 server_wait_finished(mnt);
1631 /* drop the One True Mount */
1634 /* Stop the servers (MDS, OSS) if no longer needed. We must wait
1635 until the target is really gone so that our type refcount check
1637 server_stop_servers(lddflags, lsiflags);
1639 /* In case of startup or cleanup err, stop related obds */
1641 obd = class_name2obd(extraname);
1643 CWARN("Cleaning orphaned obd %s\n", extraname);
1645 class_manual_cleanup(obd);
1647 OBD_FREE(extraname, strlen(extraname) + 1);
1650 LCONSOLE_WARN("server umount %s complete\n", tmpname);
1651 OBD_FREE(tmpname, tmpname_sz);
1655 /** Called only for 'umount -f'
1657 #ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
1658 static void server_umount_begin(struct vfsmount *vfsmnt, int flags)
1660 struct super_block *sb = vfsmnt->mnt_sb;
1662 static void server_umount_begin(struct super_block *sb)
1665 struct lustre_sb_info *lsi = s2lsi(sb);
1668 #ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
1669 if (!(flags & MNT_FORCE)) {
1675 CDEBUG(D_MOUNT, "umount -f\n");
1676 /* umount = failover
1678 no third way to do non-force, non-failover */
1679 lsi->lsi_flags &= ~LSI_UMOUNT_FAILOVER;
1680 lsi->lsi_flags |= LSI_UMOUNT_FORCE;
1684 #ifndef HAVE_STATFS_DENTRY_PARAM
1685 static int server_statfs (struct super_block *sb, cfs_kstatfs_t *buf)
1688 static int server_statfs (struct dentry *dentry, cfs_kstatfs_t *buf)
1690 struct super_block *sb = dentry->d_sb;
1692 struct vfsmount *mnt = s2lsi(sb)->lsi_srv_mnt;
1695 if (mnt && mnt->mnt_sb && mnt->mnt_sb->s_op->statfs) {
1696 #ifdef HAVE_STATFS_DENTRY_PARAM
1697 int rc = mnt->mnt_sb->s_op->statfs(mnt->mnt_root, buf);
1699 int rc = mnt->mnt_sb->s_op->statfs(mnt->mnt_sb, buf);
1702 buf->f_type = sb->s_magic;
1708 buf->f_type = sb->s_magic;
1709 buf->f_bsize = sb->s_blocksize;
1715 buf->f_namelen = NAME_MAX;
1719 /** The operations we support directly on the superblock:
1720 * mount, umount, and df.
1722 static struct super_operations server_ops =
1724 .put_super = server_put_super,
1725 .umount_begin = server_umount_begin, /* umount -f */
1726 .statfs = server_statfs,
1729 #define log2(n) cfs_ffz(~(n))
1730 #define LUSTRE_SUPER_MAGIC 0x0BD00BD1
1732 static int server_fill_super_common(struct super_block *sb)
1734 struct inode *root = 0;
1737 CDEBUG(D_MOUNT, "Server sb, dev=%d\n", (int)sb->s_dev);
1739 sb->s_blocksize = 4096;
1740 sb->s_blocksize_bits = log2(sb->s_blocksize);
1741 sb->s_magic = LUSTRE_SUPER_MAGIC;
1742 sb->s_maxbytes = 0; /* we don't allow file IO on server mountpoints */
1743 sb->s_flags |= MS_RDONLY;
1744 sb->s_op = &server_ops;
1746 root = new_inode(sb);
1748 CERROR("Can't make root inode\n");
1752 /* returns -EIO for every operation */
1753 /* make_bad_inode(root); -- badness - can't umount */
1754 /* apparently we need to be a directory for the mount to finish */
1755 root->i_mode = S_IFDIR;
1757 sb->s_root = d_alloc_root(root);
1759 CERROR("Can't make root dentry\n");
1767 /** Fill in the superblock info for a Lustre server.
1768 * Mount the device with the correct options.
1769 * Read the on-disk config file.
1770 * Start the services.
1772 static int server_fill_super(struct super_block *sb)
1774 struct lustre_sb_info *lsi = s2lsi(sb);
1775 struct vfsmount *mnt;
1779 /* the One True Mount */
1780 mnt = server_kernel_mount(sb);
1783 CERROR("Unable to mount device %s: %d\n",
1784 lsi->lsi_lmd->lmd_dev, rc);
1788 lsi->lsi_srv_mnt = mnt;
1790 LASSERT(lsi->lsi_ldd);
1791 CDEBUG(D_MOUNT, "Found service %s for fs '%s' on device %s\n",
1792 lsi->lsi_ldd->ldd_svname, lsi->lsi_ldd->ldd_fsname,
1793 lsi->lsi_lmd->lmd_dev);
1795 if (class_name2obd(lsi->lsi_ldd->ldd_svname)) {
1796 LCONSOLE_ERROR_MSG(0x161, "The target named %s is already "
1797 "running. Double-mount may have compromised"
1798 " the disk journal.\n",
1799 lsi->lsi_ldd->ldd_svname);
1805 /* Start MGS before MGC */
1806 if (IS_MGS(lsi->lsi_ldd) && !(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOMGS)){
1807 rc = server_start_mgs(sb);
1812 /* Start MGC before servers */
1813 rc = lustre_start_mgc(sb);
1817 /* Set up all obd devices for service */
1818 if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
1819 (IS_OST(lsi->lsi_ldd) || IS_MDT(lsi->lsi_ldd))) {
1820 rc = server_start_targets(sb, mnt);
1822 CERROR("Unable to start targets: %d\n", rc);
1825 /* FIXME overmount client here,
1826 or can we just start a client log and client_fill_super on this sb?
1827 We need to make sure server_put_super gets called too - ll_put_super
1828 calls lustre_common_put_super; check there for LSI_SERVER flag,
1830 Probably should start client from new thread so we can return.
1831 Client will not finish until all servers are connected.
1832 Note - MGS-only server does NOT get a client, since there is no
1833 lustre fs associated - the MGS is for all lustre fs's */
1836 rc = server_fill_super_common(sb);
1842 /* We jump here in case of failure while starting targets or MGS.
1843 * In this case we can't just put @mnt and have to do real cleanup
1844 * with stoping targets, etc. */
1845 server_put_super(sb);
1849 /* Get the index from the obd name.
1850 rc = server type, or
1852 if endptr isn't NULL it is set to end of name */
1853 int server_name2index(char *svname, __u32 *idx, char **endptr)
1855 unsigned long index;
1857 char *dash = strrchr(svname, '-');
1861 /* intepret <fsname>-MDTXXXXX-mdc as mdt, the better way is to pass
1862 * in the fsname, then determine the server index */
1863 if (!strcmp(LUSTRE_MDC_NAME, dash + 1)) {
1865 for (; dash > svname && *dash != '-'; dash--);
1870 if (strncmp(dash + 1, "MDT", 3) == 0)
1871 rc = LDD_F_SV_TYPE_MDT;
1872 else if (strncmp(dash + 1, "OST", 3) == 0)
1873 rc = LDD_F_SV_TYPE_OST;
1876 if (strcmp(dash + 4, "all") == 0)
1877 return rc | LDD_F_SV_ALL;
1879 index = simple_strtoul(dash + 4, endptr, 16);
1885 * Calculate timeout value for a target.
1887 void server_calc_timeout(struct lustre_sb_info *lsi, struct obd_device *obd)
1889 struct lustre_mount_data *lmd;
1893 bool has_ir = !!(lsi->lsi_flags & LSI_IR_CAPABLE);
1894 int min = OBD_RECOVERY_TIME_MIN;
1896 LASSERT(lsi->lsi_flags & LSI_SERVER);
1900 soft = lmd->lmd_recovery_time_soft;
1901 hard = lmd->lmd_recovery_time_hard;
1902 has_ir = has_ir && !(lmd->lmd_flags & LMD_FLG_NOIR);
1903 obd->obd_no_ir = !has_ir;
1907 soft = OBD_RECOVERY_TIME_SOFT;
1909 hard = OBD_RECOVERY_TIME_HARD;
1911 /* target may have ir_factor configured. */
1912 factor = OBD_IR_FACTOR_DEFAULT;
1913 if (obd->obd_recovery_ir_factor)
1914 factor = obd->obd_recovery_ir_factor;
1917 int new_soft = soft;
1918 int new_hard = hard;
1920 /* adjust timeout value by imperative recovery */
1922 new_soft = (soft * factor) / OBD_IR_FACTOR_MAX;
1923 new_hard = (hard * factor) / OBD_IR_FACTOR_MAX;
1925 /* make sure the timeout is not too short */
1926 new_soft = max(min, new_soft);
1927 new_hard = max(new_soft, new_hard);
1929 LCONSOLE_INFO("%s: Imperative Recovery enabled, recovery "
1930 "window shrunk from %d-%d down to %d-%d\n",
1931 obd->obd_name, soft, hard, new_soft, new_hard);
1938 obd->obd_recovery_timeout = max(obd->obd_recovery_timeout, soft);
1939 obd->obd_recovery_time_hard = hard;
1940 obd->obd_recovery_ir_factor = factor;
1942 EXPORT_SYMBOL(server_calc_timeout);
1944 /*************** mount common betweeen server and client ***************/
1947 int lustre_common_put_super(struct super_block *sb)
1952 CDEBUG(D_MOUNT, "dropping sb %p\n", sb);
1954 /* Drop a ref to the MGC */
1955 rc = lustre_stop_mgc(sb);
1956 if (rc && (rc != -ENOENT)) {
1958 CERROR("Can't stop MGC: %d\n", rc);
1961 /* BUSY just means that there's some other obd that
1962 needs the mgc. Let him clean it up. */
1963 CDEBUG(D_MOUNT, "MGC still in use\n");
1965 /* Drop a ref to the mounted disk */
1971 static void lmd_print(struct lustre_mount_data *lmd)
1975 PRINT_CMD(PRINT_MASK, " mount data:\n");
1976 if (lmd_is_client(lmd))
1977 PRINT_CMD(PRINT_MASK, "profile: %s\n", lmd->lmd_profile);
1978 PRINT_CMD(PRINT_MASK, "device: %s\n", lmd->lmd_dev);
1979 PRINT_CMD(PRINT_MASK, "flags: %x\n", lmd->lmd_flags);
1982 PRINT_CMD(PRINT_MASK, "options: %s\n", lmd->lmd_opts);
1984 if (lmd->lmd_recovery_time_soft)
1985 PRINT_CMD(PRINT_MASK, "recovery time soft: %d\n",
1986 lmd->lmd_recovery_time_soft);
1988 if (lmd->lmd_recovery_time_hard)
1989 PRINT_CMD(PRINT_MASK, "recovery time hard: %d\n",
1990 lmd->lmd_recovery_time_hard);
1992 for (i = 0; i < lmd->lmd_exclude_count; i++) {
1993 PRINT_CMD(PRINT_MASK, "exclude %d: OST%04x\n", i,
1994 lmd->lmd_exclude[i]);
1998 /* Is this server on the exclusion list */
1999 int lustre_check_exclusion(struct super_block *sb, char *svname)
2001 struct lustre_sb_info *lsi = s2lsi(sb);
2002 struct lustre_mount_data *lmd = lsi->lsi_lmd;
2007 rc = server_name2index(svname, &index, NULL);
2008 if (rc != LDD_F_SV_TYPE_OST)
2009 /* Only exclude OSTs */
2012 CDEBUG(D_MOUNT, "Check exclusion %s (%d) in %d of %s\n", svname,
2013 index, lmd->lmd_exclude_count, lmd->lmd_dev);
2015 for(i = 0; i < lmd->lmd_exclude_count; i++) {
2016 if (index == lmd->lmd_exclude[i]) {
2017 CWARN("Excluding %s (on exclusion list)\n", svname);
2024 /* mount -v -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
2025 static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr)
2027 char *s1 = ptr, *s2;
2028 __u32 index, *exclude_list;
2032 /* The shortest an ost name can be is 8 chars: -OST0000.
2033 We don't actually know the fsname at this time, so in fact
2034 a user could specify any fsname. */
2035 devmax = strlen(ptr) / 8 + 1;
2037 /* temp storage until we figure out how many we have */
2038 OBD_ALLOC(exclude_list, sizeof(index) * devmax);
2042 /* we enter this fn pointing at the '=' */
2043 while (*s1 && *s1 != ' ' && *s1 != ',') {
2045 rc = server_name2index(s1, &index, &s2);
2047 CERROR("Can't parse server name '%s'\n", s1);
2050 if (rc == LDD_F_SV_TYPE_OST)
2051 exclude_list[lmd->lmd_exclude_count++] = index;
2053 CDEBUG(D_MOUNT, "ignoring exclude %.7s\n", s1);
2055 /* now we are pointing at ':' (next exclude)
2056 or ',' (end of excludes) */
2057 if (lmd->lmd_exclude_count >= devmax)
2060 if (rc >= 0) /* non-err */
2063 if (lmd->lmd_exclude_count) {
2064 /* permanent, freed in lustre_free_lsi */
2065 OBD_ALLOC(lmd->lmd_exclude, sizeof(index) *
2066 lmd->lmd_exclude_count);
2067 if (lmd->lmd_exclude) {
2068 memcpy(lmd->lmd_exclude, exclude_list,
2069 sizeof(index) * lmd->lmd_exclude_count);
2072 lmd->lmd_exclude_count = 0;
2075 OBD_FREE(exclude_list, sizeof(index) * devmax);
2079 static int lmd_parse_mgssec(struct lustre_mount_data *lmd, char *ptr)
2084 if (lmd->lmd_mgssec != NULL) {
2085 OBD_FREE(lmd->lmd_mgssec, strlen(lmd->lmd_mgssec) + 1);
2086 lmd->lmd_mgssec = NULL;
2089 tail = strchr(ptr, ',');
2091 length = strlen(ptr);
2093 length = tail - ptr;
2095 OBD_ALLOC(lmd->lmd_mgssec, length + 1);
2096 if (lmd->lmd_mgssec == NULL)
2099 memcpy(lmd->lmd_mgssec, ptr, length);
2100 lmd->lmd_mgssec[length] = '\0';
2104 /** Parse mount line options
2105 * e.g. mount -v -t lustre -o abort_recov uml1:uml2:/lustre-client /mnt/lustre
2106 * dev is passed as device=uml1:/lustre by mount.lustre
2108 static int lmd_parse(char *options, struct lustre_mount_data *lmd)
2110 char *s1, *s2, *devname = NULL;
2111 struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
2117 LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that "
2118 "/sbin/mount.lustre is installed.\n");
2122 /* Options should be a string - try to detect old lmd data */
2123 if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) {
2124 LCONSOLE_ERROR_MSG(0x163, "You're using an old version of "
2125 "/sbin/mount.lustre. Please install "
2126 "version %s\n", LUSTRE_VERSION_STRING);
2129 lmd->lmd_magic = LMD_MAGIC;
2131 /* Set default flags here */
2136 int time_min = OBD_RECOVERY_TIME_MIN;
2138 /* Skip whitespace and extra commas */
2139 while (*s1 == ' ' || *s1 == ',')
2142 /* Client options are parsed in ll_options: eg. flock,
2145 /* Parse non-ldiskfs options here. Rather than modifying
2146 ldiskfs, we just zero these out here */
2147 if (strncmp(s1, "abort_recov", 11) == 0) {
2148 lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
2150 } else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
2151 lmd->lmd_recovery_time_soft = max_t(int,
2152 simple_strtoul(s1 + 19, NULL, 10), time_min);
2154 } else if (strncmp(s1, "recovery_time_hard=", 19) == 0) {
2155 lmd->lmd_recovery_time_hard = max_t(int,
2156 simple_strtoul(s1 + 19, NULL, 10), time_min);
2158 } else if (strncmp(s1, "noir", 4) == 0) {
2159 lmd->lmd_flags |= LMD_FLG_NOIR; /* test purpose only. */
2161 } else if (strncmp(s1, "nosvc", 5) == 0) {
2162 lmd->lmd_flags |= LMD_FLG_NOSVC;
2164 } else if (strncmp(s1, "nomgs", 5) == 0) {
2165 lmd->lmd_flags |= LMD_FLG_NOMGS;
2167 } else if (strncmp(s1, "noscrub", 7) == 0) {
2168 lmd->lmd_flags |= LMD_FLG_NOSCRUB;
2170 } else if (strncmp(s1, "writeconf", 9) == 0) {
2171 lmd->lmd_flags |= LMD_FLG_WRITECONF;
2173 } else if (strncmp(s1, "mgssec=", 7) == 0) {
2174 rc = lmd_parse_mgssec(lmd, s1 + 7);
2178 /* ost exclusion list */
2179 } else if (strncmp(s1, "exclude=", 8) == 0) {
2180 rc = lmd_make_exclusion(lmd, s1 + 7);
2185 /* Linux 2.4 doesn't pass the device, so we stuck it at the
2186 end of the options. */
2187 else if (strncmp(s1, "device=", 7) == 0) {
2189 /* terminate options right before device. device
2190 must be the last one. */
2196 s2 = strchr(s1, ',');
2204 memmove(s1, s2, strlen(s2) + 1);
2210 LCONSOLE_ERROR_MSG(0x164, "Can't find the device name "
2211 "(need mount option 'device=...')\n");
2215 s1 = strstr(devname, ":/");
2218 lmd->lmd_flags |= LMD_FLG_CLIENT;
2219 /* Remove leading /s from fsname */
2220 while (*++s1 == '/') ;
2221 /* Freed in lustre_free_lsi */
2222 OBD_ALLOC(lmd->lmd_profile, strlen(s1) + 8);
2223 if (!lmd->lmd_profile)
2225 sprintf(lmd->lmd_profile, "%s-client", s1);
2228 /* Freed in lustre_free_lsi */
2229 OBD_ALLOC(lmd->lmd_dev, strlen(devname) + 1);
2232 strcpy(lmd->lmd_dev, devname);
2234 /* Save mount options */
2235 s1 = options + strlen(options) - 1;
2236 while (s1 >= options && (*s1 == ',' || *s1 == ' '))
2238 if (*options != 0) {
2239 /* Freed in lustre_free_lsi */
2240 OBD_ALLOC(lmd->lmd_opts, strlen(options) + 1);
2243 strcpy(lmd->lmd_opts, options);
2247 lmd->lmd_magic = LMD_MAGIC;
2252 CERROR("Bad mount options %s\n", options);
2256 struct lustre_mount_data2 {
2258 struct vfsmount *lmd2_mnt;
2261 /** This is the entry point for the mount call into Lustre.
2262 * This is called when a server or client is mounted,
2263 * and this is where we start setting things up.
2264 * @param data Mount options (e.g. -o flock,abort_recov)
2266 int lustre_fill_super(struct super_block *sb, void *data, int silent)
2268 struct lustre_mount_data *lmd;
2269 struct lustre_mount_data2 *lmd2 = data;
2270 struct lustre_sb_info *lsi;
2274 CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);
2276 lsi = lustre_init_lsi(sb);
2282 * Disable lockdep during mount, because mount locking patterns are
2288 * LU-639: the obd cleanup of last mount may not finish yet, wait here.
2290 obd_zombie_barrier();
2292 /* Figure out the lmd from the mount options */
2293 if (lmd_parse((char *)(lmd2->lmd2_data), lmd)) {
2295 GOTO(out, rc = -EINVAL);
2298 if (lmd_is_client(lmd)) {
2299 CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile);
2300 if (!client_fill_super) {
2301 LCONSOLE_ERROR_MSG(0x165, "Nothing registered for "
2302 "client mount! Is the 'lustre' "
2303 "module loaded?\n");
2307 rc = lustre_start_mgc(sb);
2312 /* Connect and start */
2313 /* (should always be ll_fill_super) */
2314 rc = (*client_fill_super)(sb, lmd2->lmd2_mnt);
2315 /* c_f_s will call lustre_common_put_super on failure */
2318 CDEBUG(D_MOUNT, "Mounting server from %s\n", lmd->lmd_dev);
2319 lsi->lsi_flags |= LSI_SERVER;
2320 rc = server_fill_super(sb);
2321 /* s_f_s calls lustre_start_mgc after the mount because we need
2322 the MGS nids which are stored on disk. Plus, we may
2323 need to start the MGS first. */
2324 /* s_f_s will call server_put_super on failure */
2327 /* If error happens in fill_super() call, @lsi will be killed there.
2328 * This is why we do not put it here. */
2332 CERROR("Unable to mount %s (%d)\n",
2333 s2lsi(sb) ? lmd->lmd_dev : "", rc);
2335 CDEBUG(D_SUPER, "Mount %s complete\n",
2343 /* We can't call ll_fill_super by name because it lives in a module that
2344 must be loaded after this one. */
2345 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
2346 struct vfsmount *mnt))
2348 client_fill_super = cfs;
2351 void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb))
2353 kill_super_cb = cfs;
2356 /***************** FS registration ******************/
2358 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
2359 struct super_block * lustre_get_sb(struct file_system_type *fs_type, int flags,
2360 const char *devname, void * data)
2362 return get_sb_nodev(fs_type, flags, data, lustre_fill_super);
2365 int lustre_get_sb(struct file_system_type *fs_type, int flags,
2366 const char *devname, void * data, struct vfsmount *mnt)
2368 struct lustre_mount_data2 lmd2 = {data, mnt};
2370 return get_sb_nodev(fs_type, flags, &lmd2, lustre_fill_super, mnt);
2374 void lustre_kill_super(struct super_block *sb)
2376 struct lustre_sb_info *lsi = s2lsi(sb);
2378 if (kill_super_cb && lsi && !(lsi->lsi_flags & LSI_SERVER))
2379 (*kill_super_cb)(sb);
2381 kill_anon_super(sb);
2384 /** Register the "lustre" fs type
2386 struct file_system_type lustre_fs_type = {
2387 .owner = THIS_MODULE,
2389 .get_sb = lustre_get_sb,
2390 .kill_sb = lustre_kill_super,
2391 .fs_flags = FS_BINARY_MOUNTDATA | FS_REQUIRES_DEV |
2392 #ifdef FS_HAS_FIEMAP
2395 LL_RENAME_DOES_D_MOVE,
2398 int lustre_register_fs(void)
2400 return register_filesystem(&lustre_fs_type);
2403 int lustre_unregister_fs(void)
2405 return unregister_filesystem(&lustre_fs_type);
2408 EXPORT_SYMBOL(lustre_register_client_fill_super);
2409 EXPORT_SYMBOL(lustre_register_kill_super_cb);
2410 EXPORT_SYMBOL(lustre_common_put_super);
2411 EXPORT_SYMBOL(lustre_process_log);
2412 EXPORT_SYMBOL(lustre_end_log);
2413 EXPORT_SYMBOL(server_get_mount);
2414 EXPORT_SYMBOL(server_get_mount_2);
2415 EXPORT_SYMBOL(server_put_mount);
2416 EXPORT_SYMBOL(server_put_mount_2);
2417 EXPORT_SYMBOL(server_register_target);
2418 EXPORT_SYMBOL(server_name2index);
2419 EXPORT_SYMBOL(server_mti_print);
2420 EXPORT_SYMBOL(do_lcfg);