4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Whamcloud, Inc.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/obdclass/obd_mount.c
38 * Client/server mount routines
40 * Author: Nathan Rutman <nathan@clusterfs.com>
44 #define DEBUG_SUBSYSTEM S_CLASS
45 #define D_MOUNT D_SUPER|D_CONFIG /*|D_WARNING */
46 #define PRINT_CMD CDEBUG
47 #define PRINT_MASK D_SUPER|D_CONFIG
51 #include <lustre_fsfilt.h>
52 #include <obd_class.h>
53 #include <lustre/lustre_user.h>
54 #include <linux/version.h>
55 #include <lustre_log.h>
56 #include <lustre_disk.h>
57 #include <lustre_param.h>
59 static int (*client_fill_super)(struct super_block *sb,
60 struct vfsmount *mnt) = NULL;
61 static void (*kill_super_cb)(struct super_block *sb) = NULL;
63 /*********** mount lookup *********/
65 CFS_DEFINE_MUTEX(lustre_mount_info_lock);
66 static CFS_LIST_HEAD(server_mount_info_list);
68 static struct lustre_mount_info *server_find_mount(const char *name)
71 struct lustre_mount_info *lmi;
74 cfs_list_for_each(tmp, &server_mount_info_list) {
75 lmi = cfs_list_entry(tmp, struct lustre_mount_info,
77 if (strcmp(name, lmi->lmi_name) == 0)
83 /* we must register an obd for a mount before we call the setup routine.
84 *_setup will call lustre_get_mount to get the mnt struct
85 by obd_name, since we can't pass the pointer to setup. */
86 static int server_register_mount(const char *name, struct super_block *sb,
89 struct lustre_mount_info *lmi;
96 OBD_ALLOC(lmi, sizeof(*lmi));
99 OBD_ALLOC(name_cp, strlen(name) + 1);
101 OBD_FREE(lmi, sizeof(*lmi));
104 strcpy(name_cp, name);
106 cfs_mutex_lock(&lustre_mount_info_lock);
108 if (server_find_mount(name)) {
109 cfs_mutex_unlock(&lustre_mount_info_lock);
110 OBD_FREE(lmi, sizeof(*lmi));
111 OBD_FREE(name_cp, strlen(name) + 1);
112 CERROR("Already registered %s\n", name);
115 lmi->lmi_name = name_cp;
118 cfs_list_add(&lmi->lmi_list_chain, &server_mount_info_list);
120 cfs_mutex_unlock(&lustre_mount_info_lock);
122 CDEBUG(D_MOUNT, "reg_mnt %p from %s, vfscount=%d\n",
123 lmi->lmi_mnt, name, mnt_get_count(lmi->lmi_mnt));
128 /* when an obd no longer needs a mount */
129 static int server_deregister_mount(const char *name)
131 struct lustre_mount_info *lmi;
134 cfs_mutex_lock(&lustre_mount_info_lock);
135 lmi = server_find_mount(name);
137 cfs_mutex_unlock(&lustre_mount_info_lock);
138 CERROR("%s not registered\n", name);
142 CDEBUG(D_MOUNT, "dereg_mnt %p from %s, vfscount=%d\n",
143 lmi->lmi_mnt, name, mnt_get_count(lmi->lmi_mnt));
145 OBD_FREE(lmi->lmi_name, strlen(lmi->lmi_name) + 1);
146 cfs_list_del(&lmi->lmi_list_chain);
147 OBD_FREE(lmi, sizeof(*lmi));
148 cfs_mutex_unlock(&lustre_mount_info_lock);
153 /* obd's look up a registered mount using their obdname. This is just
154 for initial obd setup to find the mount struct. It should not be
155 called every time you want to mntget. */
156 struct lustre_mount_info *server_get_mount(const char *name)
158 struct lustre_mount_info *lmi;
159 struct lustre_sb_info *lsi;
162 cfs_mutex_lock(&lustre_mount_info_lock);
163 lmi = server_find_mount(name);
164 cfs_mutex_unlock(&lustre_mount_info_lock);
166 CERROR("Can't find mount for %s\n", name);
169 lsi = s2lsi(lmi->lmi_sb);
170 mntget(lmi->lmi_mnt);
171 cfs_atomic_inc(&lsi->lsi_mounts);
173 CDEBUG(D_MOUNT, "get_mnt %p from %s, refs=%d, vfscount=%d\n",
174 lmi->lmi_mnt, name, cfs_atomic_read(&lsi->lsi_mounts),
175 mnt_get_count(lmi->lmi_mnt));
181 * Used by mdt to get mount_info from obdname.
182 * There are no blocking when using the mount_info.
183 * Do not use server_get_mount for this purpose.
185 struct lustre_mount_info *server_get_mount_2(const char *name)
187 struct lustre_mount_info *lmi;
190 cfs_mutex_lock(&lustre_mount_info_lock);
191 lmi = server_find_mount(name);
192 cfs_mutex_unlock(&lustre_mount_info_lock);
194 CERROR("Can't find mount for %s\n", name);
199 static void unlock_mntput(struct vfsmount *mnt)
201 #ifdef HAVE_KERNEL_LOCKED
202 /* for kernel < 2.6.37 */
203 if (kernel_locked()) {
215 static int lustre_put_lsi(struct super_block *sb);
217 /* to be called from obd_cleanup methods */
218 int server_put_mount(const char *name, struct vfsmount *mnt)
220 struct lustre_mount_info *lmi;
221 struct lustre_sb_info *lsi;
222 int count = mnt_get_count(mnt) - 1;
225 /* This might be the last one, can't deref after this */
228 cfs_mutex_lock(&lustre_mount_info_lock);
229 lmi = server_find_mount(name);
230 cfs_mutex_unlock(&lustre_mount_info_lock);
232 CERROR("Can't find mount for %s\n", name);
235 lsi = s2lsi(lmi->lmi_sb);
236 LASSERT(lmi->lmi_mnt == mnt);
238 CDEBUG(D_MOUNT, "put_mnt %p from %s, refs=%d, vfscount=%d\n",
239 lmi->lmi_mnt, name, cfs_atomic_read(&lsi->lsi_mounts), count);
241 if (lustre_put_lsi(lmi->lmi_sb)) {
242 CDEBUG(D_MOUNT, "Last put of mnt %p from %s, vfscount=%d\n",
243 lmi->lmi_mnt, name, count);
244 /* last mount is the One True Mount */
246 CERROR("%s: mount busy, vfscount=%d!\n", name, count);
249 /* this obd should never need the mount again */
250 server_deregister_mount(name);
255 /* Corresponding to server_get_mount_2 */
256 int server_put_mount_2(const char *name, struct vfsmount *mnt)
262 /******* mount helper utilities *********/
265 static void ldd_print(struct lustre_disk_data *ldd)
267 PRINT_CMD(PRINT_MASK, " disk data:\n");
268 PRINT_CMD(PRINT_MASK, "server: %s\n", ldd->ldd_svname);
269 PRINT_CMD(PRINT_MASK, "uuid: %s\n", (char *)ldd->ldd_uuid);
270 PRINT_CMD(PRINT_MASK, "fs: %s\n", ldd->ldd_fsname);
271 PRINT_CMD(PRINT_MASK, "index: %04x\n", ldd->ldd_svindex);
272 PRINT_CMD(PRINT_MASK, "config: %d\n", ldd->ldd_config_ver);
273 PRINT_CMD(PRINT_MASK, "flags: %#x\n", ldd->ldd_flags);
274 PRINT_CMD(PRINT_MASK, "diskfs: %s\n", MT_STR(ldd));
275 PRINT_CMD(PRINT_MASK, "options: %s\n", ldd->ldd_mount_opts);
276 PRINT_CMD(PRINT_MASK, "params: %s\n", ldd->ldd_params);
277 PRINT_CMD(PRINT_MASK, "comment: %s\n", ldd->ldd_userdata);
281 static int ldd_parse(struct lvfs_run_ctxt *mount_ctxt,
282 struct lustre_disk_data *ldd)
284 struct lvfs_run_ctxt saved;
291 push_ctxt(&saved, mount_ctxt, NULL);
293 file = filp_open(MOUNT_DATA_FILE, O_RDONLY, 0644);
296 CERROR("cannot open %s: rc = %d\n", MOUNT_DATA_FILE, rc);
300 len = i_size_read(file->f_dentry->d_inode);
301 CDEBUG(D_MOUNT, "Have %s, size %lu\n", MOUNT_DATA_FILE, len);
302 if (len != sizeof(*ldd)) {
303 CERROR("disk data size does not match: see %lu expect %u\n",
304 len, (int)sizeof(*ldd));
305 GOTO(out_close, rc = -EINVAL);
308 rc = lustre_fread(file, ldd, len, &off);
310 CERROR("error reading %s: read %d of %lu\n",
311 MOUNT_DATA_FILE, rc, len);
312 GOTO(out_close, rc = -EINVAL);
316 if (ldd->ldd_magic != LDD_MAGIC) {
317 /* FIXME add swabbing support */
318 CERROR("Bad magic in %s: %x!=%x\n", MOUNT_DATA_FILE,
319 ldd->ldd_magic, LDD_MAGIC);
320 GOTO(out_close, rc = -EINVAL);
323 if (ldd->ldd_feature_incompat & ~LDD_INCOMPAT_SUPP) {
324 CERROR("%s: unsupported incompat filesystem feature(s) %x\n",
326 ldd->ldd_feature_incompat & ~LDD_INCOMPAT_SUPP);
327 GOTO(out_close, rc = -EINVAL);
329 if (ldd->ldd_feature_rocompat & ~LDD_ROCOMPAT_SUPP) {
330 CERROR("%s: unsupported read-only filesystem feature(s) %x\n",
332 ldd->ldd_feature_rocompat & ~LDD_ROCOMPAT_SUPP);
333 /* Do something like remount filesystem read-only */
334 GOTO(out_close, rc = -EINVAL);
340 pop_ctxt(&saved, mount_ctxt, NULL);
344 static int ldd_write(struct lvfs_run_ctxt *mount_ctxt,
345 struct lustre_disk_data *ldd)
347 struct lvfs_run_ctxt saved;
350 unsigned long len = sizeof(struct lustre_disk_data);
354 LASSERT(ldd->ldd_magic == LDD_MAGIC);
356 ldd->ldd_config_ver++;
358 push_ctxt(&saved, mount_ctxt, NULL);
360 file = filp_open(MOUNT_DATA_FILE, O_RDWR|O_SYNC, 0644);
363 CERROR("cannot open %s: rc = %d\n", MOUNT_DATA_FILE, rc);
367 rc = lustre_fwrite(file, ldd, len, &off);
369 CERROR("error writing %s: read %d of %lu\n",
370 MOUNT_DATA_FILE, rc, len);
371 GOTO(out_close, rc = -EINVAL);
379 pop_ctxt(&saved, mount_ctxt, NULL);
384 /**************** config llog ********************/
386 /** Get a config log from the MGS and process it.
387 * This func is called for both clients and servers.
388 * Continue to process new statements appended to the logs
389 * (whenever the config lock is revoked) until lustre_end_log
391 * @param sb The superblock is used by the MGC to write to the local copy of
393 * @param logname The name of the llog to replicate from the MGS
394 * @param cfg Since the same mgc may be used to follow multiple config logs
395 * (e.g. ost1, ost2, client), the config_llog_instance keeps the state for
396 * this log, and is added to the mgc's list of logs to follow.
398 int lustre_process_log(struct super_block *sb, char *logname,
399 struct config_llog_instance *cfg)
401 struct lustre_cfg *lcfg;
402 struct lustre_cfg_bufs *bufs;
403 struct lustre_sb_info *lsi = s2lsi(sb);
404 struct obd_device *mgc = lsi->lsi_mgc;
415 /* mgc_process_config */
416 lustre_cfg_bufs_reset(bufs, mgc->obd_name);
417 lustre_cfg_bufs_set_string(bufs, 1, logname);
418 lustre_cfg_bufs_set(bufs, 2, cfg, sizeof(*cfg));
419 lustre_cfg_bufs_set(bufs, 3, &sb, sizeof(sb));
420 lcfg = lustre_cfg_new(LCFG_LOG_START, bufs);
421 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
422 lustre_cfg_free(lcfg);
427 LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s'"
428 "failed from the MGS (%d). Make sure this "
429 "client and the MGS are running compatible "
430 "versions of Lustre.\n",
431 mgc->obd_name, logname, rc);
434 LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' "
435 "failed (%d). This may be the result of "
436 "communication errors between this node and "
437 "the MGS, a bad configuration, or other "
438 "errors. See the syslog for more "
439 "information.\n", mgc->obd_name, logname,
442 /* class_obd_list(); */
446 /* Stop watching this config log for updates */
447 int lustre_end_log(struct super_block *sb, char *logname,
448 struct config_llog_instance *cfg)
450 struct lustre_cfg *lcfg;
451 struct lustre_cfg_bufs bufs;
452 struct lustre_sb_info *lsi = s2lsi(sb);
453 struct obd_device *mgc = lsi->lsi_mgc;
460 /* mgc_process_config */
461 lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
462 lustre_cfg_bufs_set_string(&bufs, 1, logname);
464 lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
465 lcfg = lustre_cfg_new(LCFG_LOG_END, &bufs);
466 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
467 lustre_cfg_free(lcfg);
471 /**************** obd start *******************/
473 /** lustre_cfg_bufs are a holdover from 1.4; we can still set these up from
474 * lctl (and do for echo cli/srv.
476 int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
477 char *s1, char *s2, char *s3, char *s4)
479 struct lustre_cfg_bufs bufs;
480 struct lustre_cfg * lcfg = NULL;
483 CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname,
484 cmd, s1, s2, s3, s4);
486 lustre_cfg_bufs_reset(&bufs, cfgname);
488 lustre_cfg_bufs_set_string(&bufs, 1, s1);
490 lustre_cfg_bufs_set_string(&bufs, 2, s2);
492 lustre_cfg_bufs_set_string(&bufs, 3, s3);
494 lustre_cfg_bufs_set_string(&bufs, 4, s4);
496 lcfg = lustre_cfg_new(cmd, &bufs);
497 lcfg->lcfg_nid = nid;
498 rc = class_process_config(lcfg);
499 lustre_cfg_free(lcfg);
503 /** Call class_attach and class_setup. These methods in turn call
504 * obd type-specific methods.
506 static int lustre_start_simple(char *obdname, char *type, char *uuid,
510 CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
512 rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, 0, 0);
514 CERROR("%s attach error %d\n", obdname, rc);
517 rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, 0, 0);
519 CERROR("%s setup error %d\n", obdname, rc);
520 do_lcfg(obdname, 0, LCFG_DETACH, 0, 0, 0, 0);
525 /* Set up a MGS to serve startup logs */
526 static int server_start_mgs(struct super_block *sb)
528 struct lustre_sb_info *lsi = s2lsi(sb);
529 struct vfsmount *mnt = lsi->lsi_srv_mnt;
530 struct lustre_mount_info *lmi;
535 /* It is impossible to have more than 1 MGS per node, since
536 MGC wouldn't know which to connect to */
537 lmi = server_find_mount(LUSTRE_MGS_OBDNAME);
539 lsi = s2lsi(lmi->lmi_sb);
540 LCONSOLE_ERROR_MSG(0x15d, "The MGS service was already started"
542 lsi->lsi_ldd->ldd_svname);
546 CDEBUG(D_CONFIG, "Start MGS service %s\n", LUSTRE_MGS_OBDNAME);
548 rc = server_register_mount(LUSTRE_MGS_OBDNAME, sb, mnt);
551 rc = lustre_start_simple(LUSTRE_MGS_OBDNAME, LUSTRE_MGS_NAME,
552 LUSTRE_MGS_OBDNAME, 0, 0);
553 /* Do NOT call server_deregister_mount() here. This leads to
554 * inability cleanup cleanly and free lsi and other stuff when
555 * mgs calls server_put_mount() in error handling case. -umka */
559 LCONSOLE_ERROR_MSG(0x15e, "Failed to start MGS '%s' (%d). "
560 "Is the 'mgs' module loaded?\n",
561 LUSTRE_MGS_OBDNAME, rc);
565 static int server_stop_mgs(struct super_block *sb)
567 struct obd_device *obd;
571 CDEBUG(D_MOUNT, "Stop MGS service %s\n", LUSTRE_MGS_OBDNAME);
573 /* There better be only one MGS */
574 obd = class_name2obd(LUSTRE_MGS_OBDNAME);
576 CDEBUG(D_CONFIG, "mgs %s not running\n", LUSTRE_MGS_OBDNAME);
580 /* The MGS should always stop when we say so */
582 rc = class_manual_cleanup(obd);
586 CFS_DEFINE_MUTEX(mgc_start_lock);
588 /** Set up a mgc obd to process startup logs
590 * \param sb [in] super block of the mgc obd
592 * \retval 0 success, otherwise error code
594 static int lustre_start_mgc(struct super_block *sb)
596 struct obd_connect_data *data = NULL;
597 struct lustre_sb_info *lsi = s2lsi(sb);
598 struct obd_device *obd;
599 struct obd_export *exp;
600 struct obd_uuid *uuid;
603 char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL;
606 int rc = 0, i = 0, j, len;
609 LASSERT(lsi->lsi_lmd);
611 /* Find the first non-lo MGS nid for our MGC name */
612 if (lsi->lsi_flags & LSI_SERVER) {
613 ptr = lsi->lsi_ldd->ldd_params;
614 /* Use mgsnode= nids */
615 if ((class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0) &&
616 (class_parse_nid(ptr, &nid, &ptr) == 0)) {
618 } else if (IS_MGS(lsi->lsi_ldd)) {
619 lnet_process_id_t id;
620 while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
621 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
628 } else { /* client */
629 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
630 ptr = lsi->lsi_lmd->lmd_dev;
631 if (class_parse_nid(ptr, &nid, &ptr) == 0)
635 CERROR("No valid MGS nids found.\n");
639 cfs_mutex_lock(&mgc_start_lock);
641 len = strlen(LUSTRE_MGC_OBDNAME) + strlen(libcfs_nid2str(nid)) + 1;
642 OBD_ALLOC(mgcname, len);
643 OBD_ALLOC(niduuid, len + 2);
644 if (!mgcname || !niduuid)
645 GOTO(out_free, rc = -ENOMEM);
646 sprintf(mgcname, "%s%s", LUSTRE_MGC_OBDNAME, libcfs_nid2str(nid));
648 mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";
652 GOTO(out_free, rc = -ENOMEM);
654 obd = class_name2obd(mgcname);
655 if (obd && !obd->obd_stopping) {
656 rc = obd_set_info_async(NULL, obd->obd_self_export,
657 strlen(KEY_MGSSEC), KEY_MGSSEC,
658 strlen(mgssec), mgssec, NULL);
662 /* Re-using an existing MGC */
663 cfs_atomic_inc(&obd->u.cli.cl_mgc_refcount);
665 /* IR compatibility check, only for clients */
666 if (lmd_is_client(lsi->lsi_lmd)) {
668 int vallen = sizeof(*data);
669 __u32 *flags = &lsi->lsi_lmd->lmd_flags;
671 rc = obd_get_info(NULL, obd->obd_self_export,
672 strlen(KEY_CONN_DATA), KEY_CONN_DATA,
673 &vallen, data, NULL);
675 has_ir = OCD_HAS_FLAG(data, IMP_RECOV);
676 if (has_ir ^ !(*flags & LMD_FLG_NOIR)) {
677 /* LMD_FLG_NOIR is for test purpose only */
679 "Trying to mount a client with IR setting "
680 "not compatible with current mgc. "
681 "Force to use current mgc setting that is "
683 has_ir ? "enabled" : "disabled");
685 *flags &= ~LMD_FLG_NOIR;
687 *flags |= LMD_FLG_NOIR;
692 /* If we are restarting the MGS, don't try to keep the MGC's
693 old connection, or registration will fail. */
694 if ((lsi->lsi_flags & LSI_SERVER) && IS_MGS(lsi->lsi_ldd)) {
695 CDEBUG(D_MOUNT, "New MGS with live MGC\n");
699 /* Try all connections, but only once (again).
700 We don't want to block another target from starting
701 (using its local copy of the log), but we do want to connect
702 if at all possible. */
704 CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,recov_bk);
705 rc = obd_set_info_async(NULL, obd->obd_self_export,
706 sizeof(KEY_INIT_RECOV_BACKUP),
707 KEY_INIT_RECOV_BACKUP,
708 sizeof(recov_bk), &recov_bk, NULL);
712 CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname);
714 /* Add the primary nids for the MGS */
716 sprintf(niduuid, "%s_%x", mgcname, i);
717 if (lsi->lsi_flags & LSI_SERVER) {
718 ptr = lsi->lsi_ldd->ldd_params;
719 if (IS_MGS(lsi->lsi_ldd)) {
720 /* Use local nids (including LO) */
721 lnet_process_id_t id;
722 while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
723 rc = do_lcfg(mgcname, id.nid,
724 LCFG_ADD_UUID, niduuid, 0,0,0);
727 /* Use mgsnode= nids */
728 if (class_find_param(ptr, PARAM_MGSNODE, &ptr) != 0) {
729 CERROR("No MGS nids given.\n");
730 GOTO(out_free, rc = -EINVAL);
732 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
733 rc = do_lcfg(mgcname, nid,
734 LCFG_ADD_UUID, niduuid, 0,0,0);
738 } else { /* client */
739 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
740 ptr = lsi->lsi_lmd->lmd_dev;
741 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
742 rc = do_lcfg(mgcname, nid,
743 LCFG_ADD_UUID, niduuid, 0,0,0);
745 /* Stop at the first failover nid */
751 CERROR("No valid MGS nids found.\n");
752 GOTO(out_free, rc = -EINVAL);
754 lsi->lsi_lmd->lmd_mgs_failnodes = 1;
756 /* Random uuid for MGC allows easier reconnects */
758 ll_generate_random_uuid(uuidc);
759 class_uuid_unparse(uuidc, uuid);
762 rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
763 (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
769 /* Add any failover MGS nids */
771 while ((*ptr == ':' ||
772 class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0)) {
773 /* New failover node */
774 sprintf(niduuid, "%s_%x", mgcname, i);
776 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
778 rc = do_lcfg(mgcname, nid,
779 LCFG_ADD_UUID, niduuid, 0,0,0);
784 rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
792 lsi->lsi_lmd->lmd_mgs_failnodes = i;
794 obd = class_name2obd(mgcname);
796 CERROR("Can't find mgcobd %s\n", mgcname);
797 GOTO(out_free, rc = -ENOTCONN);
800 rc = obd_set_info_async(NULL, obd->obd_self_export,
801 strlen(KEY_MGSSEC), KEY_MGSSEC,
802 strlen(mgssec), mgssec, NULL);
806 /* Keep a refcount of servers/clients who started with "mount",
807 so we know when we can get rid of the mgc. */
808 cfs_atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
810 /* Try all connections, but only once. */
812 rc = obd_set_info_async(NULL, obd->obd_self_export,
813 sizeof(KEY_INIT_RECOV_BACKUP),
814 KEY_INIT_RECOV_BACKUP,
815 sizeof(recov_bk), &recov_bk, NULL);
818 CWARN("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc);
819 /* We connect to the MGS at setup, and don't disconnect until cleanup */
820 data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_FID |
821 OBD_CONNECT_AT | OBD_CONNECT_FULL20 |
822 OBD_CONNECT_IMP_RECOV;
823 if (lmd_is_client(lsi->lsi_lmd) &&
824 lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
825 data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
826 data->ocd_version = LUSTRE_VERSION_CODE;
827 rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
829 CERROR("connect failed %d\n", rc);
833 obd->u.cli.cl_mgc_mgsexp = exp;
836 /* Keep the mgc info in the sb. Note that many lsi's can point
840 cfs_mutex_unlock(&mgc_start_lock);
845 OBD_FREE(mgcname, len);
847 OBD_FREE(niduuid, len + 2);
851 static int lustre_stop_mgc(struct super_block *sb)
853 struct lustre_sb_info *lsi = s2lsi(sb);
854 struct obd_device *obd;
855 char *niduuid = 0, *ptr = 0;
856 int i, rc = 0, len = 0;
866 cfs_mutex_lock(&mgc_start_lock);
867 LASSERT(cfs_atomic_read(&obd->u.cli.cl_mgc_refcount) > 0);
868 if (!cfs_atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
869 /* This is not fatal, every client that stops
870 will call in here. */
871 CDEBUG(D_MOUNT, "mgc still has %d references.\n",
872 cfs_atomic_read(&obd->u.cli.cl_mgc_refcount));
873 GOTO(out, rc = -EBUSY);
876 /* The MGC has no recoverable data in any case.
877 * force shotdown set in umount_begin */
878 obd->obd_no_recov = 1;
880 if (obd->u.cli.cl_mgc_mgsexp) {
881 /* An error is not fatal, if we are unable to send the
882 disconnect mgs ping evictor cleans up the export */
883 rc = obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
885 CDEBUG(D_MOUNT, "disconnect failed %d\n", rc);
888 /* Save the obdname for cleaning the nid uuids, which are
890 len = strlen(obd->obd_name) + 6;
891 OBD_ALLOC(niduuid, len);
893 strcpy(niduuid, obd->obd_name);
894 ptr = niduuid + strlen(niduuid);
897 rc = class_manual_cleanup(obd);
901 /* Clean the nid uuids */
903 GOTO(out, rc = -ENOMEM);
905 for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
906 sprintf(ptr, "_%x", i);
907 rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
910 CERROR("del MDC UUID %s failed: rc = %d\n",
915 OBD_FREE(niduuid, len);
917 /* class_import_put will get rid of the additional connections */
918 cfs_mutex_unlock(&mgc_start_lock);
922 /* Since there's only one mgc per node, we have to change it's fs to get
923 access to the right disk. */
924 static int server_mgc_set_fs(struct obd_device *mgc, struct super_block *sb)
926 struct lustre_sb_info *lsi = s2lsi(sb);
930 CDEBUG(D_MOUNT, "Set mgc disk for %s\n", lsi->lsi_lmd->lmd_dev);
932 /* cl_mgc_sem in mgc insures we sleep if the mgc_fs is busy */
933 rc = obd_set_info_async(NULL, mgc->obd_self_export,
934 sizeof(KEY_SET_FS), KEY_SET_FS,
935 sizeof(*sb), sb, NULL);
937 CERROR("can't set_fs %d\n", rc);
943 static int server_mgc_clear_fs(struct obd_device *mgc)
948 CDEBUG(D_MOUNT, "Unassign mgc disk\n");
950 rc = obd_set_info_async(NULL, mgc->obd_self_export,
951 sizeof(KEY_CLEAR_FS), KEY_CLEAR_FS,
956 CFS_DEFINE_MUTEX(server_start_lock);
958 /* Stop MDS/OSS if nobody is using them */
959 static int server_stop_servers(int lddflags, int lsiflags)
961 struct obd_device *obd = NULL;
962 struct obd_type *type = NULL;
966 cfs_mutex_lock(&server_start_lock);
968 /* Either an MDT or an OST or neither */
969 /* if this was an MDT, and there are no more MDT's, clean up the MDS */
970 if ((lddflags & LDD_F_SV_TYPE_MDT) &&
971 (obd = class_name2obd(LUSTRE_MDS_OBDNAME))) {
972 /*FIXME pre-rename, should eventually be LUSTRE_MDT_NAME*/
973 type = class_search_type(LUSTRE_MDS_NAME);
975 /* if this was an OST, and there are no more OST's, clean up the OSS */
976 if ((lddflags & LDD_F_SV_TYPE_OST) &&
977 (obd = class_name2obd(LUSTRE_OSS_OBDNAME))) {
978 type = class_search_type(LUSTRE_OST_NAME);
981 if (obd && (!type || !type->typ_refcnt)) {
984 /* obd_fail doesn't mean much on a server obd */
985 err = class_manual_cleanup(obd);
990 cfs_mutex_unlock(&server_start_lock);
995 int server_mti_print(char *title, struct mgs_target_info *mti)
997 PRINT_CMD(PRINT_MASK, "mti %s\n", title);
998 PRINT_CMD(PRINT_MASK, "server: %s\n", mti->mti_svname);
999 PRINT_CMD(PRINT_MASK, "fs: %s\n", mti->mti_fsname);
1000 PRINT_CMD(PRINT_MASK, "uuid: %s\n", mti->mti_uuid);
1001 PRINT_CMD(PRINT_MASK, "ver: %d flags: %#x\n",
1002 mti->mti_config_ver, mti->mti_flags);
1006 static int server_sb2mti(struct super_block *sb, struct mgs_target_info *mti)
1008 struct lustre_sb_info *lsi = s2lsi(sb);
1009 struct lustre_disk_data *ldd = lsi->lsi_ldd;
1010 lnet_process_id_t id;
1014 if (!(lsi->lsi_flags & LSI_SERVER))
1017 strncpy(mti->mti_fsname, ldd->ldd_fsname,
1018 sizeof(mti->mti_fsname));
1019 strncpy(mti->mti_svname, ldd->ldd_svname,
1020 sizeof(mti->mti_svname));
1022 mti->mti_nid_count = 0;
1023 while (LNetGetId(i++, &id) != -ENOENT) {
1024 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
1027 /* server use --servicenode param, only allow specified
1028 * nids be registered */
1029 if ((ldd->ldd_flags & LDD_F_NO_PRIMNODE) != 0 &&
1030 class_match_nid(ldd->ldd_params,
1031 PARAM_FAILNODE, id.nid) < 1)
1034 /* match specified network */
1035 if (!class_match_net(ldd->ldd_params,
1036 PARAM_NETWORK, LNET_NIDNET(id.nid)))
1039 mti->mti_nids[mti->mti_nid_count] = id.nid;
1040 mti->mti_nid_count++;
1041 if (mti->mti_nid_count >= MTI_NIDS_MAX) {
1042 CWARN("Only using first %d nids for %s\n",
1043 mti->mti_nid_count, mti->mti_svname);
1048 mti->mti_lustre_ver = LUSTRE_VERSION_CODE;
1049 mti->mti_config_ver = 0;
1050 if (lsi->lsi_lmd->lmd_flags & LMD_FLG_WRITECONF)
1051 ldd->ldd_flags |= LDD_F_WRITECONF;
1052 mti->mti_flags = ldd->ldd_flags;
1053 mti->mti_stripe_index = ldd->ldd_svindex;
1054 memcpy(mti->mti_uuid, ldd->ldd_uuid, sizeof(mti->mti_uuid));
1055 if (strlen(ldd->ldd_params) > sizeof(mti->mti_params)) {
1056 CERROR("params too big for mti\n");
1059 memcpy(mti->mti_params, ldd->ldd_params, sizeof(mti->mti_params));
1063 /* Register an old or new target with the MGS. If needed MGS will construct
1064 startup logs and assign index */
1065 int server_register_target(struct super_block *sb)
1067 struct lustre_sb_info *lsi = s2lsi(sb);
1068 struct obd_device *mgc = lsi->lsi_mgc;
1069 struct lustre_disk_data *ldd = lsi->lsi_ldd;
1070 struct mgs_target_info *mti = NULL;
1077 if (!(lsi->lsi_flags & LSI_SERVER))
1083 rc = server_sb2mti(sb, mti);
1087 CDEBUG(D_MOUNT, "Registration %s, fs=%s, %s, index=%04x, flags=%#x\n",
1088 mti->mti_svname, mti->mti_fsname,
1089 libcfs_nid2str(mti->mti_nids[0]), mti->mti_stripe_index,
1092 /* if write_conf is true, the registration must succeed */
1093 writeconf = !!(ldd->ldd_flags & (LDD_F_NEED_INDEX | LDD_F_UPDATE));
1094 mti->mti_flags |= LDD_F_OPC_REG;
1096 /* Register the target */
1097 /* FIXME use mgc_process_config instead */
1098 rc = obd_set_info_async(NULL, mgc->u.cli.cl_mgc_mgsexp,
1099 sizeof(KEY_REGISTER_TARGET), KEY_REGISTER_TARGET,
1100 sizeof(*mti), mti, NULL);
1102 if (mti->mti_flags & LDD_F_ERROR) {
1103 LCONSOLE_ERROR_MSG(0x160,
1104 "The MGS is refusing to allow this "
1105 "server (%s) to start. Please see messages"
1106 " on the MGS node.\n", ldd->ldd_svname);
1107 } else if (writeconf) {
1108 LCONSOLE_ERROR_MSG(0x15f,
1109 "Communication to the MGS return error %d. "
1110 "Is the MGS running?\n", rc);
1112 CERROR("Cannot talk to the MGS: %d, not fatal\n", rc);
1113 /* reset the error code for non-fatal error. */
1119 /* Always update our flags */
1120 ldd->ldd_flags = mti->mti_flags & LDD_F_ONDISK_MASK;
1122 /* If this flag is set, it means the MGS wants us to change our
1123 on-disk data. (So far this means just the index.) */
1124 if (mti->mti_flags & LDD_F_REWRITE_LDD) {
1127 CDEBUG(D_MOUNT, "Changing on-disk index from %#x to %#x "
1128 "for %s\n", ldd->ldd_svindex, mti->mti_stripe_index,
1130 ldd->ldd_svindex = mti->mti_stripe_index;
1131 strncpy(ldd->ldd_svname, mti->mti_svname,
1132 sizeof(ldd->ldd_svname));
1133 /* or ldd_make_sv_name(ldd); */
1134 ldd_write(&mgc->obd_lvfs_ctxt, ldd);
1135 err = fsfilt_set_label(mgc, lsi->lsi_srv_mnt->mnt_sb,
1138 CERROR("Label set error %d\n", err);
1139 label = fsfilt_get_label(mgc, lsi->lsi_srv_mnt->mnt_sb);
1141 CDEBUG(D_MOUNT, "Disk label changed to %s\n", label);
1143 /* Flush the new ldd to disk */
1144 fsfilt_sync(mgc, lsi->lsi_srv_mnt->mnt_sb);
1154 * Notify the MGS that this target is ready.
1155 * Used by IR - if the MGS receives this message, it will notify clients.
1157 static int server_notify_target(struct super_block *sb, struct obd_device *obd)
1159 struct lustre_sb_info *lsi = s2lsi(sb);
1160 struct obd_device *mgc = lsi->lsi_mgc;
1161 struct mgs_target_info *mti = NULL;
1167 if (!(lsi->lsi_flags & LSI_SERVER))
1173 rc = server_sb2mti(sb, mti);
1177 mti->mti_instance = obd->u.obt.obt_instance;
1178 mti->mti_flags |= LDD_F_OPC_READY;
1180 /* FIXME use mgc_process_config instead */
1181 rc = obd_set_info_async(NULL, mgc->u.cli.cl_mgc_mgsexp,
1182 sizeof(KEY_REGISTER_TARGET),
1183 KEY_REGISTER_TARGET,
1184 sizeof(*mti), mti, NULL);
1186 /* Imperative recovery: if the mgs informs us to use IR? */
1187 if (!rc && !(mti->mti_flags & LDD_F_ERROR) &&
1188 (mti->mti_flags & LDD_F_IR_CAPABLE))
1189 lsi->lsi_flags |= LSI_IR_CAPABLE;
1198 /** Start server targets: MDTs and OSTs
1200 static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
1202 struct obd_device *obd;
1203 struct lustre_sb_info *lsi = s2lsi(sb);
1204 struct config_llog_instance cfg;
1208 CDEBUG(D_MOUNT, "starting target %s\n", lsi->lsi_ldd->ldd_svname);
1211 /* If we're an MDT, make sure the global MDS is running */
1212 if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_MDT) {
1213 /* make sure the MDS is started */
1214 cfs_mutex_lock(&server_start_lock);
1215 obd = class_name2obd(LUSTRE_MDS_OBDNAME);
1217 rc = lustre_start_simple(LUSTRE_MDS_OBDNAME,
1218 /* FIXME pre-rename, should eventually be LUSTRE_MDS_NAME */
1220 LUSTRE_MDS_OBDNAME"_uuid",
1223 cfs_mutex_unlock(&server_start_lock);
1224 CERROR("failed to start MDS: %d\n", rc);
1228 cfs_mutex_unlock(&server_start_lock);
1232 /* If we're an OST, make sure the global OSS is running */
1233 if (IS_OST(lsi->lsi_ldd)) {
1234 /* make sure OSS is started */
1235 cfs_mutex_lock(&server_start_lock);
1236 obd = class_name2obd(LUSTRE_OSS_OBDNAME);
1238 rc = lustre_start_simple(LUSTRE_OSS_OBDNAME,
1240 LUSTRE_OSS_OBDNAME"_uuid",
1243 cfs_mutex_unlock(&server_start_lock);
1244 CERROR("failed to start OSS: %d\n", rc);
1248 cfs_mutex_unlock(&server_start_lock);
1251 /* Set the mgc fs to our server disk. This allows the MGC to
1252 * read and write configs locally, in case it can't talk to the MGS. */
1253 rc = server_mgc_set_fs(lsi->lsi_mgc, sb);
1257 /* Register with MGS */
1258 rc = server_register_target(sb);
1262 /* Let the target look up the mount using the target's name
1263 (we can't pass the sb or mnt through class_process_config.) */
1264 rc = server_register_mount(lsi->lsi_ldd->ldd_svname, sb, mnt);
1268 /* Start targets using the llog named for the target */
1269 memset(&cfg, 0, sizeof(cfg));
1270 rc = lustre_process_log(sb, lsi->lsi_ldd->ldd_svname, &cfg);
1272 CERROR("failed to start server %s: %d\n",
1273 lsi->lsi_ldd->ldd_svname, rc);
1274 /* Do NOT call server_deregister_mount() here. This makes it
1275 * impossible to find mount later in cleanup time and leaves
1276 * @lsi and othder stuff leaked. -umka */
1281 /* Release the mgc fs for others to use */
1282 server_mgc_clear_fs(lsi->lsi_mgc);
1285 obd = class_name2obd(lsi->lsi_ldd->ldd_svname);
1287 CERROR("no server named %s was started\n",
1288 lsi->lsi_ldd->ldd_svname);
1292 if ((lsi->lsi_lmd->lmd_flags & LMD_FLG_ABORT_RECOV) &&
1293 (OBP(obd, iocontrol))) {
1294 obd_iocontrol(OBD_IOC_ABORT_RECOVERY,
1295 obd->obd_self_export, 0, NULL, NULL);
1298 server_notify_target(sb, obd);
1300 /* calculate recovery timeout, do it after lustre_process_log */
1301 server_calc_timeout(lsi, obd);
1303 /* log has been fully processed */
1304 obd_notify(obd, NULL, OBD_NOTIFY_CONFIG, (void *)CONFIG_LOG);
1310 /***************** lustre superblock **************/
1312 struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
1314 struct lustre_sb_info *lsi;
1320 OBD_ALLOC_PTR(lsi->lsi_lmd);
1321 if (!lsi->lsi_lmd) {
1326 lsi->lsi_lmd->lmd_exclude_count = 0;
1327 lsi->lsi_lmd->lmd_recovery_time_soft = 0;
1328 lsi->lsi_lmd->lmd_recovery_time_hard = 0;
1329 s2lsi_nocast(sb) = lsi;
1330 /* we take 1 extra ref for our setup */
1331 cfs_atomic_set(&lsi->lsi_mounts, 1);
1333 /* Default umount style */
1334 lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
1339 static int lustre_free_lsi(struct super_block *sb)
1341 struct lustre_sb_info *lsi = s2lsi(sb);
1344 LASSERT(lsi != NULL);
1345 CDEBUG(D_MOUNT, "Freeing lsi %p\n", lsi);
1347 /* someone didn't call server_put_mount. */
1348 LASSERT(cfs_atomic_read(&lsi->lsi_mounts) == 0);
1350 if (lsi->lsi_ldd != NULL)
1351 OBD_FREE(lsi->lsi_ldd, sizeof(*lsi->lsi_ldd));
1353 if (lsi->lsi_lmd != NULL) {
1354 if (lsi->lsi_lmd->lmd_dev != NULL)
1355 OBD_FREE(lsi->lsi_lmd->lmd_dev,
1356 strlen(lsi->lsi_lmd->lmd_dev) + 1);
1357 if (lsi->lsi_lmd->lmd_profile != NULL)
1358 OBD_FREE(lsi->lsi_lmd->lmd_profile,
1359 strlen(lsi->lsi_lmd->lmd_profile) + 1);
1360 if (lsi->lsi_lmd->lmd_mgssec != NULL)
1361 OBD_FREE(lsi->lsi_lmd->lmd_mgssec,
1362 strlen(lsi->lsi_lmd->lmd_mgssec) + 1);
1363 if (lsi->lsi_lmd->lmd_opts != NULL)
1364 OBD_FREE(lsi->lsi_lmd->lmd_opts,
1365 strlen(lsi->lsi_lmd->lmd_opts) + 1);
1366 if (lsi->lsi_lmd->lmd_exclude_count)
1367 OBD_FREE(lsi->lsi_lmd->lmd_exclude,
1368 sizeof(lsi->lsi_lmd->lmd_exclude[0]) *
1369 lsi->lsi_lmd->lmd_exclude_count);
1370 OBD_FREE(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd));
1373 LASSERT(lsi->lsi_llsbi == NULL);
1374 OBD_FREE(lsi, sizeof(*lsi));
1375 s2lsi_nocast(sb) = NULL;
1380 /* The lsi has one reference for every server that is using the disk -
1381 e.g. MDT, MGS, and potentially MGC */
1382 static int lustre_put_lsi(struct super_block *sb)
1384 struct lustre_sb_info *lsi = s2lsi(sb);
1387 LASSERT(lsi != NULL);
1389 CDEBUG(D_MOUNT, "put %p %d\n", sb, cfs_atomic_read(&lsi->lsi_mounts));
1390 if (cfs_atomic_dec_and_test(&lsi->lsi_mounts)) {
1391 lustre_free_lsi(sb);
1397 /*************** server mount ******************/
1399 /** Kernel mount using mount options in MOUNT_DATA_FILE.
1400 * Since this file lives on the disk, we pre-mount using a common
1401 * type, read the file, then re-mount using the type specified in the
1404 static struct vfsmount *server_kernel_mount(struct super_block *sb)
1406 struct lvfs_run_ctxt mount_ctxt;
1407 struct lustre_sb_info *lsi = s2lsi(sb);
1408 struct lustre_disk_data *ldd;
1409 struct lustre_mount_data *lmd = lsi->lsi_lmd;
1410 struct vfsmount *mnt;
1411 struct file_system_type *type;
1412 char *options = NULL;
1413 unsigned long page, s_flags;
1414 struct page *__page;
1419 OBD_ALLOC(ldd, sizeof(*ldd));
1421 RETURN(ERR_PTR(-ENOMEM));
1423 /* In the past, we have always used flags = 0.
1424 Note ext3/ldiskfs can't be mounted ro. */
1425 s_flags = sb->s_flags;
1427 /* allocate memory for options */
1428 OBD_PAGE_ALLOC(__page, CFS_ALLOC_STD);
1430 GOTO(out_free, rc = -ENOMEM);
1431 page = (unsigned long)cfs_page_address(__page);
1432 options = (char *)page;
1433 memset(options, 0, CFS_PAGE_SIZE);
1435 /* mount-line options must be added for pre-mount because it may
1436 * contain mount options such as journal_dev which are required
1437 * to mount successfuly the underlying filesystem */
1438 if (lmd->lmd_opts && (*(lmd->lmd_opts) != 0))
1439 strncat(options, lmd->lmd_opts, CFS_PAGE_SIZE - 1);
1441 /* Pre-mount ldiskfs to read the MOUNT_DATA_FILE */
1442 CDEBUG(D_MOUNT, "Pre-mount ldiskfs %s\n", lmd->lmd_dev);
1443 type = get_fs_type("ldiskfs");
1445 CERROR("premount failed: cannot find ldiskfs module\n");
1446 GOTO(out_free, rc = -ENODEV);
1448 mnt = vfs_kern_mount(type, s_flags, lmd->lmd_dev, (void *)options);
1449 cfs_module_put(type->owner);
1452 CERROR("premount %s:%#lx ldiskfs failed: %d "
1453 "Is the ldiskfs module available?\n",
1454 lmd->lmd_dev, s_flags, rc );
1458 OBD_SET_CTXT_MAGIC(&mount_ctxt);
1459 mount_ctxt.pwdmnt = mnt;
1460 mount_ctxt.pwd = mnt->mnt_root;
1461 mount_ctxt.fs = get_ds();
1463 rc = ldd_parse(&mount_ctxt, ldd);
1467 CERROR("premount parse options failed: rc = %d\n", rc);
1471 /* Done with our pre-mount, now do the real mount. */
1473 /* Glom up mount options */
1474 memset(options, 0, CFS_PAGE_SIZE);
1475 strncpy(options, ldd->ldd_mount_opts, CFS_PAGE_SIZE - 2);
1477 len = CFS_PAGE_SIZE - strlen(options) - 2;
1479 strcat(options, ",");
1480 strncat(options, "no_mbcache", len);
1482 /* Add in any mount-line options */
1483 if (lmd->lmd_opts && (*(lmd->lmd_opts) != 0)) {
1484 len = CFS_PAGE_SIZE - strlen(options) - 2;
1485 strcat(options, ",");
1486 strncat(options, lmd->lmd_opts, len);
1489 /* Special permanent mount flags */
1491 s_flags |= MS_NOATIME | MS_NODIRATIME;
1493 CDEBUG(D_MOUNT, "kern_mount: %s %s %s\n",
1494 MT_STR(ldd), lmd->lmd_dev, options);
1495 type = get_fs_type(MT_STR(ldd));
1497 CERROR("get_fs_type failed\n");
1498 GOTO(out_free, rc = -ENODEV);
1500 mnt = vfs_kern_mount(type, s_flags, lmd->lmd_dev, (void *)options);
1501 cfs_module_put(type->owner);
1504 CERROR("vfs_kern_mount failed: rc = %d\n", rc);
1508 if (lmd->lmd_flags & LMD_FLG_ABORT_RECOV)
1509 simple_truncate(mnt->mnt_sb->s_root, mnt, LAST_RCVD,
1512 OBD_PAGE_FREE(__page);
1513 lsi->lsi_ldd = ldd; /* freed at lsi cleanup */
1514 CDEBUG(D_SUPER, "%s: mnt = %p\n", lmd->lmd_dev, mnt);
1519 OBD_PAGE_FREE(__page);
1520 OBD_FREE(ldd, sizeof(*ldd));
1521 lsi->lsi_ldd = NULL;
1522 RETURN(ERR_PTR(rc));
1525 /** Wait here forever until the mount refcount is 0 before completing umount,
1526 * else we risk dereferencing a null pointer.
1527 * LNET may take e.g. 165s before killing zombies.
1529 static void server_wait_finished(struct vfsmount *mnt)
1533 cfs_sigset_t blocked;
1535 cfs_waitq_init(&waitq);
1537 while (mnt_get_count(mnt) > 1) {
1538 if (waited && (waited % 30 == 0))
1539 LCONSOLE_WARN("Mount still busy with %d refs after "
1543 /* Cannot use l_event_wait() for an interruptible sleep. */
1545 blocked = cfs_block_sigsinv(sigmask(SIGKILL));
1546 cfs_waitq_wait_event_interruptible_timeout(
1548 (mnt_get_count(mnt) == 1),
1549 cfs_time_seconds(3),
1551 cfs_restore_sigs(blocked);
1553 LCONSOLE_EMERG("Danger: interrupted umount %s with "
1554 "%d refs!\n", mnt->mnt_devname,
1555 mnt_get_count(mnt));
1562 /** Start the shutdown of servers at umount.
1564 static void server_put_super(struct super_block *sb)
1566 struct lustre_sb_info *lsi = s2lsi(sb);
1567 struct obd_device *obd;
1568 struct vfsmount *mnt = lsi->lsi_srv_mnt;
1569 char *tmpname, *extraname = NULL;
1571 int lddflags = lsi->lsi_ldd->ldd_flags;
1572 int lsiflags = lsi->lsi_flags;
1575 LASSERT(lsiflags & LSI_SERVER);
1577 tmpname_sz = strlen(lsi->lsi_ldd->ldd_svname) + 1;
1578 OBD_ALLOC(tmpname, tmpname_sz);
1579 memcpy(tmpname, lsi->lsi_ldd->ldd_svname, tmpname_sz);
1580 CDEBUG(D_MOUNT, "server put_super %s\n", tmpname);
1581 if (IS_MDT(lsi->lsi_ldd) && (lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC))
1582 snprintf(tmpname, tmpname_sz, "MGS");
1584 /* Stop the target */
1585 if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
1586 (IS_MDT(lsi->lsi_ldd) || IS_OST(lsi->lsi_ldd))) {
1587 struct lustre_profile *lprof = NULL;
1589 /* tell the mgc to drop the config log */
1590 lustre_end_log(sb, lsi->lsi_ldd->ldd_svname, NULL);
1592 /* COMPAT_146 - profile may get deleted in mgc_cleanup.
1593 If there are any setup/cleanup errors, save the lov
1594 name for safety cleanup later. */
1595 lprof = class_get_profile(lsi->lsi_ldd->ldd_svname);
1596 if (lprof && lprof->lp_dt) {
1597 OBD_ALLOC(extraname, strlen(lprof->lp_dt) + 1);
1598 strcpy(extraname, lprof->lp_dt);
1601 obd = class_name2obd(lsi->lsi_ldd->ldd_svname);
1603 CDEBUG(D_MOUNT, "stopping %s\n", obd->obd_name);
1604 if (lsi->lsi_flags & LSI_UMOUNT_FAILOVER)
1606 /* We can't seem to give an error return code
1607 * to .put_super, so we better make sure we clean up! */
1609 class_manual_cleanup(obd);
1611 CERROR("no obd %s\n", lsi->lsi_ldd->ldd_svname);
1612 server_deregister_mount(lsi->lsi_ldd->ldd_svname);
1616 /* If they wanted the mgs to stop separately from the mdt, they
1617 should have put it on a different device. */
1618 if (IS_MGS(lsi->lsi_ldd)) {
1619 /* if MDS start with --nomgs, don't stop MGS then */
1620 if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOMGS)) {
1623 OBD_ALLOC(logname, MGS_PARAM_MAXLEN);
1625 LCONSOLE_WARN("Stopping mgs failed %d, please "
1626 "try again.", -ENOMEM);
1629 strcpy(logname, lsi->lsi_ldd->ldd_fsname);
1630 strcat(logname, "-params");
1631 /* tell the mgc to drop parameter config log */
1632 lustre_end_log(sb, logname, NULL);
1633 OBD_FREE(logname, MGS_PARAM_MAXLEN);
1635 server_stop_mgs(sb);
1639 /* Clean the mgc and sb */
1640 lustre_common_put_super(sb);
1642 /* Wait for the targets to really clean up - can't exit (and let the
1643 sb get destroyed) while the mount is still in use */
1644 server_wait_finished(mnt);
1646 /* drop the One True Mount */
1649 /* Stop the servers (MDS, OSS) if no longer needed. We must wait
1650 until the target is really gone so that our type refcount check
1652 server_stop_servers(lddflags, lsiflags);
1654 /* In case of startup or cleanup err, stop related obds */
1656 obd = class_name2obd(extraname);
1658 CWARN("Cleaning orphaned obd %s\n", extraname);
1660 class_manual_cleanup(obd);
1662 OBD_FREE(extraname, strlen(extraname) + 1);
1665 LCONSOLE_WARN("server umount %s complete\n", tmpname);
1666 OBD_FREE(tmpname, tmpname_sz);
1670 /** Called only for 'umount -f'
1672 #ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
1673 static void server_umount_begin(struct vfsmount *vfsmnt, int flags)
1675 struct super_block *sb = vfsmnt->mnt_sb;
1677 static void server_umount_begin(struct super_block *sb)
1680 struct lustre_sb_info *lsi = s2lsi(sb);
1683 #ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
1684 if (!(flags & MNT_FORCE)) {
1690 CDEBUG(D_MOUNT, "umount -f\n");
1691 /* umount = failover
1693 no third way to do non-force, non-failover */
1694 lsi->lsi_flags &= ~LSI_UMOUNT_FAILOVER;
1695 lsi->lsi_flags |= LSI_UMOUNT_FORCE;
1699 #ifndef HAVE_STATFS_DENTRY_PARAM
1700 static int server_statfs (struct super_block *sb, cfs_kstatfs_t *buf)
1703 static int server_statfs (struct dentry *dentry, cfs_kstatfs_t *buf)
1705 struct super_block *sb = dentry->d_sb;
1707 struct vfsmount *mnt = s2lsi(sb)->lsi_srv_mnt;
1710 if (mnt && mnt->mnt_sb && mnt->mnt_sb->s_op->statfs) {
1711 #ifdef HAVE_STATFS_DENTRY_PARAM
1712 int rc = mnt->mnt_sb->s_op->statfs(mnt->mnt_root, buf);
1714 int rc = mnt->mnt_sb->s_op->statfs(mnt->mnt_sb, buf);
1717 buf->f_type = sb->s_magic;
1723 buf->f_type = sb->s_magic;
1724 buf->f_bsize = sb->s_blocksize;
1730 buf->f_namelen = NAME_MAX;
1734 /** The operations we support directly on the superblock:
1735 * mount, umount, and df.
1737 static struct super_operations server_ops =
1739 .put_super = server_put_super,
1740 .umount_begin = server_umount_begin, /* umount -f */
1741 .statfs = server_statfs,
1744 #define log2(n) cfs_ffz(~(n))
1745 #define LUSTRE_SUPER_MAGIC 0x0BD00BD1
1747 static int server_fill_super_common(struct super_block *sb)
1749 struct inode *root = 0;
1752 CDEBUG(D_MOUNT, "Server sb, dev=%d\n", (int)sb->s_dev);
1754 sb->s_blocksize = 4096;
1755 sb->s_blocksize_bits = log2(sb->s_blocksize);
1756 sb->s_magic = LUSTRE_SUPER_MAGIC;
1757 sb->s_maxbytes = 0; /* we don't allow file IO on server mountpoints */
1758 sb->s_flags |= MS_RDONLY;
1759 sb->s_op = &server_ops;
1761 root = new_inode(sb);
1763 CERROR("Can't make root inode\n");
1767 /* returns -EIO for every operation */
1768 /* make_bad_inode(root); -- badness - can't umount */
1769 /* apparently we need to be a directory for the mount to finish */
1770 root->i_mode = S_IFDIR;
1772 sb->s_root = d_alloc_root(root);
1774 CERROR("Can't make root dentry\n");
1782 /** Fill in the superblock info for a Lustre server.
1783 * Mount the device with the correct options.
1784 * Read the on-disk config file.
1785 * Start the services.
1787 static int server_fill_super(struct super_block *sb)
1789 struct lustre_sb_info *lsi = s2lsi(sb);
1790 struct vfsmount *mnt;
1794 /* the One True Mount */
1795 mnt = server_kernel_mount(sb);
1798 CERROR("Unable to mount device %s: %d\n",
1799 lsi->lsi_lmd->lmd_dev, rc);
1803 lsi->lsi_srv_mnt = mnt;
1805 LASSERT(lsi->lsi_ldd);
1806 CDEBUG(D_MOUNT, "Found service %s for fs '%s' on device %s\n",
1807 lsi->lsi_ldd->ldd_svname, lsi->lsi_ldd->ldd_fsname,
1808 lsi->lsi_lmd->lmd_dev);
1810 if (class_name2obd(lsi->lsi_ldd->ldd_svname)) {
1811 LCONSOLE_ERROR_MSG(0x161, "The target named %s is already "
1812 "running. Double-mount may have compromised"
1813 " the disk journal.\n",
1814 lsi->lsi_ldd->ldd_svname);
1820 /* Start MGS before MGC */
1821 if (IS_MGS(lsi->lsi_ldd) && !(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOMGS)){
1822 rc = server_start_mgs(sb);
1827 /* Start MGC before servers */
1828 rc = lustre_start_mgc(sb);
1832 /* Set up all obd devices for service */
1833 if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
1834 (IS_OST(lsi->lsi_ldd) || IS_MDT(lsi->lsi_ldd))) {
1835 rc = server_start_targets(sb, mnt);
1837 CERROR("Unable to start targets: %d\n", rc);
1840 /* FIXME overmount client here,
1841 or can we just start a client log and client_fill_super on this sb?
1842 We need to make sure server_put_super gets called too - ll_put_super
1843 calls lustre_common_put_super; check there for LSI_SERVER flag,
1845 Probably should start client from new thread so we can return.
1846 Client will not finish until all servers are connected.
1847 Note - MGS-only server does NOT get a client, since there is no
1848 lustre fs associated - the MGS is for all lustre fs's */
1849 } else if (IS_MGS(lsi->lsi_ldd) &&
1850 !(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOMGS)){
1851 struct config_llog_instance cfg;
1854 OBD_ALLOC(logname, MGS_PARAM_MAXLEN);
1855 if (logname == NULL)
1856 GOTO(out_mnt, rc = -ENOMEM);
1857 strcpy(logname, lsi->lsi_ldd->ldd_fsname);
1858 strcat(logname, "-params");
1860 memset(&cfg, 0, sizeof(cfg));
1861 rc = lustre_process_log(sb, logname, &cfg);
1862 OBD_FREE(logname, MGS_PARAM_MAXLEN);
1864 CERROR("failed to process parameters %s: %d\n",
1870 rc = server_fill_super_common(sb);
1876 /* We jump here in case of failure while starting targets or MGS.
1877 * In this case we can't just put @mnt and have to do real cleanup
1878 * with stoping targets, etc. */
1879 server_put_super(sb);
1883 /* Get the index from the obd name.
1884 rc = server type, or
1886 if endptr isn't NULL it is set to end of name */
1887 int server_name2index(char *svname, __u32 *idx, char **endptr)
1889 unsigned long index;
1891 char *dash = strrchr(svname, '-');
1895 /* intepret <fsname>-MDTXXXXX-mdc as mdt, the better way is to pass
1896 * in the fsname, then determine the server index */
1897 if (!strcmp(LUSTRE_MDC_NAME, dash + 1)) {
1899 for (; dash > svname && *dash != '-'; dash--);
1904 if (strncmp(dash + 1, "MDT", 3) == 0)
1905 rc = LDD_F_SV_TYPE_MDT;
1906 else if (strncmp(dash + 1, "OST", 3) == 0)
1907 rc = LDD_F_SV_TYPE_OST;
1910 if (strcmp(dash + 4, "all") == 0)
1911 return rc | LDD_F_SV_ALL;
1913 index = simple_strtoul(dash + 4, endptr, 16);
1919 * Calculate timeout value for a target.
1921 void server_calc_timeout(struct lustre_sb_info *lsi, struct obd_device *obd)
1923 struct lustre_mount_data *lmd;
1927 bool has_ir = !!(lsi->lsi_flags & LSI_IR_CAPABLE);
1928 int min = OBD_RECOVERY_TIME_MIN;
1930 LASSERT(lsi->lsi_flags & LSI_SERVER);
1934 soft = lmd->lmd_recovery_time_soft;
1935 hard = lmd->lmd_recovery_time_hard;
1936 has_ir = has_ir && !(lmd->lmd_flags & LMD_FLG_NOIR);
1937 obd->obd_no_ir = !has_ir;
1941 soft = OBD_RECOVERY_TIME_SOFT;
1943 hard = OBD_RECOVERY_TIME_HARD;
1945 /* target may have ir_factor configured. */
1946 factor = OBD_IR_FACTOR_DEFAULT;
1947 if (obd->obd_recovery_ir_factor)
1948 factor = obd->obd_recovery_ir_factor;
1951 int new_soft = soft;
1952 int new_hard = hard;
1954 /* adjust timeout value by imperative recovery */
1956 new_soft = (soft * factor) / OBD_IR_FACTOR_MAX;
1957 new_hard = (hard * factor) / OBD_IR_FACTOR_MAX;
1959 /* make sure the timeout is not too short */
1960 new_soft = max(min, new_soft);
1961 new_hard = max(new_soft, new_hard);
1963 LCONSOLE_INFO("%s: Imperative Recovery enabled, recovery "
1964 "window shrunk from %d-%d down to %d-%d\n",
1965 obd->obd_name, soft, hard, new_soft, new_hard);
1972 obd->obd_recovery_timeout = max(obd->obd_recovery_timeout, soft);
1973 obd->obd_recovery_time_hard = hard;
1974 obd->obd_recovery_ir_factor = factor;
1976 EXPORT_SYMBOL(server_calc_timeout);
1978 /*************** mount common betweeen server and client ***************/
1981 int lustre_common_put_super(struct super_block *sb)
1986 CDEBUG(D_MOUNT, "dropping sb %p\n", sb);
1988 /* Drop a ref to the MGC */
1989 rc = lustre_stop_mgc(sb);
1990 if (rc && (rc != -ENOENT)) {
1992 CERROR("Can't stop MGC: %d\n", rc);
1995 /* BUSY just means that there's some other obd that
1996 needs the mgc. Let him clean it up. */
1997 CDEBUG(D_MOUNT, "MGC still in use\n");
1999 /* Drop a ref to the mounted disk */
2005 static void lmd_print(struct lustre_mount_data *lmd)
2009 PRINT_CMD(PRINT_MASK, " mount data:\n");
2010 if (lmd_is_client(lmd))
2011 PRINT_CMD(PRINT_MASK, "profile: %s\n", lmd->lmd_profile);
2012 PRINT_CMD(PRINT_MASK, "device: %s\n", lmd->lmd_dev);
2013 PRINT_CMD(PRINT_MASK, "flags: %x\n", lmd->lmd_flags);
2016 PRINT_CMD(PRINT_MASK, "options: %s\n", lmd->lmd_opts);
2018 if (lmd->lmd_recovery_time_soft)
2019 PRINT_CMD(PRINT_MASK, "recovery time soft: %d\n",
2020 lmd->lmd_recovery_time_soft);
2022 if (lmd->lmd_recovery_time_hard)
2023 PRINT_CMD(PRINT_MASK, "recovery time hard: %d\n",
2024 lmd->lmd_recovery_time_hard);
2026 for (i = 0; i < lmd->lmd_exclude_count; i++) {
2027 PRINT_CMD(PRINT_MASK, "exclude %d: OST%04x\n", i,
2028 lmd->lmd_exclude[i]);
2032 /* Is this server on the exclusion list */
2033 int lustre_check_exclusion(struct super_block *sb, char *svname)
2035 struct lustre_sb_info *lsi = s2lsi(sb);
2036 struct lustre_mount_data *lmd = lsi->lsi_lmd;
2041 rc = server_name2index(svname, &index, NULL);
2042 if (rc != LDD_F_SV_TYPE_OST)
2043 /* Only exclude OSTs */
2046 CDEBUG(D_MOUNT, "Check exclusion %s (%d) in %d of %s\n", svname,
2047 index, lmd->lmd_exclude_count, lmd->lmd_dev);
2049 for(i = 0; i < lmd->lmd_exclude_count; i++) {
2050 if (index == lmd->lmd_exclude[i]) {
2051 CWARN("Excluding %s (on exclusion list)\n", svname);
2058 /* mount -v -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
2059 static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr)
2061 char *s1 = ptr, *s2;
2062 __u32 index, *exclude_list;
2066 /* The shortest an ost name can be is 8 chars: -OST0000.
2067 We don't actually know the fsname at this time, so in fact
2068 a user could specify any fsname. */
2069 devmax = strlen(ptr) / 8 + 1;
2071 /* temp storage until we figure out how many we have */
2072 OBD_ALLOC(exclude_list, sizeof(index) * devmax);
2076 /* we enter this fn pointing at the '=' */
2077 while (*s1 && *s1 != ' ' && *s1 != ',') {
2079 rc = server_name2index(s1, &index, &s2);
2081 CERROR("Can't parse server name '%s'\n", s1);
2084 if (rc == LDD_F_SV_TYPE_OST)
2085 exclude_list[lmd->lmd_exclude_count++] = index;
2087 CDEBUG(D_MOUNT, "ignoring exclude %.7s\n", s1);
2089 /* now we are pointing at ':' (next exclude)
2090 or ',' (end of excludes) */
2091 if (lmd->lmd_exclude_count >= devmax)
2094 if (rc >= 0) /* non-err */
2097 if (lmd->lmd_exclude_count) {
2098 /* permanent, freed in lustre_free_lsi */
2099 OBD_ALLOC(lmd->lmd_exclude, sizeof(index) *
2100 lmd->lmd_exclude_count);
2101 if (lmd->lmd_exclude) {
2102 memcpy(lmd->lmd_exclude, exclude_list,
2103 sizeof(index) * lmd->lmd_exclude_count);
2106 lmd->lmd_exclude_count = 0;
2109 OBD_FREE(exclude_list, sizeof(index) * devmax);
2113 static int lmd_parse_mgssec(struct lustre_mount_data *lmd, char *ptr)
2118 if (lmd->lmd_mgssec != NULL) {
2119 OBD_FREE(lmd->lmd_mgssec, strlen(lmd->lmd_mgssec) + 1);
2120 lmd->lmd_mgssec = NULL;
2123 tail = strchr(ptr, ',');
2125 length = strlen(ptr);
2127 length = tail - ptr;
2129 OBD_ALLOC(lmd->lmd_mgssec, length + 1);
2130 if (lmd->lmd_mgssec == NULL)
2133 memcpy(lmd->lmd_mgssec, ptr, length);
2134 lmd->lmd_mgssec[length] = '\0';
2138 /** Parse mount line options
2139 * e.g. mount -v -t lustre -o abort_recov uml1:uml2:/lustre-client /mnt/lustre
2140 * dev is passed as device=uml1:/lustre by mount.lustre
2142 static int lmd_parse(char *options, struct lustre_mount_data *lmd)
2144 char *s1, *s2, *devname = NULL;
2145 struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
2151 LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that "
2152 "/sbin/mount.lustre is installed.\n");
2156 /* Options should be a string - try to detect old lmd data */
2157 if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) {
2158 LCONSOLE_ERROR_MSG(0x163, "You're using an old version of "
2159 "/sbin/mount.lustre. Please install "
2160 "version %s\n", LUSTRE_VERSION_STRING);
2163 lmd->lmd_magic = LMD_MAGIC;
2165 /* Set default flags here */
2170 int time_min = OBD_RECOVERY_TIME_MIN;
2172 /* Skip whitespace and extra commas */
2173 while (*s1 == ' ' || *s1 == ',')
2176 /* Client options are parsed in ll_options: eg. flock,
2179 /* Parse non-ldiskfs options here. Rather than modifying
2180 ldiskfs, we just zero these out here */
2181 if (strncmp(s1, "abort_recov", 11) == 0) {
2182 lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
2184 } else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
2185 lmd->lmd_recovery_time_soft = max_t(int,
2186 simple_strtoul(s1 + 19, NULL, 10), time_min);
2188 } else if (strncmp(s1, "recovery_time_hard=", 19) == 0) {
2189 lmd->lmd_recovery_time_hard = max_t(int,
2190 simple_strtoul(s1 + 19, NULL, 10), time_min);
2192 } else if (strncmp(s1, "noir", 4) == 0) {
2193 lmd->lmd_flags |= LMD_FLG_NOIR; /* test purpose only. */
2195 } else if (strncmp(s1, "nosvc", 5) == 0) {
2196 lmd->lmd_flags |= LMD_FLG_NOSVC;
2198 } else if (strncmp(s1, "nomgs", 5) == 0) {
2199 lmd->lmd_flags |= LMD_FLG_NOMGS;
2201 } else if (strncmp(s1, "writeconf", 9) == 0) {
2202 lmd->lmd_flags |= LMD_FLG_WRITECONF;
2204 } else if (strncmp(s1, "mgssec=", 7) == 0) {
2205 rc = lmd_parse_mgssec(lmd, s1 + 7);
2209 /* ost exclusion list */
2210 } else if (strncmp(s1, "exclude=", 8) == 0) {
2211 rc = lmd_make_exclusion(lmd, s1 + 7);
2216 /* Linux 2.4 doesn't pass the device, so we stuck it at the
2217 end of the options. */
2218 else if (strncmp(s1, "device=", 7) == 0) {
2220 /* terminate options right before device. device
2221 must be the last one. */
2227 s2 = strchr(s1, ',');
2235 memmove(s1, s2, strlen(s2) + 1);
2241 LCONSOLE_ERROR_MSG(0x164, "Can't find the device name "
2242 "(need mount option 'device=...')\n");
2246 s1 = strstr(devname, ":/");
2249 lmd->lmd_flags |= LMD_FLG_CLIENT;
2250 /* Remove leading /s from fsname */
2251 while (*++s1 == '/') ;
2252 /* Freed in lustre_free_lsi */
2253 OBD_ALLOC(lmd->lmd_profile, strlen(s1) + 8);
2254 if (!lmd->lmd_profile)
2256 sprintf(lmd->lmd_profile, "%s-client", s1);
2259 /* Freed in lustre_free_lsi */
2260 OBD_ALLOC(lmd->lmd_dev, strlen(devname) + 1);
2263 strcpy(lmd->lmd_dev, devname);
2265 /* Save mount options */
2266 s1 = options + strlen(options) - 1;
2267 while (s1 >= options && (*s1 == ',' || *s1 == ' '))
2269 if (*options != 0) {
2270 /* Freed in lustre_free_lsi */
2271 OBD_ALLOC(lmd->lmd_opts, strlen(options) + 1);
2274 strcpy(lmd->lmd_opts, options);
2278 lmd->lmd_magic = LMD_MAGIC;
2283 CERROR("Bad mount options %s\n", options);
2287 struct lustre_mount_data2 {
2289 struct vfsmount *lmd2_mnt;
2292 /** This is the entry point for the mount call into Lustre.
2293 * This is called when a server or client is mounted,
2294 * and this is where we start setting things up.
2295 * @param data Mount options (e.g. -o flock,abort_recov)
2297 int lustre_fill_super(struct super_block *sb, void *data, int silent)
2299 struct lustre_mount_data *lmd;
2300 struct lustre_mount_data2 *lmd2 = data;
2301 struct lustre_sb_info *lsi;
2305 CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);
2307 lsi = lustre_init_lsi(sb);
2313 * Disable lockdep during mount, because mount locking patterns are
2319 * LU-639: the obd cleanup of last mount may not finish yet, wait here.
2321 obd_zombie_barrier();
2323 /* Figure out the lmd from the mount options */
2324 if (lmd_parse((char *)(lmd2->lmd2_data), lmd)) {
2326 GOTO(out, rc = -EINVAL);
2329 if (lmd_is_client(lmd)) {
2330 CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile);
2331 if (!client_fill_super) {
2332 LCONSOLE_ERROR_MSG(0x165, "Nothing registered for "
2333 "client mount! Is the 'lustre' "
2334 "module loaded?\n");
2338 rc = lustre_start_mgc(sb);
2343 /* Connect and start */
2344 /* (should always be ll_fill_super) */
2345 rc = (*client_fill_super)(sb, lmd2->lmd2_mnt);
2346 /* c_f_s will call lustre_common_put_super on failure */
2349 CDEBUG(D_MOUNT, "Mounting server from %s\n", lmd->lmd_dev);
2350 lsi->lsi_flags |= LSI_SERVER;
2351 rc = server_fill_super(sb);
2352 /* s_f_s calls lustre_start_mgc after the mount because we need
2353 the MGS nids which are stored on disk. Plus, we may
2354 need to start the MGS first. */
2355 /* s_f_s will call server_put_super on failure */
2358 /* If error happens in fill_super() call, @lsi will be killed there.
2359 * This is why we do not put it here. */
2363 CERROR("Unable to mount %s (%d)\n",
2364 s2lsi(sb) ? lmd->lmd_dev : "", rc);
2366 CDEBUG(D_SUPER, "Mount %s complete\n",
2374 /* We can't call ll_fill_super by name because it lives in a module that
2375 must be loaded after this one. */
2376 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
2377 struct vfsmount *mnt))
2379 client_fill_super = cfs;
2382 void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb))
2384 kill_super_cb = cfs;
2387 /***************** FS registration ******************/
2389 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
2390 struct super_block * lustre_get_sb(struct file_system_type *fs_type, int flags,
2391 const char *devname, void * data)
2393 return get_sb_nodev(fs_type, flags, data, lustre_fill_super);
2396 int lustre_get_sb(struct file_system_type *fs_type, int flags,
2397 const char *devname, void * data, struct vfsmount *mnt)
2399 struct lustre_mount_data2 lmd2 = {data, mnt};
2401 return get_sb_nodev(fs_type, flags, &lmd2, lustre_fill_super, mnt);
2405 void lustre_kill_super(struct super_block *sb)
2407 struct lustre_sb_info *lsi = s2lsi(sb);
2409 if (kill_super_cb && lsi && !(lsi->lsi_flags & LSI_SERVER))
2410 (*kill_super_cb)(sb);
2412 kill_anon_super(sb);
2415 /** Register the "lustre" fs type
2417 struct file_system_type lustre_fs_type = {
2418 .owner = THIS_MODULE,
2420 .get_sb = lustre_get_sb,
2421 .kill_sb = lustre_kill_super,
2422 .fs_flags = FS_BINARY_MOUNTDATA | FS_REQUIRES_DEV |
2423 #ifdef FS_HAS_FIEMAP
2426 LL_RENAME_DOES_D_MOVE,
2429 int lustre_register_fs(void)
2431 return register_filesystem(&lustre_fs_type);
2434 int lustre_unregister_fs(void)
2436 return unregister_filesystem(&lustre_fs_type);
2439 EXPORT_SYMBOL(lustre_register_client_fill_super);
2440 EXPORT_SYMBOL(lustre_register_kill_super_cb);
2441 EXPORT_SYMBOL(lustre_common_put_super);
2442 EXPORT_SYMBOL(lustre_process_log);
2443 EXPORT_SYMBOL(lustre_end_log);
2444 EXPORT_SYMBOL(server_get_mount);
2445 EXPORT_SYMBOL(server_get_mount_2);
2446 EXPORT_SYMBOL(server_put_mount);
2447 EXPORT_SYMBOL(server_put_mount_2);
2448 EXPORT_SYMBOL(server_register_target);
2449 EXPORT_SYMBOL(server_name2index);
2450 EXPORT_SYMBOL(server_mti_print);
2451 EXPORT_SYMBOL(do_lcfg);