4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Whamcloud, Inc.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/obdclass/obd_mount.c
38 * Client/server mount routines
40 * Author: Nathan Rutman <nathan@clusterfs.com>
44 #define DEBUG_SUBSYSTEM S_CLASS
45 #define D_MOUNT D_SUPER|D_CONFIG /*|D_WARNING */
46 #define PRINT_CMD CDEBUG
47 #define PRINT_MASK D_SUPER|D_CONFIG
51 #include <lustre_fsfilt.h>
52 #include <obd_class.h>
53 #include <lustre/lustre_user.h>
54 #include <linux/version.h>
55 #include <lustre_log.h>
56 #include <lustre_disk.h>
57 #include <lustre_param.h>
58 #ifdef HAVE_KERNEL_LOCKED
59 #include <linux/smp_lock.h>
62 static int (*client_fill_super)(struct super_block *sb,
63 struct vfsmount *mnt) = NULL;
64 static void (*kill_super_cb)(struct super_block *sb) = NULL;
66 /*********** mount lookup *********/
68 CFS_DEFINE_MUTEX(lustre_mount_info_lock);
69 static CFS_LIST_HEAD(server_mount_info_list);
71 static struct lustre_mount_info *server_find_mount(const char *name)
74 struct lustre_mount_info *lmi;
77 cfs_list_for_each(tmp, &server_mount_info_list) {
78 lmi = cfs_list_entry(tmp, struct lustre_mount_info,
80 if (strcmp(name, lmi->lmi_name) == 0)
86 /* we must register an obd for a mount before we call the setup routine.
87 *_setup will call lustre_get_mount to get the mnt struct
88 by obd_name, since we can't pass the pointer to setup. */
89 static int server_register_mount(const char *name, struct super_block *sb,
92 struct lustre_mount_info *lmi;
98 OBD_ALLOC(lmi, sizeof(*lmi));
101 OBD_ALLOC(name_cp, strlen(name) + 1);
103 OBD_FREE(lmi, sizeof(*lmi));
106 strcpy(name_cp, name);
108 cfs_mutex_lock(&lustre_mount_info_lock);
110 if (server_find_mount(name)) {
111 cfs_mutex_unlock(&lustre_mount_info_lock);
112 OBD_FREE(lmi, sizeof(*lmi));
113 OBD_FREE(name_cp, strlen(name) + 1);
114 CERROR("Already registered %s\n", name);
117 lmi->lmi_name = name_cp;
120 cfs_list_add(&lmi->lmi_list_chain, &server_mount_info_list);
122 cfs_mutex_unlock(&lustre_mount_info_lock);
124 CDEBUG(D_MOUNT, "reg_mnt %p from %s, vfscount=%d\n",
126 lmi->lmi_mnt ? mnt_get_count(lmi->lmi_mnt) : -1);
131 /* when an obd no longer needs a mount */
132 static int server_deregister_mount(const char *name)
134 struct lustre_mount_info *lmi;
137 cfs_mutex_lock(&lustre_mount_info_lock);
138 lmi = server_find_mount(name);
140 cfs_mutex_unlock(&lustre_mount_info_lock);
141 CERROR("%s not registered\n", name);
145 CDEBUG(D_MOUNT, "dereg_mnt %p from %s, vfscount=%d\n",
147 lmi->lmi_mnt ? mnt_get_count(lmi->lmi_mnt) : -1);
149 OBD_FREE(lmi->lmi_name, strlen(lmi->lmi_name) + 1);
150 cfs_list_del(&lmi->lmi_list_chain);
151 OBD_FREE(lmi, sizeof(*lmi));
152 cfs_mutex_unlock(&lustre_mount_info_lock);
157 /* obd's look up a registered mount using their obdname. This is just
158 for initial obd setup to find the mount struct. It should not be
159 called every time you want to mntget. */
160 struct lustre_mount_info *server_get_mount(const char *name)
162 struct lustre_mount_info *lmi;
163 struct lustre_sb_info *lsi;
166 cfs_mutex_lock(&lustre_mount_info_lock);
167 lmi = server_find_mount(name);
168 cfs_mutex_unlock(&lustre_mount_info_lock);
170 CERROR("Can't find mount for %s\n", name);
173 lsi = s2lsi(lmi->lmi_sb);
176 mntget(lmi->lmi_mnt);
177 cfs_atomic_inc(&lsi->lsi_mounts);
179 CDEBUG(D_MOUNT, "get_mnt %p from %s, refs=%d, vfscount=%d\n",
180 lmi->lmi_mnt, name, cfs_atomic_read(&lsi->lsi_mounts),
181 lmi->lmi_mnt ? mnt_get_count(lmi->lmi_mnt) - 1 : -1);
187 * Used by mdt to get mount_info from obdname.
188 * There are no blocking when using the mount_info.
189 * Do not use server_get_mount for this purpose.
191 struct lustre_mount_info *server_get_mount_2(const char *name)
193 struct lustre_mount_info *lmi;
196 cfs_mutex_lock(&lustre_mount_info_lock);
197 lmi = server_find_mount(name);
198 cfs_mutex_unlock(&lustre_mount_info_lock);
200 CERROR("Can't find mount for %s\n", name);
205 static void unlock_mntput(struct vfsmount *mnt)
207 #ifdef HAVE_KERNEL_LOCKED
208 /* for kernel < 2.6.37 */
209 if (kernel_locked()) {
221 static int lustre_put_lsi(struct super_block *sb);
223 /* to be called from obd_cleanup methods */
224 int server_put_mount(const char *name, struct vfsmount *mnt)
226 struct lustre_mount_info *lmi;
227 struct lustre_sb_info *lsi;
231 /* This might be the last one, can't deref after this */
233 count = mnt_get_count(mnt) - 1;
237 cfs_mutex_lock(&lustre_mount_info_lock);
238 lmi = server_find_mount(name);
239 cfs_mutex_unlock(&lustre_mount_info_lock);
241 CERROR("Can't find mount for %s\n", name);
244 lsi = s2lsi(lmi->lmi_sb);
245 LASSERT(lmi->lmi_mnt == mnt);
247 CDEBUG(D_MOUNT, "put_mnt %p from %s, refs=%d, vfscount=%d\n",
248 lmi->lmi_mnt, name, cfs_atomic_read(&lsi->lsi_mounts), count);
250 if (lustre_put_lsi(lmi->lmi_sb)) {
251 CDEBUG(D_MOUNT, "Last put of mnt %p from %s, vfscount=%d\n",
252 lmi->lmi_mnt, name, count);
253 /* last mount is the One True Mount */
255 CERROR("%s: mount busy, vfscount=%d!\n", name, count);
258 /* this obd should never need the mount again */
259 server_deregister_mount(name);
264 /* Corresponding to server_get_mount_2 */
265 int server_put_mount_2(const char *name, struct vfsmount *mnt)
271 /******* mount helper utilities *********/
274 static void ldd_print(struct lustre_disk_data *ldd)
276 PRINT_CMD(PRINT_MASK, " disk data:\n");
277 PRINT_CMD(PRINT_MASK, "server: %s\n", ldd->ldd_svname);
278 PRINT_CMD(PRINT_MASK, "uuid: %s\n", (char *)ldd->ldd_uuid);
279 PRINT_CMD(PRINT_MASK, "fs: %s\n", ldd->ldd_fsname);
280 PRINT_CMD(PRINT_MASK, "index: %04x\n", ldd->ldd_svindex);
281 PRINT_CMD(PRINT_MASK, "config: %d\n", ldd->ldd_config_ver);
282 PRINT_CMD(PRINT_MASK, "flags: %#x\n", ldd->ldd_flags);
283 PRINT_CMD(PRINT_MASK, "diskfs: %s\n", MT_STR(ldd));
284 PRINT_CMD(PRINT_MASK, "options: %s\n", ldd->ldd_mount_opts);
285 PRINT_CMD(PRINT_MASK, "params: %s\n", ldd->ldd_params);
286 PRINT_CMD(PRINT_MASK, "comment: %s\n", ldd->ldd_userdata);
290 static int ldd_parse(struct lvfs_run_ctxt *mount_ctxt,
291 struct lustre_disk_data *ldd)
293 struct lvfs_run_ctxt saved;
300 push_ctxt(&saved, mount_ctxt, NULL);
302 file = filp_open(MOUNT_DATA_FILE, O_RDONLY, 0644);
305 CERROR("cannot open %s: rc = %d\n", MOUNT_DATA_FILE, rc);
309 len = i_size_read(file->f_dentry->d_inode);
310 CDEBUG(D_MOUNT, "Have %s, size %lu\n", MOUNT_DATA_FILE, len);
311 if (len != sizeof(*ldd)) {
312 CERROR("disk data size does not match: see %lu expect %u\n",
313 len, (int)sizeof(*ldd));
314 GOTO(out_close, rc = -EINVAL);
317 rc = lustre_fread(file, ldd, len, &off);
319 CERROR("error reading %s: read %d of %lu\n",
320 MOUNT_DATA_FILE, rc, len);
321 GOTO(out_close, rc = -EINVAL);
325 if (ldd->ldd_magic != LDD_MAGIC) {
326 /* FIXME add swabbing support */
327 CERROR("Bad magic in %s: %x!=%x\n", MOUNT_DATA_FILE,
328 ldd->ldd_magic, LDD_MAGIC);
329 GOTO(out_close, rc = -EINVAL);
332 if (ldd->ldd_feature_incompat & ~LDD_INCOMPAT_SUPP) {
333 CERROR("%s: unsupported incompat filesystem feature(s) %x\n",
335 ldd->ldd_feature_incompat & ~LDD_INCOMPAT_SUPP);
336 GOTO(out_close, rc = -EINVAL);
338 if (ldd->ldd_feature_rocompat & ~LDD_ROCOMPAT_SUPP) {
339 CERROR("%s: unsupported read-only filesystem feature(s) %x\n",
341 ldd->ldd_feature_rocompat & ~LDD_ROCOMPAT_SUPP);
342 /* Do something like remount filesystem read-only */
343 GOTO(out_close, rc = -EINVAL);
349 pop_ctxt(&saved, mount_ctxt, NULL);
353 static int ldd_write(struct lvfs_run_ctxt *mount_ctxt,
354 struct lustre_disk_data *ldd)
356 struct lvfs_run_ctxt saved;
359 unsigned long len = sizeof(struct lustre_disk_data);
363 if (ldd->ldd_magic == 0)
366 LASSERT(ldd->ldd_magic == LDD_MAGIC);
368 ldd->ldd_config_ver++;
370 push_ctxt(&saved, mount_ctxt, NULL);
372 file = filp_open(MOUNT_DATA_FILE, O_RDWR|O_SYNC, 0644);
375 CERROR("cannot open %s: rc = %d\n", MOUNT_DATA_FILE, rc);
379 rc = lustre_fwrite(file, ldd, len, &off);
381 CERROR("error writing %s: read %d of %lu\n",
382 MOUNT_DATA_FILE, rc, len);
383 GOTO(out_close, rc = -EINVAL);
391 pop_ctxt(&saved, mount_ctxt, NULL);
396 /**************** config llog ********************/
398 /** Get a config log from the MGS and process it.
399 * This func is called for both clients and servers.
400 * Continue to process new statements appended to the logs
401 * (whenever the config lock is revoked) until lustre_end_log
403 * @param sb The superblock is used by the MGC to write to the local copy of
405 * @param logname The name of the llog to replicate from the MGS
406 * @param cfg Since the same mgc may be used to follow multiple config logs
407 * (e.g. ost1, ost2, client), the config_llog_instance keeps the state for
408 * this log, and is added to the mgc's list of logs to follow.
410 int lustre_process_log(struct super_block *sb, char *logname,
411 struct config_llog_instance *cfg)
413 struct lustre_cfg *lcfg;
414 struct lustre_cfg_bufs *bufs;
415 struct lustre_sb_info *lsi = s2lsi(sb);
416 struct obd_device *mgc = lsi->lsi_mgc;
427 /* mgc_process_config */
428 lustre_cfg_bufs_reset(bufs, mgc->obd_name);
429 lustre_cfg_bufs_set_string(bufs, 1, logname);
430 lustre_cfg_bufs_set(bufs, 2, cfg, sizeof(*cfg));
431 lustre_cfg_bufs_set(bufs, 3, &sb, sizeof(sb));
432 lcfg = lustre_cfg_new(LCFG_LOG_START, bufs);
433 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
434 lustre_cfg_free(lcfg);
439 LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s'"
440 "failed from the MGS (%d). Make sure this "
441 "client and the MGS are running compatible "
442 "versions of Lustre.\n",
443 mgc->obd_name, logname, rc);
446 LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' "
447 "failed (%d). This may be the result of "
448 "communication errors between this node and "
449 "the MGS, a bad configuration, or other "
450 "errors. See the syslog for more "
451 "information.\n", mgc->obd_name, logname,
454 /* class_obd_list(); */
458 /* Stop watching this config log for updates */
459 int lustre_end_log(struct super_block *sb, char *logname,
460 struct config_llog_instance *cfg)
462 struct lustre_cfg *lcfg;
463 struct lustre_cfg_bufs bufs;
464 struct lustre_sb_info *lsi = s2lsi(sb);
465 struct obd_device *mgc = lsi->lsi_mgc;
472 /* mgc_process_config */
473 lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
474 lustre_cfg_bufs_set_string(&bufs, 1, logname);
476 lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
477 lcfg = lustre_cfg_new(LCFG_LOG_END, &bufs);
478 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
479 lustre_cfg_free(lcfg);
483 /**************** obd start *******************/
485 /** lustre_cfg_bufs are a holdover from 1.4; we can still set these up from
486 * lctl (and do for echo cli/srv.
488 int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
489 char *s1, char *s2, char *s3, char *s4)
491 struct lustre_cfg_bufs bufs;
492 struct lustre_cfg * lcfg = NULL;
495 CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname,
496 cmd, s1, s2, s3, s4);
498 lustre_cfg_bufs_reset(&bufs, cfgname);
500 lustre_cfg_bufs_set_string(&bufs, 1, s1);
502 lustre_cfg_bufs_set_string(&bufs, 2, s2);
504 lustre_cfg_bufs_set_string(&bufs, 3, s3);
506 lustre_cfg_bufs_set_string(&bufs, 4, s4);
508 lcfg = lustre_cfg_new(cmd, &bufs);
509 lcfg->lcfg_nid = nid;
510 rc = class_process_config(lcfg);
511 lustre_cfg_free(lcfg);
515 /** Call class_attach and class_setup. These methods in turn call
516 * obd type-specific methods.
518 static int lustre_start_simple(char *obdname, char *type, char *uuid,
522 CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
524 rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, 0, 0);
526 CERROR("%s attach error %d\n", obdname, rc);
529 rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, 0, 0);
531 CERROR("%s setup error %d\n", obdname, rc);
532 do_lcfg(obdname, 0, LCFG_DETACH, 0, 0, 0, 0);
537 /* Set up a MGS to serve startup logs */
538 static int server_start_mgs(struct super_block *sb)
540 struct lustre_sb_info *lsi = s2lsi(sb);
541 struct vfsmount *mnt = lsi->lsi_srv_mnt;
542 struct lustre_mount_info *lmi;
547 /* It is impossible to have more than 1 MGS per node, since
548 MGC wouldn't know which to connect to */
549 lmi = server_find_mount(LUSTRE_MGS_OBDNAME);
551 lsi = s2lsi(lmi->lmi_sb);
552 LCONSOLE_ERROR_MSG(0x15d, "The MGS service was already started"
554 lsi->lsi_ldd->ldd_svname);
558 CDEBUG(D_CONFIG, "Start MGS service %s\n", LUSTRE_MGS_OBDNAME);
560 rc = server_register_mount(LUSTRE_MGS_OBDNAME, sb, mnt);
563 rc = lustre_start_simple(LUSTRE_MGS_OBDNAME, LUSTRE_MGS_NAME,
564 LUSTRE_MGS_OBDNAME, 0, 0);
565 /* Do NOT call server_deregister_mount() here. This leads to
566 * inability cleanup cleanly and free lsi and other stuff when
567 * mgs calls server_put_mount() in error handling case. -umka */
571 LCONSOLE_ERROR_MSG(0x15e, "Failed to start MGS '%s' (%d). "
572 "Is the 'mgs' module loaded?\n",
573 LUSTRE_MGS_OBDNAME, rc);
577 static int server_stop_mgs(struct super_block *sb)
579 struct obd_device *obd;
583 CDEBUG(D_MOUNT, "Stop MGS service %s\n", LUSTRE_MGS_OBDNAME);
585 /* There better be only one MGS */
586 obd = class_name2obd(LUSTRE_MGS_OBDNAME);
588 CDEBUG(D_CONFIG, "mgs %s not running\n", LUSTRE_MGS_OBDNAME);
592 /* The MGS should always stop when we say so */
594 rc = class_manual_cleanup(obd);
598 CFS_DEFINE_MUTEX(mgc_start_lock);
600 /** Set up a mgc obd to process startup logs
602 * \param sb [in] super block of the mgc obd
604 * \retval 0 success, otherwise error code
606 static int lustre_start_mgc(struct super_block *sb)
608 struct obd_connect_data *data = NULL;
609 struct lustre_sb_info *lsi = s2lsi(sb);
610 struct obd_device *obd;
611 struct obd_export *exp;
612 struct obd_uuid *uuid;
615 char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL;
618 int rc = 0, i = 0, j, len;
621 LASSERT(lsi->lsi_lmd);
623 /* Find the first non-lo MGS nid for our MGC name */
624 if (lsi->lsi_flags & LSI_SERVER) {
625 ptr = lsi->lsi_ldd->ldd_params;
626 /* mount -o mgsnode=nid */
627 if (lsi->lsi_lmd->lmd_mgs &&
628 (class_parse_nid(lsi->lsi_lmd->lmd_mgs, &nid, &ptr) == 0)) {
630 /* Use mgsnode= nids */
631 } else if ((class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0) &&
632 (class_parse_nid(ptr, &nid, &ptr) == 0)) {
634 } else if (IS_MGS(lsi->lsi_ldd)) {
635 lnet_process_id_t id;
636 while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
637 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
644 } else { /* client */
645 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
646 ptr = lsi->lsi_lmd->lmd_dev;
647 if (class_parse_nid(ptr, &nid, &ptr) == 0)
651 CERROR("No valid MGS nids found.\n");
655 cfs_mutex_lock(&mgc_start_lock);
657 len = strlen(LUSTRE_MGC_OBDNAME) + strlen(libcfs_nid2str(nid)) + 1;
658 OBD_ALLOC(mgcname, len);
659 OBD_ALLOC(niduuid, len + 2);
660 if (!mgcname || !niduuid)
661 GOTO(out_free, rc = -ENOMEM);
662 sprintf(mgcname, "%s%s", LUSTRE_MGC_OBDNAME, libcfs_nid2str(nid));
664 mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";
668 GOTO(out_free, rc = -ENOMEM);
670 obd = class_name2obd(mgcname);
671 if (obd && !obd->obd_stopping) {
672 rc = obd_set_info_async(NULL, obd->obd_self_export,
673 strlen(KEY_MGSSEC), KEY_MGSSEC,
674 strlen(mgssec), mgssec, NULL);
678 /* Re-using an existing MGC */
679 cfs_atomic_inc(&obd->u.cli.cl_mgc_refcount);
681 /* IR compatibility check, only for clients */
682 if (lmd_is_client(lsi->lsi_lmd)) {
684 int vallen = sizeof(*data);
685 __u32 *flags = &lsi->lsi_lmd->lmd_flags;
687 rc = obd_get_info(NULL, obd->obd_self_export,
688 strlen(KEY_CONN_DATA), KEY_CONN_DATA,
689 &vallen, data, NULL);
691 has_ir = OCD_HAS_FLAG(data, IMP_RECOV);
692 if (has_ir ^ !(*flags & LMD_FLG_NOIR)) {
693 /* LMD_FLG_NOIR is for test purpose only */
695 "Trying to mount a client with IR setting "
696 "not compatible with current mgc. "
697 "Force to use current mgc setting that is "
699 has_ir ? "enabled" : "disabled");
701 *flags &= ~LMD_FLG_NOIR;
703 *flags |= LMD_FLG_NOIR;
708 /* If we are restarting the MGS, don't try to keep the MGC's
709 old connection, or registration will fail. */
710 if ((lsi->lsi_flags & LSI_SERVER) && IS_MGS(lsi->lsi_ldd)) {
711 CDEBUG(D_MOUNT, "New MGS with live MGC\n");
715 /* Try all connections, but only once (again).
716 We don't want to block another target from starting
717 (using its local copy of the log), but we do want to connect
718 if at all possible. */
720 CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,recov_bk);
721 rc = obd_set_info_async(NULL, obd->obd_self_export,
722 sizeof(KEY_INIT_RECOV_BACKUP),
723 KEY_INIT_RECOV_BACKUP,
724 sizeof(recov_bk), &recov_bk, NULL);
728 CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname);
730 /* Add the primary nids for the MGS */
732 sprintf(niduuid, "%s_%x", mgcname, i);
733 if (lsi->lsi_flags & LSI_SERVER) {
734 ptr = lsi->lsi_ldd->ldd_params;
735 if (IS_MGS(lsi->lsi_ldd)) {
736 /* Use local nids (including LO) */
737 lnet_process_id_t id;
738 while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
739 rc = do_lcfg(mgcname, id.nid,
740 LCFG_ADD_UUID, niduuid, 0,0,0);
743 /* Use mgsnode= nids */
744 /* mount -o mgsnode=nid */
745 if (lsi->lsi_lmd->lmd_mgs) {
746 ptr = lsi->lsi_lmd->lmd_mgs;
747 } else if (class_find_param(ptr, PARAM_MGSNODE,
749 CERROR("No MGS nids given.\n");
750 GOTO(out_free, rc = -EINVAL);
752 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
753 rc = do_lcfg(mgcname, nid,
754 LCFG_ADD_UUID, niduuid, 0,0,0);
758 } else { /* client */
759 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
760 ptr = lsi->lsi_lmd->lmd_dev;
761 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
762 rc = do_lcfg(mgcname, nid,
763 LCFG_ADD_UUID, niduuid, 0,0,0);
765 /* Stop at the first failover nid */
771 CERROR("No valid MGS nids found.\n");
772 GOTO(out_free, rc = -EINVAL);
774 lsi->lsi_lmd->lmd_mgs_failnodes = 1;
776 /* Random uuid for MGC allows easier reconnects */
778 ll_generate_random_uuid(uuidc);
779 class_uuid_unparse(uuidc, uuid);
782 rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
783 (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
789 /* Add any failover MGS nids */
791 while ((*ptr == ':' ||
792 class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0)) {
793 /* New failover node */
794 sprintf(niduuid, "%s_%x", mgcname, i);
796 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
798 rc = do_lcfg(mgcname, nid,
799 LCFG_ADD_UUID, niduuid, 0,0,0);
804 rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
812 lsi->lsi_lmd->lmd_mgs_failnodes = i;
814 obd = class_name2obd(mgcname);
816 CERROR("Can't find mgcobd %s\n", mgcname);
817 GOTO(out_free, rc = -ENOTCONN);
820 rc = obd_set_info_async(NULL, obd->obd_self_export,
821 strlen(KEY_MGSSEC), KEY_MGSSEC,
822 strlen(mgssec), mgssec, NULL);
826 /* Keep a refcount of servers/clients who started with "mount",
827 so we know when we can get rid of the mgc. */
828 cfs_atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
830 /* Try all connections, but only once. */
832 rc = obd_set_info_async(NULL, obd->obd_self_export,
833 sizeof(KEY_INIT_RECOV_BACKUP),
834 KEY_INIT_RECOV_BACKUP,
835 sizeof(recov_bk), &recov_bk, NULL);
838 CWARN("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc);
839 /* We connect to the MGS at setup, and don't disconnect until cleanup */
840 data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_FID |
841 OBD_CONNECT_AT | OBD_CONNECT_FULL20 |
842 OBD_CONNECT_IMP_RECOV;
843 if (lmd_is_client(lsi->lsi_lmd) &&
844 lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
845 data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
846 data->ocd_version = LUSTRE_VERSION_CODE;
847 rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
849 CERROR("connect failed %d\n", rc);
853 obd->u.cli.cl_mgc_mgsexp = exp;
856 /* Keep the mgc info in the sb. Note that many lsi's can point
860 cfs_mutex_unlock(&mgc_start_lock);
865 OBD_FREE(mgcname, len);
867 OBD_FREE(niduuid, len + 2);
871 static int lustre_stop_mgc(struct super_block *sb)
873 struct lustre_sb_info *lsi = s2lsi(sb);
874 struct obd_device *obd;
875 char *niduuid = 0, *ptr = 0;
876 int i, rc = 0, len = 0;
886 cfs_mutex_lock(&mgc_start_lock);
887 LASSERT(cfs_atomic_read(&obd->u.cli.cl_mgc_refcount) > 0);
888 if (!cfs_atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
889 /* This is not fatal, every client that stops
890 will call in here. */
891 CDEBUG(D_MOUNT, "mgc still has %d references.\n",
892 cfs_atomic_read(&obd->u.cli.cl_mgc_refcount));
893 GOTO(out, rc = -EBUSY);
896 /* The MGC has no recoverable data in any case.
897 * force shotdown set in umount_begin */
898 obd->obd_no_recov = 1;
900 if (obd->u.cli.cl_mgc_mgsexp) {
901 /* An error is not fatal, if we are unable to send the
902 disconnect mgs ping evictor cleans up the export */
903 rc = obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
905 CDEBUG(D_MOUNT, "disconnect failed %d\n", rc);
908 /* Save the obdname for cleaning the nid uuids, which are
910 len = strlen(obd->obd_name) + 6;
911 OBD_ALLOC(niduuid, len);
913 strcpy(niduuid, obd->obd_name);
914 ptr = niduuid + strlen(niduuid);
917 rc = class_manual_cleanup(obd);
921 /* Clean the nid uuids */
923 GOTO(out, rc = -ENOMEM);
925 for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
926 sprintf(ptr, "_%x", i);
927 rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
930 CERROR("del MDC UUID %s failed: rc = %d\n",
935 OBD_FREE(niduuid, len);
937 /* class_import_put will get rid of the additional connections */
938 cfs_mutex_unlock(&mgc_start_lock);
942 /* Since there's only one mgc per node, we have to change it's fs to get
943 access to the right disk. */
944 static int server_mgc_set_fs(struct obd_device *mgc, struct super_block *sb)
946 struct lustre_sb_info *lsi = s2lsi(sb);
950 CDEBUG(D_MOUNT, "Set mgc disk for %s\n", lsi->lsi_lmd->lmd_dev);
952 /* cl_mgc_sem in mgc insures we sleep if the mgc_fs is busy */
953 rc = obd_set_info_async(NULL, mgc->obd_self_export,
954 sizeof(KEY_SET_FS), KEY_SET_FS,
955 sizeof(*sb), sb, NULL);
957 CERROR("can't set_fs %d\n", rc);
963 static int server_mgc_clear_fs(struct obd_device *mgc)
968 CDEBUG(D_MOUNT, "Unassign mgc disk\n");
970 rc = obd_set_info_async(NULL, mgc->obd_self_export,
971 sizeof(KEY_CLEAR_FS), KEY_CLEAR_FS,
976 CFS_DEFINE_MUTEX(server_start_lock);
978 /* Stop MDS/OSS if nobody is using them */
979 static int server_stop_servers(int lddflags, int lsiflags)
981 struct obd_device *obd = NULL;
982 struct obd_type *type = NULL;
986 cfs_mutex_lock(&server_start_lock);
988 /* Either an MDT or an OST or neither */
989 /* if this was an MDT, and there are no more MDT's, clean up the MDS */
990 if ((lddflags & LDD_F_SV_TYPE_MDT) &&
991 (obd = class_name2obd(LUSTRE_MDS_OBDNAME))) {
992 /*FIXME pre-rename, should eventually be LUSTRE_MDT_NAME*/
993 type = class_search_type(LUSTRE_MDS_NAME);
995 /* if this was an OST, and there are no more OST's, clean up the OSS */
996 if ((lddflags & LDD_F_SV_TYPE_OST) &&
997 (obd = class_name2obd(LUSTRE_OSS_OBDNAME))) {
998 type = class_search_type(LUSTRE_OST_NAME);
1001 if (obd && (!type || !type->typ_refcnt)) {
1004 /* obd_fail doesn't mean much on a server obd */
1005 err = class_manual_cleanup(obd);
1010 cfs_mutex_unlock(&server_start_lock);
1015 int server_mti_print(char *title, struct mgs_target_info *mti)
1017 PRINT_CMD(PRINT_MASK, "mti %s\n", title);
1018 PRINT_CMD(PRINT_MASK, "server: %s\n", mti->mti_svname);
1019 PRINT_CMD(PRINT_MASK, "fs: %s\n", mti->mti_fsname);
1020 PRINT_CMD(PRINT_MASK, "uuid: %s\n", mti->mti_uuid);
1021 PRINT_CMD(PRINT_MASK, "ver: %d flags: %#x\n",
1022 mti->mti_config_ver, mti->mti_flags);
1026 static int server_sb2mti(struct super_block *sb, struct mgs_target_info *mti)
1028 struct lustre_sb_info *lsi = s2lsi(sb);
1029 struct lustre_disk_data *ldd = lsi->lsi_ldd;
1030 lnet_process_id_t id;
1034 if (!(lsi->lsi_flags & LSI_SERVER))
1037 strncpy(mti->mti_fsname, ldd->ldd_fsname,
1038 sizeof(mti->mti_fsname));
1039 strncpy(mti->mti_svname, ldd->ldd_svname,
1040 sizeof(mti->mti_svname));
1042 mti->mti_nid_count = 0;
1043 while (LNetGetId(i++, &id) != -ENOENT) {
1044 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
1047 /* server use --servicenode param, only allow specified
1048 * nids be registered */
1049 if ((ldd->ldd_flags & LDD_F_NO_PRIMNODE) != 0 &&
1050 class_match_nid(ldd->ldd_params,
1051 PARAM_FAILNODE, id.nid) < 1)
1054 /* match specified network */
1055 if (!class_match_net(ldd->ldd_params,
1056 PARAM_NETWORK, LNET_NIDNET(id.nid)))
1059 mti->mti_nids[mti->mti_nid_count] = id.nid;
1060 mti->mti_nid_count++;
1061 if (mti->mti_nid_count >= MTI_NIDS_MAX) {
1062 CWARN("Only using first %d nids for %s\n",
1063 mti->mti_nid_count, mti->mti_svname);
1068 mti->mti_lustre_ver = LUSTRE_VERSION_CODE;
1069 mti->mti_config_ver = 0;
1070 if (lsi->lsi_lmd->lmd_flags & LMD_FLG_WRITECONF)
1071 ldd->ldd_flags |= LDD_F_WRITECONF;
1072 mti->mti_flags = ldd->ldd_flags;
1073 mti->mti_stripe_index = ldd->ldd_svindex;
1074 memcpy(mti->mti_uuid, ldd->ldd_uuid, sizeof(mti->mti_uuid));
1075 if (strlen(ldd->ldd_params) > sizeof(mti->mti_params)) {
1076 CERROR("params too big for mti\n");
1079 memcpy(mti->mti_params, ldd->ldd_params, sizeof(mti->mti_params));
1083 /* Register an old or new target with the MGS. If needed MGS will construct
1084 startup logs and assign index */
1085 int server_register_target(struct super_block *sb)
1087 struct lustre_sb_info *lsi = s2lsi(sb);
1088 struct obd_device *mgc = lsi->lsi_mgc;
1089 struct lustre_disk_data *ldd = lsi->lsi_ldd;
1090 struct mgs_target_info *mti = NULL;
1097 if (!(lsi->lsi_flags & LSI_SERVER))
1103 rc = server_sb2mti(sb, mti);
1107 CDEBUG(D_MOUNT, "Registration %s, fs=%s, %s, index=%04x, flags=%#x\n",
1108 mti->mti_svname, mti->mti_fsname,
1109 libcfs_nid2str(mti->mti_nids[0]), mti->mti_stripe_index,
1112 /* if write_conf is true, the registration must succeed */
1113 writeconf = !!(ldd->ldd_flags & (LDD_F_NEED_INDEX | LDD_F_UPDATE));
1114 mti->mti_flags |= LDD_F_OPC_REG;
1116 /* Register the target */
1117 /* FIXME use mgc_process_config instead */
1118 rc = obd_set_info_async(NULL, mgc->u.cli.cl_mgc_mgsexp,
1119 sizeof(KEY_REGISTER_TARGET), KEY_REGISTER_TARGET,
1120 sizeof(*mti), mti, NULL);
1122 if (mti->mti_flags & LDD_F_ERROR) {
1123 LCONSOLE_ERROR_MSG(0x160,
1124 "The MGS is refusing to allow this "
1125 "server (%s) to start. Please see messages"
1126 " on the MGS node.\n", ldd->ldd_svname);
1127 } else if (writeconf) {
1128 LCONSOLE_ERROR_MSG(0x15f,
1129 "Communication to the MGS return error %d. "
1130 "Is the MGS running?\n", rc);
1132 CERROR("Cannot talk to the MGS: %d, not fatal\n", rc);
1133 /* reset the error code for non-fatal error. */
1139 /* Always update our flags */
1140 ldd->ldd_flags = mti->mti_flags & LDD_F_ONDISK_MASK;
1142 /* If this flag is set, it means the MGS wants us to change our
1143 on-disk data. (So far this means just the index.) */
1144 if (mti->mti_flags & LDD_F_REWRITE_LDD) {
1147 CDEBUG(D_MOUNT, "Changing on-disk index from %#x to %#x "
1148 "for %s\n", ldd->ldd_svindex, mti->mti_stripe_index,
1150 ldd->ldd_svindex = mti->mti_stripe_index;
1151 strncpy(ldd->ldd_svname, mti->mti_svname,
1152 sizeof(ldd->ldd_svname));
1153 /* or ldd_make_sv_name(ldd); */
1154 ldd_write(&mgc->obd_lvfs_ctxt, ldd);
1155 if (lsi->lsi_lmd->lmd_osd_type)
1157 err = fsfilt_set_label(mgc, lsi->lsi_srv_mnt->mnt_sb,
1160 CERROR("Label set error %d\n", err);
1161 label = fsfilt_get_label(mgc, lsi->lsi_srv_mnt->mnt_sb);
1163 CDEBUG(D_MOUNT, "Disk label changed to %s\n", label);
1165 /* Flush the new ldd to disk */
1166 fsfilt_sync(mgc, lsi->lsi_srv_mnt->mnt_sb);
1176 * Notify the MGS that this target is ready.
1177 * Used by IR - if the MGS receives this message, it will notify clients.
1179 static int server_notify_target(struct super_block *sb, struct obd_device *obd)
1181 struct lustre_sb_info *lsi = s2lsi(sb);
1182 struct obd_device *mgc = lsi->lsi_mgc;
1183 struct mgs_target_info *mti = NULL;
1189 if (!(lsi->lsi_flags & LSI_SERVER))
1195 rc = server_sb2mti(sb, mti);
1199 mti->mti_instance = obd->u.obt.obt_instance;
1200 mti->mti_flags |= LDD_F_OPC_READY;
1202 /* FIXME use mgc_process_config instead */
1203 rc = obd_set_info_async(NULL, mgc->u.cli.cl_mgc_mgsexp,
1204 sizeof(KEY_REGISTER_TARGET),
1205 KEY_REGISTER_TARGET,
1206 sizeof(*mti), mti, NULL);
1208 /* Imperative recovery: if the mgs informs us to use IR? */
1209 if (!rc && !(mti->mti_flags & LDD_F_ERROR) &&
1210 (mti->mti_flags & LDD_F_IR_CAPABLE))
1211 lsi->lsi_flags |= LSI_IR_CAPABLE;
1220 /** Start server targets: MDTs and OSTs
1222 static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
1224 struct obd_device *obd;
1225 struct lustre_sb_info *lsi = s2lsi(sb);
1226 struct config_llog_instance cfg;
1230 CDEBUG(D_MOUNT, "starting target %s\n", lsi->lsi_ldd->ldd_svname);
1233 /* If we're an MDT, make sure the global MDS is running */
1234 if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_MDT) {
1235 /* make sure the MDS is started */
1236 cfs_mutex_lock(&server_start_lock);
1237 obd = class_name2obd(LUSTRE_MDS_OBDNAME);
1239 rc = lustre_start_simple(LUSTRE_MDS_OBDNAME,
1240 /* FIXME pre-rename, should eventually be LUSTRE_MDS_NAME */
1242 LUSTRE_MDS_OBDNAME"_uuid",
1245 cfs_mutex_unlock(&server_start_lock);
1246 CERROR("failed to start MDS: %d\n", rc);
1250 cfs_mutex_unlock(&server_start_lock);
1254 /* If we're an OST, make sure the global OSS is running */
1255 if (IS_OST(lsi->lsi_ldd)) {
1256 /* make sure OSS is started */
1257 cfs_mutex_lock(&server_start_lock);
1258 obd = class_name2obd(LUSTRE_OSS_OBDNAME);
1260 rc = lustre_start_simple(LUSTRE_OSS_OBDNAME,
1262 LUSTRE_OSS_OBDNAME"_uuid",
1265 cfs_mutex_unlock(&server_start_lock);
1266 CERROR("failed to start OSS: %d\n", rc);
1270 cfs_mutex_unlock(&server_start_lock);
1273 /* Set the mgc fs to our server disk. This allows the MGC to
1274 * read and write configs locally, in case it can't talk to the MGS. */
1275 if (lsi->lsi_lmd->lmd_osd_type == NULL) {
1276 rc = server_mgc_set_fs(lsi->lsi_mgc, sb);
1281 /* Register with MGS */
1282 rc = server_register_target(sb);
1286 /* Let the target look up the mount using the target's name
1287 (we can't pass the sb or mnt through class_process_config.) */
1288 rc = server_register_mount(lsi->lsi_ldd->ldd_svname, sb, mnt);
1292 /* Start targets using the llog named for the target */
1293 memset(&cfg, 0, sizeof(cfg));
1294 rc = lustre_process_log(sb, lsi->lsi_ldd->ldd_svname, &cfg);
1296 CERROR("failed to start server %s: %d\n",
1297 lsi->lsi_ldd->ldd_svname, rc);
1298 /* Do NOT call server_deregister_mount() here. This makes it
1299 * impossible to find mount later in cleanup time and leaves
1300 * @lsi and othder stuff leaked. -umka */
1305 /* Release the mgc fs for others to use */
1306 if (lsi->lsi_lmd->lmd_osd_type == NULL)
1307 server_mgc_clear_fs(lsi->lsi_mgc);
1310 obd = class_name2obd(lsi->lsi_ldd->ldd_svname);
1312 CERROR("no server named %s was started\n",
1313 lsi->lsi_ldd->ldd_svname);
1317 if ((lsi->lsi_lmd->lmd_flags & LMD_FLG_ABORT_RECOV) &&
1318 (OBP(obd, iocontrol))) {
1319 obd_iocontrol(OBD_IOC_ABORT_RECOVERY,
1320 obd->obd_self_export, 0, NULL, NULL);
1323 server_notify_target(sb, obd);
1325 /* calculate recovery timeout, do it after lustre_process_log */
1326 server_calc_timeout(lsi, obd);
1328 /* log has been fully processed */
1329 obd_notify(obd, NULL, OBD_NOTIFY_CONFIG, (void *)CONFIG_LOG);
1335 /***************** lustre superblock **************/
1337 struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
1339 struct lustre_sb_info *lsi;
1345 OBD_ALLOC_PTR(lsi->lsi_lmd);
1346 if (!lsi->lsi_lmd) {
1351 lsi->lsi_lmd->lmd_exclude_count = 0;
1352 lsi->lsi_lmd->lmd_recovery_time_soft = 0;
1353 lsi->lsi_lmd->lmd_recovery_time_hard = 0;
1354 s2lsi_nocast(sb) = lsi;
1355 /* we take 1 extra ref for our setup */
1356 cfs_atomic_set(&lsi->lsi_mounts, 1);
1358 /* Default umount style */
1359 lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
1364 static int lustre_free_lsi(struct super_block *sb)
1366 struct lustre_sb_info *lsi = s2lsi(sb);
1369 LASSERT(lsi != NULL);
1370 CDEBUG(D_MOUNT, "Freeing lsi %p\n", lsi);
1372 /* someone didn't call server_put_mount. */
1373 LASSERT(cfs_atomic_read(&lsi->lsi_mounts) == 0);
1375 if (lsi->lsi_ldd != NULL)
1376 OBD_FREE(lsi->lsi_ldd, sizeof(*lsi->lsi_ldd));
1378 if (lsi->lsi_lmd != NULL) {
1379 if (lsi->lsi_lmd->lmd_dev != NULL)
1380 OBD_FREE(lsi->lsi_lmd->lmd_dev,
1381 strlen(lsi->lsi_lmd->lmd_dev) + 1);
1382 if (lsi->lsi_lmd->lmd_profile != NULL)
1383 OBD_FREE(lsi->lsi_lmd->lmd_profile,
1384 strlen(lsi->lsi_lmd->lmd_profile) + 1);
1385 if (lsi->lsi_lmd->lmd_mgssec != NULL)
1386 OBD_FREE(lsi->lsi_lmd->lmd_mgssec,
1387 strlen(lsi->lsi_lmd->lmd_mgssec) + 1);
1388 if (lsi->lsi_lmd->lmd_opts != NULL)
1389 OBD_FREE(lsi->lsi_lmd->lmd_opts,
1390 strlen(lsi->lsi_lmd->lmd_opts) + 1);
1391 if (lsi->lsi_lmd->lmd_exclude_count)
1392 OBD_FREE(lsi->lsi_lmd->lmd_exclude,
1393 sizeof(lsi->lsi_lmd->lmd_exclude[0]) *
1394 lsi->lsi_lmd->lmd_exclude_count);
1395 if (lsi->lsi_lmd->lmd_mgs != NULL)
1396 OBD_FREE(lsi->lsi_lmd->lmd_mgs,
1397 strlen(lsi->lsi_lmd->lmd_mgs) + 1);
1398 if (lsi->lsi_lmd->lmd_osd_type != NULL)
1399 OBD_FREE(lsi->lsi_lmd->lmd_osd_type,
1400 strlen(lsi->lsi_lmd->lmd_osd_type) + 1);
1402 OBD_FREE(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd));
1405 LASSERT(lsi->lsi_llsbi == NULL);
1406 OBD_FREE(lsi, sizeof(*lsi));
1407 s2lsi_nocast(sb) = NULL;
1412 /* The lsi has one reference for every server that is using the disk -
1413 e.g. MDT, MGS, and potentially MGC */
1414 static int lustre_put_lsi(struct super_block *sb)
1416 struct lustre_sb_info *lsi = s2lsi(sb);
1419 LASSERT(lsi != NULL);
1421 CDEBUG(D_MOUNT, "put %p %d\n", sb, cfs_atomic_read(&lsi->lsi_mounts));
1422 if (cfs_atomic_dec_and_test(&lsi->lsi_mounts)) {
1423 lustre_free_lsi(sb);
1429 static int lsi_prepare(struct lustre_sb_info *lsi)
1431 struct lustre_disk_data *ldd;
1437 LASSERT(lsi->lsi_lmd);
1439 OBD_ALLOC(ldd, sizeof(*ldd));
1442 strcpy(lsi->lsi_osd_type, LUSTRE_OSD_NAME);
1444 /* The server name is given as a mount line option */
1445 if (lsi->lsi_lmd->lmd_profile == NULL) {
1446 LCONSOLE_ERROR("Can't determine server name\n");
1450 if (strlen(lsi->lsi_lmd->lmd_profile) >= sizeof(ldd->ldd_svname))
1451 RETURN(-ENAMETOOLONG);
1453 strcpy(ldd->ldd_svname, lsi->lsi_lmd->lmd_profile);
1454 strcpy(ldd->ldd_fsname, "lustre");
1456 /* Determine osd type */
1457 if (lsi->lsi_lmd->lmd_osd_type != NULL) {
1458 if (strlen(lsi->lsi_lmd->lmd_osd_type) >=
1459 sizeof(lsi->lsi_osd_type))
1460 RETURN (-ENAMETOOLONG);
1462 strcpy(lsi->lsi_osd_type, lsi->lsi_lmd->lmd_osd_type);
1465 /* Determine server type */
1466 rc = server_name2index(ldd->ldd_svname, &index, NULL);
1468 if (0 /*lsi->lsi_lmd->lmd_flags & LMD_FLG_MGS*/) {
1469 /* Assume we're a bare MGS */
1471 lsi->lsi_lmd->lmd_flags |= LMD_FLG_NOSVC;
1473 LCONSOLE_ERROR("Can't determine server type of '%s'\n",
1478 ldd->ldd_svindex = index;
1479 //lsi->lsi_flags |= rc;
1480 ldd->ldd_flags = rc | LDD_F_WRITECONF;
1484 /* Add mount line flags that used to be in ldd:
1485 * writeconf, mgs, iam, anything else?
1488 lsi->lsi_flags |= (lsi->lsi_lmd->lmd_flags & LMD_FLG_WRITECONF) ?
1489 LDD_F_WRITECONF : 0;
1490 lsi->lsi_flags |= (lsi->lsi_lmd->lmd_flags & LMD_FLG_MGS) ?
1491 LDD_F_SV_TYPE_MGS : 0;
1492 lsi->lsi_flags |= (lsi->lsi_lmd->lmd_flags & LMD_FLG_IAM) ?
1494 lsi->lsi_flags |= (lsi->lsi_lmd->lmd_flags & LMD_FLG_NO_PRIMNODE) ?
1495 LDD_F_NO_PRIMNODE : 0;
1501 /*************** server mount ******************/
1503 /** Kernel mount using mount options in MOUNT_DATA_FILE.
1504 * Since this file lives on the disk, we pre-mount using a common
1505 * type, read the file, then re-mount using the type specified in the
1508 static struct vfsmount *server_kernel_mount(struct super_block *sb)
1510 struct lvfs_run_ctxt mount_ctxt;
1511 struct lustre_sb_info *lsi = s2lsi(sb);
1512 struct lustre_disk_data *ldd;
1513 struct lustre_mount_data *lmd = lsi->lsi_lmd;
1514 struct vfsmount *mnt;
1515 struct file_system_type *type;
1516 char *options = NULL;
1517 unsigned long page, s_flags;
1518 struct page *__page;
1523 if (lsi->lsi_lmd->lmd_osd_type) {
1524 rc = lsi_prepare(lsi);
1525 RETURN(ERR_PTR(rc));
1528 OBD_ALLOC(ldd, sizeof(*ldd));
1530 RETURN(ERR_PTR(-ENOMEM));
1531 strcpy(lsi->lsi_osd_type, LUSTRE_OSD_NAME);
1533 /* In the past, we have always used flags = 0.
1534 Note ext3/ldiskfs can't be mounted ro. */
1535 s_flags = sb->s_flags;
1537 /* allocate memory for options */
1538 OBD_PAGE_ALLOC(__page, CFS_ALLOC_STD);
1540 GOTO(out_free, rc = -ENOMEM);
1541 page = (unsigned long)cfs_page_address(__page);
1542 options = (char *)page;
1543 memset(options, 0, CFS_PAGE_SIZE);
1545 /* mount-line options must be added for pre-mount because it may
1546 * contain mount options such as journal_dev which are required
1547 * to mount successfuly the underlying filesystem */
1548 if (lmd->lmd_opts && (*(lmd->lmd_opts) != 0))
1549 strncat(options, lmd->lmd_opts, CFS_PAGE_SIZE - 1);
1551 /* Pre-mount ldiskfs to read the MOUNT_DATA_FILE */
1552 CDEBUG(D_MOUNT, "Pre-mount ldiskfs %s\n", lmd->lmd_dev);
1553 type = get_fs_type("ldiskfs");
1555 CERROR("premount failed: cannot find ldiskfs module\n");
1556 GOTO(out_free, rc = -ENODEV);
1558 mnt = vfs_kern_mount(type, s_flags, lmd->lmd_dev, (void *)options);
1559 cfs_module_put(type->owner);
1562 CERROR("premount %s:%#lx ldiskfs failed: %d "
1563 "Is the ldiskfs module available?\n",
1564 lmd->lmd_dev, s_flags, rc );
1568 OBD_SET_CTXT_MAGIC(&mount_ctxt);
1569 mount_ctxt.pwdmnt = mnt;
1570 mount_ctxt.pwd = mnt->mnt_root;
1571 mount_ctxt.fs = get_ds();
1573 rc = ldd_parse(&mount_ctxt, ldd);
1577 CERROR("premount parse options failed: rc = %d\n", rc);
1581 /* Done with our pre-mount, now do the real mount. */
1583 /* Glom up mount options */
1584 memset(options, 0, CFS_PAGE_SIZE);
1585 strncpy(options, ldd->ldd_mount_opts, CFS_PAGE_SIZE - 2);
1587 len = CFS_PAGE_SIZE - strlen(options) - 2;
1589 strcat(options, ",");
1590 strncat(options, "no_mbcache", len);
1592 /* Add in any mount-line options */
1593 if (lmd->lmd_opts && (*(lmd->lmd_opts) != 0)) {
1594 len = CFS_PAGE_SIZE - strlen(options) - 2;
1595 strcat(options, ",");
1596 strncat(options, lmd->lmd_opts, len);
1599 /* Special permanent mount flags */
1601 s_flags |= MS_NOATIME | MS_NODIRATIME;
1603 CDEBUG(D_MOUNT, "kern_mount: %s %s %s\n",
1604 MT_STR(ldd), lmd->lmd_dev, options);
1605 type = get_fs_type(MT_STR(ldd));
1607 CERROR("get_fs_type failed\n");
1608 GOTO(out_free, rc = -ENODEV);
1610 mnt = vfs_kern_mount(type, s_flags, lmd->lmd_dev, (void *)options);
1611 cfs_module_put(type->owner);
1614 CERROR("vfs_kern_mount failed: rc = %d\n", rc);
1618 if (lmd->lmd_flags & LMD_FLG_ABORT_RECOV)
1619 simple_truncate(mnt->mnt_sb->s_root, mnt, LAST_RCVD,
1622 OBD_PAGE_FREE(__page);
1623 lsi->lsi_ldd = ldd; /* freed at lsi cleanup */
1624 CDEBUG(D_SUPER, "%s: mnt = %p\n", lmd->lmd_dev, mnt);
1629 OBD_PAGE_FREE(__page);
1630 OBD_FREE(ldd, sizeof(*ldd));
1631 lsi->lsi_ldd = NULL;
1632 RETURN(ERR_PTR(rc));
1635 /** Wait here forever until the mount refcount is 0 before completing umount,
1636 * else we risk dereferencing a null pointer.
1637 * LNET may take e.g. 165s before killing zombies.
1639 static void server_wait_finished(struct vfsmount *mnt)
1643 cfs_sigset_t blocked;
1646 cfs_waitq_init(&waitq);
1647 cfs_waitq_wait_event_interruptible_timeout(waitq, 0,
1648 cfs_time_seconds(3), rc);
1653 cfs_waitq_init(&waitq);
1655 while (mnt_get_count(mnt) > 1) {
1656 if (waited && (waited % 30 == 0))
1657 LCONSOLE_WARN("Mount still busy with %d refs after "
1661 /* Cannot use l_event_wait() for an interruptible sleep. */
1663 blocked = cfs_block_sigsinv(sigmask(SIGKILL));
1664 cfs_waitq_wait_event_interruptible_timeout(
1666 (mnt_get_count(mnt) == 1),
1667 cfs_time_seconds(3),
1669 cfs_restore_sigs(blocked);
1671 LCONSOLE_EMERG("Danger: interrupted umount %s with "
1672 "%d refs!\n", mnt->mnt_devname,
1673 mnt_get_count(mnt));
1680 /** Start the shutdown of servers at umount.
1682 static void server_put_super(struct super_block *sb)
1684 struct lustre_sb_info *lsi = s2lsi(sb);
1685 struct obd_device *obd;
1686 struct vfsmount *mnt = lsi->lsi_srv_mnt;
1687 char *tmpname, *extraname = NULL;
1689 int lddflags = lsi->lsi_ldd->ldd_flags;
1690 int lsiflags = lsi->lsi_flags;
1693 LASSERT(lsiflags & LSI_SERVER);
1695 tmpname_sz = strlen(lsi->lsi_ldd->ldd_svname) + 1;
1696 OBD_ALLOC(tmpname, tmpname_sz);
1697 memcpy(tmpname, lsi->lsi_ldd->ldd_svname, tmpname_sz);
1698 CDEBUG(D_MOUNT, "server put_super %s\n", tmpname);
1699 if (IS_MDT(lsi->lsi_ldd) && (lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC))
1700 snprintf(tmpname, tmpname_sz, "MGS");
1702 /* Stop the target */
1703 if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
1704 (IS_MDT(lsi->lsi_ldd) || IS_OST(lsi->lsi_ldd))) {
1705 struct lustre_profile *lprof = NULL;
1707 /* tell the mgc to drop the config log */
1708 lustre_end_log(sb, lsi->lsi_ldd->ldd_svname, NULL);
1710 /* COMPAT_146 - profile may get deleted in mgc_cleanup.
1711 If there are any setup/cleanup errors, save the lov
1712 name for safety cleanup later. */
1713 lprof = class_get_profile(lsi->lsi_ldd->ldd_svname);
1714 if (lprof && lprof->lp_dt) {
1715 OBD_ALLOC(extraname, strlen(lprof->lp_dt) + 1);
1716 strcpy(extraname, lprof->lp_dt);
1719 obd = class_name2obd(lsi->lsi_ldd->ldd_svname);
1721 CDEBUG(D_MOUNT, "stopping %s\n", obd->obd_name);
1722 if (lsi->lsi_flags & LSI_UMOUNT_FAILOVER)
1724 /* We can't seem to give an error return code
1725 * to .put_super, so we better make sure we clean up! */
1727 class_manual_cleanup(obd);
1729 CERROR("no obd %s\n", lsi->lsi_ldd->ldd_svname);
1730 server_deregister_mount(lsi->lsi_ldd->ldd_svname);
1734 /* If they wanted the mgs to stop separately from the mdt, they
1735 should have put it on a different device. */
1736 if (IS_MGS(lsi->lsi_ldd)) {
1737 /* if MDS start with --nomgs, don't stop MGS then */
1738 if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOMGS))
1739 server_stop_mgs(sb);
1742 /* Clean the mgc and sb */
1743 lustre_common_put_super(sb);
1745 /* Wait for the targets to really clean up - can't exit (and let the
1746 sb get destroyed) while the mount is still in use */
1747 server_wait_finished(mnt);
1749 /* drop the One True Mount */
1753 /* Stop the servers (MDS, OSS) if no longer needed. We must wait
1754 until the target is really gone so that our type refcount check
1756 server_stop_servers(lddflags, lsiflags);
1758 /* In case of startup or cleanup err, stop related obds */
1760 obd = class_name2obd(extraname);
1762 CWARN("Cleaning orphaned obd %s\n", extraname);
1764 class_manual_cleanup(obd);
1766 OBD_FREE(extraname, strlen(extraname) + 1);
1769 LCONSOLE_WARN("server umount %s complete\n", tmpname);
1770 OBD_FREE(tmpname, tmpname_sz);
1774 /** Called only for 'umount -f'
1776 #ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
1777 static void server_umount_begin(struct vfsmount *vfsmnt, int flags)
1779 struct super_block *sb = vfsmnt->mnt_sb;
1781 static void server_umount_begin(struct super_block *sb)
1784 struct lustre_sb_info *lsi = s2lsi(sb);
1787 #ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
1788 if (!(flags & MNT_FORCE)) {
1794 CDEBUG(D_MOUNT, "umount -f\n");
1795 /* umount = failover
1797 no third way to do non-force, non-failover */
1798 lsi->lsi_flags &= ~LSI_UMOUNT_FAILOVER;
1799 lsi->lsi_flags |= LSI_UMOUNT_FORCE;
1803 #ifndef HAVE_STATFS_DENTRY_PARAM
1804 static int server_statfs (struct super_block *sb, cfs_kstatfs_t *buf)
1807 static int server_statfs (struct dentry *dentry, cfs_kstatfs_t *buf)
1809 struct super_block *sb = dentry->d_sb;
1811 struct vfsmount *mnt = s2lsi(sb)->lsi_srv_mnt;
1814 if (mnt && mnt->mnt_sb && mnt->mnt_sb->s_op->statfs) {
1815 #ifdef HAVE_STATFS_DENTRY_PARAM
1816 int rc = mnt->mnt_sb->s_op->statfs(mnt->mnt_root, buf);
1818 int rc = mnt->mnt_sb->s_op->statfs(mnt->mnt_sb, buf);
1821 buf->f_type = sb->s_magic;
1827 buf->f_type = sb->s_magic;
1828 buf->f_bsize = sb->s_blocksize;
1834 buf->f_namelen = NAME_MAX;
1838 /** The operations we support directly on the superblock:
1839 * mount, umount, and df.
1841 static struct super_operations server_ops =
1843 .put_super = server_put_super,
1844 .umount_begin = server_umount_begin, /* umount -f */
1845 .statfs = server_statfs,
1848 #define log2(n) cfs_ffz(~(n))
1849 #define LUSTRE_SUPER_MAGIC 0x0BD00BD1
1851 static int server_fill_super_common(struct super_block *sb)
1853 struct inode *root = 0;
1856 CDEBUG(D_MOUNT, "Server sb, dev=%d\n", (int)sb->s_dev);
1858 sb->s_blocksize = 4096;
1859 sb->s_blocksize_bits = log2(sb->s_blocksize);
1860 sb->s_magic = LUSTRE_SUPER_MAGIC;
1861 sb->s_maxbytes = 0; /* we don't allow file IO on server mountpoints */
1862 sb->s_flags |= MS_RDONLY;
1863 sb->s_op = &server_ops;
1865 root = new_inode(sb);
1867 CERROR("Can't make root inode\n");
1871 /* returns -EIO for every operation */
1872 /* make_bad_inode(root); -- badness - can't umount */
1873 /* apparently we need to be a directory for the mount to finish */
1874 root->i_mode = S_IFDIR;
1876 sb->s_root = d_alloc_root(root);
1878 CERROR("Can't make root dentry\n");
1886 /** Fill in the superblock info for a Lustre server.
1887 * Mount the device with the correct options.
1888 * Read the on-disk config file.
1889 * Start the services.
1891 static int server_fill_super(struct super_block *sb)
1893 struct lustre_sb_info *lsi = s2lsi(sb);
1894 struct vfsmount *mnt;
1898 /* the One True Mount */
1899 mnt = server_kernel_mount(sb);
1902 CERROR("Unable to mount device %s: %d\n",
1903 lsi->lsi_lmd->lmd_dev, rc);
1907 lsi->lsi_srv_mnt = mnt;
1909 LASSERT(lsi->lsi_ldd);
1910 CDEBUG(D_MOUNT, "Found service %s for fs '%s' on device %s\n",
1911 lsi->lsi_ldd->ldd_svname, lsi->lsi_ldd->ldd_fsname,
1912 lsi->lsi_lmd->lmd_dev);
1914 if (class_name2obd(lsi->lsi_ldd->ldd_svname)) {
1915 LCONSOLE_ERROR_MSG(0x161, "The target named %s is already "
1916 "running. Double-mount may have compromised"
1917 " the disk journal.\n",
1918 lsi->lsi_ldd->ldd_svname);
1924 /* Start MGS before MGC */
1925 if (IS_MGS(lsi->lsi_ldd) && !(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOMGS)){
1926 rc = server_start_mgs(sb);
1931 /* Start MGC before servers */
1932 rc = lustre_start_mgc(sb);
1936 /* Set up all obd devices for service */
1937 if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
1938 (IS_OST(lsi->lsi_ldd) || IS_MDT(lsi->lsi_ldd))) {
1939 rc = server_start_targets(sb, mnt);
1941 CERROR("Unable to start targets: %d\n", rc);
1944 /* FIXME overmount client here,
1945 or can we just start a client log and client_fill_super on this sb?
1946 We need to make sure server_put_super gets called too - ll_put_super
1947 calls lustre_common_put_super; check there for LSI_SERVER flag,
1949 Probably should start client from new thread so we can return.
1950 Client will not finish until all servers are connected.
1951 Note - MGS-only server does NOT get a client, since there is no
1952 lustre fs associated - the MGS is for all lustre fs's */
1955 rc = server_fill_super_common(sb);
1961 /* We jump here in case of failure while starting targets or MGS.
1962 * In this case we can't just put @mnt and have to do real cleanup
1963 * with stoping targets, etc. */
1964 server_put_super(sb);
1968 /* Get the index from the obd name.
1969 rc = server type, or
1971 if endptr isn't NULL it is set to end of name */
1972 int server_name2index(char *svname, __u32 *idx, char **endptr)
1974 unsigned long index;
1976 char *dash = strrchr(svname, '-');
1980 /* intepret <fsname>-MDTXXXXX-mdc as mdt, the better way is to pass
1981 * in the fsname, then determine the server index */
1982 if (!strcmp(LUSTRE_MDC_NAME, dash + 1)) {
1984 for (; dash > svname && *dash != '-'; dash--);
1989 if (strncmp(dash + 1, "MDT", 3) == 0)
1990 rc = LDD_F_SV_TYPE_MDT;
1991 else if (strncmp(dash + 1, "OST", 3) == 0)
1992 rc = LDD_F_SV_TYPE_OST;
1995 if (strcmp(dash + 4, "all") == 0)
1996 return rc | LDD_F_SV_ALL;
1998 index = simple_strtoul(dash + 4, endptr, 16);
2004 * Calculate timeout value for a target.
2006 void server_calc_timeout(struct lustre_sb_info *lsi, struct obd_device *obd)
2008 struct lustre_mount_data *lmd;
2012 bool has_ir = !!(lsi->lsi_flags & LSI_IR_CAPABLE);
2013 int min = OBD_RECOVERY_TIME_MIN;
2015 LASSERT(lsi->lsi_flags & LSI_SERVER);
2019 soft = lmd->lmd_recovery_time_soft;
2020 hard = lmd->lmd_recovery_time_hard;
2021 has_ir = has_ir && !(lmd->lmd_flags & LMD_FLG_NOIR);
2022 obd->obd_no_ir = !has_ir;
2026 soft = OBD_RECOVERY_TIME_SOFT;
2028 hard = OBD_RECOVERY_TIME_HARD;
2030 /* target may have ir_factor configured. */
2031 factor = OBD_IR_FACTOR_DEFAULT;
2032 if (obd->obd_recovery_ir_factor)
2033 factor = obd->obd_recovery_ir_factor;
2036 int new_soft = soft;
2037 int new_hard = hard;
2039 /* adjust timeout value by imperative recovery */
2041 new_soft = (soft * factor) / OBD_IR_FACTOR_MAX;
2042 new_hard = (hard * factor) / OBD_IR_FACTOR_MAX;
2044 /* make sure the timeout is not too short */
2045 new_soft = max(min, new_soft);
2046 new_hard = max(new_soft, new_hard);
2048 LCONSOLE_INFO("%s: Imperative Recovery enabled, recovery "
2049 "window shrunk from %d-%d down to %d-%d\n",
2050 obd->obd_name, soft, hard, new_soft, new_hard);
2057 obd->obd_recovery_timeout = max(obd->obd_recovery_timeout, soft);
2058 obd->obd_recovery_time_hard = hard;
2059 obd->obd_recovery_ir_factor = factor;
2061 EXPORT_SYMBOL(server_calc_timeout);
2063 /*************** mount common betweeen server and client ***************/
2066 int lustre_common_put_super(struct super_block *sb)
2071 CDEBUG(D_MOUNT, "dropping sb %p\n", sb);
2073 /* Drop a ref to the MGC */
2074 rc = lustre_stop_mgc(sb);
2075 if (rc && (rc != -ENOENT)) {
2077 CERROR("Can't stop MGC: %d\n", rc);
2080 /* BUSY just means that there's some other obd that
2081 needs the mgc. Let him clean it up. */
2082 CDEBUG(D_MOUNT, "MGC still in use\n");
2084 /* Drop a ref to the mounted disk */
2090 static void lmd_print(struct lustre_mount_data *lmd)
2094 PRINT_CMD(PRINT_MASK, " mount data:\n");
2095 if (lmd_is_client(lmd))
2096 PRINT_CMD(PRINT_MASK, "profile: %s\n", lmd->lmd_profile);
2097 PRINT_CMD(PRINT_MASK, "device: %s\n", lmd->lmd_dev);
2098 PRINT_CMD(PRINT_MASK, "flags: %x\n", lmd->lmd_flags);
2101 PRINT_CMD(PRINT_MASK, "options: %s\n", lmd->lmd_opts);
2103 if (lmd->lmd_recovery_time_soft)
2104 PRINT_CMD(PRINT_MASK, "recovery time soft: %d\n",
2105 lmd->lmd_recovery_time_soft);
2107 if (lmd->lmd_recovery_time_hard)
2108 PRINT_CMD(PRINT_MASK, "recovery time hard: %d\n",
2109 lmd->lmd_recovery_time_hard);
2111 for (i = 0; i < lmd->lmd_exclude_count; i++) {
2112 PRINT_CMD(PRINT_MASK, "exclude %d: OST%04x\n", i,
2113 lmd->lmd_exclude[i]);
2117 /* Is this server on the exclusion list */
2118 int lustre_check_exclusion(struct super_block *sb, char *svname)
2120 struct lustre_sb_info *lsi = s2lsi(sb);
2121 struct lustre_mount_data *lmd = lsi->lsi_lmd;
2126 rc = server_name2index(svname, &index, NULL);
2127 if (rc != LDD_F_SV_TYPE_OST)
2128 /* Only exclude OSTs */
2131 CDEBUG(D_MOUNT, "Check exclusion %s (%d) in %d of %s\n", svname,
2132 index, lmd->lmd_exclude_count, lmd->lmd_dev);
2134 for(i = 0; i < lmd->lmd_exclude_count; i++) {
2135 if (index == lmd->lmd_exclude[i]) {
2136 CWARN("Excluding %s (on exclusion list)\n", svname);
2143 /* mount -v -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
2144 static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr)
2146 char *s1 = ptr, *s2;
2147 __u32 index, *exclude_list;
2151 /* The shortest an ost name can be is 8 chars: -OST0000.
2152 We don't actually know the fsname at this time, so in fact
2153 a user could specify any fsname. */
2154 devmax = strlen(ptr) / 8 + 1;
2156 /* temp storage until we figure out how many we have */
2157 OBD_ALLOC(exclude_list, sizeof(index) * devmax);
2161 /* we enter this fn pointing at the '=' */
2162 while (*s1 && *s1 != ' ' && *s1 != ',') {
2164 rc = server_name2index(s1, &index, &s2);
2166 CERROR("Can't parse server name '%s'\n", s1);
2169 if (rc == LDD_F_SV_TYPE_OST)
2170 exclude_list[lmd->lmd_exclude_count++] = index;
2172 CDEBUG(D_MOUNT, "ignoring exclude %.7s\n", s1);
2174 /* now we are pointing at ':' (next exclude)
2175 or ',' (end of excludes) */
2176 if (lmd->lmd_exclude_count >= devmax)
2179 if (rc >= 0) /* non-err */
2182 if (lmd->lmd_exclude_count) {
2183 /* permanent, freed in lustre_free_lsi */
2184 OBD_ALLOC(lmd->lmd_exclude, sizeof(index) *
2185 lmd->lmd_exclude_count);
2186 if (lmd->lmd_exclude) {
2187 memcpy(lmd->lmd_exclude, exclude_list,
2188 sizeof(index) * lmd->lmd_exclude_count);
2191 lmd->lmd_exclude_count = 0;
2194 OBD_FREE(exclude_list, sizeof(index) * devmax);
2198 static int lmd_parse_mgssec(struct lustre_mount_data *lmd, char *ptr)
2203 if (lmd->lmd_mgssec != NULL) {
2204 OBD_FREE(lmd->lmd_mgssec, strlen(lmd->lmd_mgssec) + 1);
2205 lmd->lmd_mgssec = NULL;
2208 tail = strchr(ptr, ',');
2210 length = strlen(ptr);
2212 length = tail - ptr;
2214 OBD_ALLOC(lmd->lmd_mgssec, length + 1);
2215 if (lmd->lmd_mgssec == NULL)
2218 memcpy(lmd->lmd_mgssec, ptr, length);
2219 lmd->lmd_mgssec[length] = '\0';
2223 static int lmd_parse_string(char **handle, char *ptr)
2228 if ((handle == NULL) || (ptr == NULL))
2231 if (*handle != NULL) {
2232 OBD_FREE(*handle, strlen(*handle) + 1);
2236 tail = strchr(ptr, ',');
2238 length = strlen(ptr);
2240 length = tail - ptr;
2242 OBD_ALLOC(*handle, length + 1);
2243 if (*handle == NULL)
2246 memcpy(*handle, ptr, length);
2247 (*handle)[length] = '\0';
2252 /* Collect multiple values for mgsnid specifiers */
2253 static int lmd_parse_mgs(struct lustre_mount_data *lmd, char **ptr)
2261 /* Find end of nidlist */
2262 while (class_parse_nid(tail, &nid, &tail) == 0) {}
2263 length = tail - *ptr;
2265 LCONSOLE_ERROR_MSG(0x159, "Can't parse NID '%s'\n", *ptr);
2269 if (lmd->lmd_mgs != NULL)
2270 oldlen = strlen(lmd->lmd_mgs) + 1;
2272 OBD_ALLOC(mgsnid, oldlen + length + 1);
2276 if (lmd->lmd_mgs != NULL) {
2277 /* Multiple mgsnid= are taken to mean failover locations */
2278 memcpy(mgsnid, lmd->lmd_mgs, oldlen);
2279 mgsnid[oldlen - 1] = ':';
2280 OBD_FREE(lmd->lmd_mgs, oldlen);
2282 memcpy(mgsnid + oldlen, *ptr, length);
2283 mgsnid[oldlen + length] = '\0';
2284 lmd->lmd_mgs = mgsnid;
2290 /** Parse mount line options
2291 * e.g. mount -v -t lustre -o abort_recov uml1:uml2:/lustre-client /mnt/lustre
2292 * dev is passed as device=uml1:/lustre by mount.lustre
2294 static int lmd_parse(char *options, struct lustre_mount_data *lmd)
2296 char *s1, *s2, *devname = NULL;
2297 struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
2303 LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that "
2304 "/sbin/mount.lustre is installed.\n");
2308 /* Options should be a string - try to detect old lmd data */
2309 if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) {
2310 LCONSOLE_ERROR_MSG(0x163, "You're using an old version of "
2311 "/sbin/mount.lustre. Please install "
2312 "version %s\n", LUSTRE_VERSION_STRING);
2315 lmd->lmd_magic = LMD_MAGIC;
2317 /* Set default flags here */
2322 int time_min = OBD_RECOVERY_TIME_MIN;
2324 /* Skip whitespace and extra commas */
2325 while (*s1 == ' ' || *s1 == ',')
2328 /* Client options are parsed in ll_options: eg. flock,
2331 /* Parse non-ldiskfs options here. Rather than modifying
2332 ldiskfs, we just zero these out here */
2333 if (strncmp(s1, "abort_recov", 11) == 0) {
2334 lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
2336 } else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
2337 lmd->lmd_recovery_time_soft = max_t(int,
2338 simple_strtoul(s1 + 19, NULL, 10), time_min);
2340 } else if (strncmp(s1, "recovery_time_hard=", 19) == 0) {
2341 lmd->lmd_recovery_time_hard = max_t(int,
2342 simple_strtoul(s1 + 19, NULL, 10), time_min);
2344 } else if (strncmp(s1, "noir", 4) == 0) {
2345 lmd->lmd_flags |= LMD_FLG_NOIR; /* test purpose only. */
2347 } else if (strncmp(s1, "nosvc", 5) == 0) {
2348 lmd->lmd_flags |= LMD_FLG_NOSVC;
2350 } else if (strncmp(s1, "nomgs", 5) == 0) {
2351 lmd->lmd_flags |= LMD_FLG_NOMGS;
2353 } else if (strncmp(s1, "noscrub", 7) == 0) {
2354 lmd->lmd_flags |= LMD_FLG_NOSCRUB;
2356 } else if (strncmp(s1, PARAM_MGSNODE,
2357 sizeof(PARAM_MGSNODE) - 1) == 0) {
2358 s2 = s1 + sizeof(PARAM_MGSNODE) - 1;
2359 /* Assume the next mount opt is the first
2360 invalid nid we get to. */
2361 rc = lmd_parse_mgs(lmd, &s2);
2365 } else if (strncmp(s1, "writeconf", 9) == 0) {
2366 lmd->lmd_flags |= LMD_FLG_WRITECONF;
2368 } else if (strncmp(s1, "mgssec=", 7) == 0) {
2369 rc = lmd_parse_mgssec(lmd, s1 + 7);
2373 /* ost exclusion list */
2374 } else if (strncmp(s1, "exclude=", 8) == 0) {
2375 rc = lmd_make_exclusion(lmd, s1 + 7);
2379 } else if (strncmp(s1, "svname=", 7) == 0) {
2380 rc = lmd_parse_string(&lmd->lmd_profile, s1 + 7);
2384 } else if (strncmp(s1, "osd=", 4) == 0) {
2385 rc = lmd_parse_string(&lmd->lmd_osd_type, s1 + 4);
2388 /* with ldiskfs we're still doing ldd parsing
2389 * in the kernel space */
2390 if (!strcmp(lmd->lmd_osd_type, "osd-ldiskfs")) {
2391 OBD_FREE(lmd->lmd_osd_type,
2392 strlen(lmd->lmd_osd_type) + 1);
2393 lmd->lmd_osd_type = NULL;
2397 /* Linux 2.4 doesn't pass the device, so we stuck it at the
2398 end of the options. */
2399 else if (strncmp(s1, "device=", 7) == 0) {
2401 /* terminate options right before device. device
2402 must be the last one. */
2408 s2 = strchr(s1, ',');
2416 memmove(s1, s2, strlen(s2) + 1);
2422 LCONSOLE_ERROR_MSG(0x164, "Can't find the device name "
2423 "(need mount option 'device=...')\n");
2427 s1 = strstr(devname, ":/");
2430 lmd->lmd_flags |= LMD_FLG_CLIENT;
2431 /* Remove leading /s from fsname */
2432 while (*++s1 == '/') ;
2433 /* Freed in lustre_free_lsi */
2434 OBD_ALLOC(lmd->lmd_profile, strlen(s1) + 8);
2435 if (!lmd->lmd_profile)
2437 sprintf(lmd->lmd_profile, "%s-client", s1);
2440 /* Freed in lustre_free_lsi */
2441 OBD_ALLOC(lmd->lmd_dev, strlen(devname) + 1);
2444 strcpy(lmd->lmd_dev, devname);
2446 /* Save mount options */
2447 s1 = options + strlen(options) - 1;
2448 while (s1 >= options && (*s1 == ',' || *s1 == ' '))
2450 if (*options != 0) {
2451 /* Freed in lustre_free_lsi */
2452 OBD_ALLOC(lmd->lmd_opts, strlen(options) + 1);
2455 strcpy(lmd->lmd_opts, options);
2459 lmd->lmd_magic = LMD_MAGIC;
2464 CERROR("Bad mount options %s\n", options);
2468 struct lustre_mount_data2 {
2470 struct vfsmount *lmd2_mnt;
2473 /** This is the entry point for the mount call into Lustre.
2474 * This is called when a server or client is mounted,
2475 * and this is where we start setting things up.
2476 * @param data Mount options (e.g. -o flock,abort_recov)
2478 int lustre_fill_super(struct super_block *sb, void *data, int silent)
2480 struct lustre_mount_data *lmd;
2481 struct lustre_mount_data2 *lmd2 = data;
2482 struct lustre_sb_info *lsi;
2486 CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);
2488 lsi = lustre_init_lsi(sb);
2494 * Disable lockdep during mount, because mount locking patterns are
2500 * LU-639: the obd cleanup of last mount may not finish yet, wait here.
2502 obd_zombie_barrier();
2504 /* Figure out the lmd from the mount options */
2505 if (lmd_parse((char *)(lmd2->lmd2_data), lmd)) {
2507 GOTO(out, rc = -EINVAL);
2510 if (lmd_is_client(lmd)) {
2511 CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile);
2512 if (!client_fill_super) {
2513 LCONSOLE_ERROR_MSG(0x165, "Nothing registered for "
2514 "client mount! Is the 'lustre' "
2515 "module loaded?\n");
2519 rc = lustre_start_mgc(sb);
2524 /* Connect and start */
2525 /* (should always be ll_fill_super) */
2526 rc = (*client_fill_super)(sb, lmd2->lmd2_mnt);
2527 /* c_f_s will call lustre_common_put_super on failure */
2530 CDEBUG(D_MOUNT, "Mounting server from %s\n", lmd->lmd_dev);
2531 lsi->lsi_flags |= LSI_SERVER;
2532 rc = server_fill_super(sb);
2533 /* s_f_s calls lustre_start_mgc after the mount because we need
2534 the MGS nids which are stored on disk. Plus, we may
2535 need to start the MGS first. */
2536 /* s_f_s will call server_put_super on failure */
2539 /* If error happens in fill_super() call, @lsi will be killed there.
2540 * This is why we do not put it here. */
2544 CERROR("Unable to mount %s (%d)\n",
2545 s2lsi(sb) ? lmd->lmd_dev : "", rc);
2547 CDEBUG(D_SUPER, "Mount %s complete\n",
2555 /* We can't call ll_fill_super by name because it lives in a module that
2556 must be loaded after this one. */
2557 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
2558 struct vfsmount *mnt))
2560 client_fill_super = cfs;
2563 void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb))
2565 kill_super_cb = cfs;
2568 /***************** FS registration ******************/
2570 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
2571 struct super_block * lustre_get_sb(struct file_system_type *fs_type, int flags,
2572 const char *devname, void * data)
2574 return get_sb_nodev(fs_type, flags, data, lustre_fill_super);
2577 int lustre_get_sb(struct file_system_type *fs_type, int flags,
2578 const char *devname, void * data, struct vfsmount *mnt)
2580 struct lustre_mount_data2 lmd2 = {data, mnt};
2582 return get_sb_nodev(fs_type, flags, &lmd2, lustre_fill_super, mnt);
2586 void lustre_kill_super(struct super_block *sb)
2588 struct lustre_sb_info *lsi = s2lsi(sb);
2590 if (kill_super_cb && lsi && !(lsi->lsi_flags & LSI_SERVER))
2591 (*kill_super_cb)(sb);
2593 kill_anon_super(sb);
2596 /** Register the "lustre" fs type
2598 struct file_system_type lustre_fs_type = {
2599 .owner = THIS_MODULE,
2601 .get_sb = lustre_get_sb,
2602 .kill_sb = lustre_kill_super,
2603 .fs_flags = FS_BINARY_MOUNTDATA | FS_REQUIRES_DEV |
2604 #ifdef FS_HAS_FIEMAP
2607 LL_RENAME_DOES_D_MOVE,
2610 int lustre_register_fs(void)
2612 return register_filesystem(&lustre_fs_type);
2615 int lustre_unregister_fs(void)
2617 return unregister_filesystem(&lustre_fs_type);
2620 EXPORT_SYMBOL(lustre_register_client_fill_super);
2621 EXPORT_SYMBOL(lustre_register_kill_super_cb);
2622 EXPORT_SYMBOL(lustre_common_put_super);
2623 EXPORT_SYMBOL(lustre_process_log);
2624 EXPORT_SYMBOL(lustre_end_log);
2625 EXPORT_SYMBOL(server_get_mount);
2626 EXPORT_SYMBOL(server_get_mount_2);
2627 EXPORT_SYMBOL(server_put_mount);
2628 EXPORT_SYMBOL(server_put_mount_2);
2629 EXPORT_SYMBOL(server_register_target);
2630 EXPORT_SYMBOL(server_name2index);
2631 EXPORT_SYMBOL(server_mti_print);
2632 EXPORT_SYMBOL(do_lcfg);