1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see [sun.com URL with a
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/obdclass/obd_mount.c
38 * Client/server mount routines
40 * Author: Nathan Rutman <nathan@clusterfs.com>
44 #define DEBUG_SUBSYSTEM S_CLASS
45 #define D_MOUNT D_SUPER|D_CONFIG /*|D_WARNING */
46 #define PRINT_CMD CDEBUG
47 #define PRINT_MASK D_SUPER|D_CONFIG
51 #include <lustre_fsfilt.h>
52 #include <obd_class.h>
53 #include <lustre/lustre_user.h>
54 #include <linux/version.h>
55 #include <lustre_log.h>
56 #include <lustre_disk.h>
57 #include <lustre_param.h>
59 static int (*client_fill_super)(struct super_block *sb) = NULL;
60 static void (*kill_super_cb)(struct super_block *sb) = NULL;
62 /*********** mount lookup *********/
64 DECLARE_MUTEX(lustre_mount_info_lock);
65 static CFS_LIST_HEAD(server_mount_info_list);
67 static struct lustre_mount_info *server_find_mount(const char *name)
69 struct list_head *tmp;
70 struct lustre_mount_info *lmi;
73 list_for_each(tmp, &server_mount_info_list) {
74 lmi = list_entry(tmp, struct lustre_mount_info, lmi_list_chain);
75 if (strcmp(name, lmi->lmi_name) == 0)
81 /* we must register an obd for a mount before we call the setup routine.
82 *_setup will call lustre_get_mount to get the mnt struct
83 by obd_name, since we can't pass the pointer to setup. */
84 static int server_register_mount(const char *name, struct super_block *sb,
87 struct lustre_mount_info *lmi;
94 OBD_ALLOC(lmi, sizeof(*lmi));
97 OBD_ALLOC(name_cp, strlen(name) + 1);
99 OBD_FREE(lmi, sizeof(*lmi));
102 strcpy(name_cp, name);
104 down(&lustre_mount_info_lock);
106 if (server_find_mount(name)) {
107 up(&lustre_mount_info_lock);
108 OBD_FREE(lmi, sizeof(*lmi));
109 OBD_FREE(name_cp, strlen(name) + 1);
110 CERROR("Already registered %s\n", name);
113 lmi->lmi_name = name_cp;
116 list_add(&lmi->lmi_list_chain, &server_mount_info_list);
118 up(&lustre_mount_info_lock);
120 CDEBUG(D_MOUNT, "reg_mnt %p from %s, vfscount=%d\n",
121 lmi->lmi_mnt, name, atomic_read(&lmi->lmi_mnt->mnt_count));
126 /* when an obd no longer needs a mount */
127 static int server_deregister_mount(const char *name)
129 struct lustre_mount_info *lmi;
132 down(&lustre_mount_info_lock);
133 lmi = server_find_mount(name);
135 up(&lustre_mount_info_lock);
136 CERROR("%s not registered\n", name);
140 CDEBUG(D_MOUNT, "dereg_mnt %p from %s, vfscount=%d\n",
141 lmi->lmi_mnt, name, atomic_read(&lmi->lmi_mnt->mnt_count));
143 OBD_FREE(lmi->lmi_name, strlen(lmi->lmi_name) + 1);
144 list_del(&lmi->lmi_list_chain);
145 OBD_FREE(lmi, sizeof(*lmi));
146 up(&lustre_mount_info_lock);
151 /* obd's look up a registered mount using their obdname. This is just
152 for initial obd setup to find the mount struct. It should not be
153 called every time you want to mntget. */
154 struct lustre_mount_info *server_get_mount(const char *name)
156 struct lustre_mount_info *lmi;
157 struct lustre_sb_info *lsi;
160 down(&lustre_mount_info_lock);
161 lmi = server_find_mount(name);
162 up(&lustre_mount_info_lock);
164 CERROR("Can't find mount for %s\n", name);
167 lsi = s2lsi(lmi->lmi_sb);
168 mntget(lmi->lmi_mnt);
169 atomic_inc(&lsi->lsi_mounts);
171 CDEBUG(D_MOUNT, "get_mnt %p from %s, refs=%d, vfscount=%d\n",
172 lmi->lmi_mnt, name, atomic_read(&lsi->lsi_mounts),
173 atomic_read(&lmi->lmi_mnt->mnt_count));
179 * Used by mdt to get mount_info from obdname.
180 * There are no blocking when using the mount_info.
181 * Do not use server_get_mount for this purpose.
183 struct lustre_mount_info *server_get_mount_2(const char *name)
185 struct lustre_mount_info *lmi;
188 down(&lustre_mount_info_lock);
189 lmi = server_find_mount(name);
190 up(&lustre_mount_info_lock);
192 CERROR("Can't find mount for %s\n", name);
197 static void unlock_mntput(struct vfsmount *mnt)
199 if (kernel_locked()) {
208 static int lustre_put_lsi(struct super_block *sb);
210 /* to be called from obd_cleanup methods */
211 int server_put_mount(const char *name, struct vfsmount *mnt)
213 struct lustre_mount_info *lmi;
214 struct lustre_sb_info *lsi;
215 int count = atomic_read(&mnt->mnt_count) - 1;
218 /* This might be the last one, can't deref after this */
221 down(&lustre_mount_info_lock);
222 lmi = server_find_mount(name);
223 up(&lustre_mount_info_lock);
225 CERROR("Can't find mount for %s\n", name);
228 lsi = s2lsi(lmi->lmi_sb);
229 LASSERT(lmi->lmi_mnt == mnt);
231 CDEBUG(D_MOUNT, "put_mnt %p from %s, refs=%d, vfscount=%d\n",
232 lmi->lmi_mnt, name, atomic_read(&lsi->lsi_mounts), count);
234 if (lustre_put_lsi(lmi->lmi_sb)) {
235 CDEBUG(D_MOUNT, "Last put of mnt %p from %s, vfscount=%d\n",
236 lmi->lmi_mnt, name, count);
237 /* last mount is the One True Mount */
239 CERROR("%s: mount busy, vfscount=%d!\n", name, count);
242 /* this obd should never need the mount again */
243 server_deregister_mount(name);
248 /* Corresponding to server_get_mount_2 */
249 int server_put_mount_2(const char *name, struct vfsmount *mnt)
255 /******* mount helper utilities *********/
258 static void ldd_print(struct lustre_disk_data *ldd)
260 PRINT_CMD(PRINT_MASK, " disk data:\n");
261 PRINT_CMD(PRINT_MASK, "server: %s\n", ldd->ldd_svname);
262 PRINT_CMD(PRINT_MASK, "uuid: %s\n", (char *)ldd->ldd_uuid);
263 PRINT_CMD(PRINT_MASK, "fs: %s\n", ldd->ldd_fsname);
264 PRINT_CMD(PRINT_MASK, "index: %04x\n", ldd->ldd_svindex);
265 PRINT_CMD(PRINT_MASK, "config: %d\n", ldd->ldd_config_ver);
266 PRINT_CMD(PRINT_MASK, "flags: %#x\n", ldd->ldd_flags);
267 PRINT_CMD(PRINT_MASK, "diskfs: %s\n", MT_STR(ldd));
268 PRINT_CMD(PRINT_MASK, "options: %s\n", ldd->ldd_mount_opts);
269 PRINT_CMD(PRINT_MASK, "params: %s\n", ldd->ldd_params);
270 PRINT_CMD(PRINT_MASK, "comment: %s\n", ldd->ldd_userdata);
274 static int ldd_parse(struct lvfs_run_ctxt *mount_ctxt,
275 struct lustre_disk_data *ldd)
277 struct lvfs_run_ctxt saved;
284 push_ctxt(&saved, mount_ctxt, NULL);
286 file = filp_open(MOUNT_DATA_FILE, O_RDONLY, 0644);
289 CERROR("cannot open %s: rc = %d\n", MOUNT_DATA_FILE, rc);
293 len = i_size_read(file->f_dentry->d_inode);
294 CDEBUG(D_MOUNT, "Have %s, size %lu\n", MOUNT_DATA_FILE, len);
295 if (len != sizeof(*ldd)) {
296 CERROR("disk data size does not match: see %lu expect "LPSZ"\n",
298 GOTO(out_close, rc = -EINVAL);
301 rc = lustre_fread(file, ldd, len, &off);
303 CERROR("error reading %s: read %d of %lu\n",
304 MOUNT_DATA_FILE, rc, len);
305 GOTO(out_close, rc = -EINVAL);
309 if (ldd->ldd_magic != LDD_MAGIC) {
310 /* FIXME add swabbing support */
311 CERROR("Bad magic in %s: %x!=%x\n", MOUNT_DATA_FILE,
312 ldd->ldd_magic, LDD_MAGIC);
313 GOTO(out_close, rc = -EINVAL);
316 if (ldd->ldd_feature_incompat & ~LDD_INCOMPAT_SUPP) {
317 CERROR("%s: unsupported incompat filesystem feature(s) %x\n",
319 ldd->ldd_feature_incompat & ~LDD_INCOMPAT_SUPP);
320 GOTO(out_close, rc = -EINVAL);
322 if (ldd->ldd_feature_rocompat & ~LDD_ROCOMPAT_SUPP) {
323 CERROR("%s: unsupported read-only filesystem feature(s) %x\n",
325 ldd->ldd_feature_rocompat & ~LDD_ROCOMPAT_SUPP);
326 /* Do something like remount filesystem read-only */
327 GOTO(out_close, rc = -EINVAL);
333 pop_ctxt(&saved, mount_ctxt, NULL);
337 static int ldd_write(struct lvfs_run_ctxt *mount_ctxt,
338 struct lustre_disk_data *ldd)
340 struct lvfs_run_ctxt saved;
343 unsigned long len = sizeof(struct lustre_disk_data);
347 LASSERT(ldd->ldd_magic == LDD_MAGIC);
349 ldd->ldd_config_ver++;
351 push_ctxt(&saved, mount_ctxt, NULL);
353 file = filp_open(MOUNT_DATA_FILE, O_RDWR, 0644);
356 CERROR("cannot open %s: rc = %d\n", MOUNT_DATA_FILE, rc);
360 rc = lustre_fwrite(file, ldd, len, &off);
362 CERROR("error writing %s: read %d of %lu\n",
363 MOUNT_DATA_FILE, rc, len);
364 GOTO(out_close, rc = -EINVAL);
372 pop_ctxt(&saved, mount_ctxt, NULL);
377 /**************** config llog ********************/
379 /* Get a config log from the MGS and process it.
380 This func is called for both clients and servers.
381 Continue to process new statements appended to the logs
382 (whenever the config lock is revoked) until lustre_end_log
384 int lustre_process_log(struct super_block *sb, char *logname,
385 struct config_llog_instance *cfg)
387 struct lustre_cfg *lcfg;
388 struct lustre_cfg_bufs bufs;
389 struct lustre_sb_info *lsi = s2lsi(sb);
390 struct obd_device *mgc = lsi->lsi_mgc;
397 /* mgc_process_config */
398 lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
399 lustre_cfg_bufs_set_string(&bufs, 1, logname);
400 lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
401 lustre_cfg_bufs_set(&bufs, 3, &sb, sizeof(sb));
402 lcfg = lustre_cfg_new(LCFG_LOG_START, &bufs);
403 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
404 lustre_cfg_free(lcfg);
407 LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s'"
408 "failed from the MGS (%d). Make sure this "
409 "client and the MGS are running compatible "
410 "versions of Lustre.\n",
411 mgc->obd_name, logname, rc);
414 LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' "
415 "failed (%d). This may be the result of "
416 "communication errors between this node and "
417 "the MGS, a bad configuration, or other "
418 "errors. See the syslog for more "
419 "information.\n", mgc->obd_name, logname,
422 /* class_obd_list(); */
426 /* Stop watching this config log for updates */
427 int lustre_end_log(struct super_block *sb, char *logname,
428 struct config_llog_instance *cfg)
430 struct lustre_cfg *lcfg;
431 struct lustre_cfg_bufs bufs;
432 struct lustre_sb_info *lsi = s2lsi(sb);
433 struct obd_device *mgc = lsi->lsi_mgc;
440 /* mgc_process_config */
441 lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
442 lustre_cfg_bufs_set_string(&bufs, 1, logname);
444 lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
445 lcfg = lustre_cfg_new(LCFG_LOG_END, &bufs);
446 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
447 lustre_cfg_free(lcfg);
451 /**************** obd start *******************/
453 int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
454 char *s1, char *s2, char *s3, char *s4)
456 struct lustre_cfg_bufs bufs;
457 struct lustre_cfg * lcfg = NULL;
460 CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname,
461 cmd, s1, s2, s3, s4);
463 lustre_cfg_bufs_reset(&bufs, cfgname);
465 lustre_cfg_bufs_set_string(&bufs, 1, s1);
467 lustre_cfg_bufs_set_string(&bufs, 2, s2);
469 lustre_cfg_bufs_set_string(&bufs, 3, s3);
471 lustre_cfg_bufs_set_string(&bufs, 4, s4);
473 lcfg = lustre_cfg_new(cmd, &bufs);
474 lcfg->lcfg_nid = nid;
475 rc = class_process_config(lcfg);
476 lustre_cfg_free(lcfg);
480 static int lustre_start_simple(char *obdname, char *type, char *uuid,
484 CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
486 rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, 0, 0);
488 CERROR("%s attach error %d\n", obdname, rc);
491 rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, 0, 0);
493 CERROR("%s setup error %d\n", obdname, rc);
494 do_lcfg(obdname, 0, LCFG_DETACH, 0, 0, 0, 0);
499 /* Set up a MGS to serve startup logs */
500 static int server_start_mgs(struct super_block *sb)
502 struct lustre_sb_info *lsi = s2lsi(sb);
503 struct vfsmount *mnt = lsi->lsi_srv_mnt;
504 struct lustre_mount_info *lmi;
509 /* It is impossible to have more than 1 MGS per node, since
510 MGC wouldn't know which to connect to */
511 lmi = server_find_mount(LUSTRE_MGS_OBDNAME);
513 lsi = s2lsi(lmi->lmi_sb);
514 LCONSOLE_ERROR_MSG(0x15d, "The MGS service was already started"
516 lsi->lsi_ldd->ldd_svname);
520 CDEBUG(D_CONFIG, "Start MGS service %s\n", LUSTRE_MGS_OBDNAME);
522 rc = server_register_mount(LUSTRE_MGS_OBDNAME, sb, mnt);
525 ((rc = lustre_start_simple(LUSTRE_MGS_OBDNAME, LUSTRE_MGS_NAME,
526 LUSTRE_MGS_OBDNAME, 0, 0))))
527 server_deregister_mount(LUSTRE_MGS_OBDNAME);
530 LCONSOLE_ERROR_MSG(0x15e, "Failed to start MGS '%s' (%d). "
531 "Is the 'mgs' module loaded?\n",
532 LUSTRE_MGS_OBDNAME, rc);
536 static int server_stop_mgs(struct super_block *sb)
538 struct obd_device *obd;
542 CDEBUG(D_MOUNT, "Stop MGS service %s\n", LUSTRE_MGS_OBDNAME);
544 /* There better be only one MGS */
545 obd = class_name2obd(LUSTRE_MGS_OBDNAME);
547 CDEBUG(D_CONFIG, "mgs %s not running\n", LUSTRE_MGS_OBDNAME);
551 /* The MGS should always stop when we say so */
553 rc = class_manual_cleanup(obd);
557 DECLARE_MUTEX(mgc_start_lock);
559 /* Set up a mgcobd to process startup logs */
560 static int lustre_start_mgc(struct super_block *sb)
562 struct lustre_handle mgc_conn = {0, };
563 struct obd_connect_data *data = NULL;
564 struct lustre_sb_info *lsi = s2lsi(sb);
565 struct obd_device *obd;
566 struct obd_export *exp;
567 struct obd_uuid *uuid;
570 char *mgcname, *niduuid;
573 int rc = 0, i = 0, j, len;
576 LASSERT(lsi->lsi_lmd);
578 /* Find the first non-lo MGS nid for our MGC name */
579 if (lsi->lsi_flags & LSI_SERVER) {
580 ptr = lsi->lsi_ldd->ldd_params;
581 /* Use mgsnode= nids */
582 if ((class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0) &&
583 (class_parse_nid(ptr, &nid, &ptr) == 0)) {
585 } else if (IS_MGS(lsi->lsi_ldd)) {
586 lnet_process_id_t id;
587 while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
588 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
595 } else { /* client */
596 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
597 ptr = lsi->lsi_lmd->lmd_dev;
598 if (class_parse_nid(ptr, &nid, &ptr) == 0)
602 CERROR("No valid MGS nids found.\n");
606 len = strlen(LUSTRE_MGC_OBDNAME) + strlen(libcfs_nid2str(nid)) + 1;
607 OBD_ALLOC(mgcname, len);
608 OBD_ALLOC(niduuid, len + 2);
609 if (!mgcname || !niduuid)
610 GOTO(out_free, rc = -ENOMEM);
611 sprintf(mgcname, "%s%s", LUSTRE_MGC_OBDNAME, libcfs_nid2str(nid));
613 mutex_down(&mgc_start_lock);
615 obd = class_name2obd(mgcname);
617 /* Re-using an existing MGC */
618 atomic_inc(&obd->u.cli.cl_mgc_refcount);
621 /* If we are restarting the MGS, don't try to keep the MGC's
622 old connection, or registration will fail. */
623 if ((lsi->lsi_flags & LSI_SERVER) && IS_MGS(lsi->lsi_ldd)) {
624 CDEBUG(D_MOUNT, "New MGS with live MGC\n");
628 /* Try all connections, but only once (again).
629 We don't want to block another target from starting
630 (using its local copy of the log), but we do want to connect
631 if at all possible. */
633 CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,recov_bk);
634 rc = obd_set_info_async(obd->obd_self_export,
635 sizeof(KEY_INIT_RECOV_BACKUP),
636 KEY_INIT_RECOV_BACKUP,
637 sizeof(recov_bk), &recov_bk, NULL);
641 CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname);
643 /* Add the primary nids for the MGS */
645 sprintf(niduuid, "%s_%x", mgcname, i);
646 if (lsi->lsi_flags & LSI_SERVER) {
647 ptr = lsi->lsi_ldd->ldd_params;
648 if (IS_MGS(lsi->lsi_ldd)) {
649 /* Use local nids (including LO) */
650 lnet_process_id_t id;
651 while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
652 rc = do_lcfg(mgcname, id.nid,
653 LCFG_ADD_UUID, niduuid, 0,0,0);
656 /* Use mgsnode= nids */
657 if (class_find_param(ptr, PARAM_MGSNODE, &ptr) != 0) {
658 CERROR("No MGS nids given.\n");
659 GOTO(out_free, rc = -EINVAL);
661 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
662 rc = do_lcfg(mgcname, nid,
663 LCFG_ADD_UUID, niduuid, 0,0,0);
667 } else { /* client */
668 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
669 ptr = lsi->lsi_lmd->lmd_dev;
670 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
671 rc = do_lcfg(mgcname, nid,
672 LCFG_ADD_UUID, niduuid, 0,0,0);
674 /* Stop at the first failover nid */
680 CERROR("No valid MGS nids found.\n");
681 GOTO(out_free, rc = -EINVAL);
683 lsi->lsi_lmd->lmd_mgs_failnodes = 1;
685 /* Random uuid for MGC allows easier reconnects */
687 ll_generate_random_uuid(uuidc);
688 class_uuid_unparse(uuidc, uuid);
691 rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
692 (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
698 /* Add any failover MGS nids */
700 while ((*ptr == ':' ||
701 class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0)) {
702 /* New failover node */
703 sprintf(niduuid, "%s_%x", mgcname, i);
705 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
707 rc = do_lcfg(mgcname, nid,
708 LCFG_ADD_UUID, niduuid, 0,0,0);
713 rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
721 lsi->lsi_lmd->lmd_mgs_failnodes = i;
723 obd = class_name2obd(mgcname);
725 CERROR("Can't find mgcobd %s\n", mgcname);
726 GOTO(out_free, rc = -ENOTCONN);
729 /* Keep a refcount of servers/clients who started with "mount",
730 so we know when we can get rid of the mgc. */
731 atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
733 /* Try all connections, but only once. */
735 rc = obd_set_info_async(obd->obd_self_export,
736 sizeof(KEY_INIT_RECOV_BACKUP),
737 KEY_INIT_RECOV_BACKUP,
738 sizeof(recov_bk), &recov_bk, NULL);
741 CWARN("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc);
742 /* We connect to the MGS at setup, and don't disconnect until cleanup */
745 GOTO(out, rc = -ENOMEM);
746 data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_FID |
748 data->ocd_version = LUSTRE_VERSION_CODE;
749 rc = obd_connect(NULL, &mgc_conn, obd, &(obd->obd_uuid), data, NULL);
752 CERROR("connect failed %d\n", rc);
756 exp = class_conn2export(&mgc_conn);
757 obd->u.cli.cl_mgc_mgsexp = exp;
760 /* Keep the mgc info in the sb. Note that many lsi's can point
764 mutex_up(&mgc_start_lock);
767 OBD_FREE(mgcname, len);
769 OBD_FREE(niduuid, len + 2);
773 static int lustre_stop_mgc(struct super_block *sb)
775 struct lustre_sb_info *lsi = s2lsi(sb);
776 struct obd_device *obd;
777 char *niduuid = 0, *ptr = 0;
778 int i, rc = 0, len = 0;
788 mutex_down(&mgc_start_lock);
789 if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
790 /* This is not fatal, every client that stops
791 will call in here. */
792 CDEBUG(D_MOUNT, "mgc still has %d references.\n",
793 atomic_read(&obd->u.cli.cl_mgc_refcount));
794 GOTO(out, rc = -EBUSY);
797 /* The MGC has no recoverable data in any case.
798 * force shotdown set in umount_begin */
799 obd->obd_no_recov = 1;
801 if (obd->u.cli.cl_mgc_mgsexp) {
802 /* An error is not fatal, if we are unable to send the
803 disconnect mgs ping evictor cleans up the export */
804 rc = obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
806 CDEBUG(D_MOUNT, "disconnect failed %d\n", rc);
809 /* Save the obdname for cleaning the nid uuids, which are
811 len = strlen(obd->obd_name) + 6;
812 OBD_ALLOC(niduuid, len);
814 strcpy(niduuid, obd->obd_name);
815 ptr = niduuid + strlen(niduuid);
818 rc = class_manual_cleanup(obd);
822 /* Clean the nid uuids */
825 for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
826 sprintf(ptr, "_%x", i);
827 rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
830 CERROR("del MDC UUID %s failed: rc = %d\n",
833 OBD_FREE(niduuid, len);
834 /* class_import_put will get rid of the additional connections */
837 mutex_up(&mgc_start_lock);
841 /* Since there's only one mgc per node, we have to change it's fs to get
842 access to the right disk. */
843 static int server_mgc_set_fs(struct obd_device *mgc, struct super_block *sb)
845 struct lustre_sb_info *lsi = s2lsi(sb);
849 CDEBUG(D_MOUNT, "Set mgc disk for %s\n", lsi->lsi_lmd->lmd_dev);
851 /* cl_mgc_sem in mgc insures we sleep if the mgc_fs is busy */
852 rc = obd_set_info_async(mgc->obd_self_export,
853 sizeof(KEY_SET_FS), KEY_SET_FS,
854 sizeof(*sb), sb, NULL);
856 CERROR("can't set_fs %d\n", rc);
862 static int server_mgc_clear_fs(struct obd_device *mgc)
867 CDEBUG(D_MOUNT, "Unassign mgc disk\n");
869 rc = obd_set_info_async(mgc->obd_self_export,
870 sizeof(KEY_CLEAR_FS), KEY_CLEAR_FS,
875 DECLARE_MUTEX(server_start_lock);
877 /* Stop MDS/OSS if nobody is using them */
878 static int server_stop_servers(int lddflags, int lsiflags)
880 struct obd_device *obd = NULL;
881 struct obd_type *type = NULL;
885 mutex_down(&server_start_lock);
887 /* Either an MDT or an OST or neither */
888 /* if this was an MDT, and there are no more MDT's, clean up the MDS */
889 if ((lddflags & LDD_F_SV_TYPE_MDT) &&
890 (obd = class_name2obd(LUSTRE_MDS_OBDNAME))) {
891 /*FIXME pre-rename, should eventually be LUSTRE_MDT_NAME*/
892 type = class_search_type(LUSTRE_MDS_NAME);
894 /* if this was an OST, and there are no more OST's, clean up the OSS */
895 if ((lddflags & LDD_F_SV_TYPE_OST) &&
896 (obd = class_name2obd(LUSTRE_OSS_OBDNAME))) {
897 type = class_search_type(LUSTRE_OST_NAME);
900 if (obd && (!type || !type->typ_refcnt)) {
903 /* obd_fail doesn't mean much on a server obd */
904 err = class_manual_cleanup(obd);
909 mutex_up(&server_start_lock);
914 int server_mti_print(char *title, struct mgs_target_info *mti)
916 PRINT_CMD(PRINT_MASK, "mti %s\n", title);
917 PRINT_CMD(PRINT_MASK, "server: %s\n", mti->mti_svname);
918 PRINT_CMD(PRINT_MASK, "fs: %s\n", mti->mti_fsname);
919 PRINT_CMD(PRINT_MASK, "uuid: %s\n", mti->mti_uuid);
920 PRINT_CMD(PRINT_MASK, "ver: %d flags: %#x\n",
921 mti->mti_config_ver, mti->mti_flags);
925 static int server_sb2mti(struct super_block *sb, struct mgs_target_info *mti)
927 struct lustre_sb_info *lsi = s2lsi(sb);
928 struct lustre_disk_data *ldd = lsi->lsi_ldd;
929 lnet_process_id_t id;
933 if (!(lsi->lsi_flags & LSI_SERVER))
936 strncpy(mti->mti_fsname, ldd->ldd_fsname,
937 sizeof(mti->mti_fsname));
938 strncpy(mti->mti_svname, ldd->ldd_svname,
939 sizeof(mti->mti_svname));
941 mti->mti_nid_count = 0;
942 while (LNetGetId(i++, &id) != -ENOENT) {
943 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
945 mti->mti_nids[mti->mti_nid_count] = id.nid;
946 mti->mti_nid_count++;
947 if (mti->mti_nid_count >= MTI_NIDS_MAX) {
948 CWARN("Only using first %d nids for %s\n",
949 mti->mti_nid_count, mti->mti_svname);
954 mti->mti_lustre_ver = LUSTRE_VERSION_CODE;
955 mti->mti_config_ver = 0;
956 mti->mti_flags = ldd->ldd_flags;
957 mti->mti_stripe_index = ldd->ldd_svindex;
958 memcpy(mti->mti_uuid, ldd->ldd_uuid, sizeof(mti->mti_uuid));
959 if (strlen(ldd->ldd_params) > sizeof(mti->mti_params)) {
960 CERROR("params too big for mti\n");
963 memcpy(mti->mti_params, ldd->ldd_params, sizeof(mti->mti_params));
967 /* Register an old or new target with the MGS. If needed MGS will construct
968 startup logs and assign index */
969 int server_register_target(struct super_block *sb)
971 struct lustre_sb_info *lsi = s2lsi(sb);
972 struct obd_device *mgc = lsi->lsi_mgc;
973 struct lustre_disk_data *ldd = lsi->lsi_ldd;
974 struct mgs_target_info *mti = NULL;
980 if (!(lsi->lsi_flags & LSI_SERVER))
986 rc = server_sb2mti(sb, mti);
990 CDEBUG(D_MOUNT, "Registration %s, fs=%s, %s, index=%04x, flags=%#x\n",
991 mti->mti_svname, mti->mti_fsname,
992 libcfs_nid2str(mti->mti_nids[0]), mti->mti_stripe_index,
995 /* Register the target */
996 /* FIXME use mgc_process_config instead */
997 rc = obd_set_info_async(mgc->u.cli.cl_mgc_mgsexp,
998 sizeof(KEY_REGISTER_TARGET), KEY_REGISTER_TARGET,
999 sizeof(*mti), mti, NULL);
1003 /* Always update our flags */
1004 ldd->ldd_flags = mti->mti_flags & ~LDD_F_REWRITE_LDD;
1006 /* If this flag is set, it means the MGS wants us to change our
1007 on-disk data. (So far this means just the index.) */
1008 if (mti->mti_flags & LDD_F_REWRITE_LDD) {
1011 CDEBUG(D_MOUNT, "Changing on-disk index from %#x to %#x "
1012 "for %s\n", ldd->ldd_svindex, mti->mti_stripe_index,
1014 ldd->ldd_svindex = mti->mti_stripe_index;
1015 strncpy(ldd->ldd_svname, mti->mti_svname,
1016 sizeof(ldd->ldd_svname));
1017 /* or ldd_make_sv_name(ldd); */
1018 ldd_write(&mgc->obd_lvfs_ctxt, ldd);
1019 err = fsfilt_set_label(mgc, lsi->lsi_srv_mnt->mnt_sb,
1022 CERROR("Label set error %d\n", err);
1023 label = fsfilt_get_label(mgc, lsi->lsi_srv_mnt->mnt_sb);
1025 CDEBUG(D_MOUNT, "Disk label changed to %s\n", label);
1027 /* Flush the new ldd to disk */
1028 fsfilt_sync(mgc, lsi->lsi_srv_mnt->mnt_sb);
1038 static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
1040 struct obd_device *obd;
1041 struct lustre_sb_info *lsi = s2lsi(sb);
1042 struct config_llog_instance cfg;
1046 CDEBUG(D_MOUNT, "starting target %s\n", lsi->lsi_ldd->ldd_svname);
1049 /* If we're an MDT, make sure the global MDS is running */
1050 if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_MDT) {
1051 /* make sure the MDS is started */
1052 mutex_down(&server_start_lock);
1053 obd = class_name2obd(LUSTRE_MDS_OBDNAME);
1055 rc = lustre_start_simple(LUSTRE_MDS_OBDNAME,
1056 /* FIXME pre-rename, should eventually be LUSTRE_MDS_NAME */
1058 LUSTRE_MDS_OBDNAME"_uuid",
1061 mutex_up(&server_start_lock);
1062 CERROR("failed to start MDS: %d\n", rc);
1066 mutex_up(&server_start_lock);
1070 /* If we're an OST, make sure the global OSS is running */
1071 if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_OST) {
1072 /* make sure OSS is started */
1073 mutex_down(&server_start_lock);
1074 obd = class_name2obd(LUSTRE_OSS_OBDNAME);
1076 rc = lustre_start_simple(LUSTRE_OSS_OBDNAME,
1078 LUSTRE_OSS_OBDNAME"_uuid",
1081 mutex_up(&server_start_lock);
1082 CERROR("failed to start OSS: %d\n", rc);
1086 mutex_up(&server_start_lock);
1089 /* Set the mgc fs to our server disk. This allows the MGC
1090 to read and write configs locally. */
1091 rc = server_mgc_set_fs(lsi->lsi_mgc, sb);
1095 /* Register with MGS */
1096 rc = server_register_target(sb);
1097 if (rc && (lsi->lsi_ldd->ldd_flags &
1098 (LDD_F_NEED_INDEX | LDD_F_UPDATE | LDD_F_UPGRADE14))){
1099 CERROR("Required registration failed for %s: %d\n",
1100 lsi->lsi_ldd->ldd_svname, rc);
1102 LCONSOLE_ERROR_MSG(0x15f, "Communication error with "
1103 "the MGS. Is the MGS running?\n");
1107 if (rc == -EINVAL) {
1108 LCONSOLE_ERROR_MSG(0x160, "The MGS is refusing to allow this "
1109 "server (%s) to start. Please see messages"
1110 " on the MGS node.\n",
1111 lsi->lsi_ldd->ldd_svname);
1114 /* non-fatal error of registeration with MGS */
1116 CDEBUG(D_MOUNT, "Cannot register with MGS: %d\n", rc);
1118 /* Let the target look up the mount using the target's name
1119 (we can't pass the sb or mnt through class_process_config.) */
1120 rc = server_register_mount(lsi->lsi_ldd->ldd_svname, sb, mnt);
1124 /* Start targets using the llog named for the target */
1125 memset(&cfg, 0, sizeof(cfg));
1126 rc = lustre_process_log(sb, lsi->lsi_ldd->ldd_svname, &cfg);
1128 CERROR("failed to start server %s: %d\n",
1129 lsi->lsi_ldd->ldd_svname, rc);
1134 /* Release the mgc fs for others to use */
1135 server_mgc_clear_fs(lsi->lsi_mgc);
1138 obd = class_name2obd(lsi->lsi_ldd->ldd_svname);
1140 CERROR("no server named %s was started\n",
1141 lsi->lsi_ldd->ldd_svname);
1145 if ((lsi->lsi_lmd->lmd_flags & LMD_FLG_ABORT_RECOV) &&
1146 (OBP(obd, iocontrol))) {
1147 obd_iocontrol(OBD_IOC_ABORT_RECOVERY,
1148 obd->obd_self_export, 0, NULL, NULL);
1151 /* log has been fully processed */
1152 obd_notify(obd, NULL, OBD_NOTIFY_CONFIG, (void *)CONFIG_LOG);
1158 /***************** lustre superblock **************/
1160 struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
1162 struct lustre_sb_info *lsi = NULL;
1165 OBD_ALLOC(lsi, sizeof(*lsi));
1168 OBD_ALLOC(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd));
1169 if (!lsi->lsi_lmd) {
1170 OBD_FREE(lsi, sizeof(*lsi));
1174 lsi->lsi_lmd->lmd_exclude_count = 0;
1175 s2lsi_nocast(sb) = lsi;
1176 /* we take 1 extra ref for our setup */
1177 atomic_set(&lsi->lsi_mounts, 1);
1179 /* Default umount style */
1180 lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
1185 static int lustre_free_lsi(struct super_block *sb)
1187 struct lustre_sb_info *lsi = s2lsi(sb);
1193 CDEBUG(D_MOUNT, "Freeing lsi\n");
1195 /* someone didn't call server_put_mount. */
1196 LASSERT(atomic_read(&lsi->lsi_mounts) == 0);
1198 if (lsi->lsi_ldd != NULL)
1199 OBD_FREE(lsi->lsi_ldd, sizeof(*lsi->lsi_ldd));
1201 if (lsi->lsi_lmd != NULL) {
1202 if (lsi->lsi_lmd->lmd_dev != NULL)
1203 OBD_FREE(lsi->lsi_lmd->lmd_dev,
1204 strlen(lsi->lsi_lmd->lmd_dev) + 1);
1205 if (lsi->lsi_lmd->lmd_profile != NULL)
1206 OBD_FREE(lsi->lsi_lmd->lmd_profile,
1207 strlen(lsi->lsi_lmd->lmd_profile) + 1);
1208 if (lsi->lsi_lmd->lmd_opts != NULL)
1209 OBD_FREE(lsi->lsi_lmd->lmd_opts,
1210 strlen(lsi->lsi_lmd->lmd_opts) + 1);
1211 if (lsi->lsi_lmd->lmd_exclude_count)
1212 OBD_FREE(lsi->lsi_lmd->lmd_exclude,
1213 sizeof(lsi->lsi_lmd->lmd_exclude[0]) *
1214 lsi->lsi_lmd->lmd_exclude_count);
1215 OBD_FREE(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd));
1218 LASSERT(lsi->lsi_llsbi == NULL);
1219 OBD_FREE(lsi, sizeof(*lsi));
1220 s2lsi_nocast(sb) = NULL;
1225 /* The lsi has one reference for every server that is using the disk -
1226 e.g. MDT, MGS, and potentially MGC */
1227 static int lustre_put_lsi(struct super_block *sb)
1229 struct lustre_sb_info *lsi = s2lsi(sb);
1234 CDEBUG(D_MOUNT, "put %p %d\n", sb, atomic_read(&lsi->lsi_mounts));
1236 if (atomic_dec_and_test(&lsi->lsi_mounts)) {
1237 lustre_free_lsi(sb);
1243 /*************** server mount ******************/
1245 /* Kernel mount using mount options in MOUNT_DATA_FILE */
1246 static struct vfsmount *server_kernel_mount(struct super_block *sb)
1248 struct lvfs_run_ctxt mount_ctxt;
1249 struct lustre_sb_info *lsi = s2lsi(sb);
1250 struct lustre_disk_data *ldd;
1251 struct lustre_mount_data *lmd = lsi->lsi_lmd;
1252 struct vfsmount *mnt;
1253 char *options = NULL;
1254 unsigned long page, s_flags;
1255 struct page *__page;
1259 OBD_ALLOC(ldd, sizeof(*ldd));
1261 RETURN(ERR_PTR(-ENOMEM));
1263 /* In the past, we have always used flags = 0.
1264 Note ext3/ldiskfs can't be mounted ro. */
1265 s_flags = sb->s_flags;
1267 /* Pre-mount ldiskfs to read the MOUNT_DATA_FILE */
1268 CDEBUG(D_MOUNT, "Pre-mount ldiskfs %s\n", lmd->lmd_dev);
1269 mnt = ll_kern_mount("ldiskfs", s_flags, lmd->lmd_dev, 0);
1272 CERROR("premount %s:%#lx ldiskfs failed: %d "
1273 "Is the ldiskfs module available?\n",
1274 lmd->lmd_dev, s_flags, rc );
1278 OBD_SET_CTXT_MAGIC(&mount_ctxt);
1279 mount_ctxt.pwdmnt = mnt;
1280 mount_ctxt.pwd = mnt->mnt_root;
1281 mount_ctxt.fs = get_ds();
1283 rc = ldd_parse(&mount_ctxt, ldd);
1287 CERROR("premount parse options failed: rc = %d\n", rc);
1291 /* Done with our pre-mount, now do the real mount. */
1293 /* Glom up mount options */
1294 OBD_PAGE_ALLOC(__page, CFS_ALLOC_STD);
1296 GOTO(out_free, rc = -ENOMEM);
1297 page = (unsigned long)cfs_page_address(__page);
1299 options = (char *)page;
1300 memset(options, 0, CFS_PAGE_SIZE);
1301 strncpy(options, ldd->ldd_mount_opts, CFS_PAGE_SIZE - 2);
1303 /* Add in any mount-line options */
1304 if (lmd->lmd_opts && (*(lmd->lmd_opts) != 0)) {
1305 int len = CFS_PAGE_SIZE - strlen(options) - 2;
1307 strcat(options, ",");
1308 strncat(options, lmd->lmd_opts, len);
1311 /* Special permanent mount flags */
1313 s_flags |= MS_NOATIME | MS_NODIRATIME;
1315 CDEBUG(D_MOUNT, "kern_mount: %s %s %s\n",
1316 MT_STR(ldd), lmd->lmd_dev, options);
1317 mnt = ll_kern_mount(MT_STR(ldd), s_flags, lmd->lmd_dev,
1319 OBD_PAGE_FREE(__page);
1322 CERROR("ll_kern_mount failed: rc = %d\n", rc);
1326 lsi->lsi_ldd = ldd; /* freed at lsi cleanup */
1327 CDEBUG(D_SUPER, "%s: mnt = %p\n", lmd->lmd_dev, mnt);
1331 OBD_FREE(ldd, sizeof(*ldd));
1332 lsi->lsi_ldd = NULL;
1333 RETURN(ERR_PTR(rc));
1336 static void server_wait_finished(struct vfsmount *mnt)
1338 wait_queue_head_t waitq;
1339 struct l_wait_info lwi;
1342 init_waitqueue_head(&waitq);
1344 while ((atomic_read(&mnt->mnt_count) > 1) && (retries > 0)) {
1345 LCONSOLE_WARN("Mount still busy with %d refs, waiting for "
1347 atomic_read(&mnt->mnt_count), retries);
1349 /* Wait for a bit */
1351 lwi = LWI_TIMEOUT(5 * HZ, NULL, NULL);
1352 l_wait_event(waitq, 0, &lwi);
1354 if (atomic_read(&mnt->mnt_count) > 1) {
1355 CERROR("Mount %p is still busy (%d refs), giving up.\n",
1356 mnt, atomic_read(&mnt->mnt_count));
1360 static void server_put_super(struct super_block *sb)
1362 struct lustre_sb_info *lsi = s2lsi(sb);
1363 struct obd_device *obd;
1364 struct vfsmount *mnt = lsi->lsi_srv_mnt;
1365 char *tmpname, *extraname = NULL;
1367 int lddflags = lsi->lsi_ldd->ldd_flags;
1368 int lsiflags = lsi->lsi_flags;
1372 LASSERT(lsiflags & LSI_SERVER);
1374 tmpname_sz = strlen(lsi->lsi_ldd->ldd_svname) + 1;
1375 OBD_ALLOC(tmpname, tmpname_sz);
1376 memcpy(tmpname, lsi->lsi_ldd->ldd_svname, tmpname_sz);
1377 CDEBUG(D_MOUNT, "server put_super %s\n", tmpname);
1379 /* Stop the target */
1380 if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
1381 (IS_MDT(lsi->lsi_ldd) || IS_OST(lsi->lsi_ldd))) {
1382 struct lustre_profile *lprof = NULL;
1384 /* tell the mgc to drop the config log */
1385 lustre_end_log(sb, lsi->lsi_ldd->ldd_svname, NULL);
1387 /* COMPAT_146 - profile may get deleted in mgc_cleanup.
1388 If there are any setup/cleanup errors, save the lov
1389 name for safety cleanup later. */
1390 lprof = class_get_profile(lsi->lsi_ldd->ldd_svname);
1391 if (lprof && lprof->lp_dt) {
1392 OBD_ALLOC(extraname, strlen(lprof->lp_dt) + 1);
1393 strcpy(extraname, lprof->lp_dt);
1396 obd = class_name2obd(lsi->lsi_ldd->ldd_svname);
1398 CDEBUG(D_MOUNT, "stopping %s\n", obd->obd_name);
1399 if (lsi->lsi_flags & LSI_UMOUNT_FAILOVER)
1401 /* We can't seem to give an error return code
1402 * to .put_super, so we better make sure we clean up! */
1404 class_manual_cleanup(obd);
1406 CERROR("no obd %s\n", lsi->lsi_ldd->ldd_svname);
1407 server_deregister_mount(lsi->lsi_ldd->ldd_svname);
1411 /* If they wanted the mgs to stop separately from the mdt, they
1412 should have put it on a different device. */
1413 if (IS_MGS(lsi->lsi_ldd)) {
1414 /* stop the mgc before the mgs so the connection gets cleaned
1416 lustre_stop_mgc(sb);
1417 /* if MDS start with --nomgs, don't stop MGS then */
1418 if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOMGS))
1419 server_stop_mgs(sb);
1422 /* Clean the mgc and sb */
1423 rc = lustre_common_put_super(sb);
1424 /* FIXME how can I report a failure to umount? */
1426 /* Wait for the targets to really clean up - can't exit (and let the
1427 sb get destroyed) while the mount is still in use */
1428 server_wait_finished(mnt);
1430 /* drop the One True Mount */
1433 /* Stop the servers (MDS, OSS) if no longer needed. We must wait
1434 until the target is really gone so that our type refcount check
1436 server_stop_servers(lddflags, lsiflags);
1438 /* In case of startup or cleanup err, stop related obds */
1440 obd = class_name2obd(extraname);
1442 CWARN("Cleaning orphaned obd %s\n", extraname);
1444 class_manual_cleanup(obd);
1446 OBD_FREE(extraname, strlen(extraname) + 1);
1449 LCONSOLE_WARN("server umount %s complete\n", tmpname);
1450 OBD_FREE(tmpname, tmpname_sz);
1454 #ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
1455 static void server_umount_begin(struct vfsmount *vfsmnt, int flags)
1457 struct super_block *sb = vfsmnt->mnt_sb;
1459 static void server_umount_begin(struct super_block *sb)
1462 struct lustre_sb_info *lsi = s2lsi(sb);
1465 #ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
1466 if (!(flags & MNT_FORCE)) {
1472 CDEBUG(D_MOUNT, "umount -f\n");
1473 /* umount = failover
1475 no third way to do non-force, non-failover */
1476 lsi->lsi_flags &= ~LSI_UMOUNT_FAILOVER;
1477 lsi->lsi_flags |= LSI_UMOUNT_FORCE;
1481 #ifndef HAVE_STATFS_DENTRY_PARAM
1482 static int server_statfs (struct super_block *sb, struct kstatfs *buf)
1485 static int server_statfs (struct dentry *dentry, struct kstatfs *buf)
1487 struct super_block *sb = dentry->d_sb;
1489 struct vfsmount *mnt = s2lsi(sb)->lsi_srv_mnt;
1492 if (mnt && mnt->mnt_sb && mnt->mnt_sb->s_op->statfs) {
1493 #ifdef HAVE_STATFS_DENTRY_PARAM
1494 int rc = mnt->mnt_sb->s_op->statfs(mnt->mnt_root, buf);
1496 int rc = mnt->mnt_sb->s_op->statfs(mnt->mnt_sb, buf);
1499 buf->f_type = sb->s_magic;
1505 buf->f_type = sb->s_magic;
1506 buf->f_bsize = sb->s_blocksize;
1512 buf->f_namelen = NAME_MAX;
1516 static struct super_operations server_ops =
1518 .put_super = server_put_super,
1519 .umount_begin = server_umount_begin, /* umount -f */
1520 .statfs = server_statfs,
1523 #define log2(n) ffz(~(n))
1524 #define LUSTRE_SUPER_MAGIC 0x0BD00BD1
1526 static int server_fill_super_common(struct super_block *sb)
1528 struct inode *root = 0;
1531 CDEBUG(D_MOUNT, "Server sb, dev=%d\n", (int)sb->s_dev);
1533 sb->s_blocksize = 4096;
1534 sb->s_blocksize_bits = log2(sb->s_blocksize);
1535 sb->s_magic = LUSTRE_SUPER_MAGIC;
1536 sb->s_maxbytes = 0; //PAGE_CACHE_MAXBYTES;
1537 sb->s_flags |= MS_RDONLY;
1538 sb->s_op = &server_ops;
1540 root = new_inode(sb);
1542 CERROR("Can't make root inode\n");
1546 /* returns -EIO for every operation */
1547 /* make_bad_inode(root); -- badness - can't umount */
1548 /* apparently we need to be a directory for the mount to finish */
1549 root->i_mode = S_IFDIR;
1551 sb->s_root = d_alloc_root(root);
1553 CERROR("Can't make root dentry\n");
1561 static int server_fill_super(struct super_block *sb)
1563 struct lustre_sb_info *lsi = s2lsi(sb);
1564 struct vfsmount *mnt;
1568 /* the One True Mount */
1569 mnt = server_kernel_mount(sb);
1572 CERROR("Unable to mount device %s: %d\n",
1573 lsi->lsi_lmd->lmd_dev, rc);
1577 lsi->lsi_srv_mnt = mnt;
1579 LASSERT(lsi->lsi_ldd);
1580 CDEBUG(D_MOUNT, "Found service %s for fs '%s' on device %s\n",
1581 lsi->lsi_ldd->ldd_svname, lsi->lsi_ldd->ldd_fsname,
1582 lsi->lsi_lmd->lmd_dev);
1584 if (class_name2obd(lsi->lsi_ldd->ldd_svname)) {
1585 LCONSOLE_ERROR_MSG(0x161, "The target named %s is already "
1586 "running. Double-mount may have compromised"
1587 " the disk journal.\n",
1588 lsi->lsi_ldd->ldd_svname);
1591 GOTO(out, rc = -EALREADY);
1594 /* start MGS before MGC */
1595 if (IS_MGS(lsi->lsi_ldd) && !(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOMGS)) {
1596 rc = server_start_mgs(sb);
1601 rc = lustre_start_mgc(sb);
1605 /* Set up all obd devices for service */
1606 if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
1607 (IS_OST(lsi->lsi_ldd) || IS_MDT(lsi->lsi_ldd))) {
1608 rc = server_start_targets(sb, mnt);
1610 CERROR("Unable to start targets: %d\n", rc);
1613 /* FIXME overmount client here,
1614 or can we just start a client log and client_fill_super on this sb?
1615 We need to make sure server_put_super gets called too - ll_put_super
1616 calls lustre_common_put_super; check there for LSI_SERVER flag,
1618 Probably should start client from new thread so we can return.
1619 Client will not finish until all servers are connected.
1620 Note - MGS-only server does NOT get a client, since there is no
1621 lustre fs associated - the MGS is for all lustre fs's */
1624 rc = server_fill_super_common(sb);
1628 LCONSOLE_WARN("Server %s on device %s has started\n",
1629 lsi->lsi_ldd->ldd_svname, lsi->lsi_lmd->lmd_dev);
1634 server_put_super(sb);
1639 /* Get the index from the obd name.
1640 rc = server type, or
1642 if endptr isn't NULL it is set to end of name */
1643 int server_name2index(char *svname, __u32 *idx, char **endptr)
1645 unsigned long index;
1647 char *dash = strchr(svname, '-');
1651 if (strncmp(dash + 1, "MDT", 3) == 0)
1652 rc = LDD_F_SV_TYPE_MDT;
1653 else if (strncmp(dash + 1, "OST", 3) == 0)
1654 rc = LDD_F_SV_TYPE_OST;
1658 index = simple_strtoul(dash + 4, endptr, 16);
1663 /*************** mount common betweeen server and client ***************/
1666 int lustre_common_put_super(struct super_block *sb)
1671 CDEBUG(D_MOUNT, "dropping sb %p\n", sb);
1673 /* Drop a ref to the MGC */
1674 rc = lustre_stop_mgc(sb);
1675 if (rc && (rc != -ENOENT)) {
1677 CERROR("Can't stop MGC: %d\n", rc);
1680 /* BUSY just means that there's some other obd that
1681 needs the mgc. Let him clean it up. */
1682 CDEBUG(D_MOUNT, "MGC still in use\n");
1684 /* Drop a ref to the mounted disk */
1690 static void lmd_print(struct lustre_mount_data *lmd)
1694 PRINT_CMD(PRINT_MASK, " mount data:\n");
1695 if (lmd_is_client(lmd))
1696 PRINT_CMD(PRINT_MASK, "profile: %s\n", lmd->lmd_profile);
1697 PRINT_CMD(PRINT_MASK, "device: %s\n", lmd->lmd_dev);
1698 PRINT_CMD(PRINT_MASK, "flags: %x\n", lmd->lmd_flags);
1700 PRINT_CMD(PRINT_MASK, "options: %s\n", lmd->lmd_opts);
1701 for (i = 0; i < lmd->lmd_exclude_count; i++) {
1702 PRINT_CMD(PRINT_MASK, "exclude %d: OST%04x\n", i,
1703 lmd->lmd_exclude[i]);
1708 /* Is this server on the exclusion list */
1709 int lustre_check_exclusion(struct super_block *sb, char *svname)
1711 struct lustre_sb_info *lsi = s2lsi(sb);
1712 struct lustre_mount_data *lmd = lsi->lsi_lmd;
1717 rc = server_name2index(svname, &index, NULL);
1718 if (rc != LDD_F_SV_TYPE_OST)
1719 /* Only exclude OSTs */
1722 CDEBUG(D_MOUNT, "Check exclusion %s (%d) in %d of %s\n", svname,
1723 index, lmd->lmd_exclude_count, lmd->lmd_dev);
1725 for(i = 0; i < lmd->lmd_exclude_count; i++) {
1726 if (index == lmd->lmd_exclude[i]) {
1727 CWARN("Excluding %s (on exclusion list)\n", svname);
1734 /* mount -v -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
1735 static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr)
1737 char *s1 = ptr, *s2;
1738 __u32 index, *exclude_list;
1742 /* The shortest an ost name can be is 8 chars: -OST0000.
1743 We don't actually know the fsname at this time, so in fact
1744 a user could specify any fsname. */
1745 devmax = strlen(ptr) / 8 + 1;
1747 /* temp storage until we figure out how many we have */
1748 OBD_ALLOC(exclude_list, sizeof(index) * devmax);
1752 /* we enter this fn pointing at the '=' */
1753 while (*s1 && *s1 != ' ' && *s1 != ',') {
1755 rc = server_name2index(s1, &index, &s2);
1757 CERROR("Can't parse server name '%s'\n", s1);
1760 if (rc == LDD_F_SV_TYPE_OST)
1761 exclude_list[lmd->lmd_exclude_count++] = index;
1763 CDEBUG(D_MOUNT, "ignoring exclude %.7s\n", s1);
1765 /* now we are pointing at ':' (next exclude)
1766 or ',' (end of excludes) */
1767 if (lmd->lmd_exclude_count >= devmax)
1770 if (rc >= 0) /* non-err */
1773 if (lmd->lmd_exclude_count) {
1774 /* permanent, freed in lustre_free_lsi */
1775 OBD_ALLOC(lmd->lmd_exclude, sizeof(index) *
1776 lmd->lmd_exclude_count);
1777 if (lmd->lmd_exclude) {
1778 memcpy(lmd->lmd_exclude, exclude_list,
1779 sizeof(index) * lmd->lmd_exclude_count);
1782 lmd->lmd_exclude_count = 0;
1785 OBD_FREE(exclude_list, sizeof(index) * devmax);
1789 /* mount -v -t lustre uml1:uml2:/lustre-client /mnt/lustre */
1790 static int lmd_parse(char *options, struct lustre_mount_data *lmd)
1792 char *s1, *s2, *devname = NULL;
1793 struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
1799 LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that "
1800 "/sbin/mount.lustre is installed.\n");
1804 /* Options should be a string - try to detect old lmd data */
1805 if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) {
1806 LCONSOLE_ERROR_MSG(0x163, "You're using an old version of "
1807 "/sbin/mount.lustre. Please install "
1808 "version %s\n", LUSTRE_VERSION_STRING);
1811 lmd->lmd_magic = LMD_MAGIC;
1813 /* Set default flags here */
1818 /* Skip whitespace and extra commas */
1819 while (*s1 == ' ' || *s1 == ',')
1822 /* Client options are parsed in ll_options: eg. flock,
1825 /* Parse non-ldiskfs options here. Rather than modifying
1826 ldiskfs, we just zero these out here */
1827 if (strncmp(s1, "abort_recov", 11) == 0) {
1828 lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
1830 } else if (strncmp(s1, "nosvc", 5) == 0) {
1831 lmd->lmd_flags |= LMD_FLG_NOSVC;
1833 } else if (strncmp(s1, "nomgs", 5) == 0) {
1834 lmd->lmd_flags |= LMD_FLG_NOMGS;
1836 /* ost exclusion list */
1837 } else if (strncmp(s1, "exclude=", 8) == 0) {
1838 rc = lmd_make_exclusion(lmd, s1 + 7);
1843 /* Linux 2.4 doesn't pass the device, so we stuck it at the
1844 end of the options. */
1845 else if (strncmp(s1, "device=", 7) == 0) {
1847 /* terminate options right before device. device
1848 must be the last one. */
1854 s2 = strchr(s1, ',');
1862 memmove(s1, s2, strlen(s2) + 1);
1868 LCONSOLE_ERROR_MSG(0x164, "Can't find the device name "
1869 "(need mount option 'device=...')\n");
1873 s1 = strrchr(devname, ':');
1875 lmd->lmd_flags = LMD_FLG_CLIENT;
1876 /* Remove leading /s from fsname */
1877 while (*++s1 == '/') ;
1878 /* Freed in lustre_free_lsi */
1879 OBD_ALLOC(lmd->lmd_profile, strlen(s1) + 8);
1880 if (!lmd->lmd_profile)
1882 sprintf(lmd->lmd_profile, "%s-client", s1);
1885 /* Freed in lustre_free_lsi */
1886 OBD_ALLOC(lmd->lmd_dev, strlen(devname) + 1);
1889 strcpy(lmd->lmd_dev, devname);
1891 /* Save mount options */
1892 s1 = options + strlen(options) - 1;
1893 while (s1 >= options && (*s1 == ',' || *s1 == ' '))
1895 if (*options != 0) {
1896 /* Freed in lustre_free_lsi */
1897 OBD_ALLOC(lmd->lmd_opts, strlen(options) + 1);
1900 strcpy(lmd->lmd_opts, options);
1903 lmd->lmd_magic = LMD_MAGIC;
1908 CERROR("Bad mount options %s\n", options);
1914 int lustre_fill_super(struct super_block *sb, void *data, int silent)
1916 struct lustre_mount_data *lmd;
1917 struct lustre_sb_info *lsi;
1921 CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);
1923 lsi = lustre_init_lsi(sb);
1928 /* Figure out the lmd from the mount options */
1929 if (lmd_parse((char *)data, lmd)) {
1934 if (lmd_is_client(lmd)) {
1935 CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile);
1936 if (!client_fill_super) {
1937 LCONSOLE_ERROR_MSG(0x165, "Nothing registered for "
1938 "client mount! Is the 'lustre' "
1939 "module loaded?\n");
1942 rc = lustre_start_mgc(sb);
1944 lustre_stop_mgc(sb);
1947 /* Connect and start */
1948 /* (should always be ll_fill_super) */
1949 rc = (*client_fill_super)(sb);
1950 /* c_f_s will call lustre_common_put_super on failure */
1953 CDEBUG(D_MOUNT, "Mounting server from %s\n", lmd->lmd_dev);
1954 lsi->lsi_flags |= LSI_SERVER;
1955 rc = server_fill_super(sb);
1956 /* s_f_s calls lustre_start_mgc after the mount because we need
1957 the MGS nids which are stored on disk. Plus, we may
1958 need to start the MGS first. */
1959 /* s_f_s will call server_put_super on failure */
1964 CERROR("Unable to mount %s (%d)\n",
1965 s2lsi(sb) ? lmd->lmd_dev : "", rc);
1967 CDEBUG(D_SUPER, "mount %s complete\n", lmd->lmd_dev);
1973 /* We can't call ll_fill_super by name because it lives in a module that
1974 must be loaded after this one. */
1975 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb))
1977 client_fill_super = cfs;
1980 void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb))
1982 kill_super_cb = cfs;
1985 /***************** FS registration ******************/
1987 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
1988 struct super_block * lustre_get_sb(struct file_system_type *fs_type,
1989 int flags, const char *devname, void * data)
1991 /* calls back in fill super */
1992 /* we could append devname= onto options (*data) here,
1993 but 2.4 doesn't get devname. So we do it in mount_lustre.c */
1994 return get_sb_nodev(fs_type, flags, data, lustre_fill_super);
1997 int lustre_get_sb(struct file_system_type *fs_type,
1998 int flags, const char *devname, void * data,
1999 struct vfsmount *mnt)
2001 /* calls back in fill super */
2002 /* we could append devname= onto options (*data) here,
2003 but 2.4 doesn't get devname. So we do it in mount_lustre.c */
2004 return get_sb_nodev(fs_type, flags, data, lustre_fill_super, mnt);
2008 void lustre_kill_super(struct super_block *sb)
2010 struct lustre_sb_info *lsi = s2lsi(sb);
2012 if (kill_super_cb && lsi &&(lsi->lsi_flags & LSI_SERVER))
2013 (*kill_super_cb)(sb);
2015 kill_anon_super(sb);
2018 struct file_system_type lustre_fs_type = {
2019 .owner = THIS_MODULE,
2021 .get_sb = lustre_get_sb,
2022 .kill_sb = lustre_kill_super,
2023 .fs_flags = FS_BINARY_MOUNTDATA | FS_REQUIRES_DEV |
2024 LL_RENAME_DOES_D_MOVE,
2027 int lustre_register_fs(void)
2029 return register_filesystem(&lustre_fs_type);
2032 int lustre_unregister_fs(void)
2034 return unregister_filesystem(&lustre_fs_type);
2037 EXPORT_SYMBOL(lustre_register_client_fill_super);
2038 EXPORT_SYMBOL(lustre_register_kill_super_cb);
2039 EXPORT_SYMBOL(lustre_common_put_super);
2040 EXPORT_SYMBOL(lustre_process_log);
2041 EXPORT_SYMBOL(lustre_end_log);
2042 EXPORT_SYMBOL(server_get_mount);
2043 EXPORT_SYMBOL(server_get_mount_2);
2044 EXPORT_SYMBOL(server_put_mount);
2045 EXPORT_SYMBOL(server_put_mount_2);
2046 EXPORT_SYMBOL(server_register_target);
2047 EXPORT_SYMBOL(server_name2index);
2048 EXPORT_SYMBOL(server_mti_print);
2049 EXPORT_SYMBOL(do_lcfg);