1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * lustre/obdclass/obd_mount.c
5 * Client/server mount routines
7 * Copyright (c) 2006 Cluster File Systems, Inc.
8 * Author: Nathan Rutman <nathan@clusterfs.com>
10 * This file is part of Lustre, http://www.lustre.org/
12 * Lustre is free software; you can redistribute it and/or
13 * modify it under the terms of version 2 of the GNU General Public
14 * License as published by the Free Software Foundation.
16 * Lustre is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with Lustre; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 #define DEBUG_SUBSYSTEM S_CLASS
28 #define D_MOUNT D_SUPER|D_CONFIG /*|D_WARNING */
29 #define PRINT_CMD CDEBUG
30 #define PRINT_MASK D_SUPER|D_CONFIG
34 #include <lustre_fsfilt.h>
35 #include <obd_class.h>
36 #include <lustre/lustre_user.h>
37 #include <linux/version.h>
38 #include <lustre_log.h>
39 #include <lustre_disk.h>
40 #include <lustre_param.h>
42 static int (*client_fill_super)(struct super_block *sb) = NULL;
44 /*********** mount lookup *********/
46 DECLARE_MUTEX(lustre_mount_info_lock);
47 struct list_head server_mount_info_list = LIST_HEAD_INIT(server_mount_info_list);
49 static struct lustre_mount_info *server_find_mount(const char *name)
51 struct list_head *tmp;
52 struct lustre_mount_info *lmi;
55 list_for_each(tmp, &server_mount_info_list) {
56 lmi = list_entry(tmp, struct lustre_mount_info, lmi_list_chain);
57 if (strcmp(name, lmi->lmi_name) == 0)
63 /* we must register an obd for a mount before we call the setup routine.
64 *_setup will call lustre_get_mount to get the mnt struct
65 by obd_name, since we can't pass the pointer to setup. */
66 static int server_register_mount(const char *name, struct super_block *sb,
69 struct lustre_mount_info *lmi;
76 OBD_ALLOC(lmi, sizeof(*lmi));
79 OBD_ALLOC(name_cp, strlen(name) + 1);
81 OBD_FREE(lmi, sizeof(*lmi));
84 strcpy(name_cp, name);
86 down(&lustre_mount_info_lock);
88 if (server_find_mount(name)) {
89 up(&lustre_mount_info_lock);
90 OBD_FREE(lmi, sizeof(*lmi));
91 OBD_FREE(name_cp, strlen(name) + 1);
92 CERROR("Already registered %s\n", name);
95 lmi->lmi_name = name_cp;
98 list_add(&lmi->lmi_list_chain, &server_mount_info_list);
100 up(&lustre_mount_info_lock);
102 CDEBUG(D_MOUNT, "reg_mnt %p from %s, vfscount=%d\n",
103 lmi->lmi_mnt, name, atomic_read(&lmi->lmi_mnt->mnt_count));
108 /* when an obd no longer needs a mount */
109 static int server_deregister_mount(const char *name)
111 struct lustre_mount_info *lmi;
114 down(&lustre_mount_info_lock);
115 lmi = server_find_mount(name);
117 up(&lustre_mount_info_lock);
118 CERROR("%s not registered\n", name);
122 CDEBUG(D_MOUNT, "dereg_mnt %p from %s, vfscount=%d\n",
123 lmi->lmi_mnt, name, atomic_read(&lmi->lmi_mnt->mnt_count));
125 OBD_FREE(lmi->lmi_name, strlen(lmi->lmi_name) + 1);
126 list_del(&lmi->lmi_list_chain);
127 OBD_FREE(lmi, sizeof(*lmi));
128 up(&lustre_mount_info_lock);
133 /* obd's look up a registered mount using their obdname. This is just
134 for initial obd setup to find the mount struct. It should not be
135 called every time you want to mntget. */
136 struct lustre_mount_info *server_get_mount(const char *name)
138 struct lustre_mount_info *lmi;
139 struct lustre_sb_info *lsi;
142 down(&lustre_mount_info_lock);
143 lmi = server_find_mount(name);
144 up(&lustre_mount_info_lock);
146 CERROR("Can't find mount for %s\n", name);
149 lsi = s2lsi(lmi->lmi_sb);
150 mntget(lmi->lmi_mnt);
151 atomic_inc(&lsi->lsi_mounts);
153 CDEBUG(D_MOUNT, "get_mnt %p from %s, refs=%d, vfscount=%d\n",
154 lmi->lmi_mnt, name, atomic_read(&lsi->lsi_mounts),
155 atomic_read(&lmi->lmi_mnt->mnt_count));
161 * Used by mdt to get mount_info from obdname.
162 * There are no blocking when using the mount_info.
163 * Do not use server_get_mount for this purpose.
165 struct lustre_mount_info *server_get_mount_2(const char *name)
167 struct lustre_mount_info *lmi;
170 down(&lustre_mount_info_lock);
171 lmi = server_find_mount(name);
172 up(&lustre_mount_info_lock);
174 CERROR("Can't find mount for %s\n", name);
179 static void unlock_mntput(struct vfsmount *mnt)
181 if (kernel_locked()) {
190 static int lustre_put_lsi(struct super_block *sb);
192 /* to be called from obd_cleanup methods */
193 int server_put_mount(const char *name, struct vfsmount *mnt)
195 struct lustre_mount_info *lmi;
196 struct lustre_sb_info *lsi;
197 int count = atomic_read(&mnt->mnt_count) - 1;
200 /* This might be the last one, can't deref after this */
203 down(&lustre_mount_info_lock);
204 lmi = server_find_mount(name);
205 up(&lustre_mount_info_lock);
207 CERROR("Can't find mount for %s\n", name);
210 lsi = s2lsi(lmi->lmi_sb);
211 LASSERT(lmi->lmi_mnt == mnt);
213 CDEBUG(D_MOUNT, "put_mnt %p from %s, refs=%d, vfscount=%d\n",
214 lmi->lmi_mnt, name, atomic_read(&lsi->lsi_mounts), count);
216 if (lustre_put_lsi(lmi->lmi_sb)) {
217 CDEBUG(D_MOUNT, "Last put of mnt %p from %s, vfscount=%d\n",
218 lmi->lmi_mnt, name, count);
219 /* last mount is the One True Mount */
221 CERROR("%s: mount busy, vfscount=%d!\n", name, count);
224 /* this obd should never need the mount again */
225 server_deregister_mount(name);
230 /* Corresponding to server_get_mount_2 */
231 int server_put_mount_2(const char *name, struct vfsmount *mnt)
237 /******* mount helper utilities *********/
240 static void ldd_print(struct lustre_disk_data *ldd)
242 PRINT_CMD(PRINT_MASK, " disk data:\n");
243 PRINT_CMD(PRINT_MASK, "server: %s\n", ldd->ldd_svname);
244 PRINT_CMD(PRINT_MASK, "uuid: %s\n", (char *)ldd->ldd_uuid);
245 PRINT_CMD(PRINT_MASK, "fs: %s\n", ldd->ldd_fsname);
246 PRINT_CMD(PRINT_MASK, "index: %04x\n", ldd->ldd_svindex);
247 PRINT_CMD(PRINT_MASK, "config: %d\n", ldd->ldd_config_ver);
248 PRINT_CMD(PRINT_MASK, "flags: %#x\n", ldd->ldd_flags);
249 PRINT_CMD(PRINT_MASK, "diskfs: %s\n", MT_STR(ldd));
250 PRINT_CMD(PRINT_MASK, "options: %s\n", ldd->ldd_mount_opts);
251 PRINT_CMD(PRINT_MASK, "params: %s\n", ldd->ldd_params);
252 PRINT_CMD(PRINT_MASK, "comment: %s\n", ldd->ldd_userdata);
256 static int ldd_parse(struct lvfs_run_ctxt *mount_ctxt,
257 struct lustre_disk_data *ldd)
259 struct lvfs_run_ctxt saved;
266 push_ctxt(&saved, mount_ctxt, NULL);
268 file = filp_open(MOUNT_DATA_FILE, O_RDONLY, 0644);
271 CERROR("cannot open %s: rc = %d\n", MOUNT_DATA_FILE, rc);
275 len = i_size_read(file->f_dentry->d_inode);
276 CDEBUG(D_MOUNT, "Have %s, size %lu\n", MOUNT_DATA_FILE, len);
277 if (len != sizeof(*ldd)) {
278 CERROR("disk data size does not match: see %lu expect "LPSZ"\n",
280 GOTO(out_close, rc = -EINVAL);
283 rc = lustre_fread(file, ldd, len, &off);
285 CERROR("error reading %s: read %d of %lu\n",
286 MOUNT_DATA_FILE, rc, len);
287 GOTO(out_close, rc = -EINVAL);
291 if (ldd->ldd_magic != LDD_MAGIC) {
292 /* FIXME add swabbing support */
293 CERROR("Bad magic in %s: %x!=%x\n", MOUNT_DATA_FILE,
294 ldd->ldd_magic, LDD_MAGIC);
295 GOTO(out_close, rc = -EINVAL);
298 if (ldd->ldd_feature_incompat & ~LDD_INCOMPAT_SUPP) {
299 CERROR("%s: unsupported incompat filesystem feature(s) %x\n",
301 ldd->ldd_feature_incompat & ~LDD_INCOMPAT_SUPP);
302 GOTO(out_close, rc = -EINVAL);
304 if (ldd->ldd_feature_rocompat & ~LDD_ROCOMPAT_SUPP) {
305 CERROR("%s: unsupported read-only filesystem feature(s) %x\n",
307 ldd->ldd_feature_rocompat & ~LDD_ROCOMPAT_SUPP);
308 /* Do something like remount filesystem read-only */
309 GOTO(out_close, rc = -EINVAL);
315 pop_ctxt(&saved, mount_ctxt, NULL);
319 static int ldd_write(struct lvfs_run_ctxt *mount_ctxt,
320 struct lustre_disk_data *ldd)
322 struct lvfs_run_ctxt saved;
325 unsigned long len = sizeof(struct lustre_disk_data);
329 LASSERT(ldd->ldd_magic == LDD_MAGIC);
331 ldd->ldd_config_ver++;
333 push_ctxt(&saved, mount_ctxt, NULL);
335 file = filp_open(MOUNT_DATA_FILE, O_RDWR, 0644);
338 CERROR("cannot open %s: rc = %d\n", MOUNT_DATA_FILE, rc);
342 rc = lustre_fwrite(file, ldd, len, &off);
344 CERROR("error writing %s: read %d of %lu\n",
345 MOUNT_DATA_FILE, rc, len);
346 GOTO(out_close, rc = -EINVAL);
354 pop_ctxt(&saved, mount_ctxt, NULL);
359 /**************** config llog ********************/
361 /* Get a config log from the MGS and process it.
362 This func is called for both clients and servers.
363 Continue to process new statements appended to the logs
364 (whenever the config lock is revoked) until lustre_end_log
366 int lustre_process_log(struct super_block *sb, char *logname,
367 struct config_llog_instance *cfg)
369 struct lustre_cfg *lcfg;
370 struct lustre_cfg_bufs bufs;
371 struct lustre_sb_info *lsi = s2lsi(sb);
372 struct obd_device *mgc = lsi->lsi_mgc;
379 /* mgc_process_config */
380 lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
381 lustre_cfg_bufs_set_string(&bufs, 1, logname);
382 lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
383 lustre_cfg_bufs_set(&bufs, 3, &sb, sizeof(sb));
384 lcfg = lustre_cfg_new(LCFG_LOG_START, &bufs);
385 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
386 lustre_cfg_free(lcfg);
389 LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s'"
390 "failed from the MGS (%d). Make sure this "
391 "client and the MGS are running compatible "
392 "versions of Lustre.\n",
393 mgc->obd_name, logname, rc);
396 LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' "
397 "failed (%d). This may be the result of "
398 "communication errors between this node and "
399 "the MGS, a bad configuration, or other "
400 "errors. See the syslog for more "
401 "information.\n", mgc->obd_name, logname,
404 /* class_obd_list(); */
408 /* Stop watching this config log for updates */
409 int lustre_end_log(struct super_block *sb, char *logname,
410 struct config_llog_instance *cfg)
412 struct lustre_cfg *lcfg;
413 struct lustre_cfg_bufs bufs;
414 struct lustre_sb_info *lsi = s2lsi(sb);
415 struct obd_device *mgc = lsi->lsi_mgc;
422 /* mgc_process_config */
423 lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
424 lustre_cfg_bufs_set_string(&bufs, 1, logname);
426 lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
427 lcfg = lustre_cfg_new(LCFG_LOG_END, &bufs);
428 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
429 lustre_cfg_free(lcfg);
433 /**************** obd start *******************/
435 int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
436 char *s1, char *s2, char *s3, char *s4)
438 struct lustre_cfg_bufs bufs;
439 struct lustre_cfg * lcfg = NULL;
442 CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname,
443 cmd, s1, s2, s3, s4);
445 lustre_cfg_bufs_reset(&bufs, cfgname);
447 lustre_cfg_bufs_set_string(&bufs, 1, s1);
449 lustre_cfg_bufs_set_string(&bufs, 2, s2);
451 lustre_cfg_bufs_set_string(&bufs, 3, s3);
453 lustre_cfg_bufs_set_string(&bufs, 4, s4);
455 lcfg = lustre_cfg_new(cmd, &bufs);
456 lcfg->lcfg_nid = nid;
457 rc = class_process_config(lcfg);
458 lustre_cfg_free(lcfg);
462 static int lustre_start_simple(char *obdname, char *type, char *uuid,
466 CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
468 rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, 0, 0);
470 CERROR("%s attach error %d\n", obdname, rc);
473 rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, 0, 0);
475 CERROR("%s setup error %d\n", obdname, rc);
476 do_lcfg(obdname, 0, LCFG_DETACH, 0, 0, 0, 0);
481 /* Set up a MGS to serve startup logs */
482 static int server_start_mgs(struct super_block *sb)
484 struct lustre_sb_info *lsi = s2lsi(sb);
485 struct vfsmount *mnt = lsi->lsi_srv_mnt;
486 struct lustre_mount_info *lmi;
491 /* It is impossible to have more than 1 MGS per node, since
492 MGC wouldn't know which to connect to */
493 lmi = server_find_mount(LUSTRE_MGS_OBDNAME);
495 lsi = s2lsi(lmi->lmi_sb);
496 LCONSOLE_ERROR_MSG(0x15d, "The MGS service was already started"
498 lsi->lsi_ldd->ldd_svname);
502 CDEBUG(D_CONFIG, "Start MGS service %s\n", LUSTRE_MGS_OBDNAME);
504 rc = server_register_mount(LUSTRE_MGS_OBDNAME, sb, mnt);
507 ((rc = lustre_start_simple(LUSTRE_MGS_OBDNAME, LUSTRE_MGS_NAME,
508 LUSTRE_MGS_OBDNAME, 0, 0))))
509 server_deregister_mount(LUSTRE_MGS_OBDNAME);
512 LCONSOLE_ERROR_MSG(0x15e, "Failed to start MGS '%s' (%d). "
513 "Is the 'mgs' module loaded?\n",
514 LUSTRE_MGS_OBDNAME, rc);
518 static int server_stop_mgs(struct super_block *sb)
520 struct obd_device *obd;
524 CDEBUG(D_MOUNT, "Stop MGS service %s\n", LUSTRE_MGS_OBDNAME);
526 /* There better be only one MGS */
527 obd = class_name2obd(LUSTRE_MGS_OBDNAME);
529 CDEBUG(D_CONFIG, "mgs %s not running\n", LUSTRE_MGS_OBDNAME);
533 /* The MGS should always stop when we say so */
535 rc = class_manual_cleanup(obd);
539 DECLARE_MUTEX(mgc_start_lock);
541 /* Set up a mgcobd to process startup logs */
542 static int lustre_start_mgc(struct super_block *sb)
544 struct lustre_handle mgc_conn = {0, };
545 struct obd_connect_data ocd = { 0 };
546 struct lustre_sb_info *lsi = s2lsi(sb);
547 struct obd_device *obd;
548 struct obd_export *exp;
549 struct obd_uuid *uuid;
552 char *mgcname, *niduuid;
555 int rc = 0, i = 0, j, len;
558 LASSERT(lsi->lsi_lmd);
560 /* Find the first non-lo MGS nid for our MGC name */
561 if (lsi->lsi_flags & LSI_SERVER) {
562 ptr = lsi->lsi_ldd->ldd_params;
563 /* Use mgsnode= nids */
564 if ((class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0) &&
565 (class_parse_nid(ptr, &nid, &ptr) == 0)) {
567 } else if (IS_MGS(lsi->lsi_ldd)) {
568 lnet_process_id_t id;
569 while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
570 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
577 } else { /* client */
578 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
579 ptr = lsi->lsi_lmd->lmd_dev;
580 if (class_parse_nid(ptr, &nid, &ptr) == 0)
584 CERROR("No valid MGS nids found.\n");
588 len = strlen(LUSTRE_MGC_OBDNAME) + strlen(libcfs_nid2str(nid)) + 1;
589 OBD_ALLOC(mgcname, len);
590 OBD_ALLOC(niduuid, len + 2);
591 if (!mgcname || !niduuid)
592 GOTO(out_free, rc = -ENOMEM);
593 sprintf(mgcname, "%s%s", LUSTRE_MGC_OBDNAME, libcfs_nid2str(nid));
595 mutex_down(&mgc_start_lock);
597 obd = class_name2obd(mgcname);
599 /* Re-using an existing MGC */
600 atomic_inc(&obd->u.cli.cl_mgc_refcount);
603 /* If we are restarting the MGS, don't try to keep the MGC's
604 old connection, or registration will fail. */
605 if ((lsi->lsi_flags & LSI_SERVER) && IS_MGS(lsi->lsi_ldd)) {
606 CDEBUG(D_MOUNT, "New MGS with live MGC\n");
610 /* Try all connections, but only once (again).
611 We don't want to block another target from starting
612 (using its local copy of the log), but we do want to connect
613 if at all possible. */
615 CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,recov_bk);
616 rc = obd_set_info_async(obd->obd_self_export,
617 strlen(KEY_INIT_RECOV_BACKUP),
618 KEY_INIT_RECOV_BACKUP,
619 sizeof(recov_bk), &recov_bk, NULL);
623 CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname);
625 /* Add the primary nids for the MGS */
627 sprintf(niduuid, "%s_%x", mgcname, i);
628 if (lsi->lsi_flags & LSI_SERVER) {
629 ptr = lsi->lsi_ldd->ldd_params;
630 if (IS_MGS(lsi->lsi_ldd)) {
631 /* Use local nids (including LO) */
632 lnet_process_id_t id;
633 while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
634 rc = do_lcfg(mgcname, id.nid,
635 LCFG_ADD_UUID, niduuid, 0,0,0);
638 /* Use mgsnode= nids */
639 if (class_find_param(ptr, PARAM_MGSNODE, &ptr) != 0) {
640 CERROR("No MGS nids given.\n");
641 GOTO(out_free, rc = -EINVAL);
643 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
644 rc = do_lcfg(mgcname, nid,
645 LCFG_ADD_UUID, niduuid, 0,0,0);
649 } else { /* client */
650 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
651 ptr = lsi->lsi_lmd->lmd_dev;
652 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
653 rc = do_lcfg(mgcname, nid,
654 LCFG_ADD_UUID, niduuid, 0,0,0);
656 /* Stop at the first failover nid */
662 CERROR("No valid MGS nids found.\n");
663 GOTO(out_free, rc = -EINVAL);
665 lsi->lsi_lmd->lmd_mgs_failnodes = 1;
667 /* Random uuid for MGC allows easier reconnects */
669 ll_generate_random_uuid(uuidc);
670 class_uuid_unparse(uuidc, uuid);
673 rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
674 (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
680 /* Add any failover MGS nids */
682 while ((*ptr == ':' ||
683 class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0)) {
684 /* New failover node */
685 sprintf(niduuid, "%s_%x", mgcname, i);
687 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
689 rc = do_lcfg(mgcname, nid,
690 LCFG_ADD_UUID, niduuid, 0,0,0);
695 rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
703 lsi->lsi_lmd->lmd_mgs_failnodes = i;
705 obd = class_name2obd(mgcname);
707 CERROR("Can't find mgcobd %s\n", mgcname);
708 GOTO(out_free, rc = -ENOTCONN);
711 /* Keep a refcount of servers/clients who started with "mount",
712 so we know when we can get rid of the mgc. */
713 atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
715 /* Try all connections, but only once. */
717 rc = obd_set_info_async(obd->obd_self_export,
718 strlen(KEY_INIT_RECOV_BACKUP),
719 KEY_INIT_RECOV_BACKUP,
720 sizeof(recov_bk), &recov_bk, NULL);
723 CERROR("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc);
724 /* We connect to the MGS at setup, and don't disconnect until cleanup */
725 rc = obd_connect(NULL, &mgc_conn, obd, &(obd->obd_uuid), &ocd);
727 CERROR("connect failed %d\n", rc);
731 exp = class_conn2export(&mgc_conn);
732 obd->u.cli.cl_mgc_mgsexp = exp;
735 /* Keep the mgc info in the sb. Note that many lsi's can point
739 mutex_up(&mgc_start_lock);
742 OBD_FREE(mgcname, len);
744 OBD_FREE(niduuid, len + 2);
748 static int lustre_stop_mgc(struct super_block *sb)
750 struct lustre_sb_info *lsi = s2lsi(sb);
751 struct obd_device *obd;
752 char *niduuid, *ptr = 0;
763 mutex_down(&mgc_start_lock);
764 if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
765 /* This is not fatal, every client that stops
766 will call in here. */
767 CDEBUG(D_MOUNT, "mgc still has %d references.\n",
768 atomic_read(&obd->u.cli.cl_mgc_refcount));
769 GOTO(out, rc = -EBUSY);
772 /* MGC must always stop */
774 /* client_disconnect_export uses the no_recov flag to decide whether it
775 should disconnect or just invalidate. (The MGC has no
776 recoverable data in any case.) */
777 obd->obd_no_recov = 1;
779 if (obd->u.cli.cl_mgc_mgsexp)
780 obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
782 /* Save the obdname for cleaning the nid uuids, which are
784 len = strlen(obd->obd_name) + 6;
785 OBD_ALLOC(niduuid, len);
787 strcpy(niduuid, obd->obd_name);
788 ptr = niduuid + strlen(niduuid);
791 rc = class_manual_cleanup(obd);
795 /* Clean the nid uuids */
798 for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
799 sprintf(ptr, "_%x", i);
800 rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
803 CERROR("del MDC UUID %s failed: rc = %d\n",
806 OBD_FREE(niduuid, len);
807 /* class_import_put will get rid of the additional connections */
810 mutex_up(&mgc_start_lock);
814 /* Since there's only one mgc per node, we have to change it's fs to get
815 access to the right disk. */
816 static int server_mgc_set_fs(struct obd_device *mgc, struct super_block *sb)
818 struct lustre_sb_info *lsi = s2lsi(sb);
822 CDEBUG(D_MOUNT, "Set mgc disk for %s\n", lsi->lsi_lmd->lmd_dev);
824 /* cl_mgc_sem in mgc insures we sleep if the mgc_fs is busy */
825 rc = obd_set_info_async(mgc->obd_self_export,
826 strlen("set_fs"), "set_fs",
827 sizeof(*sb), sb, NULL);
829 CERROR("can't set_fs %d\n", rc);
835 static int server_mgc_clear_fs(struct obd_device *mgc)
840 CDEBUG(D_MOUNT, "Unassign mgc disk\n");
842 rc = obd_set_info_async(mgc->obd_self_export,
843 strlen("clear_fs"), "clear_fs",
848 DECLARE_MUTEX(server_start_lock);
850 /* Stop MDS/OSS if nobody is using them */
851 static int server_stop_servers(int lddflags, int lsiflags)
853 struct obd_device *obd = NULL;
854 struct obd_type *type = NULL;
858 mutex_down(&server_start_lock);
860 /* Either an MDT or an OST or neither */
861 /* if this was an MDT, and there are no more MDT's, clean up the MDS */
862 if ((lddflags & LDD_F_SV_TYPE_MDT) &&
863 (obd = class_name2obd(LUSTRE_MDS_OBDNAME))) {
864 /*FIXME pre-rename, should eventually be LUSTRE_MDT_NAME*/
865 type = class_search_type(LUSTRE_MDS_NAME);
867 /* if this was an OST, and there are no more OST's, clean up the OSS */
868 if ((lddflags & LDD_F_SV_TYPE_OST) &&
869 (obd = class_name2obd(LUSTRE_OSS_OBDNAME))) {
870 type = class_search_type(LUSTRE_OST_NAME);
873 if (obd && (!type || !type->typ_refcnt)) {
876 /* obd_fail doesn't mean much on a server obd */
877 err = class_manual_cleanup(obd);
882 mutex_up(&server_start_lock);
887 int server_mti_print(char *title, struct mgs_target_info *mti)
889 PRINT_CMD(PRINT_MASK, "mti %s\n", title);
890 PRINT_CMD(PRINT_MASK, "server: %s\n", mti->mti_svname);
891 PRINT_CMD(PRINT_MASK, "fs: %s\n", mti->mti_fsname);
892 PRINT_CMD(PRINT_MASK, "uuid: %s\n", mti->mti_uuid);
893 PRINT_CMD(PRINT_MASK, "ver: %d flags: %#x\n",
894 mti->mti_config_ver, mti->mti_flags);
899 int mti_set_sec_opts(struct mgs_target_info *mti, struct lustre_mount_data *lmd)
903 if (lmd->lmd_sec_mdt == NULL && lmd->lmd_sec_cli == NULL) {
904 /* just let on-disk params do its work. but we have an
905 * assumption that any changes of on-disk data by tune2fs
906 * should lead to server rewrite log.
911 /* filter out existing sec options */
912 s1 = mti->mti_params;
919 if (strncmp(s1, PARAM_SEC_RPC_MDT,
920 sizeof(PARAM_SEC_RPC_MDT) - 1) == 0 ||
921 strncmp(s1, PARAM_SEC_RPC_CLI,
922 sizeof(PARAM_SEC_RPC_CLI) - 1) == 0)
927 s2 = strchr(s1, ' ');
935 memmove(s1, s2, strlen(s2) + 1);
940 /* append sec options from lmd */
941 /* FIXME add flag LDD_F_UPDATE after mountconf start supporting
944 if (lmd->lmd_sec_mdt) {
945 if (strlen(mti->mti_params) + strlen(lmd->lmd_sec_mdt) +
946 sizeof(PARAM_SEC_RPC_MDT) + 1 >= sizeof(mti->mti_params)) {
947 CERROR("security params too big for mti\n");
950 strcat(mti->mti_params, " "PARAM_SEC_RPC_MDT);
951 strcat(mti->mti_params, lmd->lmd_sec_mdt);
952 //mti->mti_flags |= LDD_F_UPDATE;
954 if (lmd->lmd_sec_cli) {
955 if (strlen(mti->mti_params) + strlen(lmd->lmd_sec_cli) +
956 sizeof(PARAM_SEC_RPC_CLI) + 2 > sizeof(mti->mti_params)) {
957 CERROR("security params too big for mti\n");
960 strcat(mti->mti_params, " "PARAM_SEC_RPC_CLI);
961 strcat(mti->mti_params, lmd->lmd_sec_cli);
962 //mti->mti_flags |= LDD_F_UPDATE;
968 static int server_sb2mti(struct super_block *sb, struct mgs_target_info *mti)
970 struct lustre_sb_info *lsi = s2lsi(sb);
971 struct lustre_disk_data *ldd = lsi->lsi_ldd;
972 struct lustre_mount_data *lmd = lsi->lsi_lmd;
973 lnet_process_id_t id;
977 if (!(lsi->lsi_flags & LSI_SERVER))
980 strncpy(mti->mti_fsname, ldd->ldd_fsname,
981 sizeof(mti->mti_fsname));
982 strncpy(mti->mti_svname, ldd->ldd_svname,
983 sizeof(mti->mti_svname));
985 mti->mti_nid_count = 0;
986 while (LNetGetId(i++, &id) != -ENOENT) {
987 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
989 mti->mti_nids[mti->mti_nid_count] = id.nid;
990 mti->mti_nid_count++;
991 if (mti->mti_nid_count >= MTI_NIDS_MAX) {
992 CWARN("Only using first %d nids for %s\n",
993 mti->mti_nid_count, mti->mti_svname);
998 mti->mti_lustre_ver = LUSTRE_VERSION_CODE;
999 mti->mti_config_ver = 0;
1000 mti->mti_flags = ldd->ldd_flags;
1001 mti->mti_stripe_index = ldd->ldd_svindex;
1002 memcpy(mti->mti_uuid, ldd->ldd_uuid, sizeof(mti->mti_uuid));
1003 if (strlen(ldd->ldd_params) > sizeof(mti->mti_params)) {
1004 CERROR("params too big for mti\n");
1007 memcpy(mti->mti_params, ldd->ldd_params, sizeof(mti->mti_params));
1009 RETURN(mti_set_sec_opts(mti, lmd));
1012 /* Register an old or new target with the MGS. If needed MGS will construct
1013 startup logs and assign index */
1014 int server_register_target(struct super_block *sb)
1016 struct lustre_sb_info *lsi = s2lsi(sb);
1017 struct obd_device *mgc = lsi->lsi_mgc;
1018 struct lustre_disk_data *ldd = lsi->lsi_ldd;
1019 struct mgs_target_info *mti = NULL;
1025 if (!(lsi->lsi_flags & LSI_SERVER))
1031 rc = server_sb2mti(sb, mti);
1035 CDEBUG(D_MOUNT, "Registration %s, fs=%s, %s, index=%04x, flags=%#x\n",
1036 mti->mti_svname, mti->mti_fsname,
1037 libcfs_nid2str(mti->mti_nids[0]), mti->mti_stripe_index,
1040 /* Register the target */
1041 /* FIXME use mgc_process_config instead */
1042 rc = obd_set_info_async(mgc->u.cli.cl_mgc_mgsexp,
1043 strlen("register_target"), "register_target",
1044 sizeof(*mti), mti, NULL);
1046 CERROR("registration with the MGS failed (%d)\n", rc);
1050 /* Always update our flags */
1051 ldd->ldd_flags = mti->mti_flags & ~LDD_F_REWRITE_LDD;
1053 /* If this flag is set, it means the MGS wants us to change our
1054 on-disk data. (So far this means just the index.) */
1055 if (mti->mti_flags & LDD_F_REWRITE_LDD) {
1058 CDEBUG(D_MOUNT, "Changing on-disk index from %#x to %#x "
1059 "for %s\n", ldd->ldd_svindex, mti->mti_stripe_index,
1061 ldd->ldd_svindex = mti->mti_stripe_index;
1062 strncpy(ldd->ldd_svname, mti->mti_svname,
1063 sizeof(ldd->ldd_svname));
1064 /* or ldd_make_sv_name(ldd); */
1065 ldd_write(&mgc->obd_lvfs_ctxt, ldd);
1066 err = fsfilt_set_label(mgc, lsi->lsi_srv_mnt->mnt_sb,
1069 CERROR("Label set error %d\n", err);
1070 label = fsfilt_get_label(mgc, lsi->lsi_srv_mnt->mnt_sb);
1072 CDEBUG(D_MOUNT, "Disk label changed to %s\n", label);
1074 /* Flush the new ldd to disk */
1075 fsfilt_sync(mgc, lsi->lsi_srv_mnt->mnt_sb);
1085 static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
1087 struct obd_device *obd;
1088 struct lustre_sb_info *lsi = s2lsi(sb);
1089 struct config_llog_instance cfg;
1093 CDEBUG(D_MOUNT, "starting target %s\n", lsi->lsi_ldd->ldd_svname);
1096 /* If we're an MDT, make sure the global MDS is running */
1097 if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_MDT) {
1098 /* make sure the MDS is started */
1099 mutex_down(&server_start_lock);
1100 obd = class_name2obd(LUSTRE_MDS_OBDNAME);
1102 rc = lustre_start_simple(LUSTRE_MDS_OBDNAME,
1103 /* FIXME pre-rename, should eventually be LUSTRE_MDS_NAME */
1105 LUSTRE_MDS_OBDNAME"_uuid",
1108 mutex_up(&server_start_lock);
1109 CERROR("failed to start MDS: %d\n", rc);
1113 mutex_up(&server_start_lock);
1117 /* If we're an OST, make sure the global OSS is running */
1118 if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_OST) {
1119 /* make sure OSS is started */
1120 mutex_down(&server_start_lock);
1121 obd = class_name2obd(LUSTRE_OSS_OBDNAME);
1123 rc = lustre_start_simple(LUSTRE_OSS_OBDNAME,
1125 LUSTRE_OSS_OBDNAME"_uuid",
1128 mutex_up(&server_start_lock);
1129 CERROR("failed to start OSS: %d\n", rc);
1133 mutex_up(&server_start_lock);
1136 /* Set the mgc fs to our server disk. This allows the MGC
1137 to read and write configs locally. */
1138 rc = server_mgc_set_fs(lsi->lsi_mgc, sb);
1142 /* Register with MGS */
1143 rc = server_register_target(sb);
1144 if (rc && (lsi->lsi_ldd->ldd_flags &
1145 (LDD_F_NEED_INDEX | LDD_F_UPDATE | LDD_F_UPGRADE14))){
1146 CERROR("Required registration failed for %s: %d\n",
1147 lsi->lsi_ldd->ldd_svname, rc);
1149 LCONSOLE_ERROR_MSG(0x15f, "Communication error with "
1150 "the MGS. Is the MGS running?\n");
1154 if (rc == -EINVAL) {
1155 LCONSOLE_ERROR_MSG(0x160, "The MGS is refusing to allow this "
1156 "server (%s) to start. Please see messages"
1157 " on the MGS node.\n",
1158 lsi->lsi_ldd->ldd_svname);
1162 /* Let the target look up the mount using the target's name
1163 (we can't pass the sb or mnt through class_process_config.) */
1164 rc = server_register_mount(lsi->lsi_ldd->ldd_svname, sb, mnt);
1168 /* Start targets using the llog named for the target */
1169 memset(&cfg, 0, sizeof(cfg));
1170 rc = lustre_process_log(sb, lsi->lsi_ldd->ldd_svname, &cfg);
1172 CERROR("failed to start server %s: %d\n",
1173 lsi->lsi_ldd->ldd_svname, rc);
1178 /* Release the mgc fs for others to use */
1179 server_mgc_clear_fs(lsi->lsi_mgc);
1182 obd = class_name2obd(lsi->lsi_ldd->ldd_svname);
1184 CERROR("no server named %s was started\n",
1185 lsi->lsi_ldd->ldd_svname);
1189 if ((lsi->lsi_lmd->lmd_flags & LMD_FLG_ABORT_RECOV) &&
1190 (OBP(obd, iocontrol))) {
1191 obd_iocontrol(OBD_IOC_ABORT_RECOVERY,
1192 obd->obd_self_export, 0, NULL, NULL);
1195 /* log has been fully processed */
1196 obd_notify(obd, NULL, OBD_NOTIFY_CONFIG, (void *)CONFIG_LOG);
1202 /***************** lustre superblock **************/
1204 struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
1206 struct lustre_sb_info *lsi = NULL;
1209 OBD_ALLOC(lsi, sizeof(*lsi));
1212 OBD_ALLOC(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd));
1213 if (!lsi->lsi_lmd) {
1214 OBD_FREE(lsi, sizeof(*lsi));
1218 lsi->lsi_lmd->lmd_exclude_count = 0;
1219 s2lsi_nocast(sb) = lsi;
1220 /* we take 1 extra ref for our setup */
1221 atomic_set(&lsi->lsi_mounts, 1);
1223 /* Default umount style */
1224 lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
1226 lsi->lsi_lmd->lmd_nllu = NOBODY_UID;
1227 lsi->lsi_lmd->lmd_nllg = NOBODY_GID;
1231 static int lustre_free_lsi(struct super_block *sb)
1233 struct lustre_sb_info *lsi = s2lsi(sb);
1239 CDEBUG(D_MOUNT, "Freeing lsi\n");
1241 /* someone didn't call server_put_mount. */
1242 LASSERT(atomic_read(&lsi->lsi_mounts) == 0);
1244 if (lsi->lsi_ldd != NULL)
1245 OBD_FREE(lsi->lsi_ldd, sizeof(*lsi->lsi_ldd));
1247 if (lsi->lsi_lmd != NULL) {
1248 if (lsi->lsi_lmd->lmd_dev != NULL)
1249 OBD_FREE(lsi->lsi_lmd->lmd_dev,
1250 strlen(lsi->lsi_lmd->lmd_dev) + 1);
1251 if (lsi->lsi_lmd->lmd_profile != NULL)
1252 OBD_FREE(lsi->lsi_lmd->lmd_profile,
1253 strlen(lsi->lsi_lmd->lmd_profile) + 1);
1254 if (lsi->lsi_lmd->lmd_sec_mdt != NULL)
1255 OBD_FREE(lsi->lsi_lmd->lmd_sec_mdt,
1256 strlen(lsi->lsi_lmd->lmd_sec_mdt) + 1);
1257 if (lsi->lsi_lmd->lmd_sec_cli != NULL)
1258 OBD_FREE(lsi->lsi_lmd->lmd_sec_cli,
1259 strlen(lsi->lsi_lmd->lmd_sec_cli) + 1);
1260 if (lsi->lsi_lmd->lmd_opts != NULL)
1261 OBD_FREE(lsi->lsi_lmd->lmd_opts,
1262 strlen(lsi->lsi_lmd->lmd_opts) + 1);
1263 if (lsi->lsi_lmd->lmd_exclude_count)
1264 OBD_FREE(lsi->lsi_lmd->lmd_exclude,
1265 sizeof(lsi->lsi_lmd->lmd_exclude[0]) *
1266 lsi->lsi_lmd->lmd_exclude_count);
1267 OBD_FREE(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd));
1270 LASSERT(lsi->lsi_llsbi == NULL);
1271 OBD_FREE(lsi, sizeof(*lsi));
1272 s2lsi_nocast(sb) = NULL;
1277 /* The lsi has one reference for every server that is using the disk -
1278 e.g. MDT, MGS, and potentially MGC */
1279 static int lustre_put_lsi(struct super_block *sb)
1281 struct lustre_sb_info *lsi = s2lsi(sb);
1286 CDEBUG(D_MOUNT, "put %p %d\n", sb, atomic_read(&lsi->lsi_mounts));
1288 if (atomic_dec_and_test(&lsi->lsi_mounts)) {
1289 lustre_free_lsi(sb);
1295 /*************** server mount ******************/
1297 /* Kernel mount using mount options in MOUNT_DATA_FILE */
1298 static struct vfsmount *server_kernel_mount(struct super_block *sb)
1300 struct lvfs_run_ctxt mount_ctxt;
1301 struct lustre_sb_info *lsi = s2lsi(sb);
1302 struct lustre_disk_data *ldd;
1303 struct lustre_mount_data *lmd = lsi->lsi_lmd;
1304 struct vfsmount *mnt;
1305 char *options = NULL;
1306 unsigned long page, s_flags;
1310 OBD_ALLOC(ldd, sizeof(*ldd));
1312 RETURN(ERR_PTR(-ENOMEM));
1314 /* In the past, we have always used flags = 0.
1315 Note ext3/ldiskfs can't be mounted ro. */
1316 s_flags = sb->s_flags;
1318 /* Pre-mount ldiskfs to read the MOUNT_DATA_FILE */
1319 CDEBUG(D_MOUNT, "Pre-mount ldiskfs %s\n", lmd->lmd_dev);
1320 mnt = ll_kern_mount("ldiskfs", s_flags, lmd->lmd_dev, 0);
1323 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
1324 /* 2.6 kernels: if ldiskfs fails, try ldiskfs2 */
1325 mnt = ll_kern_mount("ldiskfs2", s_flags, lmd->lmd_dev, 0);
1327 int rc2 = PTR_ERR(mnt);
1328 CERROR("premount %s:%#lx ldiskfs failed: %d, ldiskfs2 "
1329 "failed: %d. Is the ldiskfs module available?\n",
1330 lmd->lmd_dev, s_flags, rc, rc2);
1334 /* 2.4 kernels: if ldiskfs fails, try ext3 */
1335 mnt = ll_kern_mount("ext3", s_flags, lmd->lmd_dev, 0);
1338 CERROR("premount ext3 failed: rc = %d\n", rc);
1344 OBD_SET_CTXT_MAGIC(&mount_ctxt);
1345 mount_ctxt.pwdmnt = mnt;
1346 mount_ctxt.pwd = mnt->mnt_root;
1347 mount_ctxt.fs = get_ds();
1349 rc = ldd_parse(&mount_ctxt, ldd);
1353 CERROR("premount parse options failed: rc = %d\n", rc);
1357 /* Done with our pre-mount, now do the real mount. */
1359 /* Glom up mount options */
1360 page = __get_free_page(GFP_KERNEL);
1362 GOTO(out_free, rc = -ENOMEM);
1364 options = (char *)page;
1365 memset(options, 0, CFS_PAGE_SIZE);
1366 strncpy(options, ldd->ldd_mount_opts, CFS_PAGE_SIZE - 2);
1368 /* Add in any mount-line options */
1369 if (lmd->lmd_opts && (*(lmd->lmd_opts) != 0)) {
1370 int len = CFS_PAGE_SIZE - strlen(options) - 2;
1372 strcat(options, ",");
1373 strncat(options, lmd->lmd_opts, len);
1376 /* Special permanent mount flags */
1378 s_flags |= MS_NOATIME | MS_NODIRATIME;
1380 CDEBUG(D_MOUNT, "kern_mount: %s %s %s\n",
1381 MT_STR(ldd), lmd->lmd_dev, options);
1382 mnt = ll_kern_mount(MT_STR(ldd), s_flags, lmd->lmd_dev,
1387 CERROR("ll_kern_mount failed: rc = %d\n", rc);
1391 lsi->lsi_ldd = ldd; /* freed at lsi cleanup */
1392 CDEBUG(D_SUPER, "%s: mnt = %p\n", lmd->lmd_dev, mnt);
1396 OBD_FREE(ldd, sizeof(*ldd));
1397 lsi->lsi_ldd = NULL;
1398 RETURN(ERR_PTR(rc));
1401 static void server_wait_finished(struct vfsmount *mnt)
1403 wait_queue_head_t waitq;
1404 struct l_wait_info lwi;
1407 init_waitqueue_head(&waitq);
1409 while ((atomic_read(&mnt->mnt_count) > 1) && (retries > 0)) {
1410 LCONSOLE_WARN("Mount still busy with %d refs, waiting for "
1412 atomic_read(&mnt->mnt_count), retries);
1414 /* Wait for a bit */
1416 lwi = LWI_TIMEOUT(5 * HZ, NULL, NULL);
1417 l_wait_event(waitq, 0, &lwi);
1419 if (atomic_read(&mnt->mnt_count) > 1) {
1420 CERROR("Mount %p is still busy (%d refs), giving up.\n",
1421 mnt, atomic_read(&mnt->mnt_count));
1425 static void server_put_super(struct super_block *sb)
1427 struct lustre_sb_info *lsi = s2lsi(sb);
1428 struct obd_device *obd;
1429 struct vfsmount *mnt = lsi->lsi_srv_mnt;
1430 char *tmpname, *extraname = NULL;
1432 int lddflags = lsi->lsi_ldd->ldd_flags;
1433 int lsiflags = lsi->lsi_flags;
1437 LASSERT(lsiflags & LSI_SERVER);
1439 tmpname_sz = strlen(lsi->lsi_ldd->ldd_svname) + 1;
1440 OBD_ALLOC(tmpname, tmpname_sz);
1441 memcpy(tmpname, lsi->lsi_ldd->ldd_svname, tmpname_sz);
1442 CDEBUG(D_MOUNT, "server put_super %s\n", tmpname);
1444 /* Stop the target */
1445 if (IS_MDT(lsi->lsi_ldd) || IS_OST(lsi->lsi_ldd)) {
1446 struct lustre_profile *lprof = NULL;
1448 /* tell the mgc to drop the config log */
1449 lustre_end_log(sb, lsi->lsi_ldd->ldd_svname, NULL);
1451 /* COMPAT_146 - profile may get deleted in mgc_cleanup.
1452 If there are any setup/cleanup errors, save the lov
1453 name for safety cleanup later. */
1454 lprof = class_get_profile(lsi->lsi_ldd->ldd_svname);
1455 if (lprof && lprof->lp_dt) {
1456 OBD_ALLOC(extraname, strlen(lprof->lp_dt) + 1);
1457 strcpy(extraname, lprof->lp_dt);
1460 obd = class_name2obd(lsi->lsi_ldd->ldd_svname);
1462 CDEBUG(D_MOUNT, "stopping %s\n", obd->obd_name);
1463 if (lsi->lsi_flags & LSI_UMOUNT_FORCE)
1465 if (lsi->lsi_flags & LSI_UMOUNT_FAILOVER)
1467 /* We can't seem to give an error return code
1468 to .put_super, so we better make sure we clean up! */
1470 class_manual_cleanup(obd);
1472 CERROR("no obd %s\n", lsi->lsi_ldd->ldd_svname);
1473 server_deregister_mount(lsi->lsi_ldd->ldd_svname);
1477 /* If they wanted the mgs to stop separately from the mdt, they
1478 should have put it on a different device. */
1479 if (IS_MGS(lsi->lsi_ldd)) {
1480 /* stop the mgc before the mgs so the connection gets cleaned
1482 lustre_stop_mgc(sb);
1483 server_stop_mgs(sb);
1486 /* Clean the mgc and sb */
1487 rc = lustre_common_put_super(sb);
1488 /* FIXME how can I report a failure to umount? */
1490 /* Wait for the targets to really clean up - can't exit (and let the
1491 sb get destroyed) while the mount is still in use */
1492 server_wait_finished(mnt);
1494 /* drop the One True Mount */
1497 /* Stop the servers (MDS, OSS) if no longer needed. We must wait
1498 until the target is really gone so that our type refcount check
1500 server_stop_servers(lddflags, lsiflags);
1502 /* In case of startup or cleanup err, stop related obds */
1504 obd = class_name2obd(extraname);
1506 CWARN("Cleaning orphaned obd %s\n", extraname);
1508 class_manual_cleanup(obd);
1510 OBD_FREE(extraname, strlen(extraname) + 1);
1513 LCONSOLE_WARN("server umount %s complete\n", tmpname);
1514 OBD_FREE(tmpname, tmpname_sz);
1518 #ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
1519 static void server_umount_begin(struct vfsmount *vfsmnt, int flags)
1521 struct super_block *sb = vfsmnt->mnt_sb;
1523 static void server_umount_begin(struct super_block *sb)
1526 struct lustre_sb_info *lsi = s2lsi(sb);
1529 #ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
1530 if (!(flags & MNT_FORCE)) {
1536 CDEBUG(D_MOUNT, "umount -f\n");
1537 /* umount = failover
1539 no third way to do non-force, non-failover */
1540 lsi->lsi_flags &= ~LSI_UMOUNT_FAILOVER;
1541 lsi->lsi_flags |= LSI_UMOUNT_FORCE;
1545 #ifndef HAVE_STATFS_DENTRY_PARAM
1546 static int server_statfs (struct super_block *sb, struct kstatfs *buf)
1549 static int server_statfs (struct dentry *dentry, struct kstatfs *buf)
1551 struct super_block *sb = dentry->d_sb;
1553 struct vfsmount *mnt = s2lsi(sb)->lsi_srv_mnt;
1556 if (mnt && mnt->mnt_sb && mnt->mnt_sb->s_op->statfs) {
1557 #ifdef HAVE_STATFS_DENTRY_PARAM
1558 int rc = mnt->mnt_sb->s_op->statfs(mnt->mnt_root, buf);
1560 int rc = mnt->mnt_sb->s_op->statfs(mnt->mnt_sb, buf);
1563 buf->f_type = sb->s_magic;
1569 buf->f_type = sb->s_magic;
1570 buf->f_bsize = sb->s_blocksize;
1576 buf->f_namelen = NAME_MAX;
1580 static struct super_operations server_ops =
1582 .put_super = server_put_super,
1583 .umount_begin = server_umount_begin, /* umount -f */
1584 .statfs = server_statfs,
1587 #define log2(n) ffz(~(n))
1588 #define LUSTRE_SUPER_MAGIC 0x0BD00BD1
1590 static int server_fill_super_common(struct super_block *sb)
1592 struct inode *root = 0;
1595 CDEBUG(D_MOUNT, "Server sb, dev=%d\n", (int)sb->s_dev);
1597 sb->s_blocksize = 4096;
1598 sb->s_blocksize_bits = log2(sb->s_blocksize);
1599 sb->s_magic = LUSTRE_SUPER_MAGIC;
1600 sb->s_maxbytes = 0; //PAGE_CACHE_MAXBYTES;
1601 sb->s_flags |= MS_RDONLY;
1602 sb->s_op = &server_ops;
1604 root = new_inode(sb);
1606 CERROR("Can't make root inode\n");
1610 /* returns -EIO for every operation */
1611 /* make_bad_inode(root); -- badness - can't umount */
1612 /* apparently we need to be a directory for the mount to finish */
1613 root->i_mode = S_IFDIR;
1615 sb->s_root = d_alloc_root(root);
1617 CERROR("Can't make root dentry\n");
1625 static int server_fill_super(struct super_block *sb)
1627 struct lustre_sb_info *lsi = s2lsi(sb);
1628 struct vfsmount *mnt;
1632 /* the One True Mount */
1633 mnt = server_kernel_mount(sb);
1636 CERROR("Unable to mount device %s: %d\n",
1637 lsi->lsi_lmd->lmd_dev, rc);
1641 lsi->lsi_srv_mnt = mnt;
1643 LASSERT(lsi->lsi_ldd);
1644 CDEBUG(D_MOUNT, "Found service %s for fs '%s' on device %s\n",
1645 lsi->lsi_ldd->ldd_svname, lsi->lsi_ldd->ldd_fsname,
1646 lsi->lsi_lmd->lmd_dev);
1648 if (class_name2obd(lsi->lsi_ldd->ldd_svname)) {
1649 LCONSOLE_ERROR_MSG(0x161, "The target named %s is already "
1650 "running. Double-mount may have compromised"
1651 " the disk journal.\n",
1652 lsi->lsi_ldd->ldd_svname);
1655 GOTO(out, rc = -EALREADY);
1658 /* start MGS before MGC */
1659 if (IS_MGS(lsi->lsi_ldd)) {
1660 rc = server_start_mgs(sb);
1665 rc = lustre_start_mgc(sb);
1669 /* Set up all obd devices for service */
1670 if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
1671 (IS_OST(lsi->lsi_ldd) || IS_MDT(lsi->lsi_ldd))) {
1672 rc = server_start_targets(sb, mnt);
1674 CERROR("Unable to start targets: %d\n", rc);
1677 /* FIXME overmount client here,
1678 or can we just start a client log and client_fill_super on this sb?
1679 We need to make sure server_put_super gets called too - ll_put_super
1680 calls lustre_common_put_super; check there for LSI_SERVER flag,
1682 Probably should start client from new thread so we can return.
1683 Client will not finish until all servers are connected.
1684 Note - MGS-only server does NOT get a client, since there is no
1685 lustre fs associated - the MGS is for all lustre fs's */
1688 rc = server_fill_super_common(sb);
1692 LCONSOLE_WARN("Server %s on device %s has started\n",
1693 lsi->lsi_ldd->ldd_svname, lsi->lsi_lmd->lmd_dev);
1698 server_put_super(sb);
1703 /* Get the index from the obd name.
1704 rc = server type, or
1706 if endptr isn't NULL it is set to end of name */
1707 int server_name2index(char *svname, __u32 *idx, char **endptr)
1709 unsigned long index;
1711 char *dash = strchr(svname, '-');
1715 if (strncmp(dash + 1, "MDT", 3) == 0)
1716 rc = LDD_F_SV_TYPE_MDT;
1717 else if (strncmp(dash + 1, "OST", 3) == 0)
1718 rc = LDD_F_SV_TYPE_OST;
1722 index = simple_strtoul(dash + 4, endptr, 16);
1727 /*************** mount common betweeen server and client ***************/
1730 int lustre_common_put_super(struct super_block *sb)
1735 CDEBUG(D_MOUNT, "dropping sb %p\n", sb);
1737 /* Drop a ref to the MGC */
1738 rc = lustre_stop_mgc(sb);
1739 if (rc && (rc != -ENOENT)) {
1741 CERROR("Can't stop MGC: %d\n", rc);
1744 /* BUSY just means that there's some other obd that
1745 needs the mgc. Let him clean it up. */
1746 CDEBUG(D_MOUNT, "MGC still in use\n");
1748 /* Drop a ref to the mounted disk */
1754 static void lmd_print(struct lustre_mount_data *lmd)
1758 PRINT_CMD(PRINT_MASK, " mount data:\n");
1759 if (lmd_is_client(lmd))
1760 PRINT_CMD(PRINT_MASK, "profile: %s\n", lmd->lmd_profile);
1761 PRINT_CMD(PRINT_MASK, "device: %s\n", lmd->lmd_dev);
1762 PRINT_CMD(PRINT_MASK, "flags: %x\n", lmd->lmd_flags);
1763 if (lmd->lmd_sec_mdt)
1764 PRINT_CMD(PRINT_MASK, "sec_mdt: %s\n", lmd->lmd_sec_mdt);
1765 if (lmd->lmd_sec_cli)
1766 PRINT_CMD(PRINT_MASK, "sec_cli: %s\n", lmd->lmd_sec_cli);
1768 PRINT_CMD(PRINT_MASK, "options: %s\n", lmd->lmd_opts);
1769 for (i = 0; i < lmd->lmd_exclude_count; i++) {
1770 PRINT_CMD(PRINT_MASK, "exclude %d: OST%04x\n", i,
1771 lmd->lmd_exclude[i]);
1776 /* Is this server on the exclusion list */
1777 int lustre_check_exclusion(struct super_block *sb, char *svname)
1779 struct lustre_sb_info *lsi = s2lsi(sb);
1780 struct lustre_mount_data *lmd = lsi->lsi_lmd;
1785 rc = server_name2index(svname, &index, NULL);
1786 if (rc != LDD_F_SV_TYPE_OST)
1787 /* Only exclude OSTs */
1790 CDEBUG(D_MOUNT, "Check exclusion %s (%d) in %d of %s\n", svname,
1791 index, lmd->lmd_exclude_count, lmd->lmd_dev);
1793 for(i = 0; i < lmd->lmd_exclude_count; i++) {
1794 if (index == lmd->lmd_exclude[i]) {
1795 CWARN("Excluding %s (on exclusion list)\n", svname);
1802 /* mount -v -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
1803 static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr)
1805 char *s1 = ptr, *s2;
1806 __u32 index, *exclude_list;
1810 /* The shortest an ost name can be is 8 chars: -OST0000.
1811 We don't actually know the fsname at this time, so in fact
1812 a user could specify any fsname. */
1813 devmax = strlen(ptr) / 8 + 1;
1815 /* temp storage until we figure out how many we have */
1816 OBD_ALLOC(exclude_list, sizeof(index) * devmax);
1820 /* we enter this fn pointing at the '=' */
1821 while (*s1 && *s1 != ' ' && *s1 != ',') {
1823 rc = server_name2index(s1, &index, &s2);
1825 CERROR("Can't parse server name '%s'\n", s1);
1828 if (rc == LDD_F_SV_TYPE_OST)
1829 exclude_list[lmd->lmd_exclude_count++] = index;
1831 CDEBUG(D_MOUNT, "ignoring exclude %.7s\n", s1);
1833 /* now we are pointing at ':' (next exclude)
1834 or ',' (end of excludes) */
1835 if (lmd->lmd_exclude_count >= devmax)
1838 if (rc >= 0) /* non-err */
1841 if (lmd->lmd_exclude_count) {
1842 /* permanent, freed in lustre_free_lsi */
1843 OBD_ALLOC(lmd->lmd_exclude, sizeof(index) *
1844 lmd->lmd_exclude_count);
1845 if (lmd->lmd_exclude) {
1846 memcpy(lmd->lmd_exclude, exclude_list,
1847 sizeof(index) * lmd->lmd_exclude_count);
1850 lmd->lmd_exclude_count = 0;
1853 OBD_FREE(exclude_list, sizeof(index) * devmax);
1858 int lmd_set_sec_opts(char **set, char *opts, int length)
1861 OBD_FREE(*set, strlen(*set) + 1);
1863 OBD_ALLOC(*set, length + 1);
1867 memcpy(*set, opts, length);
1868 (*set)[length] = '\0';
1874 int lmd_parse_sec_opts(struct lustre_mount_data *lmd, char *ptr)
1880 /* check peer name */
1881 if (strncmp(ptr, "sec_mdt=", 8) == 0) {
1882 set = &lmd->lmd_sec_mdt;
1884 } else if (strncmp(ptr, "sec_cli=", 8) == 0) {
1885 set = &lmd->lmd_sec_cli;
1887 } else if (strncmp(ptr, "sec=", 4) == 0) {
1888 /* leave 'set' be null */
1891 CERROR("invalid security options: %s\n", ptr);
1895 tail = strchr(ptr, ',');
1897 length = strlen(ptr);
1899 length = tail - ptr;
1902 if (lmd_set_sec_opts(set, ptr, length))
1905 if (lmd->lmd_sec_mdt == NULL &&
1906 lmd_set_sec_opts(&lmd->lmd_sec_mdt, ptr, length))
1909 if (lmd->lmd_sec_cli == NULL &&
1910 lmd_set_sec_opts(&lmd->lmd_sec_cli, ptr, length))
1917 /* mount -v -t lustre uml1:uml2:/lustre-client /mnt/lustre */
1918 static int lmd_parse(char *options, struct lustre_mount_data *lmd)
1920 char *s1, *s2, *devname = NULL;
1921 struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
1927 LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that "
1928 "/sbin/mount.lustre is installed.\n");
1932 /* Options should be a string - try to detect old lmd data */
1933 if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) {
1934 LCONSOLE_ERROR_MSG(0x163, "You're using an old version of "
1935 "/sbin/mount.lustre. Please install "
1936 "version %s\n", LUSTRE_VERSION_STRING);
1939 lmd->lmd_magic = LMD_MAGIC;
1941 /* Set default flags here */
1946 /* Skip whitespace and extra commas */
1947 while (*s1 == ' ' || *s1 == ',')
1950 /* Client options are parsed in ll_options: eg. flock,
1953 /* Parse non-ldiskfs options here. Rather than modifying
1954 ldiskfs, we just zero these out here */
1955 if (strncmp(s1, "abort_recov", 11) == 0) {
1956 lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
1958 } else if (strncmp(s1, "nosvc", 5) == 0) {
1959 lmd->lmd_flags |= LMD_FLG_NOSVC;
1961 /* ost exclusion list */
1962 } else if (strncmp(s1, "exclude=", 8) == 0) {
1963 rc = lmd_make_exclusion(lmd, s1 + 7);
1967 } else if (strncmp(s1, "nllu=", 5) == 0) {
1968 lmd->lmd_nllu = simple_strtoul(s1 + 5, NULL, 10);
1970 } else if (strncmp(s1, "nllg=", 5) == 0) {
1971 lmd->lmd_nllg = simple_strtoul(s1 + 5, NULL, 10);
1973 } else if (strncmp(s1, "sec", 3) == 0) {
1974 rc = lmd_parse_sec_opts(lmd, s1);
1979 /* Linux 2.4 doesn't pass the device, so we stuck it at the
1980 end of the options. */
1981 else if (strncmp(s1, "device=", 7) == 0) {
1983 /* terminate options right before device. device
1984 must be the last one. */
1990 s2 = strchr(s1, ',');
1998 memmove(s1, s2, strlen(s2) + 1);
2004 LCONSOLE_ERROR_MSG(0x164, "Can't find the device name "
2005 "(need mount option 'device=...')\n");
2009 s1 = strrchr(devname, ':');
2011 lmd->lmd_flags = LMD_FLG_CLIENT;
2012 /* Remove leading /s from fsname */
2013 while (*++s1 == '/') ;
2014 /* Freed in lustre_free_lsi */
2015 OBD_ALLOC(lmd->lmd_profile, strlen(s1) + 8);
2016 if (!lmd->lmd_profile)
2018 sprintf(lmd->lmd_profile, "%s-client", s1);
2021 /* Freed in lustre_free_lsi */
2022 OBD_ALLOC(lmd->lmd_dev, strlen(devname) + 1);
2025 strcpy(lmd->lmd_dev, devname);
2027 /* Save mount options */
2028 s1 = options + strlen(options) - 1;
2029 while (s1 >= options && (*s1 == ',' || *s1 == ' '))
2031 if (*options != 0) {
2032 /* Freed in lustre_free_lsi */
2033 OBD_ALLOC(lmd->lmd_opts, strlen(options) + 1);
2036 strcpy(lmd->lmd_opts, options);
2039 lmd->lmd_magic = LMD_MAGIC;
2044 CERROR("Bad mount options %s\n", options);
2050 int lustre_fill_super(struct super_block *sb, void *data, int silent)
2052 struct lustre_mount_data *lmd;
2053 struct lustre_sb_info *lsi;
2057 CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);
2059 lsi = lustre_init_lsi(sb);
2064 /* Figure out the lmd from the mount options */
2065 if (lmd_parse((char *)data, lmd)) {
2070 if (lmd_is_client(lmd)) {
2071 CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile);
2072 if (!client_fill_super) {
2073 LCONSOLE_ERROR_MSG(0x165, "Nothing registered for "
2074 "client mount! Is the 'lustre' "
2075 "module loaded?\n");
2078 rc = lustre_start_mgc(sb);
2080 lustre_stop_mgc(sb);
2083 /* Connect and start */
2084 /* (should always be ll_fill_super) */
2085 rc = (*client_fill_super)(sb);
2086 /* c_f_s will call lustre_common_put_super on failure */
2089 CDEBUG(D_MOUNT, "Mounting server from %s\n", lmd->lmd_dev);
2090 lsi->lsi_flags |= LSI_SERVER;
2091 rc = server_fill_super(sb);
2092 /* s_f_s calls lustre_start_mgc after the mount because we need
2093 the MGS nids which are stored on disk. Plus, we may
2094 need to start the MGS first. */
2095 /* s_f_s will call server_put_super on failure */
2100 CERROR("Unable to mount %s (%d)\n",
2101 s2lsi(sb) ? lmd->lmd_dev : "", rc);
2103 CDEBUG(D_SUPER, "mount %s complete\n", lmd->lmd_dev);
2109 /* We can't call ll_fill_super by name because it lives in a module that
2110 must be loaded after this one. */
2111 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb))
2113 client_fill_super = cfs;
2116 /***************** FS registration ******************/
2118 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
2120 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
2121 struct super_block * lustre_get_sb(struct file_system_type *fs_type,
2122 int flags, const char *devname, void * data)
2124 /* calls back in fill super */
2125 /* we could append devname= onto options (*data) here,
2126 but 2.4 doesn't get devname. So we do it in mount_lustre.c */
2127 return get_sb_nodev(fs_type, flags, data, lustre_fill_super);
2130 int lustre_get_sb(struct file_system_type *fs_type,
2131 int flags, const char *devname, void * data,
2132 struct vfsmount *mnt)
2134 /* calls back in fill super */
2135 /* we could append devname= onto options (*data) here,
2136 but 2.4 doesn't get devname. So we do it in mount_lustre.c */
2137 return get_sb_nodev(fs_type, flags, data, lustre_fill_super, mnt);
2141 struct file_system_type lustre_fs_type = {
2142 .owner = THIS_MODULE,
2144 .get_sb = lustre_get_sb,
2145 .kill_sb = kill_anon_super,
2146 .fs_flags = FS_BINARY_MOUNTDATA,
2151 static struct super_block *lustre_read_super(struct super_block *sb,
2152 void *data, int silent)
2157 rc = lustre_fill_super(sb, data, silent);
2163 static struct file_system_type lustre_fs_type = {
2164 .owner = THIS_MODULE,
2166 .fs_flags = FS_NFSEXP_FSID,
2167 .read_super = lustre_read_super,
2171 int lustre_register_fs(void)
2173 return register_filesystem(&lustre_fs_type);
2176 int lustre_unregister_fs(void)
2178 return unregister_filesystem(&lustre_fs_type);
2181 EXPORT_SYMBOL(lustre_register_client_fill_super);
2182 EXPORT_SYMBOL(lustre_common_put_super);
2183 EXPORT_SYMBOL(lustre_process_log);
2184 EXPORT_SYMBOL(lustre_end_log);
2185 EXPORT_SYMBOL(server_get_mount);
2186 EXPORT_SYMBOL(server_get_mount_2);
2187 EXPORT_SYMBOL(server_put_mount);
2188 EXPORT_SYMBOL(server_put_mount_2);
2189 EXPORT_SYMBOL(server_register_target);
2190 EXPORT_SYMBOL(server_name2index);
2191 EXPORT_SYMBOL(server_mti_print);
2192 EXPORT_SYMBOL(do_lcfg);