1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * lustre/obdclass/obd_mount.c
5 * Client/server mount routines
7 * Copyright (c) 2006 Cluster File Systems, Inc.
8 * Author: Nathan Rutman <nathan@clusterfs.com>
10 * This file is part of Lustre, http://www.lustre.org/
12 * Lustre is free software; you can redistribute it and/or
13 * modify it under the terms of version 2 of the GNU General Public
14 * License as published by the Free Software Foundation.
16 * Lustre is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with Lustre; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 #define DEBUG_SUBSYSTEM S_MGMT
28 #define D_MOUNT D_SUPER|D_CONFIG|D_WARNING
29 #define PRINT_CMD LCONSOLE
30 #define PRINT_MASK D_WARNING
32 #include <linux/obd.h>
33 #include <linux/lvfs.h>
34 #include <linux/lustre_fsfilt.h>
35 #include <linux/obd_class.h>
36 #include <lustre/lustre_user.h>
37 #include <linux/version.h>
38 #include <linux/lustre_log.h>
39 #include <linux/lustre_disk.h>
40 #include <linux/lustre_ver.h>
42 static int (*client_fill_super)(struct super_block *sb) = NULL;
45 /*********** mount lookup *********/
47 DECLARE_MUTEX(lustre_mount_info_lock);
48 struct list_head server_mount_info_list = LIST_HEAD_INIT(server_mount_info_list);
50 static struct lustre_mount_info *server_find_mount(char *name)
52 struct list_head *tmp;
53 struct lustre_mount_info *lmi;
55 list_for_each(tmp, &server_mount_info_list) {
56 lmi = list_entry(tmp, struct lustre_mount_info, lmi_list_chain);
57 if (strcmp(name, lmi->lmi_name) == 0)
63 /* we must register an obd for a mount before we call the setup routine.
64 *_setup will call lustre_get_mount to get the mnt struct
65 by obd_name, since we can't pass the pointer to setup. */
66 static int server_register_mount(char *name, struct super_block *sb,
69 struct lustre_mount_info *lmi;
76 OBD_ALLOC(lmi, sizeof(*lmi));
79 OBD_ALLOC(name_cp, strlen(name) + 1);
81 OBD_FREE(lmi, sizeof(*lmi));
84 strcpy(name_cp, name);
86 down(&lustre_mount_info_lock);
88 if (server_find_mount(name)) {
89 up(&lustre_mount_info_lock);
90 OBD_FREE(lmi, sizeof(*lmi));
91 OBD_FREE(name_cp, strlen(name) + 1);
92 CERROR("Already registered %s\n", name);
95 lmi->lmi_name = name_cp;
98 list_add(&lmi->lmi_list_chain, &server_mount_info_list);
100 up(&lustre_mount_info_lock);
102 CDEBUG(D_MOUNT, "reg_mnt %p from %s, vfscount=%d\n",
103 lmi->lmi_mnt, name, atomic_read(&lmi->lmi_mnt->mnt_count));
108 /* when an obd no longer needs a mount */
109 static int server_deregister_mount(char *name)
111 struct lustre_mount_info *lmi;
114 down(&lustre_mount_info_lock);
115 lmi = server_find_mount(name);
117 up(&lustre_mount_info_lock);
118 CERROR("%s not registered\n", name);
122 CDEBUG(D_MOUNT, "dereg_mnt %p from %s, vfscount=%d\n",
123 lmi->lmi_mnt, name, atomic_read(&lmi->lmi_mnt->mnt_count));
125 OBD_FREE(lmi->lmi_name, strlen(lmi->lmi_name) + 1);
126 list_del(&lmi->lmi_list_chain);
127 OBD_FREE(lmi, sizeof(*lmi));
128 up(&lustre_mount_info_lock);
133 /* Deregister anyone referencing the mnt. Everyone should have
134 put_mount in *_cleanup, but this is a catch-all in case of err... */
135 static void server_deregister_mount_all(struct vfsmount *mnt)
137 struct list_head *tmp, *n;
138 struct lustre_mount_info *lmi;
143 down(&lustre_mount_info_lock);
144 list_for_each_safe(tmp, n, &server_mount_info_list) {
145 lmi = list_entry(tmp, struct lustre_mount_info, lmi_list_chain);
146 if (lmi->lmi_mnt == mnt) {
147 CERROR("Deregister failsafe %s\n", lmi->lmi_name);
148 OBD_FREE(lmi->lmi_name, strlen(lmi->lmi_name) + 1);
149 list_del(&lmi->lmi_list_chain);
150 OBD_FREE(lmi, sizeof(*lmi));
153 up(&lustre_mount_info_lock);
156 /* obd's look up a registered mount using their name. This is just
157 for initial obd setup to find the mount struct. It should not be
158 called every time you want to mntget. */
159 struct lustre_mount_info *server_get_mount(char *name)
161 struct lustre_mount_info *lmi;
162 struct lustre_sb_info *lsi;
165 down(&lustre_mount_info_lock);
167 lmi = server_find_mount(name);
169 up(&lustre_mount_info_lock);
170 CERROR("Can't find mount for %s\n", name);
173 lsi = s2lsi(lmi->lmi_sb);
174 mntget(lmi->lmi_mnt);
175 atomic_inc(&lsi->lsi_mounts);
177 up(&lustre_mount_info_lock);
179 CDEBUG(D_MOUNT, "get_mnt %p from %s, refs=%d, vfscount=%d\n",
180 lmi->lmi_mnt, name, atomic_read(&lsi->lsi_mounts),
181 atomic_read(&lmi->lmi_mnt->mnt_count));
186 static void unlock_mntput(struct vfsmount *mnt)
188 if (kernel_locked()) {
197 static int lustre_put_lsi(struct super_block *sb);
199 /* to be called from obd_cleanup methods */
200 int server_put_mount(char *name, struct vfsmount *mnt)
202 struct lustre_mount_info *lmi;
203 struct lustre_sb_info *lsi;
206 down(&lustre_mount_info_lock);
207 lmi = server_find_mount(name);
209 up(&lustre_mount_info_lock);
210 CERROR("Can't find mount for %s\n", name);
213 lsi = s2lsi(lmi->lmi_sb);
214 LASSERT(lmi->lmi_mnt == mnt);
215 unlock_mntput(lmi->lmi_mnt);
217 CDEBUG(D_MOUNT, "put_mnt %p from %s, refs=%d, vfscount=%d\n",
218 lmi->lmi_mnt, name, atomic_read(&lsi->lsi_mounts),
219 atomic_read(&lmi->lmi_mnt->mnt_count));
221 if (lustre_put_lsi(lmi->lmi_sb)) {
222 CDEBUG(D_MOUNT, "Last put of mnt %p from %s, vfscount=%d\n",
224 atomic_read(&lmi->lmi_mnt->mnt_count));
225 /* last mount is the One True Mount */
226 if (atomic_read(&lmi->lmi_mnt->mnt_count) > 1)
227 CERROR("%s: mount busy, vfscount=%d!\n", name,
228 atomic_read(&lmi->lmi_mnt->mnt_count));
230 up(&lustre_mount_info_lock);
232 /* this obd should never need the mount again */
233 server_deregister_mount(name);
239 /******* mount helper utilities *********/
241 static void ldd_print(struct lustre_disk_data *ldd)
245 PRINT_CMD(PRINT_MASK, " disk data:\n");
246 PRINT_CMD(PRINT_MASK, "config: %d\n", ldd->ldd_config_ver);
247 PRINT_CMD(PRINT_MASK, "fs: %s\n", ldd->ldd_fsname);
248 PRINT_CMD(PRINT_MASK, "server: %s\n", ldd->ldd_svname);
249 PRINT_CMD(PRINT_MASK, "index: %04x\n", ldd->ldd_svindex);
250 PRINT_CMD(PRINT_MASK, "flags: %#x\n", ldd->ldd_flags);
251 PRINT_CMD(PRINT_MASK, "diskfs: %s\n", MT_STR(ldd));
252 PRINT_CMD(PRINT_MASK, "options: %s\n", ldd->ldd_mount_opts);
253 if (!ldd->ldd_mgsnid_count)
254 PRINT_CMD(PRINT_MASK, "no MGS nids\n");
255 else for (i = 0; i < ldd->ldd_mgsnid_count; i++) {
256 PRINT_CMD(PRINT_MASK, "mgs nid %d: %s\n", i,
257 libcfs_nid2str(ldd->ldd_mgsnid[i]));
259 if (!ldd->ldd_failnid_count)
260 PRINT_CMD(PRINT_MASK, "no failover nids\n");
261 else for (i = 0; i < ldd->ldd_failnid_count; i++) {
262 PRINT_CMD(PRINT_MASK, "failover nid %d: %s\n", i,
263 libcfs_nid2str(ldd->ldd_failnid[i]));
267 static int ldd_parse(struct lvfs_run_ctxt *mount_ctxt,
268 struct lustre_disk_data *ldd)
270 struct lvfs_run_ctxt saved;
277 push_ctxt(&saved, mount_ctxt, NULL);
279 file = filp_open(MOUNT_DATA_FILE, O_RDONLY, 0644);
282 CERROR("cannot open %s: rc = %d\n", MOUNT_DATA_FILE, rc);
286 len = file->f_dentry->d_inode->i_size;
287 CDEBUG(D_MOUNT, "Have %s, size %lu\n", MOUNT_DATA_FILE, len);
288 if (len != sizeof(*ldd)) {
289 CERROR("disk data size does not match: see %lu expect %u\n",
291 GOTO(out_close, rc = -EINVAL);
294 rc = lustre_fread(file, ldd, len, &off);
296 CERROR("error reading %s: read %d of %lu\n",
297 MOUNT_DATA_FILE, rc, len);
298 GOTO(out_close, rc = -EINVAL);
302 if (ldd->ldd_magic != LDD_MAGIC) {
303 /* FIXME add swabbing support */
304 CERROR("Bad magic in %s: %x!=%x\n", MOUNT_DATA_FILE,
305 ldd->ldd_magic, LDD_MAGIC);
306 GOTO(out_close, rc = -EINVAL);
309 if (ldd->ldd_feature_incompat & ~LDD_INCOMPAT_SUPP) {
310 CERROR("%s: unsupported incompat filesystem feature(s) %x\n",
312 ldd->ldd_feature_incompat & ~LDD_INCOMPAT_SUPP);
313 GOTO(out_close, rc = -EINVAL);
315 if (ldd->ldd_feature_rocompat & ~LDD_ROCOMPAT_SUPP) {
316 CERROR("%s: unsupported read-only filesystem feature(s) %x\n",
318 ldd->ldd_feature_rocompat & ~LDD_ROCOMPAT_SUPP);
319 /* Do something like remount filesystem read-only */
320 GOTO(out_close, rc = -EINVAL);
328 pop_ctxt(&saved, mount_ctxt, NULL);
332 static int ldd_write(struct lvfs_run_ctxt *mount_ctxt,
333 struct lustre_disk_data *ldd)
335 struct lvfs_run_ctxt saved;
338 unsigned long len = sizeof(struct lustre_disk_data);
342 LASSERT(ldd->ldd_magic == LDD_MAGIC);
344 ldd->ldd_config_ver++;
346 push_ctxt(&saved, mount_ctxt, NULL);
348 file = filp_open(MOUNT_DATA_FILE, O_RDWR, 0644);
351 CERROR("cannot open %s: rc = %d\n", MOUNT_DATA_FILE, rc);
355 rc = lustre_fwrite(file, ldd, len, &off);
357 CERROR("error writing %s: read %d of %lu\n",
358 MOUNT_DATA_FILE, rc, len);
359 GOTO(out_close, rc = -EINVAL);
368 pop_ctxt(&saved, mount_ctxt, NULL);
373 /**************** config llog ********************/
375 /* Get a config log from the MGS and process it.
376 This func is called for both clients and servers.
377 Continue to process new statements appended to the logs
378 (whenever the config lock is revoked) until lustre_end_log
380 int lustre_process_log(struct super_block *sb, char *logname,
381 struct config_llog_instance *cfg)
383 struct lustre_cfg *lcfg;
384 struct lustre_cfg_bufs bufs;
385 struct lustre_sb_info *lsi = s2lsi(sb);
386 struct obd_device *mgc = lsi->lsi_mgc;
393 /* mgc_process_config */
394 lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
395 lustre_cfg_bufs_set_string(&bufs, 1, logname);
396 lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
397 lustre_cfg_bufs_set(&bufs, 3, &sb, sizeof(sb));
398 lcfg = lustre_cfg_new(LCFG_LOG_START, &bufs);
399 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
400 lustre_cfg_free(lcfg);
403 LCONSOLE_ERROR("%s: The configuration '%s' could not be read "
404 "(%d), mount will fail.\n",
405 mgc->obd_name, logname, rc);
411 /* Stop watching this config log for updates */
412 int lustre_end_log(struct super_block *sb, char *logname,
413 struct config_llog_instance *cfg)
415 struct lustre_cfg *lcfg;
416 struct lustre_cfg_bufs bufs;
417 struct lustre_sb_info *lsi = s2lsi(sb);
418 struct obd_device *mgc = lsi->lsi_mgc;
424 /* mgc_process_config */
425 lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
426 lustre_cfg_bufs_set_string(&bufs, 1, logname);
428 lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
429 lcfg = lustre_cfg_new(LCFG_LOG_END, &bufs);
430 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
431 lustre_cfg_free(lcfg);
435 /**************** obd start *******************/
437 static int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
438 char *s1, char *s2, char *s3, char *s4)
440 struct lustre_cfg_bufs bufs;
441 struct lustre_cfg * lcfg = NULL;
444 CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname,
445 cmd, s1, s2, s3, s4);
447 lustre_cfg_bufs_reset(&bufs, cfgname);
449 lustre_cfg_bufs_set_string(&bufs, 1, s1);
451 lustre_cfg_bufs_set_string(&bufs, 2, s2);
453 lustre_cfg_bufs_set_string(&bufs, 3, s3);
455 lustre_cfg_bufs_set_string(&bufs, 4, s4);
457 lcfg = lustre_cfg_new(cmd, &bufs);
458 lcfg->lcfg_nid = nid;
459 rc = class_process_config(lcfg);
460 lustre_cfg_free(lcfg);
464 static int lustre_start_simple(char *obdname, char *type, char *uuid,
468 CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
470 rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, 0, 0);
472 CERROR("%s attach error %d\n", obdname, rc);
475 rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, 0, 0);
477 CERROR("%s setup error %d\n", obdname, rc);
478 do_lcfg(obdname, 0, LCFG_DETACH, 0, 0, 0, 0);
483 /* Set up a MGS to serve startup logs */
484 static int server_start_mgs(struct super_block *sb)
486 struct lustre_sb_info *lsi = s2lsi(sb);
487 struct vfsmount *mnt = lsi->lsi_srv_mnt;
488 struct lustre_mount_info *lmi;
493 /* It is impossible to have more than 1 MGS per node, since
494 MGC wouldn't know which to connect to */
495 lmi = server_find_mount(LUSTRE_MGS_OBDNAME);
497 lsi = s2lsi(lmi->lmi_sb);
498 LCONSOLE_ERROR("The MGS service was already started from "
499 "server %s\n", lsi->lsi_ldd->ldd_svname);
503 CDEBUG(D_CONFIG, "Start MGS service %s\n", LUSTRE_MGS_OBDNAME);
505 rc = server_register_mount(LUSTRE_MGS_OBDNAME, sb, mnt);
508 ((rc = lustre_start_simple(LUSTRE_MGS_OBDNAME, LUSTRE_MGS_NAME,
509 LUSTRE_MGS_OBDNAME, 0, 0))))
510 server_deregister_mount(LUSTRE_MGS_OBDNAME);
513 LCONSOLE_ERROR("Failed to start MGS '%s' (%d). Is the 'mgs' "
514 "module loaded?\n", LUSTRE_MGS_OBDNAME, rc);
519 static int server_stop_mgs(struct super_block *sb)
521 struct obd_device *obd;
525 CDEBUG(D_MOUNT, "Stop MGS service %s\n", LUSTRE_MGS_OBDNAME);
527 /* There better be only one MGS */
528 obd = class_name2obd(LUSTRE_MGS_OBDNAME);
530 CDEBUG(D_CONFIG, "mgs %s not running\n", LUSTRE_MGS_OBDNAME);
534 /* The MGS should always stop when we say so */
536 rc = class_manual_cleanup(obd);
540 /* Set up a mgcobd to process startup logs */
541 static int lustre_start_mgc(struct super_block *sb)
543 struct lustre_handle mgc_conn = {0, };
544 struct obd_connect_data ocd = { 0 };
545 struct lustre_sb_info *lsi = s2lsi(sb);
546 struct obd_device *obd;
547 struct obd_export *exp;
548 struct obd_uuid *uuid;
555 LASSERT(lsi->lsi_lmd);
557 obd = class_name2obd(LUSTRE_MGC_OBDNAME);
559 atomic_inc(&obd->u.cli.cl_mgc_refcount);
560 /* FIXME There's only one MGC, but users could give different
561 MGS nids on the mount line. So now do we add new MGS uuids
562 or not? If there's truly one MGS per site, the MGS uuids
563 _should_ all be the same. Maybe check here?
566 /* Try all connections, but only once (again).
567 We don't want to block another target from starting
568 (using its local copy of the log), but we do want to connect
569 if at all possible. */
570 CDEBUG(D_MOUNT, "Set MGS reconnect\n");
572 rc = obd_set_info(obd->obd_self_export,
573 strlen(KEY_INIT_RECOV_BACKUP),
574 KEY_INIT_RECOV_BACKUP,
575 sizeof(recov_bk), &recov_bk);
579 if (lsi->lsi_lmd->lmd_mgsnid_count == 0) {
580 LCONSOLE_ERROR("No NIDs for the MGS were given.\n");
584 CDEBUG(D_MOUNT, "Start MGC '%s'\n", LUSTRE_MGC_OBDNAME);
586 /* Add the first uuid for the MGS */
587 nid = lsi->lsi_lmd->lmd_mgsnid[0];
588 rc = do_lcfg(LUSTRE_MGC_OBDNAME, nid, LCFG_ADD_UUID,
589 libcfs_nid2str(nid), 0,0,0);
593 /* Generate a unique uuid for each MGC */
596 /* use the 1st non-loopback nid */
597 lnet_process_id_t id;
599 while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
600 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
604 sprintf(uuid->uuid, "mgc_"LPX64, id.nid);
606 /* random makes reconnect easier */
607 class_generate_random_uuid(uuidc);
608 class_uuid_unparse(uuidc, uuid);
610 CDEBUG(D_MOUNT, "generated uuid: %s\n", uuid->uuid);
613 rc = lustre_start_simple(LUSTRE_MGC_OBDNAME, LUSTRE_MGC_NAME,
614 (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
615 libcfs_nid2str(nid));
620 /* Add the redundant MGS nids */
621 for (i = 1; i < lsi->lsi_lmd->lmd_mgsnid_count; i++) {
622 nid = lsi->lsi_lmd->lmd_mgsnid[i];
623 rc = do_lcfg(LUSTRE_MGC_OBDNAME, nid, LCFG_ADD_UUID,
624 libcfs_nid2str(nid), 0, 0, 0);
626 CERROR("Add uuid for %s failed %d\n",
627 libcfs_nid2str(nid), rc);
630 rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_ADD_CONN,
631 libcfs_nid2str(nid), 0, 0, 0);
633 CERROR("Add conn for %s failed %d\n",
634 libcfs_nid2str(nid), rc);
637 obd = class_name2obd(LUSTRE_MGC_OBDNAME);
639 CERROR("Can't find mgcobd %s\n", LUSTRE_MGC_OBDNAME);
643 /* Try all connections, but only once. */
645 rc = obd_set_info(obd->obd_self_export,
646 strlen(KEY_INIT_RECOV_BACKUP), KEY_INIT_RECOV_BACKUP,
647 sizeof(recov_bk), &recov_bk);
650 CERROR("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc);
652 /* FIXME add ACL support? */
653 //ocd.ocd_connect_flags = OBD_CONNECT_ACL;
655 /* We connect to the MGS at setup, and don't disconnect until cleanup */
656 rc = obd_connect(&mgc_conn, obd, &(obd->obd_uuid), &ocd);
658 CERROR("connect failed %d\n", rc);
662 exp = class_conn2export(&mgc_conn);
663 obd->u.cli.cl_mgc_mgsexp = exp;
665 /* And keep a refcount of servers/clients who started with "mount",
666 so we know when we can get rid of the mgc. */
667 atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
670 /* Keep the mgc info in the sb. Note that many lsi's can point
676 static int lustre_stop_mgc(struct super_block *sb)
678 struct lustre_sb_info *lsi = s2lsi(sb);
679 struct obd_device *obd;
691 if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
692 /* This is not fatal, every client that stops
693 will call in here. */
694 CDEBUG(D_MOUNT, "mgc still has %d references.\n",
695 atomic_read(&obd->u.cli.cl_mgc_refcount));
699 /* MGC must always stop */
701 /* client_disconnect_export uses the no_recov flag to decide whether it
702 should disconnect or just invalidate. (The MGC has no
703 recoverable data in any case.)
704 Without no_recov, we wait for locks to be dropped, so if the
705 MGS is down, we might wait for an obd timeout. With no-recov,
706 if the MGS is up, we don't tell it we're disconnecting, so
707 we must wait until the MGS evicts the dead client before the
708 client can reconnect. So it's either slow disconnect, or a
709 slow reconnect. This could probably be fixed on the server side
710 by ignoring handle mismatches in target_handle_reconnect. */
711 if (lsi->lsi_flags & LSI_UMOUNT_FORCE) {
712 /* FIXME maybe always set this? */
713 obd->obd_no_recov = 1;
716 if (obd->u.cli.cl_mgc_mgsexp)
717 obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
719 rc = class_manual_cleanup(obd);
723 /* class_add_uuid adds a nid even if the same uuid exists; we might
724 delete any copy here. So they all better match. */
725 for (i = 0; i < lsi->lsi_lmd->lmd_mgsnid_count; i++) {
726 nid = lsi->lsi_lmd->lmd_mgsnid[i];
727 rc = do_lcfg(obd->obd_name, nid, LCFG_DEL_UUID,
728 libcfs_nid2str(nid), 0, 0, 0);
730 CERROR("del MDC UUID %s failed: rc = %d\n",
731 libcfs_nid2str(nid), rc);
733 /* class_import_put will get rid of the additional connections */
738 /* Since there's only one mgc per node, we have to change it's fs to get
739 access to the right disk. */
740 static int server_mgc_set_fs(struct obd_device *mgc, struct super_block *sb)
742 struct lustre_sb_info *lsi = s2lsi(sb);
746 CDEBUG(D_MOUNT, "Set mgc disk for %s\n", lsi->lsi_lmd->lmd_dev);
748 /* cl_mgc_sem in mgc insures we sleep if the mgc_fs is busy */
749 rc = obd_set_info(mgc->obd_self_export,
750 strlen("set_fs"), "set_fs",
753 CERROR("can't set_fs %d\n", rc);
759 static int server_mgc_clear_fs(struct obd_device *mgc)
764 CDEBUG(D_MOUNT, "Unassign mgc disk\n");
766 rc = obd_set_info(mgc->obd_self_export,
767 strlen("clear_fs"), "clear_fs", 0, NULL);
771 /* Stop MDS/OSS if nobody is using them */
772 static int server_stop_servers(int lddflags, int lsiflags)
774 struct obd_device *obd = NULL;
775 struct obd_type *type;
779 /* Either an MDT or an OST or neither */
781 /* if this was an MDT, and there are no more MDT's, clean up the MDS */
782 if ((lddflags & LDD_F_SV_TYPE_MDT) && (obd = class_name2obd("MDS"))) {
783 //FIXME pre-rename, should eventually be LUSTRE_MDT_NAME
784 type = class_search_type(LUSTRE_MDS_NAME);
786 /* if this was an OST, and there are no more OST's, clean up the OSS */
787 if ((lddflags & LDD_F_SV_TYPE_OST) && (obd = class_name2obd("OSS"))) {
788 type = class_search_type(LUSTRE_OST_NAME);
791 if (obd && (!type || !type->typ_refcnt)) {
794 /* obd_fail doesn't mean much on a server obd */
795 err = class_manual_cleanup(obd);
803 int server_mti_print(char *title, struct mgs_target_info *mti)
805 PRINT_CMD(PRINT_MASK, "mti %s\n", title);
806 PRINT_CMD(PRINT_MASK, "server: %s\n", mti->mti_svname);
807 PRINT_CMD(PRINT_MASK, "fs: %s\n", mti->mti_fsname);
808 PRINT_CMD(PRINT_MASK, "uuid: %s\n", mti->mti_uuid);
809 PRINT_CMD(PRINT_MASK, "ver: %d flags: %#x\n",
810 mti->mti_config_ver, mti->mti_flags);
814 static int server_sb2mti(struct super_block *sb, struct mgs_target_info *mti)
816 struct lustre_sb_info *lsi = s2lsi(sb);
817 struct lustre_disk_data *ldd = lsi->lsi_ldd;
818 lnet_process_id_t id;
824 if (!(lsi->lsi_flags & LSI_SERVER))
827 strncpy(mti->mti_fsname, ldd->ldd_fsname,
828 sizeof(mti->mti_fsname));
829 strncpy(mti->mti_svname, ldd->ldd_svname,
830 sizeof(mti->mti_svname));
832 mti->mti_nid_count = 0;
833 while (LNetGetId(i++, &id) != -ENOENT) {
834 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
836 mti->mti_nids[mti->mti_nid_count] = id.nid;
837 mti->mti_nid_count++;
838 if (mti->mti_nid_count >= MTI_NIDS_MAX) {
839 CWARN("Only using first %d nids for %s\n",
840 mti->mti_nid_count, mti->mti_svname);
845 mti->mti_failnid_count = ldd->ldd_failnid_count;
846 memcpy(mti->mti_failnids, ldd->ldd_failnid, sizeof(mti->mti_failnids));
847 memcpy(mti->mti_uuid, ldd->ldd_uuid, sizeof(mti->mti_uuid));
848 mti->mti_config_ver = 0;
849 mti->mti_flags = ldd->ldd_flags;
850 mti->mti_stripe_index = ldd->ldd_svindex;
854 /* Register an old or new target with the MGS. If needed MGS will construct
855 startup logs and assign index */
856 int server_register_target(struct super_block *sb)
858 struct lustre_sb_info *lsi = s2lsi(sb);
859 struct obd_device *mgc = lsi->lsi_mgc;
860 struct lustre_disk_data *ldd = lsi->lsi_ldd;
861 struct mgs_target_info *mti = NULL;
867 if (!(lsi->lsi_flags & LSI_SERVER))
871 rc = server_sb2mti(sb, mti);
875 CDEBUG(D_MOUNT, "Registration %s, fs=%s, %s, index=%04x, flags=%#x\n",
876 mti->mti_svname, mti->mti_fsname,
877 libcfs_nid2str(mti->mti_nids[0]), mti->mti_stripe_index,
880 /* Register the target */
881 /* FIXME use mdc_process_config instead */
882 rc = obd_set_info(mgc->u.cli.cl_mgc_mgsexp,
883 strlen("register_target"), "register_target",
886 CERROR("registration with the MGS failed (%d)\n", rc);
890 /* Always update our flags */
891 ldd->ldd_flags = mti->mti_flags & ~LDD_F_REWRITE_LDD;
893 /* If this flag is set, it means the MGS wants us to change our
894 on-disk data. (So far this means just the index.) */
895 if (mti->mti_flags & LDD_F_REWRITE_LDD) {
896 CDEBUG(D_MOUNT, "Must change on-disk index from %#x to %#x for "
898 ldd->ldd_svindex, mti->mti_stripe_index,
900 ldd->ldd_svindex = mti->mti_stripe_index;
901 strncpy(ldd->ldd_svname, mti->mti_svname,
902 sizeof(ldd->ldd_svname));
903 /* or ldd_make_sv_name(ldd); */
904 ldd_write(&mgc->obd_lvfs_ctxt, ldd);
906 /* FIXME write last_rcvd?, disk label? */
916 static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
918 struct obd_device *obd;
919 struct lustre_sb_info *lsi = s2lsi(sb);
920 struct config_llog_instance cfg;
924 CDEBUG(D_MOUNT, "starting target %s\n", lsi->lsi_ldd->ldd_svname);
926 /* If we're an MDT, make sure the global MDS is running */
927 if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_MDT) {
928 /* make sure (what will be called) the MDS is started */
929 obd = class_name2obd("MDS");
931 //FIXME pre-rename, should eventually be LUSTRE_MDS_NAME
932 rc = lustre_start_simple("MDS", LUSTRE_MDT_NAME,
935 CERROR("failed to start MDS: %d\n", rc);
936 GOTO(out_servers, rc);
941 /* If we're an OST, make sure the global OSS is running */
942 if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_OST) {
943 /* make sure OSS is started */
944 obd = class_name2obd("OSS");
946 rc = lustre_start_simple("OSS", LUSTRE_OSS_NAME,
949 CERROR("failed to start OSS: %d\n", rc);
950 GOTO(out_servers, rc);
955 /* Set the mgc fs to our server disk. This allows the MGC
956 to read and write configs locally. */
957 server_mgc_set_fs(lsi->lsi_mgc, sb);
959 /* Register with MGS */
960 rc = server_register_target(sb);
961 if (rc && (lsi->lsi_ldd->ldd_flags &
962 (LDD_F_NEED_INDEX | LDD_F_UPDATE | LDD_F_UPGRADE14))){
963 CERROR("Required registration failed for %s: %d\n",
964 lsi->lsi_ldd->ldd_svname, rc);
966 LCONSOLE_ERROR("Communication error with the MGS. Is "
967 "the MGS running?\n");
972 /* Let the target look up the mount using the target's name
973 (we can't pass the sb or mnt through class_process_config.) */
974 rc = server_register_mount(lsi->lsi_ldd->ldd_svname, sb, mnt);
978 /* Start targets using the llog named for the target */
979 memset(&cfg, 0, sizeof(cfg));
980 rc = lustre_process_log(sb, lsi->lsi_ldd->ldd_svname, &cfg);
982 CERROR("failed to start server %s: %d\n",
983 lsi->lsi_ldd->ldd_svname, rc);
987 if (!class_name2obd(lsi->lsi_ldd->ldd_svname)) {
988 CERROR("no server named %s was started\n",
989 lsi->lsi_ldd->ldd_svname);
994 /* Release the mgc fs for others to use */
995 server_mgc_clear_fs(lsi->lsi_mgc);
1001 /***************** lustre superblock **************/
1003 struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
1005 struct lustre_sb_info *lsi = NULL;
1008 OBD_ALLOC(lsi, sizeof(*lsi));
1011 OBD_ALLOC(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd));
1012 if (!lsi->lsi_lmd) {
1013 OBD_FREE(lsi, sizeof(*lsi));
1017 lsi->lsi_lmd->lmd_exclude_count = 0;
1018 s2lsi_nocast(sb) = lsi;
1019 /* we take 1 extra ref for our setup */
1020 atomic_set(&lsi->lsi_mounts, 1);
1022 /* Default umount style */
1023 lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
1027 static int lustre_free_lsi(struct super_block *sb)
1029 struct lustre_sb_info *lsi = s2lsi(sb);
1035 CDEBUG(D_MOUNT, "Freeing lsi\n");
1037 /* someone didn't call server_put_mount. */
1038 LASSERT(atomic_read(&lsi->lsi_mounts) == 0);
1040 if (lsi->lsi_ldd != NULL)
1041 OBD_FREE(lsi->lsi_ldd, sizeof(*lsi->lsi_ldd));
1043 if (lsi->lsi_lmd != NULL) {
1044 if (lsi->lsi_lmd->lmd_dev != NULL)
1045 OBD_FREE(lsi->lsi_lmd->lmd_dev,
1046 strlen(lsi->lsi_lmd->lmd_dev) + 1);
1047 if (lsi->lsi_lmd->lmd_opts != NULL)
1048 OBD_FREE(lsi->lsi_lmd->lmd_opts,
1049 strlen(lsi->lsi_lmd->lmd_opts) + 1);
1050 if (lsi->lsi_lmd->lmd_exclude_count)
1051 OBD_FREE(lsi->lsi_lmd->lmd_exclude,
1052 sizeof(lsi->lsi_lmd->lmd_exclude[0]) *
1053 lsi->lsi_lmd->lmd_exclude_count);
1054 OBD_FREE(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd));
1057 LASSERT(lsi->lsi_llsbi == NULL);
1059 server_deregister_mount_all(lsi->lsi_srv_mnt);
1061 OBD_FREE(lsi, sizeof(*lsi));
1062 s2lsi_nocast(sb) = NULL;
1067 static int lustre_put_lsi(struct super_block *sb)
1069 struct lustre_sb_info *lsi = s2lsi(sb);
1074 CDEBUG(D_MOUNT, "put %p %d\n", sb, atomic_read(&lsi->lsi_mounts));
1076 if (atomic_dec_and_test(&lsi->lsi_mounts)) {
1077 lustre_free_lsi(sb);
1083 /*************** server mount ******************/
1085 /* Kernel mount using mount options in MOUNT_DATA_FILE */
1086 static struct vfsmount *server_kernel_mount(struct super_block *sb)
1088 struct lvfs_run_ctxt mount_ctxt;
1089 struct lustre_sb_info *lsi = s2lsi(sb);
1090 struct lustre_disk_data *ldd;
1091 struct lustre_mount_data *lmd = lsi->lsi_lmd;
1092 struct vfsmount *mnt;
1093 char *options = NULL;
1094 unsigned long page, s_flags;
1098 OBD_ALLOC(ldd, sizeof(*ldd));
1100 RETURN(ERR_PTR(-ENOMEM));
1102 /* In the past, we have always used flags = 0.
1103 Note ext3/ldiskfs can't be mounted ro. */
1104 s_flags = sb->s_flags;
1106 /* Pre-mount ext3 to read the MOUNT_DATA_FILE */
1107 CDEBUG(D_MOUNT, "Pre-mount ext3 %s\n", lmd->lmd_dev);
1108 mnt = do_kern_mount("ext3", s_flags, lmd->lmd_dev, 0);
1111 CERROR("premount ext3 failed (%d), trying ldiskfs\n", rc);
1112 /* If ext3 fails (bec. of mballoc, extents), try ldiskfs */
1113 mnt = do_kern_mount("ldiskfs", s_flags, lmd->lmd_dev, 0);
1116 CERROR("premount ldiskfs failed: rc = %d\n", rc);
1121 OBD_SET_CTXT_MAGIC(&mount_ctxt);
1122 mount_ctxt.pwdmnt = mnt;
1123 mount_ctxt.pwd = mnt->mnt_root;
1124 mount_ctxt.fs = get_ds();
1126 rc = ldd_parse(&mount_ctxt, ldd);
1130 CERROR("premount parse options failed: rc = %d\n", rc);
1134 /* Done with our pre-mount, now do the real mount. */
1136 /* Glom up mount options */
1137 page = __get_free_page(GFP_KERNEL);
1139 GOTO(out_free, rc = -ENOMEM);
1141 options = (char *)page;
1142 memset(options, 0, PAGE_SIZE);
1143 strncpy(options, ldd->ldd_mount_opts, PAGE_SIZE - 2);
1145 /* Add in any mount-line options */
1146 if (lmd->lmd_opts && (*(lmd->lmd_opts) != 0)) {
1147 int len = PAGE_SIZE - strlen(options) - 2;
1149 strcat(options, ",");
1150 strncat(options, lmd->lmd_opts, len);
1153 /* Special permanent mount flags */
1155 s_flags |= MS_NOATIME | MS_NODIRATIME;
1157 CDEBUG(D_MOUNT, "kern_mount: %s %s %s\n",
1158 MT_STR(ldd), lmd->lmd_dev, options);
1159 mnt = do_kern_mount(MT_STR(ldd), s_flags, lmd->lmd_dev,
1164 CERROR("do_kern_mount failed: rc = %d\n", rc);
1168 lsi->lsi_ldd = ldd; /* freed at lsi cleanup */
1169 CDEBUG(D_SUPER, "%s: mnt = %p\n", lmd->lmd_dev, mnt);
1173 OBD_FREE(ldd, sizeof(*ldd));
1174 lsi->lsi_ldd = NULL;
1175 RETURN(ERR_PTR(rc));
1178 static void server_wait_finished(struct vfsmount *mnt)
1180 wait_queue_head_t waitq;
1181 struct l_wait_info lwi;
1184 init_waitqueue_head(&waitq);
1186 while ((atomic_read(&mnt->mnt_count) > 0) && retries--) {
1187 CWARN("Mount still busy with %d refs\n",
1188 atomic_read(&mnt->mnt_count));
1190 /* Wait for a bit */
1191 lwi = LWI_TIMEOUT(2 * HZ, NULL, NULL);
1192 l_wait_event(waitq, 0, &lwi);
1194 if (atomic_read(&mnt->mnt_count)) {
1195 CERROR("Mount is still busy, giving up.\n");
1199 static void server_put_super(struct super_block *sb)
1201 struct lustre_sb_info *lsi = s2lsi(sb);
1202 struct obd_device *obd;
1203 struct vfsmount *mnt = lsi->lsi_srv_mnt;
1204 int lddflags = lsi->lsi_ldd->ldd_flags;
1205 int lsiflags = lsi->lsi_flags;
1209 LASSERT(lsiflags & LSI_SERVER);
1211 CDEBUG(D_MOUNT, "server put_super %s\n", lsi->lsi_ldd->ldd_svname);
1213 /* Stop the target */
1214 if (IS_MDT(lsi->lsi_ldd) || IS_OST(lsi->lsi_ldd)) {
1216 /* tell the mgc to drop the config log */
1217 lustre_end_log(sb, lsi->lsi_ldd->ldd_svname, NULL);
1219 obd = class_name2obd(lsi->lsi_ldd->ldd_svname);
1221 CDEBUG(D_MOUNT, "stopping %s\n", obd->obd_name);
1222 if (lsi->lsi_flags & LSI_UMOUNT_FORCE)
1224 if (lsi->lsi_flags & LSI_UMOUNT_FAILOVER)
1226 /* We can't seem to give an error return code
1227 to .put_super, so we better make sure we clean up!
1228 FIXME is there a way to get around this? */
1230 class_manual_cleanup(obd);
1232 CERROR("no obd %s\n", lsi->lsi_ldd->ldd_svname);
1233 server_deregister_mount(lsi->lsi_ldd->ldd_svname);
1237 /* If they wanted the mgs to stop separately from the mdt, they
1238 should have put it on a different device. */
1239 if (IS_MGS(lsi->lsi_ldd)) {
1240 /* stop the mgc before the mgs so the connection gets cleaned
1242 lustre_stop_mgc(sb);
1243 server_stop_mgs(sb);
1246 /* clean the mgc and sb */
1247 rc = lustre_common_put_super(sb);
1248 // FIXME how do I return a failure?
1250 /* drop the One True Mount */
1253 /* Wait for the targets to really clean up - can't exit (and let the
1254 sb get destroyed) while the mount is still in use */
1255 server_wait_finished(mnt);
1257 /* Stop the servers (MDS, OSS) if no longer needed. We must wait
1258 until the target is really gone so that our type refcount check
1260 server_stop_servers(lddflags, lsiflags);
1262 CDEBUG(D_MOUNT|D_WARNING, "umount done\n");
1266 static void server_umount_begin(struct super_block *sb)
1268 struct lustre_sb_info *lsi = s2lsi(sb);
1271 CDEBUG(D_MOUNT, "umount -f\n");
1272 /* umount = failover
1274 no third way to do non-force, non-failover */
1275 lsi->lsi_flags &= ~LSI_UMOUNT_FAILOVER;
1276 lsi->lsi_flags |= LSI_UMOUNT_FORCE;
1280 static int server_statfs (struct super_block *sb, struct kstatfs *buf)
1282 struct vfsmount *mnt = s2lsi(sb)->lsi_srv_mnt;
1285 if (mnt && mnt->mnt_sb && mnt->mnt_sb->s_op->statfs) {
1286 int rc = mnt->mnt_sb->s_op->statfs(mnt->mnt_sb, buf);
1288 buf->f_type = sb->s_magic;
1294 buf->f_type = sb->s_magic;
1295 buf->f_bsize = sb->s_blocksize;
1301 buf->f_namelen = NAME_MAX;
1305 static struct super_operations server_ops =
1307 .put_super = server_put_super,
1308 .umount_begin = server_umount_begin, /* umount -f */
1309 .statfs = server_statfs,
1312 #define log2(n) ffz(~(n))
1313 #define LUSTRE_SUPER_MAGIC 0x0BD00BD1
1315 static int server_fill_super_common(struct super_block *sb)
1317 struct inode *root = 0;
1320 CDEBUG(D_MOUNT, "Server sb, dev=%d\n", (int)sb->s_dev);
1322 sb->s_blocksize = 4096;
1323 sb->s_blocksize_bits = log2(sb->s_blocksize);
1324 sb->s_magic = LUSTRE_SUPER_MAGIC;
1325 sb->s_maxbytes = 0; //PAGE_CACHE_MAXBYTES;
1326 sb->s_flags |= MS_RDONLY;
1327 sb->s_op = &server_ops;
1329 root = new_inode(sb);
1331 CERROR("Can't make root inode\n");
1335 /* returns -EIO for every operation */
1336 /* make_bad_inode(root); -- badness - can't umount */
1337 /* apparently we need to be a directory for the mount to finish */
1338 root->i_mode = S_IFDIR;
1340 sb->s_root = d_alloc_root(root);
1342 CERROR("Can't make root dentry\n");
1350 static int server_fill_super(struct super_block *sb)
1352 struct lustre_sb_info *lsi = s2lsi(sb);
1353 struct vfsmount *mnt;
1354 int mgs_service = 0, i = 0, rc;
1357 /* the One True Mount */
1358 mnt = server_kernel_mount(sb);
1361 CERROR("Unable to mount device %s: %d\n",
1362 lsi->lsi_lmd->lmd_dev, rc);
1365 lsi->lsi_srv_mnt = mnt;
1367 LASSERT(lsi->lsi_ldd);
1368 CDEBUG(D_MOUNT, "Found service %s for fs '%s' on device %s\n",
1369 lsi->lsi_ldd->ldd_svname, lsi->lsi_ldd->ldd_fsname,
1370 lsi->lsi_lmd->lmd_dev);
1372 if (class_name2obd(lsi->lsi_ldd->ldd_svname)) {
1373 LCONSOLE_ERROR("The target named %s is already running. "
1374 "Double-mount may have compromised the disk "
1375 "journal.\n", lsi->lsi_ldd->ldd_svname);
1378 GOTO(out, rc = -EALREADY);
1381 /* append on-disk MGS nids to mount-line MGS nids */
1382 for (i = 0; (i < lsi->lsi_ldd->ldd_mgsnid_count) &&
1383 (lsi->lsi_lmd->lmd_mgsnid_count < MTI_NIDS_MAX); i++) {
1384 lsi->lsi_lmd->lmd_mgsnid[lsi->lsi_lmd->lmd_mgsnid_count++] =
1385 lsi->lsi_ldd->ldd_mgsnid[i];
1388 /* start MGS before MGC */
1389 if (IS_MGS(lsi->lsi_ldd)) {
1390 rc = server_start_mgs(sb);
1392 CERROR("ignoring Failed MGS start!!\n");
1393 //GOTO(out_mnt, rc);
1395 /* add local nids (including LO) to MGS nids */
1396 lnet_process_id_t id;
1397 int j = lsi->lsi_lmd->lmd_mgsnid_count;
1399 while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
1400 if (j >= MTI_NIDS_MAX)
1402 lsi->lsi_lmd->lmd_mgsnid[j++] = id.nid;
1404 lsi->lsi_lmd->lmd_mgsnid_count = j;
1410 rc = lustre_start_mgc(sb);
1414 /* Set up all obd devices for service */
1415 if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
1416 (IS_OST(lsi->lsi_ldd) || IS_MDT(lsi->lsi_ldd))) {
1417 rc = server_start_targets(sb, mnt);
1419 CERROR("Unable to start targets: %d\n", rc);
1422 /* FIXME overmount client here,
1423 or can we just start a client log and client_fill_super on this sb?
1424 We need to make sure server_put_super gets called too - ll_put_super
1425 calls lustre_common_put_super; check there for LSI_SERVER flag,
1427 Probably should start client from new thread so we can return.
1428 Client will not finish until all servers are connected.
1429 Note - MGMT-only server does NOT get a client, since there is no
1430 lustre fs associated - the MGMT is for all lustre fs's */
1433 rc = server_fill_super_common(sb);
1440 server_put_super(sb);
1445 /* Get the index from the obd name.
1446 rc = server type, or
1448 int server_name2index(char *svname, unsigned long *idx, char **endptr)
1451 char *dash = strchr(svname, '-');
1453 CERROR("Can't understand server name %s\n", svname);
1457 if (strncmp(dash + 1, "MDT", 3) == 0)
1458 rc = LDD_F_SV_TYPE_MDT;
1459 else if (strncmp(dash + 1, "OST", 3) == 0)
1460 rc = LDD_F_SV_TYPE_OST;
1464 *idx = simple_strtoul(dash + 4, endptr, 16);
1468 /*************** mount common betweeen server and client ***************/
1471 int lustre_common_put_super(struct super_block *sb)
1476 CDEBUG(D_MOUNT, "dropping sb %p\n", sb);
1478 rc = lustre_stop_mgc(sb);
1479 if (rc && (rc != -ENOENT)) {
1481 CERROR("Can't stop MGC: %d\n", rc);
1484 /* BUSY just means that there's some other obd that
1485 needs the mgc. Let him clean it up. */
1486 CDEBUG(D_MOUNT, "MGC still in use\n");
1492 static void lmd_print(struct lustre_mount_data *lmd)
1496 PRINT_CMD(PRINT_MASK, " mount data:\n");
1497 if (!lmd->lmd_mgsnid_count)
1498 PRINT_CMD(PRINT_MASK, "no MGS nids\n");
1499 else for (i = 0; i < lmd->lmd_mgsnid_count; i++) {
1500 PRINT_CMD(PRINT_MASK, "nid %d: %s\n", i,
1501 libcfs_nid2str(lmd->lmd_mgsnid[i]));
1503 if (lmd_is_client(lmd))
1504 PRINT_CMD(PRINT_MASK, "fsname: %s\n", lmd->lmd_dev);
1506 PRINT_CMD(PRINT_MASK, "device: %s\n", lmd->lmd_dev);
1507 PRINT_CMD(PRINT_MASK, "flags: %x\n", lmd->lmd_flags);
1509 PRINT_CMD(PRINT_MASK, "options: %s\n", lmd->lmd_opts);
1510 for (i = 0; i < lmd->lmd_exclude_count; i++) {
1511 PRINT_CMD(PRINT_MASK, "exclude %d: OST%04x\n", i,
1512 lmd->lmd_exclude[i]);
1516 /* Is this server on the exclusion list */
1517 int lustre_check_exclusion(struct super_block *sb, char *svname)
1519 struct lustre_sb_info *lsi = s2lsi(sb);
1520 struct lustre_mount_data *lmd = lsi->lsi_lmd;
1521 unsigned long index;
1525 rc = server_name2index(svname, &index, NULL);
1526 if (rc != LDD_F_SV_TYPE_OST)
1529 CDEBUG(D_MOUNT, "Check exclusion %s (%ld) in %d of %s\n", svname,
1530 index, lmd->lmd_exclude_count, lmd->lmd_dev);
1532 for(i = 0; i < lmd->lmd_exclude_count; i++) {
1533 if (index == lmd->lmd_exclude[i]) {
1534 CWARN("Excluding %s (on exclusion list)\n", svname);
1541 /* mount -v -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
1542 static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr)
1544 char *s1 = ptr, *s2;
1545 unsigned long index, *exclude_list;
1549 /* temp storage until we figure out how many we have */
1550 OBD_ALLOC(exclude_list, sizeof(index) * MAX_OBD_DEVICES);
1554 /* we enter this fn pointing at the '=' */
1555 while (*s1 && *s1 != ' ' && *s1 != ',') {
1557 rc = server_name2index(s1, &index, &s2);
1559 CERROR("Can't parse %s\n", s1);
1562 if (rc == LDD_F_SV_TYPE_OST)
1563 exclude_list[lmd->lmd_exclude_count++] = index;
1565 CDEBUG(D_MOUNT, "ignoring exclude %.7s\n", s1);
1567 /* now we are pointing at ':' (next exclude)
1568 or ',' (end of excludes) */
1570 if (lmd->lmd_exclude_count >= MAX_OBD_DEVICES)
1573 if (rc >= 0) /* non-err */
1576 if (lmd->lmd_exclude_count) {
1577 /* permanent, freed in lustre_free_lsi */
1578 OBD_ALLOC(lmd->lmd_exclude, sizeof(index) *
1579 lmd->lmd_exclude_count);
1580 if (lmd->lmd_exclude) {
1581 memcpy(lmd->lmd_exclude, exclude_list,
1582 sizeof(index) * lmd->lmd_exclude_count);
1585 lmd->lmd_exclude_count = 0;
1588 OBD_FREE(exclude_list, sizeof(index) * MAX_OBD_DEVICES);
1592 /* mount -v -t lustre uml1:uml2:/lustre-client /mnt/lustre */
1593 static int lmd_parse(char *options, struct lustre_mount_data *lmd)
1595 char *s1, *s2, *devname = NULL;
1596 struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
1602 LCONSOLE_ERROR("Missing mount data: check that "
1603 "/sbin/mount.lustre is installed.\n");
1607 /* Options should be a string - try to detect old lmd data */
1608 if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) {
1609 LCONSOLE_ERROR("You're using an old version of "
1610 "/sbin/mount.lustre. Please install version "
1611 "%s\n", LUSTRE_VERSION_STRING);
1614 lmd->lmd_magic = LMD_MAGIC;
1617 lmd->lmd_flags |= LMD_FLG_RECOVER;
1621 /* Skip whitespace and extra commas */
1622 while (*s1 == ' ' || *s1 == ',')
1625 /* Client options are parsed in ll_options: eg. flock,
1628 if (strncmp(s1, "recov", 5) == 0)
1629 /* FIXME do something with the RECOVER flag - see lconf */
1630 lmd->lmd_flags |= LMD_FLG_RECOVER;
1631 else if (strncmp(s1, "norecov", 7) == 0)
1632 lmd->lmd_flags &= ~LMD_FLG_RECOVER;
1633 else if (strncmp(s1, "nosvc", 5) == 0)
1634 lmd->lmd_flags |= LMD_FLG_NOSVC;
1636 /* ost exclusion list */
1637 else if (strncmp(s1, "exclude=", 8) == 0) {
1638 rc = lmd_make_exclusion(lmd, s1 + 7);
1643 /* Linux 2.4 doesn't pass the device, so we stuck it at the
1644 end of the options. */
1645 else if (strncmp(s1, "device=", 7) == 0) {
1647 /* terminate options right before device. device
1648 must be the last one. */
1653 s2 = strchr(s1, ',');
1660 LCONSOLE_ERROR("Can't find the device name "
1661 "(need mount option 'device=...')\n");
1665 if (strchr(devname, ',')) {
1666 LCONSOLE_ERROR("Device name must be the final option\n");
1671 /* Get MGS nids if client mount: uml1@tcp:uml2@tcp:/fsname-client */
1672 while ((s2 = strchr(s1, ':'))) {
1675 lmd->lmd_flags = LMD_FLG_CLIENT;
1676 nid = libcfs_str2nid(s1);
1677 if (nid == LNET_NID_ANY) {
1678 LCONSOLE_ERROR("Can't parse NID '%s'\n", s1);
1681 if (lmd->lmd_mgsnid_count >= MTI_NIDS_MAX) {
1682 LCONSOLE_ERROR("Too many NIDs: '%s'\n", s1);
1685 lmd->lmd_mgsnid[lmd->lmd_mgsnid_count++] = nid;
1689 if (lmd_is_client(lmd)) {
1690 /* Remove leading /s from fsname */
1691 while (*++s1 == '/')
1696 LCONSOLE_ERROR("No filesytem specified\n");
1700 /* freed in lustre_free_lsi */
1701 OBD_ALLOC(lmd->lmd_dev, strlen(s1) + 1);
1704 strcpy(lmd->lmd_dev, s1);
1706 /* save mount options */
1707 s1 = options + strlen(options) - 1;
1708 while (s1 >= options && (*s1 == ',' || *s1 == ' '))
1710 if (*options != 0) {
1711 /* freed in lustre_free_lsi */
1712 OBD_ALLOC(lmd->lmd_opts, strlen(options) + 1);
1715 strcpy(lmd->lmd_opts, options);
1718 lmd->lmd_magic = LMD_MAGIC;
1724 CERROR("Bad mount options %s\n", options);
1730 int lustre_fill_super(struct super_block *sb, void *data, int silent)
1732 struct lustre_mount_data *lmd;
1733 struct lustre_sb_info *lsi;
1737 CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);
1739 lsi = lustre_init_lsi(sb);
1744 /* Figure out the lmd from the mount options */
1745 if (lmd_parse((char *)data, lmd)) {
1750 if (lmd_is_client(lmd)) {
1751 CDEBUG(D_MOUNT, "Mounting client for fs %s\n", lmd->lmd_dev);
1752 if (!client_fill_super) {
1753 LCONSOLE_ERROR("Nothing registered for client mount!"
1754 " Is llite module loaded?\n");
1757 rc = lustre_start_mgc(sb);
1760 /* Connect and start */
1761 /* (should always be ll_fill_super) */
1762 rc = (*client_fill_super)(sb);
1764 lustre_common_put_super(sb);
1767 CDEBUG(D_MOUNT, "Mounting server from %s\n", lmd->lmd_dev);
1768 lsi->lsi_flags |= LSI_SERVER;
1769 rc = server_fill_super(sb);
1770 /* s_f_s calls lustre_start_mgc after the mount because we need
1771 the MGS nids which are stored on disk. Plus, we may
1772 need to start the MGS first. */
1773 /* s_f_s will call server_put_super on failure */
1778 CERROR("Unable to mount %s\n",
1779 s2lsi(sb) ? lmd->lmd_dev : "");
1781 CDEBUG(D_MOUNT, "Successfully mounted %s\n", lmd->lmd_dev);
1787 /* We can't call ll_fill_super by name because it lives in a module that
1788 must be loaded after this one. */
1789 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb))
1791 client_fill_super = cfs;
1794 /***************** FS registration ******************/
1796 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
1798 struct super_block * lustre_get_sb(struct file_system_type *fs_type,
1799 int flags, const char *devname, void * data)
1801 /* calls back in fill super */
1802 /* we could append devname= onto options (*data) here,
1803 but 2.4 doesn't get devname. So we do it in mount_lustre.c */
1804 return get_sb_nodev(fs_type, flags, data, lustre_fill_super);
1807 struct file_system_type lustre_fs_type = {
1808 .owner = THIS_MODULE,
1810 .get_sb = lustre_get_sb,
1811 .kill_sb = kill_anon_super,
1812 .fs_flags = FS_BINARY_MOUNTDATA,
1817 static struct super_block *lustre_read_super(struct super_block *sb,
1818 void *data, int silent)
1823 rc = lustre_fill_super(sb, data, silent);
1829 static struct file_system_type lustre_fs_type = {
1830 .owner = THIS_MODULE,
1832 .fs_flags = FS_NFSEXP_FSID,
1833 .read_super = lustre_read_super,
1837 int lustre_register_fs(void)
1839 return register_filesystem(&lustre_fs_type);
1842 int lustre_unregister_fs(void)
1844 return unregister_filesystem(&lustre_fs_type);
1847 EXPORT_SYMBOL(lustre_register_client_fill_super);
1848 EXPORT_SYMBOL(lustre_common_put_super);
1849 EXPORT_SYMBOL(lustre_process_log);
1850 EXPORT_SYMBOL(lustre_end_log);
1851 EXPORT_SYMBOL(server_get_mount);
1852 EXPORT_SYMBOL(server_put_mount);
1853 EXPORT_SYMBOL(server_register_target);
1854 EXPORT_SYMBOL(server_name2index);
1855 EXPORT_SYMBOL(server_mti_print);