4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2013, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/obdclass/obd_mount.c
38 * Client mount routines
40 * Author: Nathan Rutman <nathan@clusterfs.com>
44 #define DEBUG_SUBSYSTEM S_CLASS
45 #define D_MOUNT (D_SUPER|D_CONFIG/*|D_WARNING */)
46 #define PRINT_CMD CDEBUG
49 #include <obd_class.h>
50 #include <lustre/lustre_user.h>
51 #include <linux/version.h>
52 #include <lustre_log.h>
53 #include <lustre_disk.h>
54 #include <lustre_param.h>
56 static int (*client_fill_super)(struct super_block *sb,
57 struct vfsmount *mnt);
59 static void (*kill_super_cb)(struct super_block *sb);
61 /**************** config llog ********************/
63 /** Get a config log from the MGS and process it.
64 * This func is called for both clients and servers.
65 * Continue to process new statements appended to the logs
66 * (whenever the config lock is revoked) until lustre_end_log
68 * @param sb The superblock is used by the MGC to write to the local copy of
70 * @param logname The name of the llog to replicate from the MGS
71 * @param cfg Since the same mgc may be used to follow multiple config logs
72 * (e.g. ost1, ost2, client), the config_llog_instance keeps the state for
73 * this log, and is added to the mgc's list of logs to follow.
75 int lustre_process_log(struct super_block *sb, char *logname,
76 struct config_llog_instance *cfg)
78 struct lustre_cfg *lcfg;
79 struct lustre_cfg_bufs *bufs;
80 struct lustre_sb_info *lsi = s2lsi(sb);
81 struct obd_device *mgc = lsi->lsi_mgc;
92 /* mgc_process_config */
93 lustre_cfg_bufs_reset(bufs, mgc->obd_name);
94 lustre_cfg_bufs_set_string(bufs, 1, logname);
95 lustre_cfg_bufs_set(bufs, 2, cfg, sizeof(*cfg));
96 lustre_cfg_bufs_set(bufs, 3, &sb, sizeof(sb));
97 lcfg = lustre_cfg_new(LCFG_LOG_START, bufs);
98 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
99 lustre_cfg_free(lcfg);
104 LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s'"
105 "failed from the MGS (%d). Make sure this "
106 "client and the MGS are running compatible "
107 "versions of Lustre.\n",
108 mgc->obd_name, logname, rc);
110 LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' "
111 "failed (%d). This may be the result of "
112 "communication errors between this node and "
113 "the MGS, a bad configuration, or other "
114 "errors. See the syslog for more "
115 "information.\n", mgc->obd_name, logname,
118 /* class_obd_list(); */
121 EXPORT_SYMBOL(lustre_process_log);
123 /* Stop watching this config log for updates */
124 int lustre_end_log(struct super_block *sb, char *logname,
125 struct config_llog_instance *cfg)
127 struct lustre_cfg *lcfg;
128 struct lustre_cfg_bufs bufs;
129 struct lustre_sb_info *lsi = s2lsi(sb);
130 struct obd_device *mgc = lsi->lsi_mgc;
137 /* mgc_process_config */
138 lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
139 lustre_cfg_bufs_set_string(&bufs, 1, logname);
141 lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
142 lcfg = lustre_cfg_new(LCFG_LOG_END, &bufs);
143 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
144 lustre_cfg_free(lcfg);
147 EXPORT_SYMBOL(lustre_end_log);
149 /**************** obd start *******************/
151 /** lustre_cfg_bufs are a holdover from 1.4; we can still set these up from
152 * lctl (and do for echo cli/srv.
154 int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
155 char *s1, char *s2, char *s3, char *s4)
157 struct lustre_cfg_bufs bufs;
158 struct lustre_cfg * lcfg = NULL;
161 CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname,
162 cmd, s1, s2, s3, s4);
164 lustre_cfg_bufs_reset(&bufs, cfgname);
166 lustre_cfg_bufs_set_string(&bufs, 1, s1);
168 lustre_cfg_bufs_set_string(&bufs, 2, s2);
170 lustre_cfg_bufs_set_string(&bufs, 3, s3);
172 lustre_cfg_bufs_set_string(&bufs, 4, s4);
174 lcfg = lustre_cfg_new(cmd, &bufs);
175 lcfg->lcfg_nid = nid;
176 rc = class_process_config(lcfg);
177 lustre_cfg_free(lcfg);
180 EXPORT_SYMBOL(do_lcfg);
182 /** Call class_attach and class_setup. These methods in turn call
183 * obd type-specific methods.
185 int lustre_start_simple(char *obdname, char *type, char *uuid,
186 char *s1, char *s2, char *s3, char *s4)
189 CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
191 rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, 0, 0);
193 CERROR("%s attach error %d\n", obdname, rc);
196 rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, s3, s4);
198 CERROR("%s setup error %d\n", obdname, rc);
199 do_lcfg(obdname, 0, LCFG_DETACH, 0, 0, 0, 0);
204 DEFINE_MUTEX(mgc_start_lock);
206 /** Set up a mgc obd to process startup logs
208 * \param sb [in] super block of the mgc obd
210 * \retval 0 success, otherwise error code
212 int lustre_start_mgc(struct super_block *sb)
214 struct obd_connect_data *data = NULL;
215 struct lustre_sb_info *lsi = s2lsi(sb);
216 struct obd_device *obd;
217 struct obd_export *exp;
218 struct obd_uuid *uuid;
221 char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL;
224 int rc = 0, i = 0, j, len;
227 LASSERT(lsi->lsi_lmd);
229 /* Find the first non-lo MGS nid for our MGC name */
230 if (IS_SERVER(lsi)) {
231 /* mount -o mgsnode=nid */
232 ptr = lsi->lsi_lmd->lmd_mgs;
233 if (lsi->lsi_lmd->lmd_mgs &&
234 (class_parse_nid(lsi->lsi_lmd->lmd_mgs, &nid, &ptr) == 0)) {
236 } else if (IS_MGS(lsi)) {
237 lnet_process_id_t id;
238 while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
239 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
246 } else { /* client */
247 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
248 ptr = lsi->lsi_lmd->lmd_dev;
249 if (class_parse_nid(ptr, &nid, &ptr) == 0)
253 CERROR("No valid MGS nids found.\n");
257 mutex_lock(&mgc_start_lock);
259 len = strlen(LUSTRE_MGC_OBDNAME) + strlen(libcfs_nid2str(nid)) + 1;
260 OBD_ALLOC(mgcname, len);
261 OBD_ALLOC(niduuid, len + 2);
262 if (!mgcname || !niduuid)
263 GOTO(out_free, rc = -ENOMEM);
264 sprintf(mgcname, "%s%s", LUSTRE_MGC_OBDNAME, libcfs_nid2str(nid));
266 mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";
270 GOTO(out_free, rc = -ENOMEM);
272 obd = class_name2obd(mgcname);
273 if (obd && !obd->obd_stopping) {
274 rc = obd_set_info_async(NULL, obd->obd_self_export,
275 strlen(KEY_MGSSEC), KEY_MGSSEC,
276 strlen(mgssec), mgssec, NULL);
280 /* Re-using an existing MGC */
281 atomic_inc(&obd->u.cli.cl_mgc_refcount);
283 /* IR compatibility check, only for clients */
284 if (lmd_is_client(lsi->lsi_lmd)) {
286 int vallen = sizeof(*data);
287 __u32 *flags = &lsi->lsi_lmd->lmd_flags;
289 rc = obd_get_info(NULL, obd->obd_self_export,
290 strlen(KEY_CONN_DATA), KEY_CONN_DATA,
291 &vallen, data, NULL);
293 has_ir = OCD_HAS_FLAG(data, IMP_RECOV);
294 if (has_ir ^ !(*flags & LMD_FLG_NOIR)) {
295 /* LMD_FLG_NOIR is for test purpose only */
297 "Trying to mount a client with IR setting "
298 "not compatible with current mgc. "
299 "Force to use current mgc setting that is "
301 has_ir ? "enabled" : "disabled");
303 *flags &= ~LMD_FLG_NOIR;
305 *flags |= LMD_FLG_NOIR;
310 /* If we are restarting the MGS, don't try to keep the MGC's
311 old connection, or registration will fail. */
313 CDEBUG(D_MOUNT, "New MGS with live MGC\n");
317 /* Try all connections, but only once (again).
318 We don't want to block another target from starting
319 (using its local copy of the log), but we do want to connect
320 if at all possible. */
322 CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,recov_bk);
323 rc = obd_set_info_async(NULL, obd->obd_self_export,
324 sizeof(KEY_INIT_RECOV_BACKUP),
325 KEY_INIT_RECOV_BACKUP,
326 sizeof(recov_bk), &recov_bk, NULL);
330 CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname);
332 /* Add the primary nids for the MGS */
334 sprintf(niduuid, "%s_%x", mgcname, i);
335 if (IS_SERVER(lsi)) {
336 ptr = lsi->lsi_lmd->lmd_mgs;
337 CDEBUG(D_MOUNT, "mgs nids %s.\n", ptr);
339 /* Use local nids (including LO) */
340 lnet_process_id_t id;
341 while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
342 rc = do_lcfg(mgcname, id.nid, LCFG_ADD_UUID,
346 /* Use mgsnode= nids */
347 /* mount -o mgsnode=nid */
348 if (lsi->lsi_lmd->lmd_mgs) {
349 ptr = lsi->lsi_lmd->lmd_mgs;
350 } else if (class_find_param(ptr, PARAM_MGSNODE,
352 CERROR("No MGS nids given.\n");
353 GOTO(out_free, rc = -EINVAL);
357 * Here we only take the first mgsnid as its primary
358 * serving mgs node, the rest mgsnid will be taken as
359 * failover mgs node, otherwise they would be takens
360 * as multiple nids of a single mgs node.
362 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
363 rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID,
371 } else { /* client */
372 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
373 ptr = lsi->lsi_lmd->lmd_dev;
374 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
375 rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID,
379 /* Stop at the first failover nid */
385 CERROR("No valid MGS nids found.\n");
386 GOTO(out_free, rc = -EINVAL);
388 lsi->lsi_lmd->lmd_mgs_failnodes = 1;
390 /* Random uuid for MGC allows easier reconnects */
392 ll_generate_random_uuid(uuidc);
393 class_uuid_unparse(uuidc, uuid);
396 rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
397 (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
403 /* Add any failover MGS nids */
405 while (ptr && ((*ptr == ':' ||
406 class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0))) {
407 /* New failover node */
408 sprintf(niduuid, "%s_%x", mgcname, i);
410 while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) {
411 rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID,
419 rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
428 lsi->lsi_lmd->lmd_mgs_failnodes = i;
430 obd = class_name2obd(mgcname);
432 CERROR("Can't find mgcobd %s\n", mgcname);
433 GOTO(out_free, rc = -ENOTCONN);
436 rc = obd_set_info_async(NULL, obd->obd_self_export,
437 strlen(KEY_MGSSEC), KEY_MGSSEC,
438 strlen(mgssec), mgssec, NULL);
442 /* Keep a refcount of servers/clients who started with "mount",
443 so we know when we can get rid of the mgc. */
444 atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
446 /* Try all connections, but only once. */
448 rc = obd_set_info_async(NULL, obd->obd_self_export,
449 sizeof(KEY_INIT_RECOV_BACKUP),
450 KEY_INIT_RECOV_BACKUP,
451 sizeof(recov_bk), &recov_bk, NULL);
454 CWARN("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc);
456 /* We connect to the MGS at setup, and don't disconnect until cleanup */
457 data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT |
458 OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV |
459 OBD_CONNECT_LVB_TYPE;
461 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
462 data->ocd_connect_flags |= OBD_CONNECT_MNE_SWAB;
464 #warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
467 if (lmd_is_client(lsi->lsi_lmd) &&
468 lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
469 data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
470 data->ocd_version = LUSTRE_VERSION_CODE;
471 rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
473 CERROR("connect failed %d\n", rc);
477 obd->u.cli.cl_mgc_mgsexp = exp;
480 /* Keep the mgc info in the sb. Note that many lsi's can point
484 mutex_unlock(&mgc_start_lock);
489 OBD_FREE(mgcname, len);
491 OBD_FREE(niduuid, len + 2);
495 static int lustre_stop_mgc(struct super_block *sb)
497 struct lustre_sb_info *lsi = s2lsi(sb);
498 struct obd_device *obd;
499 char *niduuid = 0, *ptr = 0;
500 int i, rc = 0, len = 0;
510 mutex_lock(&mgc_start_lock);
511 LASSERT(atomic_read(&obd->u.cli.cl_mgc_refcount) > 0);
512 if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
513 /* This is not fatal, every client that stops
514 will call in here. */
515 CDEBUG(D_MOUNT, "mgc still has %d references.\n",
516 atomic_read(&obd->u.cli.cl_mgc_refcount));
517 GOTO(out, rc = -EBUSY);
520 /* The MGC has no recoverable data in any case.
521 * force shotdown set in umount_begin */
522 obd->obd_no_recov = 1;
524 if (obd->u.cli.cl_mgc_mgsexp) {
525 /* An error is not fatal, if we are unable to send the
526 disconnect mgs ping evictor cleans up the export */
527 rc = obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
529 CDEBUG(D_MOUNT, "disconnect failed %d\n", rc);
532 /* Save the obdname for cleaning the nid uuids, which are
534 len = strlen(obd->obd_name) + 6;
535 OBD_ALLOC(niduuid, len);
537 strcpy(niduuid, obd->obd_name);
538 ptr = niduuid + strlen(niduuid);
541 rc = class_manual_cleanup(obd);
545 /* Clean the nid uuids */
547 GOTO(out, rc = -ENOMEM);
549 for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
550 sprintf(ptr, "_%x", i);
551 rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
554 CERROR("del MDC UUID %s failed: rc = %d\n",
559 OBD_FREE(niduuid, len);
561 /* class_import_put will get rid of the additional connections */
562 mutex_unlock(&mgc_start_lock);
566 /***************** lustre superblock **************/
568 struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
570 struct lustre_sb_info *lsi;
576 OBD_ALLOC_PTR(lsi->lsi_lmd);
582 lsi->lsi_lmd->lmd_exclude_count = 0;
583 lsi->lsi_lmd->lmd_recovery_time_soft = 0;
584 lsi->lsi_lmd->lmd_recovery_time_hard = 0;
585 s2lsi_nocast(sb) = lsi;
586 /* we take 1 extra ref for our setup */
587 atomic_set(&lsi->lsi_mounts, 1);
589 /* Default umount style */
590 lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
591 INIT_LIST_HEAD(&lsi->lsi_lwp_list);
592 spin_lock_init(&lsi->lsi_lwp_lock);
597 static int lustre_free_lsi(struct super_block *sb)
599 struct lustre_sb_info *lsi = s2lsi(sb);
602 LASSERT(lsi != NULL);
603 CDEBUG(D_MOUNT, "Freeing lsi %p\n", lsi);
605 /* someone didn't call server_put_mount. */
606 LASSERT(atomic_read(&lsi->lsi_mounts) == 0);
608 if (lsi->lsi_lmd != NULL) {
609 if (lsi->lsi_lmd->lmd_dev != NULL)
610 OBD_FREE(lsi->lsi_lmd->lmd_dev,
611 strlen(lsi->lsi_lmd->lmd_dev) + 1);
612 if (lsi->lsi_lmd->lmd_profile != NULL)
613 OBD_FREE(lsi->lsi_lmd->lmd_profile,
614 strlen(lsi->lsi_lmd->lmd_profile) + 1);
615 if (lsi->lsi_lmd->lmd_mgssec != NULL)
616 OBD_FREE(lsi->lsi_lmd->lmd_mgssec,
617 strlen(lsi->lsi_lmd->lmd_mgssec) + 1);
618 if (lsi->lsi_lmd->lmd_opts != NULL)
619 OBD_FREE(lsi->lsi_lmd->lmd_opts,
620 strlen(lsi->lsi_lmd->lmd_opts) + 1);
621 if (lsi->lsi_lmd->lmd_exclude_count)
622 OBD_FREE(lsi->lsi_lmd->lmd_exclude,
623 sizeof(lsi->lsi_lmd->lmd_exclude[0]) *
624 lsi->lsi_lmd->lmd_exclude_count);
625 if (lsi->lsi_lmd->lmd_mgs != NULL)
626 OBD_FREE(lsi->lsi_lmd->lmd_mgs,
627 strlen(lsi->lsi_lmd->lmd_mgs) + 1);
628 if (lsi->lsi_lmd->lmd_osd_type != NULL)
629 OBD_FREE(lsi->lsi_lmd->lmd_osd_type,
630 strlen(lsi->lsi_lmd->lmd_osd_type) + 1);
631 if (lsi->lsi_lmd->lmd_params != NULL)
632 OBD_FREE(lsi->lsi_lmd->lmd_params, 4096);
634 OBD_FREE(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd));
637 LASSERT(lsi->lsi_llsbi == NULL);
638 OBD_FREE(lsi, sizeof(*lsi));
639 s2lsi_nocast(sb) = NULL;
644 /* The lsi has one reference for every server that is using the disk -
645 e.g. MDT, MGS, and potentially MGC */
646 int lustre_put_lsi(struct super_block *sb)
648 struct lustre_sb_info *lsi = s2lsi(sb);
651 LASSERT(lsi != NULL);
653 CDEBUG(D_MOUNT, "put %p %d\n", sb, atomic_read(&lsi->lsi_mounts));
654 if (atomic_dec_and_test(&lsi->lsi_mounts)) {
655 if (IS_SERVER(lsi) && lsi->lsi_osd_exp) {
656 lu_device_put(&lsi->lsi_dt_dev->dd_lu_dev);
657 lsi->lsi_osd_exp->exp_obd->obd_lvfs_ctxt.dt = NULL;
658 lsi->lsi_dt_dev = NULL;
659 obd_disconnect(lsi->lsi_osd_exp);
660 /* wait till OSD is gone */
661 obd_zombie_barrier();
670 * <FSNAME><SEPERATOR><TYPE><INDEX>
671 * FSNAME is between 1 and 8 characters (inclusive).
672 * Excluded characters are '/' and ':'
673 * SEPERATOR is either ':' or '-'
674 * TYPE: "OST", "MDT", etc.
675 * INDEX: Hex representation of the index
678 /** Get the fsname ("lustre") from the server name ("lustre-OST003F").
679 * @param [in] svname server name including type and index
680 * @param [out] fsname Buffer to copy filesystem name prefix into.
681 * Must have at least 'strlen(fsname) + 1' chars.
682 * @param [out] endptr if endptr isn't NULL it is set to end of fsname
685 int server_name2fsname(const char *svname, char *fsname, const char **endptr)
689 dash = svname + strnlen(svname, 8); /* max fsname length is 8 */
690 for (; dash > svname && *dash != '-' && *dash != ':'; dash--)
695 if (fsname != NULL) {
696 strncpy(fsname, svname, dash - svname);
697 fsname[dash - svname] = '\0';
705 EXPORT_SYMBOL(server_name2fsname);
708 * Get service name (svname) from string
710 * if endptr isn't NULL it is set to end of fsname *
712 int server_name2svname(const char *label, char *svname, const char **endptr,
718 /* We use server_name2fsname() just for parsing */
719 rc = server_name2fsname(label, NULL, &dash);
726 if (strlcpy(svname, dash + 1, svsize) >= svsize)
731 EXPORT_SYMBOL(server_name2svname);
734 * check server name is OST.
736 int server_name_is_ost(const char *svname)
741 /* We use server_name2fsname() just for parsing */
742 rc = server_name2fsname(svname, NULL, &dash);
748 if (strncmp(dash, "OST", 3) == 0)
752 EXPORT_SYMBOL(server_name_is_ost);
755 * Get the index from the target name MDTXXXX/OSTXXXX
756 * rc = server type, or rc < 0 on error
758 int target_name2index(const char *tgtname, __u32 *idx, const char **endptr)
760 const char *dash = tgtname;
764 if (strncmp(dash, "MDT", 3) == 0)
765 rc = LDD_F_SV_TYPE_MDT;
766 else if (strncmp(dash, "OST", 3) == 0)
767 rc = LDD_F_SV_TYPE_OST;
773 if (strncmp(dash, "all", 3) == 0) {
776 return rc | LDD_F_SV_ALL;
779 index = simple_strtoul(dash, (char **)endptr, 16);
784 EXPORT_SYMBOL(target_name2index);
786 /* Get the index from the obd name.
789 if endptr isn't NULL it is set to end of name */
790 int server_name2index(const char *svname, __u32 *idx, const char **endptr)
795 /* We use server_name2fsname() just for parsing */
796 rc = server_name2fsname(svname, NULL, &dash);
801 rc = target_name2index(dash, idx, endptr);
805 /* Account for -mdc after index that is possible when specifying mdt */
806 if (endptr != NULL && strncmp(LUSTRE_MDC_NAME, *endptr + 1,
807 sizeof(LUSTRE_MDC_NAME)-1) == 0)
808 *endptr += sizeof(LUSTRE_MDC_NAME);
812 EXPORT_SYMBOL(server_name2index);
814 /*************** mount common betweeen server and client ***************/
817 int lustre_common_put_super(struct super_block *sb)
822 CDEBUG(D_MOUNT, "dropping sb %p\n", sb);
824 /* Drop a ref to the MGC */
825 rc = lustre_stop_mgc(sb);
826 if (rc && (rc != -ENOENT)) {
828 CERROR("Can't stop MGC: %d\n", rc);
831 /* BUSY just means that there's some other obd that
832 needs the mgc. Let him clean it up. */
833 CDEBUG(D_MOUNT, "MGC still in use\n");
835 /* Drop a ref to the mounted disk */
840 EXPORT_SYMBOL(lustre_common_put_super);
842 static void lmd_print(struct lustre_mount_data *lmd)
846 PRINT_CMD(D_MOUNT, " mount data:\n");
847 if (lmd_is_client(lmd))
848 PRINT_CMD(D_MOUNT, "profile: %s\n", lmd->lmd_profile);
849 PRINT_CMD(D_MOUNT, "device: %s\n", lmd->lmd_dev);
850 PRINT_CMD(D_MOUNT, "flags: %x\n", lmd->lmd_flags);
853 PRINT_CMD(D_MOUNT, "options: %s\n", lmd->lmd_opts);
855 if (lmd->lmd_recovery_time_soft)
856 PRINT_CMD(D_MOUNT, "recovery time soft: %d\n",
857 lmd->lmd_recovery_time_soft);
859 if (lmd->lmd_recovery_time_hard)
860 PRINT_CMD(D_MOUNT, "recovery time hard: %d\n",
861 lmd->lmd_recovery_time_hard);
863 for (i = 0; i < lmd->lmd_exclude_count; i++) {
864 PRINT_CMD(D_MOUNT, "exclude %d: OST%04x\n", i,
865 lmd->lmd_exclude[i]);
869 /* Is this server on the exclusion list */
870 int lustre_check_exclusion(struct super_block *sb, char *svname)
872 struct lustre_sb_info *lsi = s2lsi(sb);
873 struct lustre_mount_data *lmd = lsi->lsi_lmd;
878 rc = server_name2index(svname, &index, NULL);
879 if (rc != LDD_F_SV_TYPE_OST)
880 /* Only exclude OSTs */
883 CDEBUG(D_MOUNT, "Check exclusion %s (%d) in %d of %s\n", svname,
884 index, lmd->lmd_exclude_count, lmd->lmd_dev);
886 for(i = 0; i < lmd->lmd_exclude_count; i++) {
887 if (index == lmd->lmd_exclude[i]) {
888 CWARN("Excluding %s (on exclusion list)\n", svname);
895 /* mount -v -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
896 static int lmd_make_exclusion(struct lustre_mount_data *lmd, const char *ptr)
898 const char *s1 = ptr, *s2;
899 __u32 index, *exclude_list;
903 /* The shortest an ost name can be is 8 chars: -OST0000.
904 We don't actually know the fsname at this time, so in fact
905 a user could specify any fsname. */
906 devmax = strlen(ptr) / 8 + 1;
908 /* temp storage until we figure out how many we have */
909 OBD_ALLOC(exclude_list, sizeof(index) * devmax);
913 /* we enter this fn pointing at the '=' */
914 while (*s1 && *s1 != ' ' && *s1 != ',') {
916 rc = server_name2index(s1, &index, &s2);
918 CERROR("Can't parse server name '%s': rc = %d\n",
922 if (rc == LDD_F_SV_TYPE_OST)
923 exclude_list[lmd->lmd_exclude_count++] = index;
925 CDEBUG(D_MOUNT, "ignoring exclude %.*s: type = %#x\n",
926 (uint)(s2-s1), s1, rc);
928 /* now we are pointing at ':' (next exclude)
929 or ',' (end of excludes) */
930 if (lmd->lmd_exclude_count >= devmax)
933 if (rc >= 0) /* non-err */
936 if (lmd->lmd_exclude_count) {
937 /* permanent, freed in lustre_free_lsi */
938 OBD_ALLOC(lmd->lmd_exclude, sizeof(index) *
939 lmd->lmd_exclude_count);
940 if (lmd->lmd_exclude) {
941 memcpy(lmd->lmd_exclude, exclude_list,
942 sizeof(index) * lmd->lmd_exclude_count);
945 lmd->lmd_exclude_count = 0;
948 OBD_FREE(exclude_list, sizeof(index) * devmax);
952 static int lmd_parse_mgssec(struct lustre_mount_data *lmd, char *ptr)
957 if (lmd->lmd_mgssec != NULL) {
958 OBD_FREE(lmd->lmd_mgssec, strlen(lmd->lmd_mgssec) + 1);
959 lmd->lmd_mgssec = NULL;
962 tail = strchr(ptr, ',');
964 length = strlen(ptr);
968 OBD_ALLOC(lmd->lmd_mgssec, length + 1);
969 if (lmd->lmd_mgssec == NULL)
972 memcpy(lmd->lmd_mgssec, ptr, length);
973 lmd->lmd_mgssec[length] = '\0';
977 static int lmd_parse_string(char **handle, char *ptr)
982 if ((handle == NULL) || (ptr == NULL))
985 if (*handle != NULL) {
986 OBD_FREE(*handle, strlen(*handle) + 1);
990 tail = strchr(ptr, ',');
992 length = strlen(ptr);
996 OBD_ALLOC(*handle, length + 1);
1000 memcpy(*handle, ptr, length);
1001 (*handle)[length] = '\0';
1006 /* Collect multiple values for mgsnid specifiers */
1007 static int lmd_parse_mgs(struct lustre_mount_data *lmd, char **ptr)
1015 /* Find end of nidlist */
1016 while (class_parse_nid_quiet(tail, &nid, &tail) == 0) {}
1017 length = tail - *ptr;
1019 LCONSOLE_ERROR_MSG(0x159, "Can't parse NID '%s'\n", *ptr);
1023 if (lmd->lmd_mgs != NULL)
1024 oldlen = strlen(lmd->lmd_mgs) + 1;
1026 OBD_ALLOC(mgsnid, oldlen + length + 1);
1030 if (lmd->lmd_mgs != NULL) {
1031 /* Multiple mgsnid= are taken to mean failover locations */
1032 memcpy(mgsnid, lmd->lmd_mgs, oldlen);
1033 mgsnid[oldlen - 1] = ':';
1034 OBD_FREE(lmd->lmd_mgs, oldlen);
1036 memcpy(mgsnid + oldlen, *ptr, length);
1037 mgsnid[oldlen + length] = '\0';
1038 lmd->lmd_mgs = mgsnid;
1044 /** Parse mount line options
1045 * e.g. mount -v -t lustre -o abort_recov uml1:uml2:/lustre-client /mnt/lustre
1046 * dev is passed as device=uml1:/lustre by mount.lustre
1048 static int lmd_parse(char *options, struct lustre_mount_data *lmd)
1050 char *s1, *s2, *s3, *devname = NULL;
1051 struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
1057 LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that "
1058 "/sbin/mount.lustre is installed.\n");
1062 /* Options should be a string - try to detect old lmd data */
1063 if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) {
1064 LCONSOLE_ERROR_MSG(0x163, "You're using an old version of "
1065 "/sbin/mount.lustre. Please install "
1066 "version %s\n", LUSTRE_VERSION_STRING);
1069 lmd->lmd_magic = LMD_MAGIC;
1071 OBD_ALLOC(lmd->lmd_params, 4096);
1072 if (lmd->lmd_params == NULL)
1074 lmd->lmd_params[0] = '\0';
1076 /* Set default flags here */
1081 int time_min = OBD_RECOVERY_TIME_MIN;
1083 /* Skip whitespace and extra commas */
1084 while (*s1 == ' ' || *s1 == ',')
1088 /* Client options are parsed in ll_options: eg. flock,
1091 /* Parse non-ldiskfs options here. Rather than modifying
1092 ldiskfs, we just zero these out here */
1093 if (strncmp(s1, "abort_recov", 11) == 0) {
1094 lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
1096 } else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
1097 lmd->lmd_recovery_time_soft = max_t(int,
1098 simple_strtoul(s1 + 19, NULL, 10), time_min);
1100 } else if (strncmp(s1, "recovery_time_hard=", 19) == 0) {
1101 lmd->lmd_recovery_time_hard = max_t(int,
1102 simple_strtoul(s1 + 19, NULL, 10), time_min);
1104 } else if (strncmp(s1, "noir", 4) == 0) {
1105 lmd->lmd_flags |= LMD_FLG_NOIR; /* test purpose only. */
1107 } else if (strncmp(s1, "nosvc", 5) == 0) {
1108 lmd->lmd_flags |= LMD_FLG_NOSVC;
1110 } else if (strncmp(s1, "nomgs", 5) == 0) {
1111 lmd->lmd_flags |= LMD_FLG_NOMGS;
1113 } else if (strncmp(s1, "noscrub", 7) == 0) {
1114 lmd->lmd_flags |= LMD_FLG_NOSCRUB;
1116 } else if (strncmp(s1, PARAM_MGSNODE,
1117 sizeof(PARAM_MGSNODE) - 1) == 0) {
1118 s2 = s1 + sizeof(PARAM_MGSNODE) - 1;
1119 /* Assume the next mount opt is the first
1120 invalid nid we get to. */
1121 rc = lmd_parse_mgs(lmd, &s2);
1126 } else if (strncmp(s1, "writeconf", 9) == 0) {
1127 lmd->lmd_flags |= LMD_FLG_WRITECONF;
1129 } else if (strncmp(s1, "update", 6) == 0) {
1130 lmd->lmd_flags |= LMD_FLG_UPDATE;
1132 } else if (strncmp(s1, "virgin", 6) == 0) {
1133 lmd->lmd_flags |= LMD_FLG_VIRGIN;
1135 } else if (strncmp(s1, "noprimnode", 10) == 0) {
1136 lmd->lmd_flags |= LMD_FLG_NO_PRIMNODE;
1138 } else if (strncmp(s1, "mgssec=", 7) == 0) {
1139 rc = lmd_parse_mgssec(lmd, s1 + 7);
1143 /* ost exclusion list */
1144 } else if (strncmp(s1, "exclude=", 8) == 0) {
1145 rc = lmd_make_exclusion(lmd, s1 + 7);
1149 } else if (strncmp(s1, "mgs", 3) == 0) {
1151 lmd->lmd_flags |= LMD_FLG_MGS;
1153 } else if (strncmp(s1, "svname=", 7) == 0) {
1154 rc = lmd_parse_string(&lmd->lmd_profile, s1 + 7);
1158 } else if (strncmp(s1, "param=", 6) == 0) {
1160 char *tail = strchr(s1 + 6, ',');
1162 length = strlen(s1);
1165 char *param_str = tail + 1;
1166 int supplementary = 1;
1168 while (class_parse_nid_quiet(param_str, &nid,
1172 length = param_str - s1 - supplementary;
1175 strncat(lmd->lmd_params, s1 + 6, length);
1176 strcat(lmd->lmd_params, " ");
1177 s3 = s1 + 6 + length;
1179 } else if (strncmp(s1, "osd=", 4) == 0) {
1180 rc = lmd_parse_string(&lmd->lmd_osd_type, s1 + 4);
1185 /* Linux 2.4 doesn't pass the device, so we stuck it at the
1186 end of the options. */
1187 else if (strncmp(s1, "device=", 7) == 0) {
1189 /* terminate options right before device. device
1190 must be the last one. */
1196 s2 = strchr(s3, ',');
1204 memmove(s1, s2, strlen(s2) + 1);
1210 LCONSOLE_ERROR_MSG(0x164, "Can't find the device name "
1211 "(need mount option 'device=...')\n");
1215 s1 = strstr(devname, ":/");
1218 lmd->lmd_flags |= LMD_FLG_CLIENT;
1219 /* Remove leading /s from fsname */
1220 while (*++s1 == '/') ;
1221 /* Freed in lustre_free_lsi */
1222 OBD_ALLOC(lmd->lmd_profile, strlen(s1) + 8);
1223 if (!lmd->lmd_profile)
1225 sprintf(lmd->lmd_profile, "%s-client", s1);
1228 /* Freed in lustre_free_lsi */
1229 OBD_ALLOC(lmd->lmd_dev, strlen(devname) + 1);
1232 strcpy(lmd->lmd_dev, devname);
1234 /* Save mount options */
1235 s1 = options + strlen(options) - 1;
1236 while (s1 >= options && (*s1 == ',' || *s1 == ' '))
1238 if (*options != 0) {
1239 /* Freed in lustre_free_lsi */
1240 OBD_ALLOC(lmd->lmd_opts, strlen(options) + 1);
1243 strcpy(lmd->lmd_opts, options);
1247 lmd->lmd_magic = LMD_MAGIC;
1252 CERROR("Bad mount options %s\n", options);
1256 struct lustre_mount_data2 {
1258 struct vfsmount *lmd2_mnt;
1261 /** This is the entry point for the mount call into Lustre.
1262 * This is called when a server or client is mounted,
1263 * and this is where we start setting things up.
1264 * @param data Mount options (e.g. -o flock,abort_recov)
1266 int lustre_fill_super(struct super_block *sb, void *data, int silent)
1268 struct lustre_mount_data *lmd;
1269 struct lustre_mount_data2 *lmd2 = data;
1270 struct lustre_sb_info *lsi;
1274 CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);
1276 lsi = lustre_init_lsi(sb);
1282 * Disable lockdep during mount, because mount locking patterns are
1288 * LU-639: the obd cleanup of last mount may not finish yet, wait here.
1290 obd_zombie_barrier();
1292 /* Figure out the lmd from the mount options */
1293 if (lmd_parse((char *)(lmd2->lmd2_data), lmd)) {
1295 GOTO(out, rc = -EINVAL);
1298 if (lmd_is_client(lmd)) {
1299 CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile);
1300 if (!client_fill_super) {
1301 LCONSOLE_ERROR_MSG(0x165, "Nothing registered for "
1302 "client mount! Is the 'lustre' "
1303 "module loaded?\n");
1307 rc = lustre_start_mgc(sb);
1312 /* Connect and start */
1313 /* (should always be ll_fill_super) */
1314 rc = (*client_fill_super)(sb, lmd2->lmd2_mnt);
1315 /* c_f_s will call lustre_common_put_super on failure */
1318 #ifdef HAVE_SERVER_SUPPORT
1319 CDEBUG(D_MOUNT, "Mounting server from %s\n", lmd->lmd_dev);
1320 rc = server_fill_super(sb);
1321 /* s_f_s calls lustre_start_mgc after the mount because we need
1322 the MGS nids which are stored on disk. Plus, we may
1323 need to start the MGS first. */
1324 /* s_f_s will call server_put_super on failure */
1326 CERROR("This is client-side-only module, "
1327 "cannot handle server mount.\n");
1332 /* If error happens in fill_super() call, @lsi will be killed there.
1333 * This is why we do not put it here. */
1337 CERROR("Unable to mount %s (%d)\n",
1338 s2lsi(sb) ? lmd->lmd_dev : "", rc);
1340 CDEBUG(D_SUPER, "Mount %s complete\n",
1348 /* We can't call ll_fill_super by name because it lives in a module that
1349 must be loaded after this one. */
1350 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
1351 struct vfsmount *mnt))
1353 client_fill_super = cfs;
1355 EXPORT_SYMBOL(lustre_register_client_fill_super);
1357 void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb))
1359 kill_super_cb = cfs;
1361 EXPORT_SYMBOL(lustre_register_kill_super_cb);
1363 /***************** FS registration ******************/
1364 #ifdef HAVE_FSTYPE_MOUNT
1365 struct dentry *lustre_mount(struct file_system_type *fs_type, int flags,
1366 const char *devname, void *data)
1368 struct lustre_mount_data2 lmd2 = { data, NULL };
1370 return mount_nodev(fs_type, flags, &lmd2, lustre_fill_super);
1373 int lustre_get_sb(struct file_system_type *fs_type, int flags,
1374 const char *devname, void * data, struct vfsmount *mnt)
1376 struct lustre_mount_data2 lmd2 = { data, mnt };
1378 return get_sb_nodev(fs_type, flags, &lmd2, lustre_fill_super, mnt);
1382 void lustre_kill_super(struct super_block *sb)
1384 struct lustre_sb_info *lsi = s2lsi(sb);
1386 if (kill_super_cb && lsi && !IS_SERVER(lsi))
1387 (*kill_super_cb)(sb);
1389 kill_anon_super(sb);
1392 /** Register the "lustre" fs type
1394 struct file_system_type lustre_fs_type = {
1395 .owner = THIS_MODULE,
1397 #ifdef HAVE_FSTYPE_MOUNT
1398 .mount = lustre_mount,
1400 .get_sb = lustre_get_sb,
1402 .kill_sb = lustre_kill_super,
1403 .fs_flags = FS_BINARY_MOUNTDATA | FS_REQUIRES_DEV |
1404 FS_HAS_FIEMAP | FS_RENAME_DOES_D_MOVE,
1407 int lustre_register_fs(void)
1409 return register_filesystem(&lustre_fs_type);
1412 int lustre_unregister_fs(void)
1414 return unregister_filesystem(&lustre_fs_type);