4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2016, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
32 * lustre/obdclass/obd_mount.c
34 * Client mount routines
36 * Author: Nathan Rutman <nathan@clusterfs.com>
40 #define DEBUG_SUBSYSTEM S_CLASS
41 #define D_MOUNT (D_SUPER|D_CONFIG/*|D_WARNING */)
42 #define PRINT_CMD CDEBUG
45 #include <obd_class.h>
46 #include <linux/version.h>
47 #include <lustre_log.h>
48 #include <lustre_disk.h>
49 #include <uapi/linux/lustre/lustre_param.h>
51 static int (*client_fill_super)(struct super_block *sb,
52 struct vfsmount *mnt);
54 static void (*kill_super_cb)(struct super_block *sb);
56 /**************** config llog ********************/
58 /** Get a config log from the MGS and process it.
59 * This func is called for both clients and servers.
60 * Continue to process new statements appended to the logs
61 * (whenever the config lock is revoked) until lustre_end_log
63 * @param sb The superblock is used by the MGC to write to the local copy of
65 * @param logname The name of the llog to replicate from the MGS
66 * @param cfg Since the same mgc may be used to follow multiple config logs
67 * (e.g. ost1, ost2, client), the config_llog_instance keeps the state for
68 * this log, and is added to the mgc's list of logs to follow.
70 int lustre_process_log(struct super_block *sb, char *logname,
71 struct config_llog_instance *cfg)
73 struct lustre_cfg *lcfg;
74 struct lustre_cfg_bufs *bufs;
75 struct lustre_sb_info *lsi = s2lsi(sb);
76 struct obd_device *mgc = lsi->lsi_mgc;
87 /* mgc_process_config */
88 lustre_cfg_bufs_reset(bufs, mgc->obd_name);
89 lustre_cfg_bufs_set_string(bufs, 1, logname);
90 lustre_cfg_bufs_set(bufs, 2, cfg, sizeof(*cfg));
91 lustre_cfg_bufs_set(bufs, 3, &sb, sizeof(sb));
92 OBD_ALLOC(lcfg, lustre_cfg_len(bufs->lcfg_bufcount, bufs->lcfg_buflen));
94 GOTO(out, rc = -ENOMEM);
95 lustre_cfg_init(lcfg, LCFG_LOG_START, bufs);
97 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
98 OBD_FREE(lcfg, lustre_cfg_len(lcfg->lcfg_bufcount, lcfg->lcfg_buflens));
103 LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s'"
104 "failed from the MGS (%d). Make sure this "
105 "client and the MGS are running compatible "
106 "versions of Lustre.\n",
107 mgc->obd_name, logname, rc);
109 LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' "
110 "failed (%d). This may be the result of "
111 "communication errors between this node and "
112 "the MGS, a bad configuration, or other "
113 "errors. See the syslog for more "
114 "information.\n", mgc->obd_name, logname,
117 /* class_obd_list(); */
120 EXPORT_SYMBOL(lustre_process_log);
122 /* Stop watching this config log for updates */
123 int lustre_end_log(struct super_block *sb, char *logname,
124 struct config_llog_instance *cfg)
126 struct lustre_cfg *lcfg;
127 struct lustre_cfg_bufs bufs;
128 struct lustre_sb_info *lsi = s2lsi(sb);
129 struct obd_device *mgc = lsi->lsi_mgc;
136 /* mgc_process_config */
137 lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
138 lustre_cfg_bufs_set_string(&bufs, 1, logname);
140 lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
141 OBD_ALLOC(lcfg, lustre_cfg_len(bufs.lcfg_bufcount, bufs.lcfg_buflen));
144 lustre_cfg_init(lcfg, LCFG_LOG_END, &bufs);
145 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
146 OBD_FREE(lcfg, lustre_cfg_len(lcfg->lcfg_bufcount, lcfg->lcfg_buflens));
149 EXPORT_SYMBOL(lustre_end_log);
151 /**************** obd start *******************/
153 /** lustre_cfg_bufs are a holdover from 1.4; we can still set these up from
154 * lctl (and do for echo cli/srv.
156 static int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
157 char *s1, char *s2, char *s3, char *s4)
159 struct lustre_cfg_bufs bufs;
160 struct lustre_cfg *lcfg = NULL;
163 CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname,
164 cmd, s1, s2, s3, s4);
166 lustre_cfg_bufs_reset(&bufs, cfgname);
168 lustre_cfg_bufs_set_string(&bufs, 1, s1);
170 lustre_cfg_bufs_set_string(&bufs, 2, s2);
172 lustre_cfg_bufs_set_string(&bufs, 3, s3);
174 lustre_cfg_bufs_set_string(&bufs, 4, s4);
176 OBD_ALLOC(lcfg, lustre_cfg_len(bufs.lcfg_bufcount, bufs.lcfg_buflen));
179 lustre_cfg_init(lcfg, cmd, &bufs);
180 lcfg->lcfg_nid = nid;
181 rc = class_process_config(lcfg);
182 OBD_FREE(lcfg, lustre_cfg_len(lcfg->lcfg_bufcount, lcfg->lcfg_buflens));
186 /** Call class_attach and class_setup. These methods in turn call
187 * obd type-specific methods.
189 int lustre_start_simple(char *obdname, char *type, char *uuid,
190 char *s1, char *s2, char *s3, char *s4)
193 CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
195 rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, NULL, NULL);
197 CERROR("%s attach error %d\n", obdname, rc);
200 rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, s3, s4);
202 CERROR("%s setup error %d\n", obdname, rc);
203 do_lcfg(obdname, 0, LCFG_DETACH, NULL, NULL, NULL, NULL);
208 static DEFINE_MUTEX(mgc_start_lock);
210 /** Set up a mgc obd to process startup logs
212 * \param sb [in] super block of the mgc obd
214 * \retval 0 success, otherwise error code
216 int lustre_start_mgc(struct super_block *sb)
218 struct obd_connect_data *data = NULL;
219 struct lustre_sb_info *lsi = s2lsi(sb);
220 struct obd_device *obd;
221 struct obd_export *exp;
222 struct obd_uuid *uuid;
225 char nidstr[LNET_NIDSTR_SIZE];
226 char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL;
228 int rc = 0, i = 0, j;
232 LASSERT(lsi->lsi_lmd);
234 /* Find the first non-lo MGS nid for our MGC name */
235 if (IS_SERVER(lsi)) {
236 /* mount -o mgsnode=nid */
237 ptr = lsi->lsi_lmd->lmd_mgs;
238 if (lsi->lsi_lmd->lmd_mgs &&
239 (class_parse_nid(lsi->lsi_lmd->lmd_mgs, &nid, &ptr) == 0)) {
241 } else if (IS_MGS(lsi)) {
242 struct lnet_process_id id;
244 while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
245 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
252 } else { /* client */
253 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
254 ptr = lsi->lsi_lmd->lmd_dev;
255 if (class_parse_nid(ptr, &nid, &ptr) == 0)
259 CERROR("No valid MGS nids found.\n");
263 mutex_lock(&mgc_start_lock);
265 libcfs_nid2str_r(nid, nidstr, sizeof(nidstr));
266 len = strlen(LUSTRE_MGC_OBDNAME) + strlen(nidstr) + 1;
267 OBD_ALLOC(mgcname, len);
268 OBD_ALLOC(niduuid, len + 2);
269 if (mgcname == NULL || niduuid == NULL)
270 GOTO(out_free, rc = -ENOMEM);
271 snprintf(mgcname, len, "%s%s", LUSTRE_MGC_OBDNAME, nidstr);
273 mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";
277 GOTO(out_free, rc = -ENOMEM);
279 obd = class_name2obd(mgcname);
280 if (obd && !obd->obd_stopping) {
283 rc = obd_set_info_async(NULL, obd->obd_self_export,
284 strlen(KEY_MGSSEC), KEY_MGSSEC,
285 strlen(mgssec), mgssec, NULL);
289 /* Re-using an existing MGC */
290 atomic_inc(&obd->u.cli.cl_mgc_refcount);
292 /* IR compatibility check, only for clients */
293 if (lmd_is_client(lsi->lsi_lmd)) {
295 int vallen = sizeof(*data);
296 __u32 *flags = &lsi->lsi_lmd->lmd_flags;
298 rc = obd_get_info(NULL, obd->obd_self_export,
299 strlen(KEY_CONN_DATA), KEY_CONN_DATA,
302 has_ir = OCD_HAS_FLAG(data, IMP_RECOV);
303 if (has_ir ^ !(*flags & LMD_FLG_NOIR)) {
304 /* LMD_FLG_NOIR is for test purpose only */
306 "Trying to mount a client with IR setting "
307 "not compatible with current mgc. "
308 "Force to use current mgc setting that is "
310 has_ir ? "enabled" : "disabled");
312 *flags &= ~LMD_FLG_NOIR;
314 *flags |= LMD_FLG_NOIR;
319 /* If we are restarting the MGS, don't try to keep the MGC's
320 old connection, or registration will fail. */
322 CDEBUG(D_MOUNT, "New MGS with live MGC\n");
326 /* Try all connections, but only once (again).
327 We don't want to block another target from starting
328 (using its local copy of the log), but we do want to connect
329 if at all possible. */
331 CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,recov_bk);
332 rc = obd_set_info_async(NULL, obd->obd_self_export,
333 sizeof(KEY_INIT_RECOV_BACKUP),
334 KEY_INIT_RECOV_BACKUP,
335 sizeof(recov_bk), &recov_bk, NULL);
339 CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname);
341 /* Add the primary nids for the MGS */
343 snprintf(niduuid, len + 2, "%s_%x", mgcname, i);
344 if (IS_SERVER(lsi)) {
345 ptr = lsi->lsi_lmd->lmd_mgs;
346 CDEBUG(D_MOUNT, "mgs nids %s.\n", ptr);
348 /* Use local nids (including LO) */
349 struct lnet_process_id id;
351 while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
352 rc = do_lcfg(mgcname, id.nid, LCFG_ADD_UUID,
353 niduuid, NULL, NULL, NULL);
356 /* Use mgsnode= nids */
357 /* mount -o mgsnode=nid */
358 if (lsi->lsi_lmd->lmd_mgs) {
359 ptr = lsi->lsi_lmd->lmd_mgs;
360 } else if (class_find_param(ptr, PARAM_MGSNODE,
362 CERROR("No MGS nids given.\n");
363 GOTO(out_free, rc = -EINVAL);
366 * Add primary MGS nid(s).
367 * Multiple nids on one MGS node are separated
370 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
371 rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID,
372 niduuid, NULL, NULL, NULL);
375 /* Stop at the first failover nid */
380 } else { /* client */
381 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
382 ptr = lsi->lsi_lmd->lmd_dev;
383 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
384 rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID,
385 niduuid, NULL, NULL, NULL);
388 /* Stop at the first failover nid */
394 CERROR("No valid MGS nids found.\n");
395 GOTO(out_free, rc = -EINVAL);
397 lsi->lsi_lmd->lmd_mgs_failnodes = 1;
399 /* Random uuid for MGC allows easier reconnects */
402 GOTO(out_free, rc = -ENOMEM);
404 ll_generate_random_uuid(uuidc);
405 class_uuid_unparse(uuidc, uuid);
408 rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
409 (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
410 niduuid, NULL, NULL);
415 /* Add any failover MGS nids */
417 while (ptr && ((*ptr == ':' ||
418 class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0))) {
419 /* New failover node */
420 sprintf(niduuid, "%s_%x", mgcname, i);
422 while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) {
423 rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID,
424 niduuid, NULL, NULL, NULL);
431 rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
432 niduuid, NULL, NULL, NULL);
440 lsi->lsi_lmd->lmd_mgs_failnodes = i;
442 obd = class_name2obd(mgcname);
444 CERROR("Can't find mgcobd %s\n", mgcname);
445 GOTO(out_free, rc = -ENOTCONN);
448 rc = obd_set_info_async(NULL, obd->obd_self_export,
449 strlen(KEY_MGSSEC), KEY_MGSSEC,
450 strlen(mgssec), mgssec, NULL);
454 /* Keep a refcount of servers/clients who started with "mount",
455 so we know when we can get rid of the mgc. */
456 atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
458 /* We connect to the MGS at setup, and don't disconnect until cleanup */
459 data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT |
460 OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV |
461 OBD_CONNECT_LVB_TYPE |
462 OBD_CONNECT_BULK_MBITS | OBD_CONNECT_BARRIER;
464 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0)
465 data->ocd_connect_flags |= OBD_CONNECT_MNE_SWAB;
468 if (lmd_is_client(lsi->lsi_lmd) &&
469 lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
470 data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
471 data->ocd_version = LUSTRE_VERSION_CODE;
472 rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
474 CERROR("connect failed %d\n", rc);
478 obd->u.cli.cl_mgc_mgsexp = exp;
481 /* Keep the mgc info in the sb. Note that many lsi's can point
485 mutex_unlock(&mgc_start_lock);
490 OBD_FREE(mgcname, len);
492 OBD_FREE(niduuid, len + 2);
496 static int lustre_stop_mgc(struct super_block *sb)
498 struct lustre_sb_info *lsi = s2lsi(sb);
499 struct obd_device *obd;
500 char *niduuid = NULL, *ptr = NULL;
501 int i, rc = 0, len = 0;
511 mutex_lock(&mgc_start_lock);
512 LASSERT(atomic_read(&obd->u.cli.cl_mgc_refcount) > 0);
513 if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
514 /* This is not fatal, every client that stops
515 will call in here. */
516 CDEBUG(D_MOUNT, "mgc still has %d references.\n",
517 atomic_read(&obd->u.cli.cl_mgc_refcount));
518 GOTO(out, rc = -EBUSY);
521 /* The MGC has no recoverable data in any case.
522 * force shotdown set in umount_begin */
523 obd->obd_no_recov = 1;
525 if (obd->u.cli.cl_mgc_mgsexp) {
526 /* An error is not fatal, if we are unable to send the
527 disconnect mgs ping evictor cleans up the export */
528 rc = obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
530 CDEBUG(D_MOUNT, "disconnect failed %d\n", rc);
533 /* Save the obdname for cleaning the nid uuids, which are
535 len = strlen(obd->obd_name) + 6;
536 OBD_ALLOC(niduuid, len);
538 strcpy(niduuid, obd->obd_name);
539 ptr = niduuid + strlen(niduuid);
542 rc = class_manual_cleanup(obd);
546 /* Clean the nid uuids */
548 GOTO(out, rc = -ENOMEM);
550 for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
551 sprintf(ptr, "_%x", i);
552 rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
553 niduuid, NULL, NULL, NULL);
555 CERROR("del MDC UUID %s failed: rc = %d\n",
560 OBD_FREE(niduuid, len);
562 /* class_import_put will get rid of the additional connections */
563 mutex_unlock(&mgc_start_lock);
567 /***************** lustre superblock **************/
569 static struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
571 struct lustre_sb_info *lsi;
577 OBD_ALLOC_PTR(lsi->lsi_lmd);
583 lsi->lsi_lmd->lmd_exclude_count = 0;
584 lsi->lsi_lmd->lmd_recovery_time_soft = 0;
585 lsi->lsi_lmd->lmd_recovery_time_hard = 0;
586 s2lsi_nocast(sb) = lsi;
587 /* we take 1 extra ref for our setup */
588 atomic_set(&lsi->lsi_mounts, 1);
590 /* Default umount style */
591 lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
592 INIT_LIST_HEAD(&lsi->lsi_lwp_list);
593 spin_lock_init(&lsi->lsi_lwp_lock);
598 static int lustre_free_lsi(struct super_block *sb)
600 struct lustre_sb_info *lsi = s2lsi(sb);
603 LASSERT(lsi != NULL);
604 CDEBUG(D_MOUNT, "Freeing lsi %p\n", lsi);
606 /* someone didn't call server_put_mount. */
607 LASSERT(atomic_read(&lsi->lsi_mounts) == 0);
609 if (lsi->lsi_lmd != NULL) {
610 if (lsi->lsi_lmd->lmd_dev != NULL)
611 OBD_FREE(lsi->lsi_lmd->lmd_dev,
612 strlen(lsi->lsi_lmd->lmd_dev) + 1);
613 if (lsi->lsi_lmd->lmd_profile != NULL)
614 OBD_FREE(lsi->lsi_lmd->lmd_profile,
615 strlen(lsi->lsi_lmd->lmd_profile) + 1);
616 if (lsi->lsi_lmd->lmd_fileset != NULL)
617 OBD_FREE(lsi->lsi_lmd->lmd_fileset,
618 strlen(lsi->lsi_lmd->lmd_fileset) + 1);
619 if (lsi->lsi_lmd->lmd_mgssec != NULL)
620 OBD_FREE(lsi->lsi_lmd->lmd_mgssec,
621 strlen(lsi->lsi_lmd->lmd_mgssec) + 1);
622 if (lsi->lsi_lmd->lmd_opts != NULL)
623 OBD_FREE(lsi->lsi_lmd->lmd_opts,
624 strlen(lsi->lsi_lmd->lmd_opts) + 1);
625 if (lsi->lsi_lmd->lmd_exclude_count)
626 OBD_FREE(lsi->lsi_lmd->lmd_exclude,
627 sizeof(lsi->lsi_lmd->lmd_exclude[0]) *
628 lsi->lsi_lmd->lmd_exclude_count);
629 if (lsi->lsi_lmd->lmd_mgs != NULL)
630 OBD_FREE(lsi->lsi_lmd->lmd_mgs,
631 strlen(lsi->lsi_lmd->lmd_mgs) + 1);
632 if (lsi->lsi_lmd->lmd_osd_type != NULL)
633 OBD_FREE(lsi->lsi_lmd->lmd_osd_type,
634 strlen(lsi->lsi_lmd->lmd_osd_type) + 1);
635 if (lsi->lsi_lmd->lmd_params != NULL)
636 OBD_FREE(lsi->lsi_lmd->lmd_params, 4096);
637 if (lsi->lsi_lmd->lmd_nidnet != NULL)
638 OBD_FREE(lsi->lsi_lmd->lmd_nidnet,
639 strlen(lsi->lsi_lmd->lmd_nidnet) + 1);
641 OBD_FREE_PTR(lsi->lsi_lmd);
644 LASSERT(lsi->lsi_llsbi == NULL);
646 s2lsi_nocast(sb) = NULL;
651 /* The lsi has one reference for every server that is using the disk -
652 e.g. MDT, MGS, and potentially MGC */
653 int lustre_put_lsi(struct super_block *sb)
655 struct lustre_sb_info *lsi = s2lsi(sb);
658 LASSERT(lsi != NULL);
660 CDEBUG(D_MOUNT, "put %p %d\n", sb, atomic_read(&lsi->lsi_mounts));
661 if (atomic_dec_and_test(&lsi->lsi_mounts)) {
662 if (IS_SERVER(lsi) && lsi->lsi_osd_exp) {
663 lu_device_put(&lsi->lsi_dt_dev->dd_lu_dev);
664 lsi->lsi_osd_exp->exp_obd->obd_lvfs_ctxt.dt = NULL;
665 lsi->lsi_dt_dev = NULL;
666 obd_disconnect(lsi->lsi_osd_exp);
667 /* wait till OSD is gone */
668 obd_zombie_barrier();
677 * <FSNAME><SEPARATOR><TYPE><INDEX>
678 * FSNAME is between 1 and 8 characters (inclusive).
679 * Excluded characters are '/' and ':'
680 * SEPARATOR is either ':' or '-'
681 * TYPE: "OST", "MDT", etc.
682 * INDEX: Hex representation of the index
685 /** Get the fsname ("lustre") from the server name ("lustre-OST003F").
686 * @param [in] svname server name including type and index
687 * @param [out] fsname Buffer to copy filesystem name prefix into.
688 * Must have at least 'strlen(fsname) + 1' chars.
689 * @param [out] endptr if endptr isn't NULL it is set to end of fsname
692 int server_name2fsname(const char *svname, char *fsname, const char **endptr)
696 dash = svname + strnlen(svname, LUSTRE_MAXFSNAME);
697 for (; dash > svname && *dash != '-' && *dash != ':'; dash--)
702 if (fsname != NULL) {
703 strncpy(fsname, svname, dash - svname);
704 fsname[dash - svname] = '\0';
712 EXPORT_SYMBOL(server_name2fsname);
715 * Get service name (svname) from string
717 * if endptr isn't NULL it is set to end of fsname *
719 int server_name2svname(const char *label, char *svname, const char **endptr,
725 /* We use server_name2fsname() just for parsing */
726 rc = server_name2fsname(label, NULL, &dash);
733 if (strlcpy(svname, dash + 1, svsize) >= svsize)
738 EXPORT_SYMBOL(server_name2svname);
741 * check server name is OST.
743 int server_name_is_ost(const char *svname)
748 /* We use server_name2fsname() just for parsing */
749 rc = server_name2fsname(svname, NULL, &dash);
755 if (strncmp(dash, "OST", 3) == 0)
759 EXPORT_SYMBOL(server_name_is_ost);
762 * Get the index from the target name MDTXXXX/OSTXXXX
763 * rc = server type, or rc < 0 on error
765 int target_name2index(const char *tgtname, __u32 *idx, const char **endptr)
767 const char *dash = tgtname;
771 if (strncmp(dash, "MDT", 3) == 0)
772 rc = LDD_F_SV_TYPE_MDT;
773 else if (strncmp(dash, "OST", 3) == 0)
774 rc = LDD_F_SV_TYPE_OST;
780 if (strncmp(dash, "all", 3) == 0) {
783 return rc | LDD_F_SV_ALL;
786 index = simple_strtoul(dash, (char **)endptr, 16);
791 EXPORT_SYMBOL(target_name2index);
793 /* Get the index from the obd name.
796 if endptr isn't NULL it is set to end of name */
797 int server_name2index(const char *svname, __u32 *idx, const char **endptr)
802 /* We use server_name2fsname() just for parsing */
803 rc = server_name2fsname(svname, NULL, &dash);
808 rc = target_name2index(dash, idx, endptr);
812 /* Account for -mdc after index that is possible when specifying mdt */
813 if (endptr != NULL && strncmp(LUSTRE_MDC_NAME, *endptr + 1,
814 sizeof(LUSTRE_MDC_NAME)-1) == 0)
815 *endptr += sizeof(LUSTRE_MDC_NAME);
819 EXPORT_SYMBOL(server_name2index);
821 /*************** mount common betweeen server and client ***************/
824 int lustre_common_put_super(struct super_block *sb)
829 CDEBUG(D_MOUNT, "dropping sb %p\n", sb);
831 /* Drop a ref to the MGC */
832 rc = lustre_stop_mgc(sb);
833 if (rc && (rc != -ENOENT)) {
835 CERROR("Can't stop MGC: %d\n", rc);
838 /* BUSY just means that there's some other obd that
839 needs the mgc. Let him clean it up. */
840 CDEBUG(D_MOUNT, "MGC still in use\n");
842 /* Drop a ref to the mounted disk */
847 EXPORT_SYMBOL(lustre_common_put_super);
849 static void lmd_print(struct lustre_mount_data *lmd)
853 PRINT_CMD(D_MOUNT, " mount data:\n");
854 if (lmd_is_client(lmd))
855 PRINT_CMD(D_MOUNT, "profile: %s\n", lmd->lmd_profile);
856 PRINT_CMD(D_MOUNT, "device: %s\n", lmd->lmd_dev);
857 PRINT_CMD(D_MOUNT, "flags: %x\n", lmd->lmd_flags);
860 PRINT_CMD(D_MOUNT, "options: %s\n", lmd->lmd_opts);
862 if (lmd->lmd_recovery_time_soft)
863 PRINT_CMD(D_MOUNT, "recovery time soft: %d\n",
864 lmd->lmd_recovery_time_soft);
866 if (lmd->lmd_recovery_time_hard)
867 PRINT_CMD(D_MOUNT, "recovery time hard: %d\n",
868 lmd->lmd_recovery_time_hard);
870 for (i = 0; i < lmd->lmd_exclude_count; i++) {
871 PRINT_CMD(D_MOUNT, "exclude %d: OST%04x\n", i,
872 lmd->lmd_exclude[i]);
876 /* Is this server on the exclusion list */
877 int lustre_check_exclusion(struct super_block *sb, char *svname)
879 struct lustre_sb_info *lsi = s2lsi(sb);
880 struct lustre_mount_data *lmd = lsi->lsi_lmd;
885 rc = server_name2index(svname, &index, NULL);
886 if (rc != LDD_F_SV_TYPE_OST)
887 /* Only exclude OSTs */
890 CDEBUG(D_MOUNT, "Check exclusion %s (%d) in %d of %s\n", svname,
891 index, lmd->lmd_exclude_count, lmd->lmd_dev);
893 for(i = 0; i < lmd->lmd_exclude_count; i++) {
894 if (index == lmd->lmd_exclude[i]) {
895 CWARN("Excluding %s (on exclusion list)\n", svname);
902 /* mount -v -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
903 static int lmd_make_exclusion(struct lustre_mount_data *lmd, const char *ptr)
905 const char *s1 = ptr, *s2;
911 /* The shortest an ost name can be is 8 chars: -OST0000.
912 We don't actually know the fsname at this time, so in fact
913 a user could specify any fsname. */
914 devmax = strlen(ptr) / 8 + 1;
916 /* temp storage until we figure out how many we have */
917 OBD_ALLOC(exclude_list, sizeof(index) * devmax);
921 /* we enter this fn pointing at the '=' */
922 while (*s1 && *s1 != ' ' && *s1 != ',') {
924 rc = server_name2index(s1, &index, &s2);
926 CERROR("Can't parse server name '%s': rc = %d\n",
930 if (rc == LDD_F_SV_TYPE_OST)
931 exclude_list[lmd->lmd_exclude_count++] = index;
933 CDEBUG(D_MOUNT, "ignoring exclude %.*s: type = %#x\n",
934 (uint)(s2-s1), s1, rc);
936 /* now we are pointing at ':' (next exclude)
937 or ',' (end of excludes) */
938 if (lmd->lmd_exclude_count >= devmax)
941 if (rc >= 0) /* non-err */
944 if (lmd->lmd_exclude_count) {
945 /* permanent, freed in lustre_free_lsi */
946 OBD_ALLOC(lmd->lmd_exclude, sizeof(index) *
947 lmd->lmd_exclude_count);
948 if (lmd->lmd_exclude) {
949 memcpy(lmd->lmd_exclude, exclude_list,
950 sizeof(index) * lmd->lmd_exclude_count);
953 lmd->lmd_exclude_count = 0;
956 OBD_FREE(exclude_list, sizeof(index) * devmax);
960 static int lmd_parse_mgssec(struct lustre_mount_data *lmd, char *ptr)
965 if (lmd->lmd_mgssec != NULL) {
966 OBD_FREE(lmd->lmd_mgssec, strlen(lmd->lmd_mgssec) + 1);
967 lmd->lmd_mgssec = NULL;
970 tail = strchr(ptr, ',');
972 length = strlen(ptr);
976 OBD_ALLOC(lmd->lmd_mgssec, length + 1);
977 if (lmd->lmd_mgssec == NULL)
980 memcpy(lmd->lmd_mgssec, ptr, length);
981 lmd->lmd_mgssec[length] = '\0';
985 static int lmd_parse_network(struct lustre_mount_data *lmd, char *ptr)
990 if (lmd->lmd_nidnet != NULL) {
991 OBD_FREE(lmd->lmd_nidnet, strlen(lmd->lmd_nidnet) + 1);
992 lmd->lmd_nidnet = NULL;
995 tail = strchr(ptr, ',');
997 length = strlen(ptr);
1001 OBD_ALLOC(lmd->lmd_nidnet, length + 1);
1002 if (lmd->lmd_nidnet == NULL)
1005 memcpy(lmd->lmd_nidnet, ptr, length);
1006 lmd->lmd_nidnet[length] = '\0';
1010 static int lmd_parse_string(char **handle, char *ptr)
1015 if ((handle == NULL) || (ptr == NULL))
1018 if (*handle != NULL) {
1019 OBD_FREE(*handle, strlen(*handle) + 1);
1023 tail = strchr(ptr, ',');
1025 length = strlen(ptr);
1027 length = tail - ptr;
1029 OBD_ALLOC(*handle, length + 1);
1030 if (*handle == NULL)
1033 memcpy(*handle, ptr, length);
1034 (*handle)[length] = '\0';
1039 /* Collect multiple values for mgsnid specifiers */
1040 static int lmd_parse_mgs(struct lustre_mount_data *lmd, char **ptr)
1048 /* Find end of nidlist */
1049 while (class_parse_nid_quiet(tail, &nid, &tail) == 0) {}
1050 length = tail - *ptr;
1052 LCONSOLE_ERROR_MSG(0x159, "Can't parse NID '%s'\n", *ptr);
1056 if (lmd->lmd_mgs != NULL)
1057 oldlen = strlen(lmd->lmd_mgs) + 1;
1059 OBD_ALLOC(mgsnid, oldlen + length + 1);
1063 if (lmd->lmd_mgs != NULL) {
1064 /* Multiple mgsnid= are taken to mean failover locations */
1065 memcpy(mgsnid, lmd->lmd_mgs, oldlen);
1066 mgsnid[oldlen - 1] = ':';
1067 OBD_FREE(lmd->lmd_mgs, oldlen);
1069 memcpy(mgsnid + oldlen, *ptr, length);
1070 mgsnid[oldlen + length] = '\0';
1071 lmd->lmd_mgs = mgsnid;
1078 * Find the first delimiter (comma or colon) from the specified \a buf and
1079 * make \a *endh point to the string starting with the delimiter. The commas
1080 * in expression list [...] will be skipped.
1082 * \param[in] buf a delimiter-separated string
1083 * \param[in] endh a pointer to a pointer that will point to the string
1084 * starting with the delimiter
1086 * \retval 0 if delimiter is found
1087 * \retval 1 if delimiter is not found
1089 static int lmd_find_delimiter(char *buf, char **endh)
1097 while (*c != '\0') {
1103 if ((*c == ',' || *c == ':') && skip == 0) {
1116 * Find the first valid string delimited by comma or colon from the specified
1117 * \a buf and parse it to see whether it's a valid nid list. If yes, \a *endh
1118 * will point to the next string starting with the delimiter.
1120 * \param[in] buf a delimiter-separated string
1121 * \param[in] endh a pointer to a pointer that will point to the string
1122 * starting with the delimiter
1124 * \retval 0 if the string is a valid nid list
1125 * \retval 1 if the string is not a valid nid list
1127 static int lmd_parse_nidlist(char *buf, char **endh)
1129 struct list_head nidlist;
1136 while (*buf == ',' || *buf == ':')
1138 if (*buf == ' ' || *buf == '/' || *buf == '\0')
1141 if (lmd_find_delimiter(buf, &endp) != 0)
1142 endp = buf + strlen(buf);
1147 INIT_LIST_HEAD(&nidlist);
1148 if (cfs_parse_nidlist(buf, strlen(buf), &nidlist) <= 0)
1150 cfs_free_nidlist(&nidlist);
1160 /** Parse mount line options
1161 * e.g. mount -v -t lustre -o abort_recov uml1:uml2:/lustre-client /mnt/lustre
1162 * dev is passed as device=uml1:/lustre by mount.lustre
1164 static int lmd_parse(char *options, struct lustre_mount_data *lmd)
1166 char *s1, *s2, *devname = NULL;
1167 struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
1173 LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that "
1174 "/sbin/mount.lustre is installed.\n");
1178 /* Options should be a string - try to detect old lmd data */
1179 if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) {
1180 LCONSOLE_ERROR_MSG(0x163, "You're using an old version of "
1181 "/sbin/mount.lustre. Please install "
1182 "version %s\n", LUSTRE_VERSION_STRING);
1185 lmd->lmd_magic = LMD_MAGIC;
1187 OBD_ALLOC(lmd->lmd_params, LMD_PARAMS_MAXLEN);
1188 if (lmd->lmd_params == NULL)
1190 lmd->lmd_params[0] = '\0';
1192 /* Set default flags here */
1197 int time_min = OBD_RECOVERY_TIME_MIN;
1200 /* Skip whitespace and extra commas */
1201 while (*s1 == ' ' || *s1 == ',')
1205 /* Client options are parsed in ll_options: eg. flock,
1208 /* Parse non-ldiskfs options here. Rather than modifying
1209 ldiskfs, we just zero these out here */
1210 if (strncmp(s1, "abort_recov", 11) == 0) {
1211 lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
1213 } else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
1214 lmd->lmd_recovery_time_soft =
1215 max_t(int, simple_strtoul(s1 + 19, NULL, 10),
1218 } else if (strncmp(s1, "recovery_time_hard=", 19) == 0) {
1219 lmd->lmd_recovery_time_hard =
1220 max_t(int, simple_strtoul(s1 + 19, NULL, 10),
1223 } else if (strncmp(s1, "noir", 4) == 0) {
1224 lmd->lmd_flags |= LMD_FLG_NOIR; /* test purpose only. */
1226 } else if (strncmp(s1, "nosvc", 5) == 0) {
1227 lmd->lmd_flags |= LMD_FLG_NOSVC;
1229 } else if (strncmp(s1, "nomgs", 5) == 0) {
1230 lmd->lmd_flags |= LMD_FLG_NOMGS;
1232 } else if (strncmp(s1, "noscrub", 7) == 0) {
1233 lmd->lmd_flags |= LMD_FLG_NOSCRUB;
1235 } else if (strncmp(s1, "skip_lfsck", 10) == 0) {
1236 lmd->lmd_flags |= LMD_FLG_SKIP_LFSCK;
1238 } else if (strncmp(s1, "rdonly_dev", 10) == 0) {
1239 lmd->lmd_flags |= LMD_FLG_DEV_RDONLY;
1241 } else if (strncmp(s1, PARAM_MGSNODE,
1242 sizeof(PARAM_MGSNODE) - 1) == 0) {
1243 s2 = s1 + sizeof(PARAM_MGSNODE) - 1;
1244 /* Assume the next mount opt is the first
1245 invalid nid we get to. */
1246 rc = lmd_parse_mgs(lmd, &s2);
1251 } else if (strncmp(s1, "writeconf", 9) == 0) {
1252 lmd->lmd_flags |= LMD_FLG_WRITECONF;
1254 } else if (strncmp(s1, "update", 6) == 0) {
1255 lmd->lmd_flags |= LMD_FLG_UPDATE;
1257 } else if (strncmp(s1, "virgin", 6) == 0) {
1258 lmd->lmd_flags |= LMD_FLG_VIRGIN;
1260 } else if (strncmp(s1, "noprimnode", 10) == 0) {
1261 lmd->lmd_flags |= LMD_FLG_NO_PRIMNODE;
1263 } else if (strncmp(s1, "mgssec=", 7) == 0) {
1264 rc = lmd_parse_mgssec(lmd, s1 + 7);
1268 /* ost exclusion list */
1269 } else if (strncmp(s1, "exclude=", 8) == 0) {
1270 rc = lmd_make_exclusion(lmd, s1 + 7);
1274 } else if (strncmp(s1, "mgs", 3) == 0) {
1276 lmd->lmd_flags |= LMD_FLG_MGS;
1278 } else if (strncmp(s1, "svname=", 7) == 0) {
1279 rc = lmd_parse_string(&lmd->lmd_profile, s1 + 7);
1283 } else if (strncmp(s1, "param=", 6) == 0) {
1284 size_t length, params_length;
1286 if (lmd_find_delimiter(s1 + 6, &tail) != 0)
1287 length = strlen(s1);
1289 char *param_str = tail + 1;
1290 int supplementary = 1;
1291 while (lmd_parse_nidlist(param_str,
1295 length = param_str - s1 - supplementary;
1298 params_length = strlen(lmd->lmd_params);
1299 if (params_length + length + 1 >= LMD_PARAMS_MAXLEN)
1301 strncat(lmd->lmd_params, s1 + 6, length);
1302 lmd->lmd_params[params_length + length] = '\0';
1303 strlcat(lmd->lmd_params, " ", LMD_PARAMS_MAXLEN);
1304 s3 = s1 + 6 + length;
1306 } else if (strncmp(s1, "osd=", 4) == 0) {
1307 rc = lmd_parse_string(&lmd->lmd_osd_type, s1 + 4);
1312 /* Linux 2.4 doesn't pass the device, so we stuck it at the
1313 end of the options. */
1314 else if (strncmp(s1, "device=", 7) == 0) {
1316 /* terminate options right before device. device
1317 must be the last one. */
1320 } else if (strncmp(s1, "network=", 8) == 0) {
1321 rc = lmd_parse_network(lmd, s1 + 8);
1328 s2 = strchr(s3, ',');
1336 memmove(s1, s2, strlen(s2) + 1);
1342 LCONSOLE_ERROR_MSG(0x164, "Can't find the device name "
1343 "(need mount option 'device=...')\n");
1347 s1 = strstr(devname, ":/");
1350 lmd->lmd_flags |= LMD_FLG_CLIENT;
1351 /* Remove leading /s from fsname */
1352 while (*++s1 == '/')
1355 while (*s2 != '/' && *s2 != '\0')
1357 /* Freed in lustre_free_lsi */
1358 OBD_ALLOC(lmd->lmd_profile, s2 - s1 + 8);
1359 if (!lmd->lmd_profile)
1362 strncat(lmd->lmd_profile, s1, s2 - s1);
1363 strncat(lmd->lmd_profile, "-client", 7);
1366 s2 = s1 + strlen(s1) - 1;
1367 /* Remove padding /s from fileset */
1371 OBD_ALLOC(lmd->lmd_fileset, s2 - s1 + 2);
1372 if (lmd->lmd_fileset == NULL) {
1373 OBD_FREE(lmd->lmd_profile,
1374 strlen(lmd->lmd_profile) + 1);
1377 strncat(lmd->lmd_fileset, s1, s2 - s1 + 1);
1381 if (lmd->lmd_nidnet != NULL) {
1382 /* 'network=' mount option forbidden for server */
1383 OBD_FREE(lmd->lmd_nidnet, strlen(lmd->lmd_nidnet) + 1);
1384 lmd->lmd_nidnet = NULL;
1386 CERROR("%s: option 'network=' not allowed for Lustre "
1387 "servers: rc = %d\n", devname, rc);
1392 /* Freed in lustre_free_lsi */
1393 OBD_ALLOC(lmd->lmd_dev, strlen(devname) + 1);
1396 strncpy(lmd->lmd_dev, devname, strlen(devname)+1);
1398 /* Save mount options */
1399 s1 = options + strlen(options) - 1;
1400 while (s1 >= options && (*s1 == ',' || *s1 == ' '))
1402 if (*options != 0) {
1403 /* Freed in lustre_free_lsi */
1404 OBD_ALLOC(lmd->lmd_opts, strlen(options) + 1);
1407 strncpy(lmd->lmd_opts, options, strlen(options)+1);
1411 lmd->lmd_magic = LMD_MAGIC;
1416 CERROR("Bad mount options %s\n", options);
1420 struct lustre_mount_data2 {
1422 struct vfsmount *lmd2_mnt;
1425 /** This is the entry point for the mount call into Lustre.
1426 * This is called when a server or client is mounted,
1427 * and this is where we start setting things up.
1428 * @param data Mount options (e.g. -o flock,abort_recov)
1430 static int lustre_fill_super(struct super_block *sb, void *data, int silent)
1432 struct lustre_mount_data *lmd;
1433 struct lustre_mount_data2 *lmd2 = data;
1434 struct lustre_sb_info *lsi;
1438 CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);
1440 lsi = lustre_init_lsi(sb);
1446 * Disable lockdep during mount, because mount locking patterns are
1452 * LU-639: the obd cleanup of last mount may not finish yet, wait here.
1454 obd_zombie_barrier();
1456 /* Figure out the lmd from the mount options */
1457 if (lmd_parse((char *)(lmd2->lmd2_data), lmd)) {
1459 GOTO(out, rc = -EINVAL);
1462 if (lmd_is_client(lmd)) {
1463 CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile);
1464 if (client_fill_super == NULL)
1465 request_module("lustre");
1466 if (client_fill_super == NULL) {
1467 LCONSOLE_ERROR_MSG(0x165, "Nothing registered for "
1468 "client mount! Is the 'lustre' "
1469 "module loaded?\n");
1473 rc = lustre_start_mgc(sb);
1475 lustre_common_put_super(sb);
1478 /* Connect and start */
1479 /* (should always be ll_fill_super) */
1480 rc = (*client_fill_super)(sb, lmd2->lmd2_mnt);
1481 /* c_f_s will call lustre_common_put_super on failure */
1484 #ifdef HAVE_SERVER_SUPPORT
1485 CDEBUG(D_MOUNT, "Mounting server from %s\n", lmd->lmd_dev);
1486 rc = server_fill_super(sb);
1487 /* s_f_s calls lustre_start_mgc after the mount because we need
1488 the MGS nids which are stored on disk. Plus, we may
1489 need to start the MGS first. */
1490 /* s_f_s will call server_put_super on failure */
1492 CERROR("This is client-side-only module, "
1493 "cannot handle server mount.\n");
1498 /* If error happens in fill_super() call, @lsi will be killed there.
1499 * This is why we do not put it here. */
1503 CERROR("Unable to mount %s (%d)\n",
1504 s2lsi(sb) ? lmd->lmd_dev : "", rc);
1506 CDEBUG(D_SUPER, "Mount %s complete\n",
1514 /* We can't call ll_fill_super by name because it lives in a module that
1515 must be loaded after this one. */
1516 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
1517 struct vfsmount *mnt))
1519 client_fill_super = cfs;
1521 EXPORT_SYMBOL(lustre_register_client_fill_super);
1523 void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb))
1525 kill_super_cb = cfs;
1527 EXPORT_SYMBOL(lustre_register_kill_super_cb);
1529 /***************** FS registration ******************/
1530 #ifdef HAVE_FSTYPE_MOUNT
1531 static struct dentry *lustre_mount(struct file_system_type *fs_type, int flags,
1532 const char *devname, void *data)
1534 struct lustre_mount_data2 lmd2 = {
1538 return mount_nodev(fs_type, flags, &lmd2, lustre_fill_super);
1541 static int lustre_get_sb(struct file_system_type *fs_type, int flags,
1542 const char *devname, void *data, struct vfsmount *mnt)
1544 struct lustre_mount_data2 lmd2 = {
1549 return get_sb_nodev(fs_type, flags, &lmd2, lustre_fill_super, mnt);
1553 static void lustre_kill_super(struct super_block *sb)
1555 struct lustre_sb_info *lsi = s2lsi(sb);
1557 if (kill_super_cb && lsi && !IS_SERVER(lsi))
1558 (*kill_super_cb)(sb);
1560 kill_anon_super(sb);
1563 /** Register the "lustre" fs type
1565 static struct file_system_type lustre_fs_type = {
1566 .owner = THIS_MODULE,
1568 #ifdef HAVE_FSTYPE_MOUNT
1569 .mount = lustre_mount,
1571 .get_sb = lustre_get_sb,
1573 .kill_sb = lustre_kill_super,
1574 .fs_flags = FS_REQUIRES_DEV | FS_HAS_FIEMAP | FS_RENAME_DOES_D_MOVE,
1576 MODULE_ALIAS_FS("lustre");
1578 int lustre_register_fs(void)
1580 return register_filesystem(&lustre_fs_type);
1583 int lustre_unregister_fs(void)
1585 return unregister_filesystem(&lustre_fs_type);