/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * lustre/obdclass/obd_mount.c
- * Client/server mount routines
+ * GPL HEADER START
*
- * Copyright (c) 2006 Cluster File Systems, Inc.
- * Author: Nathan Rutman <nathan@clusterfs.com>
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
- * This file is part of Lustre, http://www.lustre.org/
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
*
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
*
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/obd_mount.c
+ *
+ * Client/server mount routines
+ *
+ * Author: Nathan Rutman <nathan@clusterfs.com>
*/
/*********** mount lookup *********/
-DECLARE_MUTEX(lustre_mount_info_lock);
-struct list_head server_mount_info_list = LIST_HEAD_INIT(server_mount_info_list);
+CFS_DECLARE_MUTEX(lustre_mount_info_lock);
+static CFS_LIST_HEAD(server_mount_info_list);
static struct lustre_mount_info *server_find_mount(const char *name)
{
- struct list_head *tmp;
+ cfs_list_t *tmp;
struct lustre_mount_info *lmi;
ENTRY;
- list_for_each(tmp, &server_mount_info_list) {
- lmi = list_entry(tmp, struct lustre_mount_info, lmi_list_chain);
+ cfs_list_for_each(tmp, &server_mount_info_list) {
+ lmi = cfs_list_entry(tmp, struct lustre_mount_info,
+ lmi_list_chain);
if (strcmp(name, lmi->lmi_name) == 0)
RETURN(lmi);
}
}
strcpy(name_cp, name);
- down(&lustre_mount_info_lock);
+ cfs_down(&lustre_mount_info_lock);
if (server_find_mount(name)) {
- up(&lustre_mount_info_lock);
+ cfs_up(&lustre_mount_info_lock);
OBD_FREE(lmi, sizeof(*lmi));
OBD_FREE(name_cp, strlen(name) + 1);
CERROR("Already registered %s\n", name);
lmi->lmi_name = name_cp;
lmi->lmi_sb = sb;
lmi->lmi_mnt = mnt;
- list_add(&lmi->lmi_list_chain, &server_mount_info_list);
+ cfs_list_add(&lmi->lmi_list_chain, &server_mount_info_list);
- up(&lustre_mount_info_lock);
+ cfs_up(&lustre_mount_info_lock);
CDEBUG(D_MOUNT, "reg_mnt %p from %s, vfscount=%d\n",
- lmi->lmi_mnt, name, atomic_read(&lmi->lmi_mnt->mnt_count));
+ lmi->lmi_mnt, name, cfs_atomic_read(&lmi->lmi_mnt->mnt_count));
RETURN(0);
}
struct lustre_mount_info *lmi;
ENTRY;
- down(&lustre_mount_info_lock);
+ cfs_down(&lustre_mount_info_lock);
lmi = server_find_mount(name);
if (!lmi) {
- up(&lustre_mount_info_lock);
+ cfs_up(&lustre_mount_info_lock);
CERROR("%s not registered\n", name);
RETURN(-ENOENT);
}
CDEBUG(D_MOUNT, "dereg_mnt %p from %s, vfscount=%d\n",
- lmi->lmi_mnt, name, atomic_read(&lmi->lmi_mnt->mnt_count));
+ lmi->lmi_mnt, name, cfs_atomic_read(&lmi->lmi_mnt->mnt_count));
OBD_FREE(lmi->lmi_name, strlen(lmi->lmi_name) + 1);
- list_del(&lmi->lmi_list_chain);
+ cfs_list_del(&lmi->lmi_list_chain);
OBD_FREE(lmi, sizeof(*lmi));
- up(&lustre_mount_info_lock);
+ cfs_up(&lustre_mount_info_lock);
RETURN(0);
}
struct lustre_sb_info *lsi;
ENTRY;
- down(&lustre_mount_info_lock);
+ cfs_down(&lustre_mount_info_lock);
lmi = server_find_mount(name);
- up(&lustre_mount_info_lock);
+ cfs_up(&lustre_mount_info_lock);
if (!lmi) {
CERROR("Can't find mount for %s\n", name);
RETURN(NULL);
}
lsi = s2lsi(lmi->lmi_sb);
mntget(lmi->lmi_mnt);
- atomic_inc(&lsi->lsi_mounts);
+ cfs_atomic_inc(&lsi->lsi_mounts);
CDEBUG(D_MOUNT, "get_mnt %p from %s, refs=%d, vfscount=%d\n",
- lmi->lmi_mnt, name, atomic_read(&lsi->lsi_mounts),
- atomic_read(&lmi->lmi_mnt->mnt_count));
+ lmi->lmi_mnt, name, cfs_atomic_read(&lsi->lsi_mounts),
+ cfs_atomic_read(&lmi->lmi_mnt->mnt_count));
RETURN(lmi);
}
struct lustre_mount_info *lmi;
ENTRY;
- down(&lustre_mount_info_lock);
+ cfs_down(&lustre_mount_info_lock);
lmi = server_find_mount(name);
- up(&lustre_mount_info_lock);
+ cfs_up(&lustre_mount_info_lock);
if (!lmi)
CERROR("Can't find mount for %s\n", name);
static void unlock_mntput(struct vfsmount *mnt)
{
if (kernel_locked()) {
- unlock_kernel();
+ cfs_unlock_kernel();
mntput(mnt);
- lock_kernel();
+ cfs_lock_kernel();
} else {
mntput(mnt);
}
/* This might be the last one, can't deref after this */
unlock_mntput(mnt);
- down(&lustre_mount_info_lock);
+ cfs_down(&lustre_mount_info_lock);
lmi = server_find_mount(name);
- up(&lustre_mount_info_lock);
+ cfs_up(&lustre_mount_info_lock);
if (!lmi) {
CERROR("Can't find mount for %s\n", name);
RETURN(-ENOENT);
LASSERT(lmi->lmi_mnt == mnt);
CDEBUG(D_MOUNT, "put_mnt %p from %s, refs=%d, vfscount=%d\n",
- lmi->lmi_mnt, name, atomic_read(&lsi->lsi_mounts), count);
+ lmi->lmi_mnt, name, cfs_atomic_read(&lsi->lsi_mounts), count);
if (lustre_put_lsi(lmi->lmi_sb)) {
CDEBUG(D_MOUNT, "Last put of mnt %p from %s, vfscount=%d\n",
len = i_size_read(file->f_dentry->d_inode);
CDEBUG(D_MOUNT, "Have %s, size %lu\n", MOUNT_DATA_FILE, len);
if (len != sizeof(*ldd)) {
- CERROR("disk data size does not match: see %lu expect "LPSZ"\n",
- len, sizeof(*ldd));
+ CERROR("disk data size does not match: see %lu expect %u\n",
+ len, (int)sizeof(*ldd));
GOTO(out_close, rc = -EINVAL);
}
/**************** config llog ********************/
-/* Get a config log from the MGS and process it.
- This func is called for both clients and servers.
- Continue to process new statements appended to the logs
- (whenever the config lock is revoked) until lustre_end_log
- is called. */
+/** Get a config log from the MGS and process it.
+ * This func is called for both clients and servers.
+ * Continue to process new statements appended to the logs
+ * (whenever the config lock is revoked) until lustre_end_log
+ * is called.
+ * @param sb The superblock is used by the MGC to write to the local copy of
+ * the config log
+ * @param logname The name of the llog to replicate from the MGS
+ * @param cfg Since the same mgc may be used to follow multiple config logs
+ * (e.g. ost1, ost2, client), the config_llog_instance keeps the state for
+ * this log, and is added to the mgc's list of logs to follow.
+ */
int lustre_process_log(struct super_block *sb, char *logname,
struct config_llog_instance *cfg)
{
"communication errors between this node and "
"the MGS, a bad configuration, or other "
"errors. See the syslog for more "
- "information.\n", mgc->obd_name, logname,
+ "information.\n", mgc->obd_name, logname,
rc);
/* class_obd_list(); */
/**************** obd start *******************/
+/** lustre_cfg_bufs are a holdover from 1.4; we can still set these up from
+ * lctl (and do for echo cli/srv.
+ */
int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
char *s1, char *s2, char *s3, char *s4)
{
return(rc);
}
+/** Call class_attach and class_setup. These methods in turn call
+ * obd type-specific methods.
+ */
static int lustre_start_simple(char *obdname, char *type, char *uuid,
char *s1, char *s2)
{
rc = server_register_mount(LUSTRE_MGS_OBDNAME, sb, mnt);
- if (!rc &&
- ((rc = lustre_start_simple(LUSTRE_MGS_OBDNAME, LUSTRE_MGS_NAME,
- LUSTRE_MGS_OBDNAME, 0, 0))))
- server_deregister_mount(LUSTRE_MGS_OBDNAME);
+ if (!rc) {
+ rc = lustre_start_simple(LUSTRE_MGS_OBDNAME, LUSTRE_MGS_NAME,
+ LUSTRE_MGS_OBDNAME, 0, 0);
+ /* Do NOT call server_deregister_mount() here. This leads to
+ * inability cleanup cleanly and free lsi and other stuff when
+ * mgs calls server_put_mount() in error handling case. -umka */
+ }
if (rc)
LCONSOLE_ERROR_MSG(0x15e, "Failed to start MGS '%s' (%d). "
RETURN(rc);
}
-DECLARE_MUTEX(mgc_start_lock);
+CFS_DECLARE_MUTEX(mgc_start_lock);
-/* Set up a mgcobd to process startup logs */
+/** Set up a mgc obd to process startup logs
+ *
+ * \param sb [in] super block of the mgc obd
+ *
+ * \retval 0 success, otherwise error code
+ */
static int lustre_start_mgc(struct super_block *sb)
{
- struct lustre_handle mgc_conn = {0, };
- struct obd_connect_data ocd = { 0 };
+ struct obd_connect_data *data = NULL;
struct lustre_sb_info *lsi = s2lsi(sb);
struct obd_device *obd;
struct obd_export *exp;
struct obd_uuid *uuid;
class_uuid_t uuidc;
lnet_nid_t nid;
- char *mgcname, *niduuid;
+ char *mgcname, *niduuid, *mgssec;
char *ptr;
int recov_bk;
int rc = 0, i = 0, j, len;
GOTO(out_free, rc = -ENOMEM);
sprintf(mgcname, "%s%s", LUSTRE_MGC_OBDNAME, libcfs_nid2str(nid));
- mutex_down(&mgc_start_lock);
+ mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";
+
+ cfs_mutex_down(&mgc_start_lock);
obd = class_name2obd(mgcname);
- if (obd) {
+ if (obd && !obd->obd_stopping) {
+ rc = obd_set_info_async(obd->obd_self_export,
+ strlen(KEY_MGSSEC), KEY_MGSSEC,
+ strlen(mgssec), mgssec, NULL);
+ if (rc)
+ GOTO(out_free, rc);
+
/* Re-using an existing MGC */
- atomic_inc(&obd->u.cli.cl_mgc_refcount);
+ cfs_atomic_inc(&obd->u.cli.cl_mgc_refcount);
recov_bk = 0;
/* If we are restarting the MGS, don't try to keep the MGC's
recov_bk++;
CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,recov_bk);
rc = obd_set_info_async(obd->obd_self_export,
- strlen(KEY_INIT_RECOV_BACKUP),
+ sizeof(KEY_INIT_RECOV_BACKUP),
KEY_INIT_RECOV_BACKUP,
sizeof(recov_bk), &recov_bk, NULL);
GOTO(out, rc = 0);
GOTO(out_free, rc = -ENOTCONN);
}
+ rc = obd_set_info_async(obd->obd_self_export,
+ strlen(KEY_MGSSEC), KEY_MGSSEC,
+ strlen(mgssec), mgssec, NULL);
+ if (rc)
+ GOTO(out_free, rc);
+
/* Keep a refcount of servers/clients who started with "mount",
so we know when we can get rid of the mgc. */
- atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
+ cfs_atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
/* Try all connections, but only once. */
recov_bk = 1;
rc = obd_set_info_async(obd->obd_self_export,
- strlen(KEY_INIT_RECOV_BACKUP),
+ sizeof(KEY_INIT_RECOV_BACKUP),
KEY_INIT_RECOV_BACKUP,
sizeof(recov_bk), &recov_bk, NULL);
if (rc)
/* nonfatal */
- CERROR("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc);
+ CWARN("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc);
/* We connect to the MGS at setup, and don't disconnect until cleanup */
-
- ocd.ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_FID;
- ocd.ocd_version = LUSTRE_VERSION_CODE;
-
- rc = obd_connect(NULL, &mgc_conn, obd, &(obd->obd_uuid), &ocd);
+ OBD_ALLOC_PTR(data);
+ if (data == NULL)
+ GOTO(out, rc = -ENOMEM);
+ data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_FID |
+ OBD_CONNECT_AT | OBD_CONNECT_FULL20;
+ data->ocd_version = LUSTRE_VERSION_CODE;
+ rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
+ OBD_FREE_PTR(data);
if (rc) {
CERROR("connect failed %d\n", rc);
GOTO(out, rc);
}
- exp = class_conn2export(&mgc_conn);
obd->u.cli.cl_mgc_mgsexp = exp;
out:
to the same mgc.*/
lsi->lsi_mgc = obd;
out_free:
- mutex_up(&mgc_start_lock);
+ cfs_mutex_up(&mgc_start_lock);
if (mgcname)
OBD_FREE(mgcname, len);
obd = lsi->lsi_mgc;
if (!obd)
RETURN(-ENOENT);
-
lsi->lsi_mgc = NULL;
- mutex_down(&mgc_start_lock);
- if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
+
+ cfs_mutex_down(&mgc_start_lock);
+ LASSERT(cfs_atomic_read(&obd->u.cli.cl_mgc_refcount) > 0);
+ if (!cfs_atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
/* This is not fatal, every client that stops
will call in here. */
CDEBUG(D_MOUNT, "mgc still has %d references.\n",
- atomic_read(&obd->u.cli.cl_mgc_refcount));
+ cfs_atomic_read(&obd->u.cli.cl_mgc_refcount));
GOTO(out, rc = -EBUSY);
}
- /* The MGC has no recoverable data in any case.
+ /* The MGC has no recoverable data in any case.
* force shotdown set in umount_begin */
obd->obd_no_recov = 1;
- if (obd->u.cli.cl_mgc_mgsexp)
- obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
+ if (obd->u.cli.cl_mgc_mgsexp) {
+ /* An error is not fatal, if we are unable to send the
+ disconnect mgs ping evictor cleans up the export */
+ rc = obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
+ if (rc)
+ CDEBUG(D_MOUNT, "disconnect failed %d\n", rc);
+ }
/* Save the obdname for cleaning the nid uuids, which are
obdname_XX */
/* Clean the nid uuids */
if (!niduuid)
- RETURN(-ENOMEM);
+ GOTO(out, rc = -ENOMEM);
+
for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
sprintf(ptr, "_%x", i);
rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
CERROR("del MDC UUID %s failed: rc = %d\n",
niduuid, rc);
}
- OBD_FREE(niduuid, len);
- /* class_import_put will get rid of the additional connections */
-
out:
- mutex_up(&mgc_start_lock);
+ if (niduuid)
+ OBD_FREE(niduuid, len);
+
+ /* class_import_put will get rid of the additional connections */
+ cfs_mutex_up(&mgc_start_lock);
RETURN(rc);
}
/* cl_mgc_sem in mgc insures we sleep if the mgc_fs is busy */
rc = obd_set_info_async(mgc->obd_self_export,
- strlen("set_fs"), "set_fs",
+ sizeof(KEY_SET_FS), KEY_SET_FS,
sizeof(*sb), sb, NULL);
if (rc) {
CERROR("can't set_fs %d\n", rc);
CDEBUG(D_MOUNT, "Unassign mgc disk\n");
rc = obd_set_info_async(mgc->obd_self_export,
- strlen("clear_fs"), "clear_fs",
+ sizeof(KEY_CLEAR_FS), KEY_CLEAR_FS,
0, NULL, NULL);
RETURN(rc);
}
-DECLARE_MUTEX(server_start_lock);
+CFS_DECLARE_MUTEX(server_start_lock);
/* Stop MDS/OSS if nobody is using them */
static int server_stop_servers(int lddflags, int lsiflags)
int rc = 0;
ENTRY;
- mutex_down(&server_start_lock);
+ cfs_mutex_down(&server_start_lock);
/* Either an MDT or an OST or neither */
/* if this was an MDT, and there are no more MDT's, clean up the MDS */
rc = err;
}
- mutex_up(&server_start_lock);
+ cfs_mutex_up(&server_start_lock);
RETURN(rc);
}
struct lustre_sb_info *lsi = s2lsi(sb);
struct lustre_disk_data *ldd = lsi->lsi_ldd;
lnet_process_id_t id;
- int i = 0;
+ int i = 0;
ENTRY;
if (!(lsi->lsi_flags & LSI_SERVER))
while (LNetGetId(i++, &id) != -ENOENT) {
if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
continue;
+
+ if (class_find_param(ldd->ldd_params,
+ PARAM_NETWORK, NULL) == 0 &&
+ !class_match_net(ldd->ldd_params, id.nid)) {
+ /* can't match specified network */
+ continue;
+ }
+
mti->mti_nids[mti->mti_nid_count] = id.nid;
mti->mti_nid_count++;
if (mti->mti_nid_count >= MTI_NIDS_MAX) {
/* Register the target */
/* FIXME use mgc_process_config instead */
rc = obd_set_info_async(mgc->u.cli.cl_mgc_mgsexp,
- strlen("register_target"), "register_target",
+ sizeof(KEY_REGISTER_TARGET), KEY_REGISTER_TARGET,
sizeof(*mti), mti, NULL);
- if (rc) {
- CERROR("registration with the MGS failed (%d)\n", rc);
+ if (rc)
GOTO(out, rc);
- }
/* Always update our flags */
ldd->ldd_flags = mti->mti_flags & ~LDD_F_REWRITE_LDD;
RETURN(rc);
}
-/* Start targets */
+/** Start server targets: MDTs and OSTs
+ */
static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
{
struct obd_device *obd;
/* If we're an MDT, make sure the global MDS is running */
if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_MDT) {
/* make sure the MDS is started */
- mutex_down(&server_start_lock);
+ cfs_mutex_down(&server_start_lock);
obd = class_name2obd(LUSTRE_MDS_OBDNAME);
if (!obd) {
rc = lustre_start_simple(LUSTRE_MDS_OBDNAME,
LUSTRE_MDS_OBDNAME"_uuid",
0, 0);
if (rc) {
- mutex_up(&server_start_lock);
+ cfs_mutex_up(&server_start_lock);
CERROR("failed to start MDS: %d\n", rc);
RETURN(rc);
}
}
- mutex_up(&server_start_lock);
+ cfs_mutex_up(&server_start_lock);
}
#endif
/* If we're an OST, make sure the global OSS is running */
if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_OST) {
/* make sure OSS is started */
- mutex_down(&server_start_lock);
+ cfs_mutex_down(&server_start_lock);
obd = class_name2obd(LUSTRE_OSS_OBDNAME);
if (!obd) {
rc = lustre_start_simple(LUSTRE_OSS_OBDNAME,
LUSTRE_OSS_OBDNAME"_uuid",
0, 0);
if (rc) {
- mutex_up(&server_start_lock);
+ cfs_mutex_up(&server_start_lock);
CERROR("failed to start OSS: %d\n", rc);
RETURN(rc);
}
}
- mutex_up(&server_start_lock);
+ cfs_mutex_up(&server_start_lock);
}
- /* Set the mgc fs to our server disk. This allows the MGC
- to read and write configs locally. */
+ /* Set the mgc fs to our server disk. This allows the MGC to
+ * read and write configs locally, in case it can't talk to the MGS. */
rc = server_mgc_set_fs(lsi->lsi_mgc, sb);
if (rc)
RETURN(rc);
lsi->lsi_ldd->ldd_svname);
GOTO(out_mgc, rc);
}
+ /* non-fatal error of registeration with MGS */
+ if (rc)
+ CDEBUG(D_MOUNT, "Cannot register with MGS: %d\n", rc);
/* Let the target look up the mount using the target's name
(we can't pass the sb or mnt through class_process_config.) */
if (rc) {
CERROR("failed to start server %s: %d\n",
lsi->lsi_ldd->ldd_svname, rc);
+ /* Do NOT call server_deregister_mount() here. This makes it
+ * impossible to find mount later in cleanup time and leaves
+ * @lsi and othder stuff leaked. -umka */
GOTO(out_mgc, rc);
}
struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
{
- struct lustre_sb_info *lsi = NULL;
+ struct lustre_sb_info *lsi;
ENTRY;
- OBD_ALLOC(lsi, sizeof(*lsi));
+ OBD_ALLOC_PTR(lsi);
if (!lsi)
RETURN(NULL);
- OBD_ALLOC(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd));
+ OBD_ALLOC_PTR(lsi->lsi_lmd);
if (!lsi->lsi_lmd) {
- OBD_FREE(lsi, sizeof(*lsi));
+ OBD_FREE_PTR(lsi);
RETURN(NULL);
}
lsi->lsi_lmd->lmd_exclude_count = 0;
+ lsi->lsi_lmd->lmd_recovery_time_soft = 0;
+ lsi->lsi_lmd->lmd_recovery_time_hard = 0;
s2lsi_nocast(sb) = lsi;
/* we take 1 extra ref for our setup */
- atomic_set(&lsi->lsi_mounts, 1);
+ cfs_atomic_set(&lsi->lsi_mounts, 1);
/* Default umount style */
lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
struct lustre_sb_info *lsi = s2lsi(sb);
ENTRY;
- if (!lsi)
- RETURN(0);
-
- CDEBUG(D_MOUNT, "Freeing lsi\n");
+ LASSERT(lsi != NULL);
+ CDEBUG(D_MOUNT, "Freeing lsi %p\n", lsi);
/* someone didn't call server_put_mount. */
- LASSERT(atomic_read(&lsi->lsi_mounts) == 0);
+ LASSERT(cfs_atomic_read(&lsi->lsi_mounts) == 0);
if (lsi->lsi_ldd != NULL)
OBD_FREE(lsi->lsi_ldd, sizeof(*lsi->lsi_ldd));
if (lsi->lsi_lmd->lmd_profile != NULL)
OBD_FREE(lsi->lsi_lmd->lmd_profile,
strlen(lsi->lsi_lmd->lmd_profile) + 1);
+ if (lsi->lsi_lmd->lmd_mgssec != NULL)
+ OBD_FREE(lsi->lsi_lmd->lmd_mgssec,
+ strlen(lsi->lsi_lmd->lmd_mgssec) + 1);
if (lsi->lsi_lmd->lmd_opts != NULL)
OBD_FREE(lsi->lsi_lmd->lmd_opts,
strlen(lsi->lsi_lmd->lmd_opts) + 1);
struct lustre_sb_info *lsi = s2lsi(sb);
ENTRY;
- LASSERT(lsi);
+ LASSERT(lsi != NULL);
- CDEBUG(D_MOUNT, "put %p %d\n", sb, atomic_read(&lsi->lsi_mounts));
-
- if (atomic_dec_and_test(&lsi->lsi_mounts)) {
+ CDEBUG(D_MOUNT, "put %p %d\n", sb, cfs_atomic_read(&lsi->lsi_mounts));
+ if (cfs_atomic_dec_and_test(&lsi->lsi_mounts)) {
lustre_free_lsi(sb);
RETURN(1);
}
/*************** server mount ******************/
-/* Kernel mount using mount options in MOUNT_DATA_FILE */
+/** Kernel mount using mount options in MOUNT_DATA_FILE.
+ * Since this file lives on the disk, we pre-mount using a common
+ * type, read the file, then re-mount using the type specified in the
+ * file.
+ */
static struct vfsmount *server_kernel_mount(struct super_block *sb)
{
struct lvfs_run_ctxt mount_ctxt;
Note ext3/ldiskfs can't be mounted ro. */
s_flags = sb->s_flags;
+ /* allocate memory for options */
+ OBD_PAGE_ALLOC(__page, CFS_ALLOC_STD);
+ if (!__page)
+ GOTO(out_free, rc = -ENOMEM);
+ page = (unsigned long)cfs_page_address(__page);
+ options = (char *)page;
+ memset(options, 0, CFS_PAGE_SIZE);
+
+ /* mount-line options must be added for pre-mount because it may
+ * contain mount options such as journal_dev which are required
+ * to mount successfuly the underlying filesystem */
+ if (lmd->lmd_opts && (*(lmd->lmd_opts) != 0))
+ strncat(options, lmd->lmd_opts, CFS_PAGE_SIZE - 1);
+
/* Pre-mount ldiskfs to read the MOUNT_DATA_FILE */
CDEBUG(D_MOUNT, "Pre-mount ldiskfs %s\n", lmd->lmd_dev);
- mnt = ll_kern_mount("ldiskfs", s_flags, lmd->lmd_dev, 0);
+ mnt = ll_kern_mount("ldiskfs", s_flags, lmd->lmd_dev, (void *)options);
if (IS_ERR(mnt)) {
rc = PTR_ERR(mnt);
CERROR("premount %s:%#lx ldiskfs failed: %d "
/* Done with our pre-mount, now do the real mount. */
/* Glom up mount options */
- OBD_PAGE_ALLOC(__page, CFS_ALLOC_STD);
- if (!__page)
- GOTO(out_free, rc = -ENOMEM);
- page = (unsigned long)cfs_page_address(__page);
-
- options = (char *)page;
memset(options, 0, CFS_PAGE_SIZE);
strncpy(options, ldd->ldd_mount_opts, CFS_PAGE_SIZE - 2);
MT_STR(ldd), lmd->lmd_dev, options);
mnt = ll_kern_mount(MT_STR(ldd), s_flags, lmd->lmd_dev,
(void *)options);
- OBD_PAGE_FREE(__page);
if (IS_ERR(mnt)) {
rc = PTR_ERR(mnt);
CERROR("ll_kern_mount failed: rc = %d\n", rc);
GOTO(out_free, rc);
}
+ if (lmd->lmd_flags & LMD_FLG_ABORT_RECOV)
+ simple_truncate(mnt->mnt_sb->s_root, mnt, LAST_RCVD,
+ LR_CLIENT_START);
+
+ OBD_PAGE_FREE(__page);
lsi->lsi_ldd = ldd; /* freed at lsi cleanup */
CDEBUG(D_SUPER, "%s: mnt = %p\n", lmd->lmd_dev, mnt);
RETURN(mnt);
out_free:
+ if (__page)
+ OBD_PAGE_FREE(__page);
OBD_FREE(ldd, sizeof(*ldd));
lsi->lsi_ldd = NULL;
RETURN(ERR_PTR(rc));
}
+/** Wait here forever until the mount refcount is 0 before completing umount,
+ * else we risk dereferencing a null pointer.
+ * LNET may take e.g. 165s before killing zombies.
+ */
static void server_wait_finished(struct vfsmount *mnt)
{
- wait_queue_head_t waitq;
- struct l_wait_info lwi;
- int retries = 330;
-
- init_waitqueue_head(&waitq);
-
- while ((atomic_read(&mnt->mnt_count) > 1) && (retries > 0)) {
- LCONSOLE_WARN("Mount still busy with %d refs, waiting for "
- "%d secs...\n",
- atomic_read(&mnt->mnt_count), retries);
-
- /* Wait for a bit */
- retries -= 5;
- lwi = LWI_TIMEOUT(5 * HZ, NULL, NULL);
- l_wait_event(waitq, 0, &lwi);
- }
- if (atomic_read(&mnt->mnt_count) > 1) {
- CERROR("Mount %p is still busy (%d refs), giving up.\n",
- mnt, atomic_read(&mnt->mnt_count));
- }
+ cfs_waitq_t waitq;
+ int rc, waited = 0;
+ cfs_sigset_t blocked;
+
+ cfs_waitq_init(&waitq);
+
+ while (atomic_read(&mnt->mnt_count) > 1) {
+ if (waited && (waited % 30 == 0))
+ LCONSOLE_WARN("Mount still busy with %d refs after "
+ "%d secs.\n",
+ atomic_read(&mnt->mnt_count),
+ waited);
+ /* Cannot use l_event_wait() for an interruptible sleep. */
+ waited += 3;
+ blocked = l_w_e_set_sigs(sigmask(SIGKILL));
+ cfs_waitq_wait_event_interruptible_timeout(
+ waitq,
+ (atomic_read(&mnt->mnt_count) == 1),
+ cfs_time_seconds(3),
+ rc);
+ cfs_block_sigs(blocked);
+ if (rc < 0) {
+ LCONSOLE_EMERG("Danger: interrupted umount %s with "
+ "%d refs!\n",
+ mnt->mnt_devname,
+ atomic_read(&mnt->mnt_count));
+ break;
+ }
+
+ }
}
+/** Start the shutdown of servers at umount.
+ */
static void server_put_super(struct super_block *sb)
{
struct lustre_sb_info *lsi = s2lsi(sb);
int tmpname_sz;
int lddflags = lsi->lsi_ldd->ldd_flags;
int lsiflags = lsi->lsi_flags;
- int rc;
ENTRY;
LASSERT(lsiflags & LSI_SERVER);
OBD_ALLOC(tmpname, tmpname_sz);
memcpy(tmpname, lsi->lsi_ldd->ldd_svname, tmpname_sz);
CDEBUG(D_MOUNT, "server put_super %s\n", tmpname);
+ if (IS_MDT(lsi->lsi_ldd) && (lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC))
+ snprintf(tmpname, tmpname_sz, "MGS");
/* Stop the target */
- if (IS_MDT(lsi->lsi_ldd) || IS_OST(lsi->lsi_ldd)) {
+ if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
+ (IS_MDT(lsi->lsi_ldd) || IS_OST(lsi->lsi_ldd))) {
struct lustre_profile *lprof = NULL;
/* tell the mgc to drop the config log */
/* If they wanted the mgs to stop separately from the mdt, they
should have put it on a different device. */
if (IS_MGS(lsi->lsi_ldd)) {
- /* stop the mgc before the mgs so the connection gets cleaned
- up */
- lustre_stop_mgc(sb);
- server_stop_mgs(sb);
+ /* if MDS start with --nomgs, don't stop MGS then */
+ if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOMGS))
+ server_stop_mgs(sb);
}
/* Clean the mgc and sb */
- rc = lustre_common_put_super(sb);
- /* FIXME how can I report a failure to umount? */
+ lustre_common_put_super(sb);
/* Wait for the targets to really clean up - can't exit (and let the
sb get destroyed) while the mount is still in use */
EXIT;
}
+/** Called only for 'umount -f'
+ */
#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
static void server_umount_begin(struct vfsmount *vfsmnt, int flags)
{
}
#ifndef HAVE_STATFS_DENTRY_PARAM
-static int server_statfs (struct super_block *sb, struct kstatfs *buf)
+static int server_statfs (struct super_block *sb, cfs_kstatfs_t *buf)
{
#else
-static int server_statfs (struct dentry *dentry, struct kstatfs *buf)
+static int server_statfs (struct dentry *dentry, cfs_kstatfs_t *buf)
{
struct super_block *sb = dentry->d_sb;
#endif
RETURN(0);
}
+/** The operations we support directly on the superblock:
+ * mount, umount, and df.
+ */
static struct super_operations server_ops =
{
.put_super = server_put_super,
.statfs = server_statfs,
};
-#define log2(n) ffz(~(n))
+#define log2(n) cfs_ffz(~(n))
#define LUSTRE_SUPER_MAGIC 0x0BD00BD1
static int server_fill_super_common(struct super_block *sb)
RETURN(0);
}
+/** Fill in the superblock info for a Lustre server.
+ * Mount the device with the correct options.
+ * Read the on-disk config file.
+ * Start the services.
+ */
static int server_fill_super(struct super_block *sb)
{
struct lustre_sb_info *lsi = s2lsi(sb);
if (IS_ERR(mnt)) {
rc = PTR_ERR(mnt);
CERROR("Unable to mount device %s: %d\n",
- lsi->lsi_lmd->lmd_dev, rc);
+ lsi->lsi_lmd->lmd_dev, rc);
lustre_put_lsi(sb);
- GOTO(out, rc);
+ RETURN(rc);
}
lsi->lsi_srv_mnt = mnt;
"running. Double-mount may have compromised"
" the disk journal.\n",
lsi->lsi_ldd->ldd_svname);
- unlock_mntput(mnt);
lustre_put_lsi(sb);
- GOTO(out, rc = -EALREADY);
+ unlock_mntput(mnt);
+ RETURN(-EALREADY);
}
- /* start MGS before MGC */
- if (IS_MGS(lsi->lsi_ldd)) {
+ /* Start MGS before MGC */
+ if (IS_MGS(lsi->lsi_ldd) && !(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOMGS)){
rc = server_start_mgs(sb);
if (rc)
GOTO(out_mnt, rc);
}
+ /* Start MGC before servers */
rc = lustre_start_mgc(sb);
if (rc)
GOTO(out_mnt, rc);
if (rc)
GOTO(out_mnt, rc);
- LCONSOLE_WARN("Server %s on device %s has started\n",
- lsi->lsi_ldd->ldd_svname, lsi->lsi_lmd->lmd_dev);
-
RETURN(0);
-
out_mnt:
+ /* We jump here in case of failure while starting targets or MGS.
+ * In this case we can't just put @mnt and have to do real cleanup
+ * with stoping targets, etc. */
server_put_super(sb);
-out:
- RETURN(rc);
+ return rc;
}
/* Get the index from the obd name.
{
unsigned long index;
int rc;
- char *dash = strchr(svname, '-');
+ char *dash = strrchr(svname, '-');
if (!dash)
return(-EINVAL);
+ /* intepret <fsname>-MDTXXXXX-mdc as mdt, the better way is to pass
+ * in the fsname, then determine the server index */
+ if (!strcmp(LUSTRE_MDC_NAME, dash + 1)) {
+ dash--;
+ for (; dash > svname && *dash != '-'; dash--);
+ if (dash == svname)
+ return(-EINVAL);
+ }
+
if (strncmp(dash + 1, "MDT", 3) == 0)
rc = LDD_F_SV_TYPE_MDT;
else if (strncmp(dash + 1, "OST", 3) == 0)
rc = LDD_F_SV_TYPE_OST;
else
return(-EINVAL);
+ if (strcmp(dash + 4, "all") == 0)
+ return rc | LDD_F_SV_ALL;
index = simple_strtoul(dash + 4, endptr, 16);
*idx = index;
}
/* Drop a ref to the mounted disk */
lustre_put_lsi(sb);
+ lu_types_stop();
RETURN(rc);
}
-#if 0
static void lmd_print(struct lustre_mount_data *lmd)
{
int i;
PRINT_CMD(PRINT_MASK, "profile: %s\n", lmd->lmd_profile);
PRINT_CMD(PRINT_MASK, "device: %s\n", lmd->lmd_dev);
PRINT_CMD(PRINT_MASK, "flags: %x\n", lmd->lmd_flags);
+
if (lmd->lmd_opts)
PRINT_CMD(PRINT_MASK, "options: %s\n", lmd->lmd_opts);
+
+ if (lmd->lmd_recovery_time_soft)
+ PRINT_CMD(PRINT_MASK, "recovery time soft: %d\n",
+ lmd->lmd_recovery_time_soft);
+
+ if (lmd->lmd_recovery_time_hard)
+ PRINT_CMD(PRINT_MASK, "recovery time hard: %d\n",
+ lmd->lmd_recovery_time_hard);
+
for (i = 0; i < lmd->lmd_exclude_count; i++) {
PRINT_CMD(PRINT_MASK, "exclude %d: OST%04x\n", i,
lmd->lmd_exclude[i]);
}
}
-#endif
/* Is this server on the exclusion list */
int lustre_check_exclusion(struct super_block *sb, char *svname)
RETURN(rc);
}
-/* mount -v -t lustre uml1:uml2:/lustre-client /mnt/lustre */
+static int lmd_parse_mgssec(struct lustre_mount_data *lmd, char *ptr)
+{
+ char *tail;
+ int length;
+
+ if (lmd->lmd_mgssec != NULL) {
+ OBD_FREE(lmd->lmd_mgssec, strlen(lmd->lmd_mgssec) + 1);
+ lmd->lmd_mgssec = NULL;
+ }
+
+ tail = strchr(ptr, ',');
+ if (tail == NULL)
+ length = strlen(ptr);
+ else
+ length = tail - ptr;
+
+ OBD_ALLOC(lmd->lmd_mgssec, length + 1);
+ if (lmd->lmd_mgssec == NULL)
+ return -ENOMEM;
+
+ memcpy(lmd->lmd_mgssec, ptr, length);
+ lmd->lmd_mgssec[length] = '\0';
+ return 0;
+}
+
+/** Parse mount line options
+ * e.g. mount -v -t lustre -o abort_recov uml1:uml2:/lustre-client /mnt/lustre
+ * dev is passed as device=uml1:/lustre by mount.lustre
+ */
static int lmd_parse(char *options, struct lustre_mount_data *lmd)
{
char *s1, *s2, *devname = NULL;
s1 = options;
while (*s1) {
int clear = 0;
+ int time_min = 2 * (CONNECTION_SWITCH_MAX +
+ 2 * INITIAL_CONNECT_TIMEOUT);
+
/* Skip whitespace and extra commas */
while (*s1 == ' ' || *s1 == ',')
s1++;
if (strncmp(s1, "abort_recov", 11) == 0) {
lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
clear++;
+ } else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
+ lmd->lmd_recovery_time_soft = max_t(int,
+ simple_strtoul(s1 + 19, NULL, 10), time_min);
+ clear++;
+ } else if (strncmp(s1, "recovery_time_hard=", 19) == 0) {
+ lmd->lmd_recovery_time_hard = max_t(int,
+ simple_strtoul(s1 + 19, NULL, 10), time_min);
+ clear++;
} else if (strncmp(s1, "nosvc", 5) == 0) {
lmd->lmd_flags |= LMD_FLG_NOSVC;
clear++;
+ } else if (strncmp(s1, "nomgs", 5) == 0) {
+ lmd->lmd_flags |= LMD_FLG_NOMGS;
+ clear++;
+ } else if (strncmp(s1, "mgssec=", 7) == 0) {
+ rc = lmd_parse_mgssec(lmd, s1 + 7);
+ if (rc)
+ goto invalid;
+ clear++;
/* ost exclusion list */
} else if (strncmp(s1, "exclude=", 8) == 0) {
rc = lmd_make_exclusion(lmd, s1 + 7);
goto invalid;
}
- s1 = strrchr(devname, ':');
+ s1 = strstr(devname, ":/");
if (s1) {
+ ++s1;
lmd->lmd_flags = LMD_FLG_CLIENT;
/* Remove leading /s from fsname */
while (*++s1 == '/') ;
strcpy(lmd->lmd_opts, options);
}
+ lmd_print(lmd);
lmd->lmd_magic = LMD_MAGIC;
RETURN(rc);
}
-/* Common mount */
+/** This is the entry point for the mount call into Lustre.
+ * This is called when a server or client is mounted,
+ * and this is where we start setting things up.
+ * @param data Mount options (e.g. -o flock,abort_recov)
+ */
int lustre_fill_super(struct super_block *sb, void *data, int silent)
{
struct lustre_mount_data *lmd;
RETURN(-ENOMEM);
lmd = lsi->lsi_lmd;
+ /*
+ * Disable lockdep during mount, because mount locking patterns are
+ * `special'.
+ */
+ cfs_lockdep_off();
+
/* Figure out the lmd from the mount options */
if (lmd_parse((char *)data, lmd)) {
lustre_put_lsi(sb);
- RETURN(-EINVAL);
+ GOTO(out, rc = -EINVAL);
}
if (lmd_is_client(lmd)) {
LCONSOLE_ERROR_MSG(0x165, "Nothing registered for "
"client mount! Is the 'lustre' "
"module loaded?\n");
+ lustre_put_lsi(sb);
rc = -ENODEV;
} else {
rc = lustre_start_mgc(sb);
if (rc) {
- lustre_stop_mgc(sb);
- goto out;
+ lustre_put_lsi(sb);
+ GOTO(out, rc);
}
/* Connect and start */
/* (should always be ll_fill_super) */
/* s_f_s will call server_put_super on failure */
}
+ /* If error happens in fill_super() call, @lsi will be killed there.
+ * This is why we do not put it here. */
+ GOTO(out, rc);
out:
- if (rc){
+ if (rc) {
CERROR("Unable to mount %s (%d)\n",
s2lsi(sb) ? lmd->lmd_dev : "", rc);
} else {
- CDEBUG(D_SUPER, "mount %s complete\n", lmd->lmd_dev);
+ CDEBUG(D_SUPER, "Mount %s complete\n",
+ lmd->lmd_dev);
}
- RETURN(rc);
+ cfs_lockdep_on();
+ return rc;
}
struct super_block * lustre_get_sb(struct file_system_type *fs_type,
int flags, const char *devname, void * data)
{
- /* calls back in fill super */
- /* we could append devname= onto options (*data) here,
- but 2.4 doesn't get devname. So we do it in mount_lustre.c */
return get_sb_nodev(fs_type, flags, data, lustre_fill_super);
}
#else
int flags, const char *devname, void * data,
struct vfsmount *mnt)
{
- /* calls back in fill super */
- /* we could append devname= onto options (*data) here,
- but 2.4 doesn't get devname. So we do it in mount_lustre.c */
return get_sb_nodev(fs_type, flags, data, lustre_fill_super, mnt);
}
#endif
{
struct lustre_sb_info *lsi = s2lsi(sb);
- if (kill_super_cb && lsi &&(lsi->lsi_flags & LSI_SERVER))
+ if (kill_super_cb && lsi && !(lsi->lsi_flags & LSI_SERVER))
(*kill_super_cb)(sb);
kill_anon_super(sb);
}
+/** Register the "lustre" fs type
+ */
struct file_system_type lustre_fs_type = {
.owner = THIS_MODULE,
.name = "lustre",
.get_sb = lustre_get_sb,
.kill_sb = lustre_kill_super,
- .fs_flags = FS_BINARY_MOUNTDATA | FS_REQUIRES_DEV,
+ .fs_flags = FS_BINARY_MOUNTDATA | FS_REQUIRES_DEV |
+#ifdef FS_HAS_FIEMAP
+ FS_HAS_FIEMAP |
+#endif
+ LL_RENAME_DOES_D_MOVE,
};
int lustre_register_fs(void)
EXPORT_SYMBOL(server_name2index);
EXPORT_SYMBOL(server_mti_print);
EXPORT_SYMBOL(do_lcfg);
-
-