* GPL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
/*********** mount lookup *********/
-DECLARE_MUTEX(lustre_mount_info_lock);
+CFS_DECLARE_MUTEX(lustre_mount_info_lock);
static CFS_LIST_HEAD(server_mount_info_list);
static struct lustre_mount_info *server_find_mount(const char *name)
{
- struct list_head *tmp;
+ cfs_list_t *tmp;
struct lustre_mount_info *lmi;
ENTRY;
- list_for_each(tmp, &server_mount_info_list) {
- lmi = list_entry(tmp, struct lustre_mount_info, lmi_list_chain);
+ cfs_list_for_each(tmp, &server_mount_info_list) {
+ lmi = cfs_list_entry(tmp, struct lustre_mount_info,
+ lmi_list_chain);
if (strcmp(name, lmi->lmi_name) == 0)
RETURN(lmi);
}
}
strcpy(name_cp, name);
- down(&lustre_mount_info_lock);
+ cfs_down(&lustre_mount_info_lock);
if (server_find_mount(name)) {
- up(&lustre_mount_info_lock);
+ cfs_up(&lustre_mount_info_lock);
OBD_FREE(lmi, sizeof(*lmi));
OBD_FREE(name_cp, strlen(name) + 1);
CERROR("Already registered %s\n", name);
lmi->lmi_name = name_cp;
lmi->lmi_sb = sb;
lmi->lmi_mnt = mnt;
- list_add(&lmi->lmi_list_chain, &server_mount_info_list);
+ cfs_list_add(&lmi->lmi_list_chain, &server_mount_info_list);
- up(&lustre_mount_info_lock);
+ cfs_up(&lustre_mount_info_lock);
CDEBUG(D_MOUNT, "reg_mnt %p from %s, vfscount=%d\n",
- lmi->lmi_mnt, name, atomic_read(&lmi->lmi_mnt->mnt_count));
+ lmi->lmi_mnt, name, cfs_atomic_read(&lmi->lmi_mnt->mnt_count));
RETURN(0);
}
struct lustre_mount_info *lmi;
ENTRY;
- down(&lustre_mount_info_lock);
+ cfs_down(&lustre_mount_info_lock);
lmi = server_find_mount(name);
if (!lmi) {
- up(&lustre_mount_info_lock);
+ cfs_up(&lustre_mount_info_lock);
CERROR("%s not registered\n", name);
RETURN(-ENOENT);
}
CDEBUG(D_MOUNT, "dereg_mnt %p from %s, vfscount=%d\n",
- lmi->lmi_mnt, name, atomic_read(&lmi->lmi_mnt->mnt_count));
+ lmi->lmi_mnt, name, cfs_atomic_read(&lmi->lmi_mnt->mnt_count));
OBD_FREE(lmi->lmi_name, strlen(lmi->lmi_name) + 1);
- list_del(&lmi->lmi_list_chain);
+ cfs_list_del(&lmi->lmi_list_chain);
OBD_FREE(lmi, sizeof(*lmi));
- up(&lustre_mount_info_lock);
+ cfs_up(&lustre_mount_info_lock);
RETURN(0);
}
struct lustre_sb_info *lsi;
ENTRY;
- down(&lustre_mount_info_lock);
+ cfs_down(&lustre_mount_info_lock);
lmi = server_find_mount(name);
- up(&lustre_mount_info_lock);
+ cfs_up(&lustre_mount_info_lock);
if (!lmi) {
CERROR("Can't find mount for %s\n", name);
RETURN(NULL);
}
lsi = s2lsi(lmi->lmi_sb);
mntget(lmi->lmi_mnt);
- atomic_inc(&lsi->lsi_mounts);
+ cfs_atomic_inc(&lsi->lsi_mounts);
CDEBUG(D_MOUNT, "get_mnt %p from %s, refs=%d, vfscount=%d\n",
- lmi->lmi_mnt, name, atomic_read(&lsi->lsi_mounts),
- atomic_read(&lmi->lmi_mnt->mnt_count));
+ lmi->lmi_mnt, name, cfs_atomic_read(&lsi->lsi_mounts),
+ cfs_atomic_read(&lmi->lmi_mnt->mnt_count));
RETURN(lmi);
}
struct lustre_mount_info *lmi;
ENTRY;
- down(&lustre_mount_info_lock);
+ cfs_down(&lustre_mount_info_lock);
lmi = server_find_mount(name);
- up(&lustre_mount_info_lock);
+ cfs_up(&lustre_mount_info_lock);
if (!lmi)
CERROR("Can't find mount for %s\n", name);
static void unlock_mntput(struct vfsmount *mnt)
{
if (kernel_locked()) {
- unlock_kernel();
+ cfs_unlock_kernel();
mntput(mnt);
- lock_kernel();
+ cfs_lock_kernel();
} else {
mntput(mnt);
}
/* This might be the last one, can't deref after this */
unlock_mntput(mnt);
- down(&lustre_mount_info_lock);
+ cfs_down(&lustre_mount_info_lock);
lmi = server_find_mount(name);
- up(&lustre_mount_info_lock);
+ cfs_up(&lustre_mount_info_lock);
if (!lmi) {
CERROR("Can't find mount for %s\n", name);
RETURN(-ENOENT);
LASSERT(lmi->lmi_mnt == mnt);
CDEBUG(D_MOUNT, "put_mnt %p from %s, refs=%d, vfscount=%d\n",
- lmi->lmi_mnt, name, atomic_read(&lsi->lsi_mounts), count);
+ lmi->lmi_mnt, name, cfs_atomic_read(&lsi->lsi_mounts), count);
if (lustre_put_lsi(lmi->lmi_sb)) {
CDEBUG(D_MOUNT, "Last put of mnt %p from %s, vfscount=%d\n",
len = i_size_read(file->f_dentry->d_inode);
CDEBUG(D_MOUNT, "Have %s, size %lu\n", MOUNT_DATA_FILE, len);
if (len != sizeof(*ldd)) {
- CERROR("disk data size does not match: see %lu expect "LPSZ"\n",
- len, sizeof(*ldd));
+ CERROR("disk data size does not match: see %lu expect %u\n",
+ len, (int)sizeof(*ldd));
GOTO(out_close, rc = -EINVAL);
}
/**************** config llog ********************/
-/* Get a config log from the MGS and process it.
- This func is called for both clients and servers.
- Continue to process new statements appended to the logs
- (whenever the config lock is revoked) until lustre_end_log
- is called. */
+/** Get a config log from the MGS and process it.
+ * This func is called for both clients and servers.
+ * Continue to process new statements appended to the logs
+ * (whenever the config lock is revoked) until lustre_end_log
+ * is called.
+ * @param sb The superblock is used by the MGC to write to the local copy of
+ * the config log
+ * @param logname The name of the llog to replicate from the MGS
+ * @param cfg Since the same mgc may be used to follow multiple config logs
+ * (e.g. ost1, ost2, client), the config_llog_instance keeps the state for
+ * this log, and is added to the mgc's list of logs to follow.
+ */
int lustre_process_log(struct super_block *sb, char *logname,
struct config_llog_instance *cfg)
{
/**************** obd start *******************/
+/** lustre_cfg_bufs are a holdover from 1.4; we can still set these up from
+ * lctl (and do for echo cli/srv.
+ */
int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
char *s1, char *s2, char *s3, char *s4)
{
return(rc);
}
+/** Call class_attach and class_setup. These methods in turn call
+ * obd type-specific methods.
+ */
static int lustre_start_simple(char *obdname, char *type, char *uuid,
char *s1, char *s2)
{
RETURN(rc);
}
-DECLARE_MUTEX(mgc_start_lock);
+CFS_DECLARE_MUTEX(mgc_start_lock);
/** Set up a mgc obd to process startup logs
*
mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";
- mutex_down(&mgc_start_lock);
+ cfs_mutex_down(&mgc_start_lock);
obd = class_name2obd(mgcname);
if (obd && !obd->obd_stopping) {
GOTO(out_free, rc);
/* Re-using an existing MGC */
- atomic_inc(&obd->u.cli.cl_mgc_refcount);
+ cfs_atomic_inc(&obd->u.cli.cl_mgc_refcount);
recov_bk = 0;
/* If we are restarting the MGS, don't try to keep the MGC's
/* Keep a refcount of servers/clients who started with "mount",
so we know when we can get rid of the mgc. */
- atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
+ cfs_atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
/* Try all connections, but only once. */
recov_bk = 1;
if (data == NULL)
GOTO(out, rc = -ENOMEM);
data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_FID |
- OBD_CONNECT_AT;
+ OBD_CONNECT_AT | OBD_CONNECT_FULL20;
data->ocd_version = LUSTRE_VERSION_CODE;
rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
OBD_FREE_PTR(data);
to the same mgc.*/
lsi->lsi_mgc = obd;
out_free:
- mutex_up(&mgc_start_lock);
+ cfs_mutex_up(&mgc_start_lock);
if (mgcname)
OBD_FREE(mgcname, len);
RETURN(-ENOENT);
lsi->lsi_mgc = NULL;
- mutex_down(&mgc_start_lock);
- LASSERT(atomic_read(&obd->u.cli.cl_mgc_refcount) > 0);
- if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
+ cfs_mutex_down(&mgc_start_lock);
+ LASSERT(cfs_atomic_read(&obd->u.cli.cl_mgc_refcount) > 0);
+ if (!cfs_atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
/* This is not fatal, every client that stops
will call in here. */
CDEBUG(D_MOUNT, "mgc still has %d references.\n",
- atomic_read(&obd->u.cli.cl_mgc_refcount));
+ cfs_atomic_read(&obd->u.cli.cl_mgc_refcount));
GOTO(out, rc = -EBUSY);
}
OBD_FREE(niduuid, len);
/* class_import_put will get rid of the additional connections */
- mutex_up(&mgc_start_lock);
+ cfs_mutex_up(&mgc_start_lock);
RETURN(rc);
}
RETURN(rc);
}
-DECLARE_MUTEX(server_start_lock);
+CFS_DECLARE_MUTEX(server_start_lock);
/* Stop MDS/OSS if nobody is using them */
static int server_stop_servers(int lddflags, int lsiflags)
int rc = 0;
ENTRY;
- mutex_down(&server_start_lock);
+ cfs_mutex_down(&server_start_lock);
/* Either an MDT or an OST or neither */
/* if this was an MDT, and there are no more MDT's, clean up the MDS */
rc = err;
}
- mutex_up(&server_start_lock);
+ cfs_mutex_up(&server_start_lock);
RETURN(rc);
}
struct lustre_sb_info *lsi = s2lsi(sb);
struct lustre_disk_data *ldd = lsi->lsi_ldd;
lnet_process_id_t id;
- int i = 0;
+ int i = 0;
ENTRY;
if (!(lsi->lsi_flags & LSI_SERVER))
while (LNetGetId(i++, &id) != -ENOENT) {
if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
continue;
+
+ if (class_find_param(ldd->ldd_params,
+ PARAM_NETWORK, NULL) == 0 &&
+ !class_match_net(ldd->ldd_params, id.nid)) {
+ /* can't match specified network */
+ continue;
+ }
+
mti->mti_nids[mti->mti_nid_count] = id.nid;
mti->mti_nid_count++;
if (mti->mti_nid_count >= MTI_NIDS_MAX) {
RETURN(rc);
}
-/* Start targets */
+/** Start server targets: MDTs and OSTs
+ */
static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
{
struct obd_device *obd;
/* If we're an MDT, make sure the global MDS is running */
if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_MDT) {
/* make sure the MDS is started */
- mutex_down(&server_start_lock);
+ cfs_mutex_down(&server_start_lock);
obd = class_name2obd(LUSTRE_MDS_OBDNAME);
if (!obd) {
rc = lustre_start_simple(LUSTRE_MDS_OBDNAME,
LUSTRE_MDS_OBDNAME"_uuid",
0, 0);
if (rc) {
- mutex_up(&server_start_lock);
+ cfs_mutex_up(&server_start_lock);
CERROR("failed to start MDS: %d\n", rc);
RETURN(rc);
}
}
- mutex_up(&server_start_lock);
+ cfs_mutex_up(&server_start_lock);
}
#endif
/* If we're an OST, make sure the global OSS is running */
if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_OST) {
/* make sure OSS is started */
- mutex_down(&server_start_lock);
+ cfs_mutex_down(&server_start_lock);
obd = class_name2obd(LUSTRE_OSS_OBDNAME);
if (!obd) {
rc = lustre_start_simple(LUSTRE_OSS_OBDNAME,
LUSTRE_OSS_OBDNAME"_uuid",
0, 0);
if (rc) {
- mutex_up(&server_start_lock);
+ cfs_mutex_up(&server_start_lock);
CERROR("failed to start OSS: %d\n", rc);
RETURN(rc);
}
}
- mutex_up(&server_start_lock);
+ cfs_mutex_up(&server_start_lock);
}
- /* Set the mgc fs to our server disk. This allows the MGC
- to read and write configs locally. */
+ /* Set the mgc fs to our server disk. This allows the MGC to
+ * read and write configs locally, in case it can't talk to the MGS. */
rc = server_mgc_set_fs(lsi->lsi_mgc, sb);
if (rc)
RETURN(rc);
}
lsi->lsi_lmd->lmd_exclude_count = 0;
+ lsi->lsi_lmd->lmd_recovery_time_soft = 0;
+ lsi->lsi_lmd->lmd_recovery_time_hard = 0;
s2lsi_nocast(sb) = lsi;
/* we take 1 extra ref for our setup */
- atomic_set(&lsi->lsi_mounts, 1);
+ cfs_atomic_set(&lsi->lsi_mounts, 1);
/* Default umount style */
lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
CDEBUG(D_MOUNT, "Freeing lsi %p\n", lsi);
/* someone didn't call server_put_mount. */
- LASSERT(atomic_read(&lsi->lsi_mounts) == 0);
+ LASSERT(cfs_atomic_read(&lsi->lsi_mounts) == 0);
if (lsi->lsi_ldd != NULL)
OBD_FREE(lsi->lsi_ldd, sizeof(*lsi->lsi_ldd));
LASSERT(lsi != NULL);
- CDEBUG(D_MOUNT, "put %p %d\n", sb, atomic_read(&lsi->lsi_mounts));
- if (atomic_dec_and_test(&lsi->lsi_mounts)) {
+ CDEBUG(D_MOUNT, "put %p %d\n", sb, cfs_atomic_read(&lsi->lsi_mounts));
+ if (cfs_atomic_dec_and_test(&lsi->lsi_mounts)) {
lustre_free_lsi(sb);
RETURN(1);
}
/*************** server mount ******************/
-/* Kernel mount using mount options in MOUNT_DATA_FILE */
+/** Kernel mount using mount options in MOUNT_DATA_FILE.
+ * Since this file lives on the disk, we pre-mount using a common
+ * type, read the file, then re-mount using the type specified in the
+ * file.
+ */
static struct vfsmount *server_kernel_mount(struct super_block *sb)
{
struct lvfs_run_ctxt mount_ctxt;
RETURN(ERR_PTR(rc));
}
-/* Wait here forever until the mount refcount is 0 before completing umount,
+/** Wait here forever until the mount refcount is 0 before completing umount,
* else we risk dereferencing a null pointer.
* LNET may take e.g. 165s before killing zombies.
*/
cfs_waitq_init(&waitq);
- while (cfs_atomic_read(&mnt->mnt_count) > 1) {
+ while (atomic_read(&mnt->mnt_count) > 1) {
if (waited && (waited % 30 == 0))
LCONSOLE_WARN("Mount still busy with %d refs after "
"%d secs.\n",
blocked = l_w_e_set_sigs(sigmask(SIGKILL));
cfs_waitq_wait_event_interruptible_timeout(
waitq,
- (cfs_atomic_read(&mnt->mnt_count) == 1),
+ (atomic_read(&mnt->mnt_count) == 1),
cfs_time_seconds(3),
rc);
cfs_block_sigs(blocked);
}
}
+/** Start the shutdown of servers at umount.
+ */
static void server_put_super(struct super_block *sb)
{
struct lustre_sb_info *lsi = s2lsi(sb);
EXIT;
}
+/** Called only for 'umount -f'
+ */
#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
static void server_umount_begin(struct vfsmount *vfsmnt, int flags)
{
}
#ifndef HAVE_STATFS_DENTRY_PARAM
-static int server_statfs (struct super_block *sb, struct kstatfs *buf)
+static int server_statfs (struct super_block *sb, cfs_kstatfs_t *buf)
{
#else
-static int server_statfs (struct dentry *dentry, struct kstatfs *buf)
+static int server_statfs (struct dentry *dentry, cfs_kstatfs_t *buf)
{
struct super_block *sb = dentry->d_sb;
#endif
RETURN(0);
}
+/** The operations we support directly on the superblock:
+ * mount, umount, and df.
+ */
static struct super_operations server_ops =
{
.put_super = server_put_super,
.statfs = server_statfs,
};
-#define log2(n) ffz(~(n))
+#define log2(n) cfs_ffz(~(n))
#define LUSTRE_SUPER_MAGIC 0x0BD00BD1
static int server_fill_super_common(struct super_block *sb)
RETURN(0);
}
+/** Fill in the superblock info for a Lustre server.
+ * Mount the device with the correct options.
+ * Read the on-disk config file.
+ * Start the services.
+ */
static int server_fill_super(struct super_block *sb)
{
struct lustre_sb_info *lsi = s2lsi(sb);
GOTO(out_mnt, rc);
}
+ /* Start MGC before servers */
rc = lustre_start_mgc(sb);
if (rc)
GOTO(out_mnt, rc);
if (rc)
GOTO(out_mnt, rc);
- LCONSOLE_WARN("Server %s on device %s has started\n",
- ((lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
- (IS_MDT(lsi->lsi_ldd))) ? "MGS" : lsi->lsi_ldd->ldd_svname,
- lsi->lsi_lmd->lmd_dev);
-
RETURN(0);
out_mnt:
/* We jump here in case of failure while starting targets or MGS.
RETURN(rc);
}
-#if 0
static void lmd_print(struct lustre_mount_data *lmd)
{
int i;
PRINT_CMD(PRINT_MASK, "profile: %s\n", lmd->lmd_profile);
PRINT_CMD(PRINT_MASK, "device: %s\n", lmd->lmd_dev);
PRINT_CMD(PRINT_MASK, "flags: %x\n", lmd->lmd_flags);
+
if (lmd->lmd_opts)
PRINT_CMD(PRINT_MASK, "options: %s\n", lmd->lmd_opts);
+
+ if (lmd->lmd_recovery_time_soft)
+ PRINT_CMD(PRINT_MASK, "recovery time soft: %d\n",
+ lmd->lmd_recovery_time_soft);
+
+ if (lmd->lmd_recovery_time_hard)
+ PRINT_CMD(PRINT_MASK, "recovery time hard: %d\n",
+ lmd->lmd_recovery_time_hard);
+
for (i = 0; i < lmd->lmd_exclude_count; i++) {
PRINT_CMD(PRINT_MASK, "exclude %d: OST%04x\n", i,
lmd->lmd_exclude[i]);
}
}
-#endif
/* Is this server on the exclusion list */
int lustre_check_exclusion(struct super_block *sb, char *svname)
return 0;
}
-/* mount -v -t lustre uml1:uml2:/lustre-client /mnt/lustre */
+/** Parse mount line options
+ * e.g. mount -v -t lustre -o abort_recov uml1:uml2:/lustre-client /mnt/lustre
+ * dev is passed as device=uml1:/lustre by mount.lustre
+ */
static int lmd_parse(char *options, struct lustre_mount_data *lmd)
{
char *s1, *s2, *devname = NULL;
s1 = options;
while (*s1) {
int clear = 0;
+ int time_min = 2 * (CONNECTION_SWITCH_MAX +
+ 2 * INITIAL_CONNECT_TIMEOUT);
+
/* Skip whitespace and extra commas */
while (*s1 == ' ' || *s1 == ',')
s1++;
if (strncmp(s1, "abort_recov", 11) == 0) {
lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
clear++;
+ } else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
+ lmd->lmd_recovery_time_soft = max_t(int,
+ simple_strtoul(s1 + 19, NULL, 10), time_min);
+ clear++;
+ } else if (strncmp(s1, "recovery_time_hard=", 19) == 0) {
+ lmd->lmd_recovery_time_hard = max_t(int,
+ simple_strtoul(s1 + 19, NULL, 10), time_min);
+ clear++;
} else if (strncmp(s1, "nosvc", 5) == 0) {
lmd->lmd_flags |= LMD_FLG_NOSVC;
clear++;
strcpy(lmd->lmd_opts, options);
}
+ lmd_print(lmd);
lmd->lmd_magic = LMD_MAGIC;
RETURN(rc);
}
-/* Common mount */
+/** This is the entry point for the mount call into Lustre.
+ * This is called when a server or client is mounted,
+ * and this is where we start setting things up.
+ * @param data Mount options (e.g. -o flock,abort_recov)
+ */
int lustre_fill_super(struct super_block *sb, void *data, int silent)
{
struct lustre_mount_data *lmd;
* Disable lockdep during mount, because mount locking patterns are
* `special'.
*/
- lockdep_off();
+ cfs_lockdep_off();
/* Figure out the lmd from the mount options */
if (lmd_parse((char *)data, lmd)) {
CDEBUG(D_SUPER, "Mount %s complete\n",
lmd->lmd_dev);
}
- lockdep_on();
+ cfs_lockdep_on();
return rc;
}
kill_anon_super(sb);
}
+/** Register the "lustre" fs type
+ */
struct file_system_type lustre_fs_type = {
.owner = THIS_MODULE,
.name = "lustre",
.get_sb = lustre_get_sb,
.kill_sb = lustre_kill_super,
.fs_flags = FS_BINARY_MOUNTDATA | FS_REQUIRES_DEV |
+#ifdef FS_HAS_FIEMAP
+ FS_HAS_FIEMAP |
+#endif
LL_RENAME_DOES_D_MOVE,
};