subdir-m += ptlrpc
subdir-m += osc
subdir-m += obdecho
+subdir-m += mgc
-@SERVER_TRUE@subdir-m += mds obdfilter ost
-@CLIENT_TRUE@subdir-m += mdc llite
+@SERVER_TRUE@subdir-m += mds obdfilter ost mgs
+@CLIENT_TRUE@subdir-m += mdc llite
@QUOTA_TRUE@subdir-m += quota
@INCLUDE_RULES@
AUTOMAKE_OPTIONS = foreign
ALWAYS_SUBDIRS := include lvfs obdclass ldlm ptlrpc osc lov obdecho \
- doc utils tests conf scripts autoconf
+ mgc doc utils tests conf scripts autoconf
-SERVER_SUBDIRS := ldiskfs obdfilter ost mds
+SERVER_SUBDIRS := ldiskfs obdfilter ost mds mgs
CLIENT_SUBDIRS := mdc llite
lustre/osc/autoMakefile
lustre/ost/Makefile
lustre/ost/autoMakefile
+lustre/mgc/Makefile
+lustre/mgc/autoMakefile
+lustre/mgs/Makefile
+lustre/mgs/autoMakefile
lustre/ptlrpc/Makefile
lustre/ptlrpc/autoMakefile
lustre/quota/Makefile
#ifndef min_t
#define min_t(type,x,y) \
- ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; })
+ ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; })
#endif
#ifndef max_t
#define max_t(type,x,y) \
- ({ type __x = (x); type __y = (y); __x > __y ? __x: __y; })
+ ({ type __x = (x); type __y = (y); __x > __y ? __x: __y; })
#endif
+#define simple_strtol strtol
+
/* registering symbols */
#ifndef ERESTARTSYS
#define ERESTARTSYS ERESTART
})
#define time_after(a, b) ((long)(b) - (long)(a) < 0)
#define time_before(a, b) time_after(b,a)
-#define time_after_eq(a,b) ((long)(a) - (long)(b) >= 0)
+#define time_after_eq(a,b) ((long)(a) - (long)(b) >= 0)
struct timer_list {
struct list_head tl_list;
lustre_dlm.h lustre_handles.h lustre_net.h obd_class.h obd_support.h \
lustre_log.h lustre_compat25.h lustre_fsfilt.h lustre_mds.h obd.h \
lvfs.h lvfs_linux.h lustre_lite.h lustre_quota.h \
- lustre_disk.h lustre_user.h lustre_types.h
+ lustre_disk.h lustre_user.h lustre_types.h lustre_param.h
+
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * This file is part of Lustre, http://www.lustre.org
- *
- * Lustre disk format definitions.
- */
-#ifndef _LUSTRE_DISK_H
-#define _LUSTRE_DISK_H_
-
-#include <linux/types.h>
-
-#include <lnet/types.h>
-
-/****************** last_rcvd file *********************/
-
-#define LAST_RCVD "last_rcvd"
-#define LOV_OBJID "lov_objid"
-
-#define LR_SERVER_SIZE 512
-#define LR_CLIENT_START 8192
-#define LR_CLIENT_SIZE 128
-#if LR_CLIENT_START < LR_SERVER_SIZE
-#error "Can't have LR_CLIENT_START < LR_SERVER_SIZE"
-#endif
-/* This limit is arbitrary (32k clients on x86), but it is convenient to use
- * 2^n * PAGE_SIZE * 8 for the number of bits that fit an order-n allocation. */
-#define LR_MAX_CLIENTS (PAGE_SIZE * 8)
-
-#define OBD_COMPAT_OST 0x00000002 /* this is an OST (temporary) */
-#define OBD_COMPAT_MDT 0x00000004 /* this is an MDT (temporary) */
-
-#define OBD_ROCOMPAT_LOVOBJID 0x00000001 /* MDS handles LOV_OBJID file */
-#define OBD_ROCOMPAT_CROW 0x00000002 /* OST will CROW create objects */
-
-#define OBD_INCOMPAT_GROUPS 0x00000001 /* OST handles group subdirs */
-#define OBD_INCOMPAT_OST 0x00000002 /* this is an OST (permanent) */
-#define OBD_INCOMPAT_MDT 0x00000004 /* this is an MDT (permanent) */
-
-/* Data stored per client in the last_rcvd file. In le32 order. */
-struct lsd_client_data {
- __u8 lcd_uuid[40]; /* client UUID */
- __u64 lcd_last_transno; /* last completed transaction ID */
- __u64 lcd_last_xid; /* xid for the last transaction */
- __u32 lcd_last_result; /* result from last RPC */
- __u32 lcd_last_data; /* per-op data (disposition for open &c.) */
- /* for MDS_CLOSE requests */
- __u64 lcd_last_close_transno; /* last completed transaction ID */
- __u64 lcd_last_close_xid; /* xid for the last transaction */
- __u32 lcd_last_close_result; /* result from last RPC */
- __u32 lcd_last_close_data; /* per-op data */
- __u8 lcd_padding[LR_CLIENT_SIZE - 88];
-};
-
-#endif /* _LUSTRE_DISK_H_ */
struct list_head fs_list;
struct module *fs_owner;
char *fs_type;
- char *(* fs_label)(struct super_block *sb);
+ char *(* fs_getlabel)(struct super_block *sb);
+ int (* fs_setlabel)(struct super_block *sb, char *label);
char *(* fs_uuid)(struct super_block *sb);
void *(* fs_start)(struct inode *inode, int op, void *desc_private,
int logs);
extern struct fsfilt_operations *fsfilt_get_ops(const char *type);
extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops);
-static inline char *fsfilt_label(struct obd_device *obd, struct super_block *sb)
+static inline char *fsfilt_get_label(struct obd_device *obd,
+ struct super_block *sb)
{
- if (obd->obd_fsops->fs_label == NULL)
+ if (obd->obd_fsops->fs_getlabel == NULL)
return NULL;
- if (obd->obd_fsops->fs_label(sb)[0] == '\0')
+ if (obd->obd_fsops->fs_getlabel(sb)[0] == '\0')
return NULL;
- return obd->obd_fsops->fs_label(sb);
+ return obd->obd_fsops->fs_getlabel(sb);
+}
+
+static inline int fsfilt_set_label(struct obd_device *obd,
+ struct super_block *sb, char *label)
+{
+ if (obd->obd_fsops->fs_setlabel == NULL)
+ return -ENOSYS;
+ return (obd->obd_fsops->fs_setlabel(sb, label));
}
static inline __u8 *fsfilt_uuid(struct obd_device *obd, struct super_block *sb)
extern int llapi_target_check(int num_types, char **obd_types, char *dir);
extern int llapi_catinfo(char *dir, char *keyword, char *node_name);
extern int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp, int *ost_count);
-extern int llapi_is_lustre_mnttype(char *type);
+extern int llapi_is_lustre_mnttype(struct mntent *mnt);
extern int llapi_quotachown(char *path, int flag);
extern int llapi_quotacheck(char *mnt, int check_type);
extern int llapi_poll_quotacheck(char *mnt, struct if_quotacheck *qchk);
#error Unsupported operating system.
#endif
+#include <lnet/types.h> /* for lnet_nid_t */
+
/* Defn's shared with user-space. */
#include <lustre/lustre_user.h>
#define MDS_SETATTR_PORTAL 22
#define MDS_READPAGE_PORTAL 23
+#define MGC_REPLY_PORTAL 25
+#define MGS_REQUEST_PORTAL 26
+#define MGS_REPLY_PORTAL 27
#define OST_REQUEST_PORTAL 28
#define SVC_KILLED 1
#define LUSTRE_OST_VERSION 0x00030000
#define LUSTRE_DLM_VERSION 0x00040000
#define LUSTRE_LOG_VERSION 0x00050000
+#define LUSTRE_MGS_VERSION 0x00060000
+
struct lustre_handle {
__u64 cookie;
#define OBD_CONNECT_IBITS 0x1000ULL /* support for inodebits locks */
#define OBD_CONNECT_JOIN 0x2000ULL /* files can be concatenated */
#define OBD_CONNECT_NODEVOH 0x8000ULL /* No open handle for special nodes */
+#define OBD_CONNECT_EMPTY 0x80000000ULL /* fake: these are empty connect flags*/
+
/* also update obd_connect_names[] for lprocfs_rd_connect_flags() */
#define MDS_CONNECT_SUPPORTED (OBD_CONNECT_RDONLY | OBD_CONNECT_VERSION | \
OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX)
#define ECHO_CONNECT_SUPPORTED (0)
+#define MGS_CONNECT_SUPPORTED (OBD_CONNECT_VERSION)
#define OBD_OCD_VERSION(major,minor,patch,fix) (((major)<<24) + ((minor)<<16) +\
((patch)<<8) + (fix))
extern void lustre_swab_ldlm_reply (struct ldlm_reply *r);
+
+/*
+ * Opcodes for mountconf (mgs and mgc)
+ */
+typedef enum {
+ MGS_CONNECT = 250,
+ MGS_DISCONNECT,
+ MGS_EXCEPTION, /* node died, etc. */
+ MGS_TARGET_REG, /* whenever target starts up */
+ MGS_TARGET_DEL,
+ MGS_LAST_OPC
+} mgs_cmd_t;
+
+#define MTI_NAME_MAXLEN 64
+#define MTI_UUID_MAXLEN MTI_NAME_MAXLEN + 5
+/* each host can have multiple nids, and multiple failover hosts, and I don't
+ want to run out of room... */
+#define MTI_NIDS_MAX 64 /* match lustre_disk.h */
+
+struct mgs_target_info {
+ char mti_fsname[MTI_NAME_MAXLEN];
+ char mti_svname[MTI_NAME_MAXLEN];
+ char mti_uuid[sizeof(struct obd_uuid)];
+ lnet_nid_t mti_nids[MTI_NIDS_MAX]; /* host nids */
+ lnet_nid_t mti_failnids[MTI_NIDS_MAX]; /* partner nids */
+ __u16 mti_failnodes[8]; /* last nid index of each partner */
+ __u32 mti_stripe_index;
+ __u32 mti_nid_count;
+ __u32 mti_failnid_count;
+ __u32 mti_config_ver;
+ __u32 mti_flags;
+ char mti_params[2048];
+};
+
+extern void lustre_swab_mgs_target_info(struct mgs_target_info *oinfo);
+
+#define CM_START 0x01
+#define CM_END 0x02
+#define CM_SKIP 0x04
+#define CM_UPGRADE146 0x08
+#define CM_START_SKIP (CM_START | CM_SKIP)
+
+struct cfg_marker {
+ __u32 cm_step; /* aka config version */
+ __u32 cm_flags;
+ time_t cm_createtime; /*when this record was first created */
+ time_t cm_canceltime; /*when this record is no longer valid*/
+ char cm_svname[16];
+ char cm_comment[40];
+};
+
/*
* Opcodes for multiple servers.
*/
uuid->uuid[sizeof(*uuid) - 1] = '\0';
}
+/* For printf's only, make sure uuid is terminated */
+static inline char *obd_uuid2str(struct obd_uuid *uuid)
+{
+ if (uuid->uuid[sizeof(*uuid) - 1] != '\0') {
+ /* Obviously not safe, but for printfs, no real harm done...*/
+ static char temp[sizeof(*uuid)];
+ memcpy(temp, uuid->uuid, sizeof(*uuid));
+ temp[sizeof(*uuid) - 1] = '\0';
+ return temp;
+ }
+ return (char *)(uuid->uuid);
+}
+
#define LUSTRE_Q_QUOTAON 0x800002 /* turn quotas on */
#define LUSTRE_Q_QUOTAOFF 0x800003 /* turn quotas off */
#define LUSTRE_Q_GETINFO 0x800005 /* get information about quota files */
#define LCFG_HDR_SIZE(count) \
size_round(offsetof (struct lustre_cfg, lcfg_buflens[(count)]))
+/* If not LCFG_REQUIRED, we can ignore this cmd and go on. */
+#define LCFG_REQUIRED 0x0001000
+
enum lcfg_command_type {
LCFG_ATTACH = 0x00cf001,
LCFG_DETACH = 0x00cf002,
LCFG_DEL_CONN = 0x00cf00c,
LCFG_LOV_ADD_OBD = 0x00cf00d,
LCFG_LOV_DEL_OBD = 0x00cf00e,
- LCFG_PARAM = 0x00cf00f,
- LCFG_MARKER = 0x00cf010
+ LCFG_PARAM = 0x00ce00f,
+ LCFG_MARKER = 0x00ce010,
+ LCFG_LOG_START = 0x00ce011,
+ LCFG_LOG_END = 0x00ce012,
+ LCFG_LOV_ADD_INA = 0x00ce013,
};
struct lustre_cfg_bufs {
return NULL;
/* make sure it's NULL terminated, even if this kills a char
- * of data
+ * of data. Try to use the padding first though.
*/
- s[lcfg->lcfg_buflens[index] - 1] = '\0';
+ if (s[lcfg->lcfg_buflens[index] - 1] != '\0') {
+ int last = min((int)lcfg->lcfg_buflens[index],
+ size_round(lcfg->lcfg_buflens[index]) - 1);
+ s[last] = '\0';
+ CWARN("Truncating buf %d to '%s'\n", index, s);
+ }
return s;
}
if (lcfg->lcfg_version != LUSTRE_CFG_VERSION)
RETURN(-EINVAL);
+
if (lcfg->lcfg_bufcount >= LUSTRE_CFG_MAX_BUFCOUNT)
RETURN(-EINVAL);
RETURN(0);
}
-
-#define LMD_MAGIC 0xbdacbd03
-#define LMD_MAGIC_MASK (0xffffff00 & LMD_MAGIC)
-
-#define lmd_bad_magic(LMDP) \
-({ \
- struct lustre_mount_data *_lmd__ = (LMDP); \
- int _ret__ = 0; \
- if (!_lmd__) { \
- LCONSOLE_ERROR("Missing mount data: " \
- "check that /sbin/mount.lustre is installed.\n");\
- _ret__ = 1; \
- } else if (_lmd__->lmd_magic == LMD_MAGIC) { \
- _ret__ = 0; \
- } else if ((_lmd__->lmd_magic & LMD_MAGIC_MASK) == LMD_MAGIC_MASK) { \
- LCONSOLE_ERROR("You're using an old version of " \
- "/sbin/mount.lustre. Please install version " \
- "1.%d\n", LMD_MAGIC & 0xFF); \
- _ret__ = 1; \
- } else { \
- LCONSOLE_ERROR("Invalid mount data (%#x != %#x): " \
- "check that /sbin/mount.lustre is installed\n", \
- _lmd__->lmd_magic, LMD_MAGIC); \
- _ret__ = 1; \
- } \
- _ret__; \
-})
-
-#define MAX_FAILOVER_NIDS 10
-
-/* Passed by mount */
-/* Any changes in the alignment of elements in this stuct require a change to
- LMD_MAGIC */
-struct lustre_mount_data {
- uint32_t lmd_magic;
- uint32_t lmd_flags;
- uint16_t lmd_nid_count; /* how many failover nids we have for the MDS */
- lnet_nid_t lmd_nid[MAX_FAILOVER_NIDS];
- char lmd_mds[64];
- char lmd_profile[64];
-};
-
-#define LMD_FLG_FLOCK 0x0001
-#define LMD_FLG_USER_XATTR 0x0002
-#define LMD_FLG_ACL 0x0004
-
#endif // _LUSTRE_CFG_H
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ * Author: Nathan Rutman <nathan@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ * Lustre disk format definitions.
+ */
+
+#ifndef _LUSTRE_DISK_H
+#define _LUSTRE_DISK_H
+
+#include <linux/types.h>
+#include <lnet/types.h>
+
+
+/****************** persistent mount data *********************/
+
+/* Persistent mount data are stored on the disk in this file.
+ Used before the setup llog can be read. */
+#define MOUNT_CONFIGS_DIR "CONFIGS"
+#define MOUNT_DATA_FILE MOUNT_CONFIGS_DIR"/mountdata"
+#define MDT_LOGS_DIR "LOGS" /* COMPAT_146 */
+
+#define LDD_F_SV_TYPE_MDT 0x0001
+#define LDD_F_SV_TYPE_OST 0x0002
+#define LDD_F_SV_TYPE_MGS 0x0004
+#define LDD_F_NEED_INDEX 0x0010 /* need an index assignment */
+#define LDD_F_VIRGIN 0x0020 /* never registered */
+#define LDD_F_UPDATE 0x0040 /* update the config logs for this server*/
+#define LDD_F_REWRITE_LDD 0x0080 /* rewrite the LDD */
+#define LDD_F_WRITECONF 0x0100 /* regenerate all logs for this fs */
+#define LDD_F_UPGRADE14 0x0200 /* COMPAT_14 */
+#define MTI_F_IOCTL 0x0400 /* only used in mti */
+
+
+enum ldd_mount_type {
+ LDD_MT_EXT3 = 0,
+ LDD_MT_LDISKFS,
+ LDD_MT_SMFS,
+ LDD_MT_REISERFS,
+ LDD_MT_LAST
+};
+
+static inline char *mt_str(enum ldd_mount_type mt)
+{
+ static char *mount_type_string[] = {
+ "ext3",
+ "ldiskfs",
+ "smfs",
+ "reiserfs",
+ };
+ //LASSERT(mt < LDD_MT_LAST);
+ return mount_type_string[mt];
+}
+
+#ifndef MTI_NIDS_MAX /* match lustre_idl.h */
+#define MTI_NIDS_MAX 64
+#endif
+
+#define LDD_INCOMPAT_SUPP 0
+#define LDD_ROCOMPAT_SUPP 0
+
+#define LDD_MAGIC 0x1dd00001
+
+/* FIXME does on-disk ldd have to be a fixed endianness? (like last_rcvd) */
+struct lustre_disk_data {
+ __u32 ldd_magic;
+ __u32 ldd_feature_compat; /* compatible feature flags */
+ __u32 ldd_feature_rocompat;/* read-only compatible feature flags */
+ __u32 ldd_feature_incompat;/* incompatible feature flags */
+
+ __u32 ldd_config_ver; /* config rewrite count - not used */
+ __u32 ldd_flags; /* LDD_SV_TYPE */
+ __u32 ldd_svindex; /* server index (0001), must match
+ svname */
+ __u32 ldd_mount_type; /* target fs type LDD_MT_* */
+ char ldd_fsname[64]; /* filesystem this server is part of */
+ char ldd_svname[64]; /* this server's name (lustre-mdt0001)*/
+ __u8 ldd_uuid[40]; /* server UUID (COMPAT_146) */
+
+/*200*/ __u8 ldd_padding[4096 - 200];
+/*4096*/char ldd_mount_opts[4096]; /* target fs mount opts */
+/*8192*/char ldd_params[4096]; /* key=value pairs */
+};
+
+#define IS_MDT(data) ((data)->ldd_flags & LDD_F_SV_TYPE_MDT)
+#define IS_OST(data) ((data)->ldd_flags & LDD_F_SV_TYPE_OST)
+#define IS_MGS(data) ((data)->ldd_flags & LDD_F_SV_TYPE_MGS)
+#define MT_STR(data) mt_str((data)->ldd_mount_type)
+
+/* Make the mdt/ost server obd name based on the filesystem name */
+static inline int server_make_name(__u32 flags, __u16 index, char *fs,
+ char *name)
+{
+ if (flags & (LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_OST)) {
+ sprintf(name, "%.8s-%s%04x", fs,
+ (flags & LDD_F_SV_TYPE_MDT) ? "MDT" : "OST",
+ index);
+ } else if (flags & LDD_F_SV_TYPE_MGS) {
+ sprintf(name, "MGS");
+ } else {
+ CERROR("unknown server type %#x\n", flags);
+ return 1;
+ }
+ return 0;
+}
+
+/* Get the index from the obd name */
+int server_name2index(char *svname, __u32 *idx, char **endptr);
+
+
+/****************** mount command *********************/
+
+/* The lmd is only used internally by Lustre; mount simply passes
+ everything as string options */
+
+#define LMD_MAGIC 0xbdacbd03
+
+/* gleaned from the mount command - no persistent info here */
+struct lustre_mount_data {
+ __u32 lmd_magic;
+ __u32 lmd_flags; /* lustre mount flags */
+ int lmd_mgs_failnodes; /* mgs failover node count */
+ int lmd_exclude_count;
+ char *lmd_dev; /* device name */
+ char *lmd_profile; /* client only */
+ char *lmd_opts; /* lustre mount options (as opposed to
+ _device_ mount options) */
+ __u32 *lmd_exclude; /* array of OSTs to ignore */
+};
+
+#define LMD_FLG_CLIENT 0x0002 /* Mounting a client only */
+#define LMD_FLG_RECOVER 0x0004 /* Allow recovery */
+#define LMD_FLG_NOSVC 0x0008 /* Only start MGS/MGC for servers,
+ no other services */
+
+#define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT)
+
+/****************** mkfs command *********************/
+
+#define MO_IS_LOOP 0x01
+#define MO_FORCEFORMAT 0x02
+
+/* used to describe the options to format the lustre disk, not persistent */
+struct mkfs_opts {
+ struct lustre_disk_data mo_ldd; /* to be written in MOUNT_DATA_FILE */
+ char mo_mount_type_string[20]; /* "ext3", "ldiskfs", ... */
+ char mo_device[128]; /* disk device name */
+ char mo_mkfsopts[128]; /* options to the backing-store mkfs */
+ char mo_loopdev[128]; /* in case a loop dev is needed */
+ __u64 mo_device_sz; /* in KB */
+ int mo_stripe_count;
+ int mo_flags;
+ int mo_mgs_failnodes;
+};
+
+/****************** on-disk files *********************/
+
+#define LAST_RCVD "last_rcvd"
+#define LOV_OBJID "lov_objid"
+#define HEALTH_CHECK "health_check"
+
+/****************** last_rcvd file *********************/
+
+#define LR_SERVER_SIZE 512
+#define LR_CLIENT_START 8192
+#define LR_CLIENT_SIZE 128
+#if LR_CLIENT_START < LR_SERVER_SIZE
+#error "Can't have LR_CLIENT_START < LR_SERVER_SIZE"
+#endif
+/* This limit is arbitrary (32k clients on x86), but it is convenient to use
+ * 2^n * PAGE_SIZE * 8 for the number of bits that fit an order-n allocation. */
+#define LR_MAX_CLIENTS (PAGE_SIZE * 8)
+
+
+/* COMPAT_146 */
+#define OBD_COMPAT_OST 0x00000002 /* this is an OST (temporary) */
+#define OBD_COMPAT_MDT 0x00000004 /* this is an MDT (temporary) */
+/* end COMPAT_146 */
+
+#define OBD_ROCOMPAT_LOVOBJID 0x00000001 /* MDS handles LOV_OBJID file */
+#define OBD_ROCOMPAT_CROW 0x00000002 /* OST will CROW create objects */
+
+#define OBD_INCOMPAT_GROUPS 0x00000001 /* OST handles group subdirs */
+#define OBD_INCOMPAT_OST 0x00000002 /* this is an OST */
+#define OBD_INCOMPAT_MDT 0x00000004 /* this is an MDT */
+#define OBD_INCOMPAT_COMMON_LR 0x00000008 /* common last_rvcd format */
+
+
+/* Data stored per server at the head of the last_rcvd file. In le32 order.
+ This should be common to filter_internal.h, lustre_mds.h */
+struct lr_server_data {
+ __u8 lsd_uuid[40]; /* server UUID */
+ __u64 lsd_unused; /* was fsd_last_objid - don't use for now */
+ __u64 lsd_last_transno; /* last completed transaction ID */
+ __u64 lsd_mount_count; /* incarnation number */
+ __u32 lsd_feature_compat; /* compatible feature flags */
+ __u32 lsd_feature_rocompat;/* read-only compatible feature flags */
+ __u32 lsd_feature_incompat;/* incompatible feature flags */
+ __u32 lsd_server_size; /* size of server data area */
+ __u32 lsd_client_start; /* start of per-client data area */
+ __u16 lsd_client_size; /* size of per-client data area */
+ __u16 lsd_subdir_count; /* number of subdirectories for objects */
+ __u64 lsd_catalog_oid; /* recovery catalog object id */
+ __u32 lsd_catalog_ogen; /* recovery catalog inode generation */
+ __u8 lsd_peeruuid[40]; /* UUID of MDS associated with this OST */
+ __u32 lsd_ost_index; /* index number of OST in LOV */
+ __u32 lsd_mdt_index; /* index number of MDT in LMV */
+ __u8 lsd_padding[LR_SERVER_SIZE - 148];
+};
+
+/* Data stored per client in the last_rcvd file. In le32 order. */
+struct lsd_client_data {
+ __u8 lcd_uuid[40]; /* client UUID */
+ __u64 lcd_last_transno; /* last completed transaction ID */
+ __u64 lcd_last_xid; /* xid for the last transaction */
+ __u32 lcd_last_result; /* result from last RPC */
+ __u32 lcd_last_data; /* per-op data (disposition for open &c.) */
+ /* for MDS_CLOSE requests */
+ __u64 lcd_last_close_transno; /* last completed transaction ID */
+ __u64 lcd_last_close_xid; /* xid for the last transaction */
+ __u32 lcd_last_close_result; /* result from last RPC */
+ __u32 lcd_last_close_data; /* per-op data */
+ __u8 lcd_padding[LR_CLIENT_SIZE - 88];
+};
+
+
+#ifdef __KERNEL__
+/****************** superblock additional info *********************/
+struct ll_sb_info;
+
+struct lustre_sb_info {
+ int lsi_flags;
+ struct obd_device *lsi_mgc; /* mgc obd */
+ struct lustre_mount_data *lsi_lmd; /* mount command info */
+ struct lustre_disk_data *lsi_ldd; /* mount info on-disk */
+ struct ll_sb_info *lsi_llsbi; /* add'l client sbi info */
+ struct vfsmount *lsi_srv_mnt; /* the one server mount */
+ atomic_t lsi_mounts; /* references to the srv_mnt */
+};
+
+#define LSI_SERVER 0x00000001
+#define LSI_UMOUNT_FORCE 0x00000010
+#define LSI_UMOUNT_FAILOVER 0x00000020
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+# define s2lsi(sb) ((struct lustre_sb_info *)((sb)->s_fs_info))
+# define s2lsi_nocast(sb) ((sb)->s_fs_info)
+#else /* 2.4 here */
+# define s2lsi(sb) ((struct lustre_sb_info *)((sb)->u.generic_sbp))
+# define s2lsi_nocast(sb) ((sb)->u.generic_sbp)
+#endif
+
+#define get_profile_name(sb) (s2lsi(sb)->lsi_lmd->lmd_profile)
+
+#endif /* __KERNEL__ */
+
+/****************** mount lookup info *********************/
+
+struct lustre_mount_info {
+ char *lmi_name;
+ struct super_block *lmi_sb;
+ struct vfsmount *lmi_mnt;
+ struct list_head lmi_list_chain;
+};
+
+/****************** prototypes *********************/
+
+#ifdef __KERNEL__
+#include <obd_class.h>
+
+/* obd_mount.c */
+void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb));
+int lustre_common_put_super(struct super_block *sb);
+int lustre_process_log(struct super_block *sb, char *logname,
+ struct config_llog_instance *cfg);
+int lustre_end_log(struct super_block *sb, char *logname,
+ struct config_llog_instance *cfg);
+struct lustre_mount_info *server_get_mount(char *name);
+int server_put_mount(char *name, struct vfsmount *mnt);
+int server_register_target(struct super_block *sb);
+struct mgs_target_info;
+int server_mti_print(char *title, struct mgs_target_info *mti);
+
+/* mgc_request.c */
+int mgc_logname2resid(char *logname, struct ldlm_res_id *res_id);
+
+#endif
+
+#endif // _LUSTRE_DISK_H
#include <lustre/lustre_idl.h>
#include <lustre_dlm.h>
+/* Data stored per client in the last_rcvd file. In le32 order. */
struct mds_client_data;
struct mds_export_data {
spinlock_t imp_lock;
/* flags */
- unsigned int imp_invalid:1, imp_replayable:1,
- imp_dlm_fake:1, imp_server_timeout:1,
- imp_initial_recov:1, imp_initial_recov_bk:1,
- imp_force_verify:1, imp_pingable:1,
- imp_resend_replay:1, imp_deactive:1;
+ unsigned int
+ imp_invalid:1, /* evicted */
+ imp_replayable:1, /* try to recover the import */
+ imp_dlm_fake:1, /* don't run recovery (timeout instead) */
+ imp_server_timeout:1, /* use 1/2 timeout on MDS' OSCs */
+ imp_initial_recov:1, /* retry the initial connection */
+ imp_initial_recov_bk:1, /* turn off init_recov after trying all failover nids */
+ imp_force_verify:1, /* force an immidiate ping */
+ imp_pingable:1, /* pingable */
+ imp_resend_replay:1, /* resend for replay */
+ imp_deactive:1; /* administratively disabled */
__u32 imp_connect_op;
struct obd_connect_data imp_connect_data;
__u64 imp_connect_flags_orig;
#define OBD_IOC_PROCESS_CFG _IOWR('f', 184, OBD_IOC_DATA_TYPE)
#define OBD_IOC_DUMP_LOG _IOWR('f', 185, OBD_IOC_DATA_TYPE)
#define OBD_IOC_CLEAR_LOG _IOWR('f', 186, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PARAM _IOW ('f', 187, OBD_IOC_DATA_TYPE)
#define OBD_IOC_CATLOGLIST _IOWR('f', 190, OBD_IOC_DATA_TYPE)
#define OBD_IOC_LLOG_INFO _IOWR('f', 191, OBD_IOC_DATA_TYPE)
void *data, void *catdata);
extern int llog_cancel_rec(struct llog_handle *loghandle, int index);
extern int llog_close(struct llog_handle *cathandle);
+extern int llog_get_size(struct llog_handle *loghandle);
/* llog_cat.c - catalog api */
struct llog_process_data {
if (!strcmp(obd->obd_type->typ_name, LUSTRE_MDS_NAME))
ctxt->loc_gen.mnt_cnt = obd->u.mds.mds_mount_count;
- else if (!strstr(obd->obd_type->typ_name, LUSTRE_FILTER_NAME))
+ else if (!strstr(obd->obd_type->typ_name, LUSTRE_OST_NAME))
ctxt->loc_gen.mnt_cnt = obd->u.filter.fo_mount_count;
else
ctxt->loc_gen.mnt_cnt = 0;
#define MDS_MAXREQSIZE (5 * 1024)
#define MDS_MAXREPSIZE max(9 * 1024, 280 + LOV_MAX_STRIPE_COUNT * 56)
+/* FIXME fix all constants here. Andreas suggests dyamically adding threads. */
+#define MGS_MAX_THREADS 8UL
+#define MGS_NUM_THREADS max(2UL, min_t(unsigned long, MGS_MAX_THREADS, \
+ num_physpages * smp_num_cpus >> (26 - PAGE_SHIFT)))
+
+#define MGS_NBUFS (64 * smp_num_cpus)
+#define MGS_BUFSIZE (8 * 1024)
+#define MGS_MAXREQSIZE (5 * 1024)
+#define MGS_MAXREPSIZE (9 * 1024)
+
#define OST_MAX_THREADS 512UL
#define OST_DEF_THREADS max_t(unsigned long, 2, \
(num_physpages >> (26-PAGE_SHIFT)) * smp_num_cpus)
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2006 Cluster File Systems, Inc.
+ * Author: Nathan Rutman <nathan@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ * User-settable parameter keys
+ */
+
+#ifndef _LUSTRE_PARAM_H
+#define _LUSTRE_PARAM_H
+
+/* obd_mount.c */
+int class_find_param(char *buf, char *key, char **valp);
+int class_match_param(char *buf, char *key, char **valp);
+int class_parse_nid(char *buf, lnet_nid_t *nid, char **endh);
+
+
+/****************** User-settable parameter keys *********************/
+
+#define PARAM_MGSNODE "mgsnode="
+#define PARAM_FAILNODE "failnode="
+#define PARAM_OBD_TIMEOUT "obd_timeout="
+#define PARAM_DEFAULT_STRIPE "default_stripe_"
+#define PARAM_D_STRIPE_SIZE PARAM_DEFAULT_STRIPE"size"
+#define PARAM_D_STRIPE_COUNT PARAM_DEFAULT_STRIPE"count"
+#define PARAM_D_STRIPE_OFFSET PARAM_DEFAULT_STRIPE"offset"
+#define PARAM_D_STRIPE_PATTERN PARAM_DEFAULT_STRIPE"pattern"
+
+#endif // _LUSTRE_PARAM_H
struct {
/* Public members. */
__u64 lw_object_id; /* lov object id */
- __u64 lw_object_gr; /* lov object id */
+ __u64 lw_object_gr; /* lov object group */
__u64 lw_maxbytes; /* maximum possible file size */
unsigned long lw_xfersize; /* optimal transfer size */
spinlock_t fo_translock; /* protect fsd_last_transno */
struct file *fo_rcvd_filp;
struct file *fo_health_check_filp;
- struct filter_server_data *fo_fsd;
+ struct lr_server_data *fo_fsd;
unsigned long *fo_last_rcvd_slots;
__u64 fo_mount_count;
atomic_t fo_quotachecking;
};
-struct mds_server_data;
-
#define OSC_MAX_RIF_DEFAULT 8
#define OSC_MAX_RIF_MAX 256
#define OSC_MAX_DIRTY_DEFAULT (OSC_MAX_RIF_DEFAULT * 4)
struct mdc_rpc_lock *cl_setattr_lock;
struct osc_creator cl_oscc;
+ /* mgc datastruct */
+ struct semaphore cl_mgc_sem;
+ struct vfsmount *cl_mgc_vfsmnt;
+ struct dentry *cl_mgc_configs_dir;
+ atomic_t cl_mgc_refcount;
+ struct obd_export *cl_mgc_mgsexp;
+
/* Flags section */
unsigned int cl_checksum:1; /* debug checksums */
#define CL_NOT_QUOTACHECKED 1 /* client->cl_qchk_stat init value */
+struct mgs_obd {
+ struct ptlrpc_service *mgs_service;
+ struct vfsmount *mgs_vfsmnt;
+ struct super_block *mgs_sb;
+ struct dentry *mgs_configs_dir;
+ struct dentry *mgs_fid_de;
+ struct list_head mgs_fs_db_list;
+ struct semaphore mgs_sem;
+};
+
struct mds_obd {
/* NB this field MUST be first */
struct obd_device_target mds_obt;
unsigned long mds_atime_diff;
struct semaphore mds_epoch_sem;
struct ll_fid mds_rootfid;
- struct mds_server_data *mds_server_data;
+ struct lr_server_data *mds_server_data;
cfs_dentry_t *mds_pending_dir;
cfs_dentry_t *mds_logs_dir;
cfs_dentry_t *mds_objects_dir;
struct obd_uuid mds_lov_uuid;
char *mds_profile;
struct obd_export *mds_osc_exp; /* XXX lov_exp */
- int mds_has_lov_desc;
struct lov_desc mds_lov_desc;
obd_id *mds_lov_objids;
+ int mds_lov_objids_size;
+ __u32 mds_lov_objids_in_file;
+ unsigned int mds_lov_objids_dirty:1;
int mds_lov_nextid_set;
struct file *mds_lov_objid_filp;
struct file *mds_health_check_filp;
struct semaphore lov_lock;
atomic_t refcount;
struct lov_desc desc;
+ struct obd_connect_data ocd;
int bufsize;
int connects;
int death_row; /* Do we have tgts scheduled to be deleted?
};
/* obd device type names */
+ /* FIXME all the references to LUSTRE_MDS_NAME should be swapped with LUSTRE_MDT_NAME */
#define LUSTRE_MDS_NAME "mds"
#define LUSTRE_MDT_NAME "mdt"
#define LUSTRE_MDC_NAME "mdc"
-#define LUSTRE_FILTER_NAME "obdfilter"
-#define LUSTRE_OST_NAME "ost"
+#define LUSTRE_OSS_NAME "ost" /*FIXME change name to oss*/
+#define LUSTRE_OST_NAME "obdfilter" /* FIXME change name to ost*/
#define LUSTRE_OSC_NAME "osc"
+#define LUSTRE_LOV_NAME "lov"
+#define LUSTRE_MGS_NAME "mgs"
+#define LUSTRE_MGC_NAME "mgc"
+
+#define LUSTRE_OSTSAN_NAME "sanobdfilter"
#define LUSTRE_SANOSC_NAME "sanosc"
#define LUSTRE_SANOST_NAME "sanost"
-#define LUSTRE_LOV_NAME "lov"
#define LUSTRE_CACHEOBD_NAME "cobd"
#define LUSTRE_ECHO_NAME "obdecho"
#define LUSTRE_ECHO_CLIENT_NAME "echo_client"
+/* Constant obd names */
+#define LUSTRE_MGS_OBDNAME "MGS"
+#define LUSTRE_MGC_OBDNAME "MGC"
/* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */
#define N_LOCAL_TEMP_PAGE 0x10000000
/* Device deactivated */
OBD_NOTIFY_INACTIVE,
/* Connect data for import were changed */
- OBD_NOTIFY_OCD
+ OBD_NOTIFY_OCD,
+ /* Sync request */
+ OBD_NOTIFY_SYNC_NONBLOCK,
+ OBD_NOTIFY_SYNC
};
/*
/* corresponds to one of the obd's */
struct obd_device {
- struct obd_type *obd_type;
-
+ struct obd_type *obd_type;
/* common and UUID name of this device */
- char *obd_name;
- struct obd_uuid obd_uuid;
+ char *obd_name;
+ struct obd_uuid obd_uuid;
- int obd_minor;
+ int obd_minor;
unsigned int obd_attached:1, obd_set_up:1, obd_recovering:1,
obd_abort_recovery:1, obd_replayable:1, obd_no_transno:1,
obd_no_recov:1, obd_stopping:1, obd_starting:1,
struct echo_client_obd echo_client;
struct echo_obd echo;
struct lov_obd lov;
+ struct mgs_obd mgs;
} u;
/* Fields used by LProcFS */
unsigned int obd_cntr_base;
OBD_CLEANUP_OBD,
};
+/* get/set_info keys */
+#define KEY_MDS_CONN "mds_conn"
+#define KEY_NEXT_ID "next_id"
+#define KEY_LOVDESC "lovdesc"
+#define KEY_INIT_RECOV "initial_recov"
+#define KEY_INIT_RECOV_BACKUP "init_recov_bk"
+
struct obd_ops {
struct module *o_owner;
int (*o_iocontrol)(unsigned int cmd, struct obd_export *exp, int len,
enum obd_import_event);
int (*o_notify)(struct obd_device *obd, struct obd_device *watched,
- enum obd_notify_event ev);
+ enum obd_notify_event ev, void *data);
int (*o_health_check)(struct obd_device *);
struct obd_device *class_name2obd(char *name);
int class_uuid2dev(struct obd_uuid *uuid);
struct obd_device *class_uuid2obd(struct obd_uuid *uuid);
+void class_obd_list(void);
struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid,
char * typ_name,
struct obd_uuid *grp_uuid);
int obd_export_evict_by_nid(struct obd_device *obd, char *nid);
int obd_export_evict_by_uuid(struct obd_device *obd, char *uuid);
-/* config.c */
+/* obd_config.c */
int class_process_config(struct lustre_cfg *lcfg);
int class_attach(struct lustre_cfg *lcfg);
int class_setup(struct obd_device *obd, struct lustre_cfg *lcfg);
struct obd_device *class_incref(struct obd_device *obd);
void class_decref(struct obd_device *obd);
+#define CFG_F_START 0x01 /* Set when we start updating from a log */
+#define CFG_F_MARKER 0x02 /* We are within a maker */
+#define CFG_F_SKIP 0x04 /* We should ignore this cfg command */
+#define CFG_F_COMPAT146 0x08 /* Translation to new obd names required */
+#define CFG_F_EXCLUDE 0x10 /* OST exclusion list */
+
+
/* Passed as data param to class_config_parse_llog */
struct config_llog_instance {
- char * cfg_instance;
- struct obd_uuid cfg_uuid;
+ char * cfg_instance;
+ struct super_block *cfg_sb;
+ struct obd_uuid cfg_uuid;
+ int cfg_last_idx; /* for partial llog processing */
+ int cfg_flags;
};
int class_config_parse_llog(struct llog_ctxt *ctxt, char *name,
struct config_llog_instance *cfg);
int class_config_dump_llog(struct llog_ctxt *ctxt, char *name,
struct config_llog_instance *cfg);
+/* list of active configuration logs */
+struct config_llog_data {
+ char *cld_logname;
+ struct ldlm_res_id cld_resid;
+ struct config_llog_instance cld_cfg;
+ struct list_head cld_list_chain;
+ atomic_t cld_refcount;
+ unsigned int cld_stopping:1;
+};
+
struct lustre_profile {
struct list_head lp_list;
char * lp_profile;
struct lustre_profile *class_get_profile(char * prof);
void class_del_profile(char *prof);
+/* genops.c */
#define class_export_get(exp) \
({ \
struct obd_export *exp_ = exp; \
struct obd_import *class_new_import(struct obd_device *obd);
void class_destroy_import(struct obd_import *exp);
+struct obd_type *class_search_type(char *name);
struct obd_type *class_get_type(char *name);
void class_put_type(struct obd_type *type);
int class_connect(struct lustre_handle *conn, struct obd_device *obd,
void class_fail_export(struct obd_export *exp);
void class_disconnect_exports(struct obd_device *obddev);
void class_disconnect_stale_exports(struct obd_device *obddev);
-void class_manual_cleanup(struct obd_device *obd);
+int class_manual_cleanup(struct obd_device *obd);
/* obdo.c */
void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_flag valid);
static inline int obd_notify(struct obd_device *obd,
struct obd_device *watched,
- enum obd_notify_event ev)
+ enum obd_notify_event ev, void *data)
{
ENTRY;
OBD_CHECK_DEV(obd);
- if (!obd->obd_set_up) {
+
+ /* the check for async_recov is a complete hack - I'm hereby
+ overloading the meaning to also mean "this was called from
+ mds_postsetup". I know that my mds is able to handle notifies
+ by this point, and it needs to get them to execute mds_postrecov. */
+ if (!obd->obd_set_up && !obd->obd_async_recov) {
CDEBUG(D_HA, "obd %s not set up\n", obd->obd_name);
RETURN(-EINVAL);
}
}
OBD_COUNTER_INCREMENT(obd, notify);
- RETURN(OBP(obd, notify)(obd, watched, ev));
+ RETURN(OBP(obd, notify)(obd, watched, ev, data));
}
static inline int obd_notify_observer(struct obd_device *observer,
struct obd_device *observed,
- enum obd_notify_event ev)
+ enum obd_notify_event ev, void *data)
{
int rc1;
int rc2;
struct obd_notify_upcall *onu;
if (observer->obd_observer)
- rc1 = obd_notify(observer->obd_observer, observed, ev);
+ rc1 = obd_notify(observer->obd_observer, observed, ev, data);
else
rc1 = 0;
/*
/* uuid.c */
typedef __u8 class_uuid_t[16];
-//int class_uuid_parse(struct obd_uuid in, class_uuid_t out);
+void class_generate_random_uuid(class_uuid_t uuid);
void class_uuid_unparse(class_uuid_t in, struct obd_uuid *out);
/* lustre_peer.c */
#define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800
+#define OBD_FAIL_MGS 0x900
+#define OBD_FAIL_MGS_ALL_REQUEST_NET 0x901
+#define OBD_FAIL_MGS_ALL_REPLY_NET 0x902
+
/* preparation for a more advanced failure testbed (not functional yet) */
#define OBD_FAIL_MASK_SYS 0x0000FF00
#define OBD_FAIL_MASK_LOC (0x000000FF | OBD_FAIL_MASK_SYS)
CONFIG_DEBUG_SPINLOCK=y
CONFIG_DEBUG_SPINLOCK_SLEEP=y
CONFIG_DEBUG_HIGHMEM=y
-CONFIG_DEBUG_INFO=y
+# CONFIG_DEBUG_INFO is not set
# CONFIG_FRAME_POINTER is not set
CONFIG_EARLY_PRINTK=y
CONFIG_DEBUG_STACKOVERFLOW=y
int compat = 1;
ENTRY;
- LASSERT(req_bits); /* There is no sence in lock with no bits set,
+ LASSERT(req_bits); /* There is no sense in lock with no bits set,
I think. Also such a lock would be compatible
with any other bit lock */
list_for_each(tmp, queue) {
int ldlm_process_flock_lock(struct ldlm_lock *lock, int *flags, int first_enq,
ldlm_error_t *err);
-
/* ldlm_inodebits.c */
int ldlm_process_inodebits_lock(struct ldlm_lock *lock, int *flags,
int first_enq, ldlm_error_t *err);
rq_portal = MDS_REQUEST_PORTAL;
rp_portal = MDC_REPLY_PORTAL;
connect_op = MDS_CONNECT;
+ } else if (!strcmp(name, LUSTRE_MGC_NAME)) {
+ rq_portal = MGS_REQUEST_PORTAL;
+ rp_portal = MGC_REPLY_PORTAL;
+ connect_op = MGS_CONNECT;
} else {
CERROR("unknown client OBD type \"%s\", can't setup\n",
name);
}
sema_init(&cli->cl_sem, 1);
+ sema_init(&cli->cl_mgc_sem, 1);
cli->cl_conn_count = 0;
memcpy(server_uuid.uuid, lustre_cfg_buf(lcfg, 2),
min_t(unsigned int, LUSTRE_CFG_BUFLEN(lcfg, 2),
imp->imp_client = &obddev->obd_ldlm_client;
imp->imp_connect_op = connect_op;
imp->imp_initial_recov = 1;
+ imp->imp_initial_recov_bk = 0;
CFS_INIT_LIST_HEAD(&imp->imp_pinger_chain);
memcpy(cli->cl_target_uuid.uuid, lustre_cfg_buf(lcfg, 1),
LUSTRE_CFG_BUFLEN(lcfg, 1));
RETURN(0);
}
-/* ->o_connect() method for client side (OSC and MDC) */
+/* ->o_connect() method for client side (OSC and MDC and MGC) */
int client_connect_import(struct lustre_handle *dlm_handle,
struct obd_device *obd, struct obd_uuid *cluuid,
struct obd_connect_data *data)
obd_str2uuid (&tgtuuid, str);
target = class_uuid2obd(&tgtuuid);
+ /* COMPAT_146 */
+ /* old (pre 1.6) lustre_process_log tries to connect to mdsname
+ (eg. mdsA) instead of uuid. */
+ if (!target) {
+ snprintf((char *)tgtuuid.uuid, sizeof(tgtuuid), "%s_UUID", str);
+ target = class_uuid2obd(&tgtuuid);
+ }
if (!target)
target = class_name2obd(str);
+ /* end COMPAT_146 */
if (!target || target->obd_stopping || !target->obd_set_up) {
DEBUG_REQ(D_ERROR, req, "UUID '%s' is not available "
if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_LIBCLIENT) {
if (!data) {
- DEBUG_REQ(D_INFO, req, "Refusing old (unversioned) "
+ DEBUG_REQ(D_WARNING, req, "Refusing old (unversioned) "
"libclient connection attempt\n");
GOTO(out, rc = -EPROTO);
} else if (data->ocd_version < LUSTRE_VERSION_CODE -
LUSTRE_VERSION_ALLOWED_OFFSET) {
- DEBUG_REQ(D_INFO, req, "Refusing old (%d.%d.%d.%d) "
+ DEBUG_REQ(D_WARNING, req, "Refusing old (%d.%d.%d.%d) "
"libclient connection attempt\n",
OBD_OCD_VERSION_MAJOR(data->ocd_version),
OBD_OCD_VERSION_MINOR(data->ocd_version),
GOTO(out, rc = -EPROTO);
}
+#if 0
+ /* FIXME this makes it impossible to use LDLM_PLAIN locks -- check
+ against server's _CONNECT_SUPPORTED flags? (I don't want to use
+ ibits for mgc/mgs) */
+
/* INODEBITS_INTEROP: Perform conversion from plain lock to
* inodebits lock if client does not support them. */
if (!(req->rq_export->exp_connect_flags & OBD_CONNECT_IBITS) &&
if (dlm_req->lock_desc.l_req_mode == LCK_PR)
dlm_req->lock_desc.l_req_mode = LCK_CR;
}
+#endif
if (flags & LDLM_FL_REPLAY) {
lock = find_existing_lock(req->rq_export,
spin_lock_init(&waiting_locks_spinlock);
cfs_timer_init(&waiting_locks_timer, waiting_locks_callback, 0);
+ /* Using CLONE_FILES instead of CLONE_FS here causes failures in
+ conf-sanity test 21. But using CLONE_FS can cause problems
+ if the daemonize happens between push/pop_ctxt... */
rc = cfs_kernel_thread(expired_lock_main, NULL, CLONE_VM | CLONE_FS);
if (rc < 0) {
CERROR("Cannot start ldlm expired-lock thread: %d\n", rc);
GOTO(out, rc);
lustre_cfg_bufs_reset(&bufs, name);
- lustre_cfg_bufs_set_string(&bufs, 1, LUSTRE_MDC_NAME);
+ lustre_cfg_bufs_set_string(&bufs, 1, LUSTRE_MDC_NAME);//FIXME connect to mgc
lustre_cfg_bufs_set_string(&bufs, 2, mdc_uuid.uuid);
lcfg = lustre_cfg_new(LCFG_ATTACH, &bufs);
rc = class_process_config(lcfg);
/* Disable initial recovery on this import */
rc = obd_set_info_async(obd->obd_self_export,
- strlen("initial_recov"), "initial_recov",
+ strlen(KEY_INIT_RECOV), KEY_INIT_RECOV,
sizeof(allow_recov), &allow_recov, NULL);
rc = obd_connect(&mdc_conn, obd, &mdc_uuid, ocd);
char *lli_symlink_name;
struct semaphore lli_open_sem;
__u64 lli_maxbytes;
- unsigned long lli_flags;
+ unsigned long lli_flags;
/* for libsysio */
struct file_identifier lli_sysio_fid;
static struct inode* llu_new_inode(struct filesys *fs,
struct ll_fid *fid)
{
- struct inode *inode;
+ struct inode *inode;
struct llu_inode_info *lli;
struct intnl_stat st = {
.st_dev = 0,
lli->lli_fid = *fid;
/* file identifier is needed by functions like _sysio_i_find() */
- inode = _sysio_i_new(fs, &lli->lli_sysio_fid,
+ inode = _sysio_i_new(fs, &lli->lli_sysio_fid,
&st, 0, &llu_inode_ops, lli);
- if (!inode)
- OBD_FREE(lli, sizeof(*lli));
+ if (!inode)
+ OBD_FREE(lli, sizeof(*lli));
return inode;
}
(rc = ll_permission(inode, MAY_WRITE)) != 0)
RETURN(rc);
} else {
- /* from inode_change_ok() */
- if (current->fsuid != st->st_uid &&
- !capable(CAP_FOWNER))
- RETURN(-EPERM);
+ /* from inode_change_ok() */
+ if (current->fsuid != st->st_uid &&
+ !capable(CAP_FOWNER))
+ RETURN(-EPERM);
}
}
struct config_llog_instance cfg;
char ll_instance[sizeof(sbi) * 2 + 1];
struct lustre_profile *lprof;
- char *zconf_mdsnid, *zconf_mdsname, *zconf_profile;
+ char *zconf_mdsnid, *zconf_mdsname, *zconf_profile;
char *osc = NULL, *mdc = NULL;
int async = 1, err = -EINVAL;
struct obd_connect_data ocd = {0,};
GOTO(out_request, err = -EBADF);
}
- /*
- * Generate base path-node for root.
- */
- rootpb = _sysio_pb_new(&noname, NULL, root);
- if (!rootpb) {
- err = -ENOMEM;
- goto out_inode;
- }
+ /*
+ * Generate base path-node for root.
+ */
+ rootpb = _sysio_pb_new(&noname, NULL, root);
+ if (!rootpb) {
+ err = -ENOMEM;
+ goto out_inode;
+ }
- err = _sysio_do_mount(fs, rootpb, flags, tocover, mntp);
- if (err) {
+ err = _sysio_do_mount(fs, rootpb, flags, tocover, mntp);
+ if (err) {
_sysio_pb_gone(rootpb);
- goto out_inode;
+ goto out_inode;
}
ptlrpc_req_finished(request);
#include <lustre_debug.h>
#include <lustre_ver.h>
#include <linux/lustre_version.h>
-
+#include <lustre_disk.h> /* for s2sbi */
+
/*
struct lustre_intent_data {
__u64 it_lock_handle[2];
struct proc_dir_entry* ll_proc_root;
obd_id ll_rootino; /* number of root inode */
- struct lustre_mount_data *ll_lmd;
-
int ll_flags;
struct list_head ll_conn_chain; /* per-conn chain of SBs */
struct lustre_client_ocd ll_lco;
extern struct super_operations lustre_super_operations;
char *ll_read_opt(const char *opt, char *data);
-int ll_set_opt(const char *opt, char *data, int fl);
-void ll_options(char *options, char **ost, char **mds, int *flags);
+void ll_options(char *options, int *flags);
void ll_lli_init(struct ll_inode_info *lli);
-int ll_fill_super(struct super_block *sb, void *data, int silent);
-int lustre_fill_super(struct super_block *sb, void *data, int silent);
-void lustre_put_super(struct super_block *sb);
+int ll_fill_super(struct super_block *sb);
+void ll_put_super(struct super_block *sb);
struct inode *ll_inode_from_lock(struct ldlm_lock *lock);
void ll_clear_inode(struct inode *inode);
int ll_setattr_raw(struct inode *inode, struct iattr *attr);
int ll_iocontrol(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long arg);
void ll_umount_begin(struct super_block *sb);
-int lustre_remount_fs(struct super_block *sb, int *flags, char *data);
+int ll_remount_fs(struct super_block *sb, int *flags, char *data);
int ll_prep_inode(struct obd_export *exp, struct inode **inode,
struct ptlrpc_request *req, int offset, struct super_block *);
void lustre_dump_dentry(struct dentry *, int recur);
#define LL_MAX_BLKSIZE (4UL * 1024 * 1024)
+#define ll_s2sbi(sb) (s2lsi(sb)->lsi_llsbi)
+
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#define ll_s2sbi_nocast(sb) ((sb)->s_fs_info)
void __d_rehash(struct dentry * entry, int lock);
static inline __u64 ll_ts2u64(struct timespec *time)
{
return t;
}
#else /* 2.4 here */
-#define ll_s2sbi_nocast(sb) ((sb)->u.generic_sbp)
static inline __u64 ll_ts2u64(time_t *time)
{
return *time;
}
#endif
-#define ll_s2sbi(sb) ((struct ll_sb_info *)ll_s2sbi_nocast(sb))
/* don't need an addref as the sb_info should be holding one */
static inline struct obd_export *ll_s2obdexp(struct super_block *sb)
#include <lustre_ha.h>
#include <lustre_dlm.h>
#include <lprocfs_status.h>
+#include <lustre_disk.h>
#include "llite_internal.h"
kmem_cache_t *ll_file_data_slab;
#define log2(n) ffz(~(n))
#endif
-/* We need to have some extra twiddling here because some systems have
- * no random state when they start up. */
-static void
-lustre_generate_random_uuid(class_uuid_t uuid)
-{
- struct timeval t;
- int *i, j, k;
-
- ENTRY;
- LASSERT(sizeof(class_uuid_t) % sizeof(*i) == 0);
-
- j = jiffies;
- do_gettimeofday(&t);
- k = t.tv_usec;
-
- generate_random_uuid(uuid);
-
- for (i = (int *)uuid; (char *)i < (char *)uuid + sizeof(class_uuid_t); i++) {
- *i ^= j ^ k;
- j = ((j << 8) & 0xffffff00) | ((j >> 24) & 0x000000ff);
- k = ((k >> 8) & 0x00ffffff) | ((k << 24) & 0xff000000);
- }
-
- EXIT;
-}
-struct ll_sb_info *lustre_init_sbi(struct super_block *sb)
+struct ll_sb_info *ll_init_sbi(void)
{
struct ll_sb_info *sbi = NULL;
class_uuid_t uuid;
INIT_LIST_HEAD(&sbi->ll_conn_chain);
INIT_HLIST_HEAD(&sbi->ll_orphan_dentry_list);
- ll_s2sbi_nocast(sb) = sbi;
- lustre_generate_random_uuid(uuid);
+ class_generate_random_uuid(uuid);
class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
CDEBUG(D_HA, "generated uuid: %s\n", sbi->ll_sb_uuid.uuid);
RETURN(sbi);
}
-void lustre_free_sbi(struct super_block *sb)
+void ll_free_sbi(struct super_block *sb)
{
struct ll_sb_info *sbi = ll_s2sbi(sb);
ENTRY;
spin_unlock(&ll_sb_lock);
OBD_FREE(sbi, sizeof(*sbi));
}
- ll_s2sbi_nocast(sb) = NULL;
EXIT;
}
.d_compare = ll_dcompare,
};
-int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc)
+int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
{
struct inode *root = 0;
struct ll_sb_info *sbi = ll_s2sbi(sb);
err = obd_connect(&mdc_conn, obd, &sbi->ll_sb_uuid, data);
if (err == -EBUSY) {
- CERROR("An MDS (mdc %s) is performing recovery, of which this"
+ CERROR("An MDT (mdc %s) is performing recovery, of which this"
" client is not a part. Please wait for recovery to "
"complete, abort, or time out.\n", mdc);
GOTO(out, err);
mdc_init_ea_size(sbi->ll_mdc_exp, sbi->ll_osc_exp);
+ err = obd_prep_async_page(sbi->ll_osc_exp, NULL, NULL, NULL,
+ 0, NULL, NULL, NULL);
+ if (err < 0) {
+ LCONSOLE_ERROR("There are no OST's in this filesystem. "
+ "There must be at least one active OST for "
+ "a client to start.\n");
+ GOTO(out_osc, err);
+ }
+
if (!ll_async_page_slab) {
ll_async_page_slab_size =
- size_round(sizeof(struct ll_async_page)) +
- obd_prep_async_page(sbi->ll_osc_exp, NULL, NULL, NULL,
- 0, NULL, NULL, NULL);
+ size_round(sizeof(struct ll_async_page)) + err;
ll_async_page_slab = kmem_cache_create("ll_async_page",
ll_async_page_slab_size,
0, 0, NULL, NULL);
EXIT;
}
-void lustre_common_put_super(struct super_block *sb)
+void client_common_put_super(struct super_block *sb)
{
struct ll_sb_info *sbi = ll_s2sbi(sb);
ENTRY;
RETURN(retval);
}
-int ll_set_opt(const char *opt, char *data, int fl)
+static inline int ll_set_opt(const char *opt, char *data, int fl)
{
- ENTRY;
-
- CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
- if (strncmp(opt, data, strlen(opt)))
- RETURN(0);
+ if (strncmp(opt, data, strlen(opt)) != 0)
+ return(0);
else
- RETURN(fl);
+ return(fl);
}
-void ll_options(char *options, char **ost, char **mdc, int *flags)
+/* non-client-specific mount options are parsed in lmd_parse */
+void ll_options(char *options, int *flags)
{
int tmp;
- char *this_char;
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
- char *opt_ptr = options;
-#endif
+ char *s1 = options, *s2;
ENTRY;
if (!options) {
return;
}
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- for (this_char = strtok (options, ",");
- this_char != NULL;
- this_char = strtok (NULL, ","))
-#else
- while ((this_char = strsep (&opt_ptr, ",")) != NULL)
-#endif
- {
- CDEBUG(D_SUPER, "this_char %s\n", this_char);
- if (!*ost && (*ost = ll_read_opt(LUSTRE_OSC_NAME, this_char)))
- continue;
- if (!*mdc && (*mdc = ll_read_opt(LUSTRE_MDC_NAME, this_char)))
- continue;
- tmp = ll_set_opt("nolock", this_char, LL_SBI_NOLCK);
+ CDEBUG(D_CONFIG, "Parsing opts %s\n", options);
+
+ while (*s1) {
+ CDEBUG(D_SUPER, "next opt=%s\n", s1);
+ tmp = ll_set_opt("nolock", s1, LL_SBI_NOLCK);
if (tmp) {
*flags |= tmp;
- continue;
+ goto next;
}
- tmp = ll_set_opt("flock", this_char, LL_SBI_FLOCK);
+ tmp = ll_set_opt("flock", s1, LL_SBI_FLOCK);
if (tmp) {
*flags |= tmp;
- continue;
+ goto next;
}
- tmp = ll_set_opt("noflock", this_char, LL_SBI_FLOCK);
+ tmp = ll_set_opt("noflock", s1, LL_SBI_FLOCK);
if (tmp) {
*flags &= ~tmp;
- continue;
+ goto next;
}
- tmp = ll_set_opt("user_xattr", this_char, LL_SBI_USER_XATTR);
+ tmp = ll_set_opt("user_xattr", s1, LL_SBI_USER_XATTR);
if (tmp) {
*flags |= tmp;
- continue;
+ goto next;
}
- tmp = ll_set_opt("nouser_xattr", this_char, LL_SBI_USER_XATTR);
+ tmp = ll_set_opt("nouser_xattr", s1, LL_SBI_USER_XATTR);
if (tmp) {
*flags &= ~tmp;
- continue;
+ goto next;
}
- tmp = ll_set_opt("acl", this_char, LL_SBI_ACL);
+ tmp = ll_set_opt("acl", s1, LL_SBI_ACL);
if (tmp) {
/* Ignore deprecated mount option. The client will
* always try to mount with ACL support, whether this
* is used depends on whether server supports it. */
- continue;
+ goto next;
}
- tmp = ll_set_opt("noacl", this_char, LL_SBI_ACL);
+ tmp = ll_set_opt("noacl", s1, LL_SBI_ACL);
if (tmp) {
- continue;
+ goto next;
}
+
+next:
+ /* Find next opt */
+ s2 = strchr(s1, ',');
+ if (s2 == NULL)
+ break;
+ s1 = s2 + 1;
}
EXIT;
}
-
+
void ll_lli_init(struct ll_inode_info *lli)
{
sema_init(&lli->lli_open_sem, 1);
INIT_LIST_HEAD(&lli->lli_dead_list);
}
-int ll_fill_super(struct super_block *sb, void *data, int silent)
+int ll_fill_super(struct super_block *sb)
{
+ struct lustre_profile *lprof;
+ struct lustre_sb_info *lsi = s2lsi(sb);
struct ll_sb_info *sbi;
- char *osc = NULL;
- char *mdc = NULL;
- int err;
+ char *osc = NULL;
+ char *mdc = NULL;
+ char *profilenm = get_profile_name(sb);
+ struct config_llog_instance cfg;
+ char ll_instance[sizeof(sb) * 2 + 1];
+ int err;
ENTRY;
-
+
CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
- sbi = lustre_init_sbi(sb);
- if (!sbi)
+ /* client additional sb info */
+ lsi->lsi_llsbi = sbi = ll_init_sbi();
+ if (!sbi)
RETURN(-ENOMEM);
- ll_options(data, &osc, &mdc, &sbi->ll_flags);
-
- if (!osc) {
- CERROR("no osc\n");
- GOTO(out, err = -EINVAL);
- }
-
- if (!mdc) {
- CERROR("no mdc\n");
- GOTO(out, err = -EINVAL);
- }
-
- err = lustre_common_fill_super(sb, mdc, osc);
-out:
- if (err)
- lustre_free_sbi(sb);
-
- if (mdc)
- OBD_FREE(mdc, strlen(mdc) + 1);
- if (osc)
- OBD_FREE(osc, strlen(osc) + 1);
-
- RETURN(err);
-} /* ll_read_super */
-
-static int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
- char *s1, char *s2)
-{
- struct lustre_cfg_bufs bufs;
- struct lustre_cfg * lcfg = NULL;
- int err;
-
- CDEBUG(D_TRACE, "lcfg %s %#x %s %s\n", cfgname, cmd, s1, s2);
-
- lustre_cfg_bufs_reset(&bufs, cfgname);
- if (s1)
- lustre_cfg_bufs_set_string(&bufs, 1, s1);
- if (s2)
- lustre_cfg_bufs_set_string(&bufs, 2, s2);
-
- lcfg = lustre_cfg_new(cmd, &bufs);
- lcfg->lcfg_nid = nid;
- err = class_process_config(lcfg);
- lustre_cfg_free(lcfg);
- return(err);
-}
-
-static int lustre_process_log(struct lustre_mount_data *lmd, char * profile,
- struct config_llog_instance *cfg)
-{
- struct obd_device *obd;
- struct lustre_handle mdc_conn = {0, };
- struct obd_export *exp;
- char * name = "mdc_dev";
- class_uuid_t uuid;
- struct obd_uuid mdc_uuid;
- struct llog_ctxt *ctxt;
- struct obd_connect_data ocd = { 0 };
- lnet_nid_t nid;
- int i, rc = 0, recov_bk = 1;
- int err;
- ENTRY;
-
- if (lmd_bad_magic(lmd))
- RETURN(-EINVAL);
-
- lustre_generate_random_uuid(uuid);
- class_uuid_unparse(uuid, &mdc_uuid);
- CDEBUG(D_HA, "generated uuid: %s\n", mdc_uuid.uuid);
+ ll_options(lsi->lsi_lmd->lmd_opts, &sbi->ll_flags);
- nid = lmd->lmd_nid[0];
- LASSERT(nid != LNET_NID_ANY);
- rc = do_lcfg(name, nid, LCFG_ADD_UUID, libcfs_nid2str(nid), 0);
- if (rc < 0)
- GOTO(out, rc);
-
- rc = do_lcfg(name, 0, LCFG_ATTACH, LUSTRE_MDC_NAME, mdc_uuid.uuid);
- if (rc < 0)
- GOTO(out_del_uuid, rc);
-
- rc = do_lcfg(name, 0, LCFG_SETUP, lmd->lmd_mds, libcfs_nid2str(nid));
- if (rc < 0) {
- LCONSOLE_ERROR("I couldn't establish a connection with the MDS."
- " Check that the MDS host NID is correct and the"
- " networks are up.\n");
- GOTO(out_detach, rc);
- }
-
- obd = class_name2obd(name);
- if (obd == NULL)
- GOTO(out_cleanup, rc = -EINVAL);
-
- /* Add the redundant MDS nids */
- for (i = 1; i < lmd->lmd_nid_count; i++) {
- nid = lmd->lmd_nid[i];
- rc = do_lcfg(name, nid, LCFG_ADD_UUID, libcfs_nid2str(nid), 0);
- if (rc) {
- CERROR("Add uuid for %s failed %d\n",
- libcfs_nid2str(nid), rc);
- continue;
- }
- rc = do_lcfg(name, 0, LCFG_ADD_CONN, libcfs_nid2str(nid), 0);
- if (rc)
- CERROR("Add conn for %s failed %d\n",
- libcfs_nid2str(nid), rc);
- }
-
- /* Try all connections, but only once. */
- rc = obd_set_info_async(obd->obd_self_export,
- strlen("init_recov_bk"), "init_recov_bk",
- sizeof(recov_bk), &recov_bk, NULL);
- if (rc)
- GOTO(out_cleanup, rc);
-
- ocd.ocd_connect_flags = OBD_CONNECT_ACL;
-
- rc = obd_connect(&mdc_conn, obd, &mdc_uuid, &ocd);
- if (rc) {
- CERROR("cannot connect to %s: rc = %d\n", lmd->lmd_mds, rc);
- GOTO(out_cleanup, rc);
- }
-
- exp = class_conn2export(&mdc_conn);
-
- ctxt = llog_get_context(exp->exp_obd, LLOG_CONFIG_REPL_CTXT);
-#if 1
- rc = class_config_parse_llog(ctxt, profile, cfg);
-#else
- /*
- * For debugging, it's useful to just dump the log
- */
- rc = class_config_dump_llog(ctxt, profile, cfg);
-#endif
- switch (rc) {
- case 0:
- break;
- case -EINVAL:
- LCONSOLE_ERROR("%s: The configuration '%s' could not be read "
- "from the MDS '%s'. Make sure this client and "
- "the MDS are running compatible versions of "
- "Lustre.\n",
- obd->obd_name, profile, lmd->lmd_mds);
- /* fall through */
- default:
- LCONSOLE_ERROR("%s: The configuration '%s' could not be read "
- "from the MDS '%s'. This may be the result of "
- "communication errors between the client and "
- "the MDS, or if the MDS is not running.\n",
- obd->obd_name, profile, lmd->lmd_mds);
- break;
- }
-
- /* We don't so much care about errors in cleaning up the config llog
- * connection, as we have already read the config by this point. */
- err = obd_disconnect(exp);
- if (err)
- CERROR("obd_disconnect failed: rc = %d\n", err);
-
-out_cleanup:
- err = do_lcfg(name, 0, LCFG_CLEANUP, 0, 0);
- if (err)
- CERROR("mdc_cleanup failed: rc = %d\n", err);
-
-out_detach:
- err = do_lcfg(name, 0, LCFG_DETACH, 0, 0);
- if (err)
- CERROR("mdc_detach failed: rc = %d\n", err);
-
-out_del_uuid:
- /* class_add_uuid adds a nid even if the same uuid exists; we might
- delete any copy here. So they all better match. */
- for (i = 0; i < lmd->lmd_nid_count; i++) {
- nid = lmd->lmd_nid[i];
- err = do_lcfg(name, nid, LCFG_DEL_UUID, libcfs_nid2str(nid), 0);
- if (err)
- CERROR("del MDC UUID %s failed: rc = %d\n",
- libcfs_nid2str(nid), err);
- }
- /* class_import_put will get rid of the additional connections */
-out:
- RETURN(rc);
-}
-
-static void lustre_manual_cleanup(struct ll_sb_info *sbi)
-{
- struct obd_device *obd;
- int next = 0;
-
- while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) !=NULL) {
- class_manual_cleanup(obd);
- }
-
- if (sbi->ll_lmd != NULL)
- class_del_profile(sbi->ll_lmd->lmd_profile);
-}
-
-int lustre_fill_super(struct super_block *sb, void *data, int silent)
-{
- struct lustre_mount_data * lmd = data;
- struct ll_sb_info *sbi;
- char *osc = NULL;
- char *mdc = NULL;
- int err;
- ENTRY;
-
- CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
- if (lmd_bad_magic(lmd))
- RETURN(-EINVAL);
-
- sbi = lustre_init_sbi(sb);
- if (!sbi)
- RETURN(-ENOMEM);
-
- if (lmd->lmd_profile) {
- struct lustre_profile *lprof;
- struct config_llog_instance cfg;
- char ll_instance[sizeof(sb) * 2 + 1];
-
- if (lmd->lmd_mds[0] == '\0') {
- CERROR("no mds name\n");
- GOTO(out_free, err = -EINVAL);
- }
-
- OBD_ALLOC(sbi->ll_lmd, sizeof(*sbi->ll_lmd));
- if (sbi->ll_lmd == NULL)
- GOTO(out_free, err = -ENOMEM);
- memcpy(sbi->ll_lmd, lmd, sizeof(*lmd));
- if (lmd->lmd_flags & LMD_FLG_FLOCK)
- sbi->ll_flags |= LL_SBI_FLOCK;
- if (lmd->lmd_flags & LMD_FLG_USER_XATTR)
- sbi->ll_flags |= LL_SBI_USER_XATTR;
-
- /* generate a string unique to this super, let's try
- the address of the super itself.*/
- sprintf(ll_instance, "%p", sb);
-
- cfg.cfg_instance = ll_instance;
- cfg.cfg_uuid = sbi->ll_sb_uuid;
- err = lustre_process_log(lmd, lmd->lmd_profile, &cfg);
- if (err < 0) {
- CERROR("Unable to process log: %s\n", lmd->lmd_profile);
- GOTO(out_free, err);
- }
-
- lprof = class_get_profile(lmd->lmd_profile);
- if (lprof == NULL) {
- CERROR("No profile found: %s\n", lmd->lmd_profile);
- GOTO(out_free, err = -EINVAL);
- }
- if (osc)
- OBD_FREE(osc, strlen(osc) + 1);
- OBD_ALLOC(osc, strlen(lprof->lp_osc) +
- strlen(ll_instance) + 2);
- sprintf(osc, "%s-%s", lprof->lp_osc, ll_instance);
-
- if (mdc)
- OBD_FREE(mdc, strlen(mdc) + 1);
- OBD_ALLOC(mdc, strlen(lprof->lp_mdc) +
- strlen(ll_instance) + 2);
- sprintf(mdc, "%s-%s", lprof->lp_mdc, ll_instance);
- }
-
- if (!osc) {
- CERROR("no osc\n");
- GOTO(out_free, err = -EINVAL);
+ /* Generate a string unique to this super, in case some joker tries
+ to mount the same fs at two mount points.
+ Use the address of the super itself.*/
+ sprintf(ll_instance, "%p", sb);
+ cfg.cfg_instance = ll_instance;
+ cfg.cfg_uuid = lsi->lsi_llsbi->ll_sb_uuid;
+ cfg.cfg_last_idx = 0;
+
+ /* set up client obds */
+ err = lustre_process_log(sb, profilenm, &cfg);
+ if (err < 0) {
+ CERROR("Unable to process log: %d\n", err);
+ GOTO(out_free, err);
}
- if (!mdc) {
- CERROR("no mdc\n");
+ lprof = class_get_profile(profilenm);
+ if (lprof == NULL) {
+ CERROR("No profile found: %s\n", profilenm);
GOTO(out_free, err = -EINVAL);
}
-
- err = lustre_common_fill_super(sb, mdc, osc);
-
- if (err)
- GOTO(out_free, err);
-
-out_dev:
+ CDEBUG(D_CONFIG, "Found profile %s: mdc=%s osc=%s\n", profilenm,
+ lprof->lp_mdc, lprof->lp_osc);
+
+ OBD_ALLOC(osc, strlen(lprof->lp_osc) +
+ strlen(ll_instance) + 2);
+ if (!osc)
+ GOTO(out_free, err = -ENOMEM);
+ sprintf(osc, "%s-%s", lprof->lp_osc, ll_instance);
+
+ OBD_ALLOC(mdc, strlen(lprof->lp_mdc) +
+ strlen(ll_instance) + 2);
+ if (!mdc)
+ GOTO(out_free, err = -ENOMEM);
+ sprintf(mdc, "%s-%s", lprof->lp_mdc, ll_instance);
+
+ /* connections, registrations, sb setup */
+ err = client_common_fill_super(sb, mdc, osc);
+
+out_free:
if (mdc)
OBD_FREE(mdc, strlen(mdc) + 1);
if (osc)
OBD_FREE(osc, strlen(osc) + 1);
-
- RETURN(err);
-
-out_free:
- if (sbi->ll_lmd) {
- lustre_manual_cleanup(sbi);
- OBD_FREE(sbi->ll_lmd, sizeof(*sbi->ll_lmd));
+ if (err) {
+ struct obd_device *obd;
+ int next = 0;
+ /* like ll_put_super below */
+ lustre_end_log(sb, NULL, &cfg);
+ while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next))
+ != NULL) {
+ class_manual_cleanup(obd);
+ }
+ class_del_profile(profilenm);
+ ll_free_sbi(sb);
+ lsi->lsi_llsbi = NULL;
+ lustre_common_put_super(sb);
}
- lustre_free_sbi(sb);
+ RETURN(err);
+} /* ll_fill_super */
- goto out_dev;
-} /* lustre_fill_super */
-void lustre_put_super(struct super_block *sb)
+void ll_put_super(struct super_block *sb)
{
+ struct config_llog_instance cfg;
+ char ll_instance[sizeof(sb) * 2 + 1];
struct obd_device *obd;
+ struct lustre_sb_info *lsi = s2lsi(sb);
struct ll_sb_info *sbi = ll_s2sbi(sb);
- int force = 0;
+ char *profilenm = get_profile_name(sb);
+ int next;
ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
+ CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm);
+
+ sprintf(ll_instance, "%p", sb);
+ cfg.cfg_instance = ll_instance;
+ lustre_end_log(sb, NULL, &cfg);
+
obd = class_exp2obd(sbi->ll_mdc_exp);
if (obd) {
- int next = 0;
- /* We need to set force before the lov_disconnect in
+ int force = obd->obd_no_recov;
+ /* We need to set force before the lov_disconnect in
lustre_common_put_super, since l_d cleans up osc's as well. */
- force = obd->obd_no_recov;
- while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next))
- !=NULL) {
+ next = 0;
+ while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next))
+ != NULL) {
obd->obd_force = force;
- }
+ }
}
- lustre_common_put_super(sb);
+ client_common_put_super(sb);
+
+ next = 0;
+ while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) !=NULL) {
+ class_manual_cleanup(obd);
+ }
+
+ if (profilenm)
+ class_del_profile(profilenm);
- if (sbi->ll_lmd != NULL) {
- lustre_manual_cleanup(sbi);
- OBD_FREE(sbi->ll_lmd, sizeof(*sbi->ll_lmd));
- }
+ ll_free_sbi(sb);
+ lsi->lsi_llsbi = NULL;
- lustre_free_sbi(sb);
+ lustre_common_put_super(sb);
+ LCONSOLE_WARN("client umount complete\n");
EXIT;
-} /* lustre_put_super */
+} /* client_put_super */
#ifdef HAVE_REGISTER_CACHE
#include <linux/cache_def.h>
RETURN(0);
}
+/* umount -f client means force down, don't save state */
void ll_umount_begin(struct super_block *sb)
{
+ struct lustre_sb_info *lsi = s2lsi(sb);
struct ll_sb_info *sbi = ll_s2sbi(sb);
struct obd_device *obd;
struct obd_ioctl_data ioc_data = { 0 };
ENTRY;
+
+ /* Tell the MGC we got umount -f */
+ lsi->lsi_flags |= LSI_UMOUNT_FORCE;
+
CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb,
sb->s_count, atomic_read(&sb->s_active));
EXIT;
}
-int lustre_remount_fs(struct super_block *sb, int *flags, char *data)
+int ll_remount_fs(struct super_block *sb, int *flags, char *data)
{
struct ll_sb_info *sbi = ll_s2sbi(sb);
int err;
__u32 read_only;
-
+
if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
read_only = *flags & MS_RDONLY;
err = obd_set_info_async(sbi->ll_mdc_exp, strlen("read-only"),
"remount: %d\n", err);
return err;
}
-
+
if (read_only)
sb->s_flags |= MS_RDONLY;
else
ll_teardown_mmaps(page->mapping,
(__u64)page->index<<PAGE_CACHE_SHIFT,
((__u64)page->index<<PAGE_CACHE_SHIFT)|
- ~PAGE_CACHE_MASK);
+ ~PAGE_CACHE_MASK);
if (!PageDirty(page) && !page_mapped(page)) {
ll_ra_accounting(llap, page->mapping);
ll_truncate_complete_page(page);
extern struct address_space_operations ll_aops;
extern struct address_space_operations ll_dir_aops;
-static struct super_block *ll_read_super(struct super_block *sb,
- void *data, int silent)
-{
- int err;
- ENTRY;
- err = ll_fill_super(sb, data, silent);
- if (err)
- RETURN(NULL);
- RETURN(sb);
-}
-
-static struct super_block *lustre_read_super(struct super_block *sb,
- void *data, int silent)
-{
- int err;
- ENTRY;
- err = lustre_fill_super(sb, data, silent);
- if (err)
- RETURN(NULL);
- RETURN(sb);
-}
-
-static struct file_system_type lustre_lite_fs_type = {
- .owner = THIS_MODULE,
- .name = "lustre_lite",
- .fs_flags = FS_NFSEXP_FSID,
- .read_super = ll_read_super,
-};
/* exported operations */
struct super_operations lustre_super_operations =
{
.read_inode2 = ll_read_inode2,
.clear_inode = ll_clear_inode,
- .put_super = lustre_put_super,
+ .put_super = ll_put_super,
.statfs = ll_statfs,
.umount_begin = ll_umount_begin,
.fh_to_dentry = ll_fh_to_dentry,
.dentry_to_fh = ll_dentry_to_fh,
- .remount_fs = lustre_remount_fs,
-};
-
-static struct file_system_type lustre_fs_type = {
- .owner = THIS_MODULE,
- .name = "lustre",
- .fs_flags = FS_NFSEXP_FSID,
- .read_super = lustre_read_super,
+ .remount_fs = ll_remount_fs,
};
static int __init init_lustre_lite(void)
{
int rc, seed[2];
- printk(KERN_INFO "Lustre: Lustre Lite Client File System; "
+ printk(KERN_INFO "Lustre: Lustre Client File System; "
"info@clusterfs.com\n");
ll_file_data_slab = kmem_cache_create("ll_file_data",
sizeof(struct ll_file_data), 0,
ll_register_cache(&ll_cache_definition);
- rc = register_filesystem(&lustre_lite_fs_type);
- if (rc == 0)
- rc = register_filesystem(&lustre_fs_type);
- if (rc) {
- /* This is safe even if lustre_lite_fs_type isn't registered */
- unregister_filesystem(&lustre_lite_fs_type);
- ll_unregister_cache(&ll_cache_definition);
- }
+ lustre_register_client_fill_super(ll_fill_super);
get_random_bytes(seed, sizeof(seed));
ll_srand(seed[0], seed[1]);
{
int rc;
- unregister_filesystem(&lustre_lite_fs_type);
- unregister_filesystem(&lustre_fs_type);
-
+ lustre_register_client_fill_super(NULL);
+
ll_unregister_cache(&ll_cache_definition);
rc = kmem_cache_destroy(ll_file_data_slab);
#include <lprocfs_status.h>
#include "llite_internal.h"
-struct super_block * ll_get_sb(struct file_system_type *fs_type,
- int flags, const char *devname, void * data)
-{
- /* calls back in fill super */
- return get_sb_nodev(fs_type, flags, data, ll_fill_super);
-}
-
-struct super_block * lustre_get_sb(struct file_system_type *fs_type,
- int flags, const char *devname, void * data)
-{
- /* calls back in fill super */
- return get_sb_nodev(fs_type, flags, data, lustre_fill_super);
-}
-
static kmem_cache_t *ll_inode_cachep;
static struct inode *ll_alloc_inode(struct super_block *sb)
.alloc_inode = ll_alloc_inode,
.destroy_inode = ll_destroy_inode,
.clear_inode = ll_clear_inode,
- .put_super = lustre_put_super,
+ .put_super = ll_put_super,
.statfs = ll_statfs,
.umount_begin = ll_umount_begin,
- .remount_fs = lustre_remount_fs,
-};
-
-
-struct file_system_type lustre_lite_fs_type = {
- .owner = THIS_MODULE,
- .name = "lustre_lite",
- .get_sb = ll_get_sb,
- .kill_sb = kill_anon_super,
- .fs_flags = FS_BINARY_MOUNTDATA,
+ .remount_fs = ll_remount_fs,
};
-struct file_system_type lustre_fs_type = {
- .owner = THIS_MODULE,
- .name = "lustre",
- .get_sb = lustre_get_sb,
- .kill_sb = kill_anon_super,
- .fs_flags = FS_BINARY_MOUNTDATA,
-};
static int __init init_lustre_lite(void)
{
int rc, seed[2];
- printk(KERN_INFO "Lustre: Lustre Lite Client File System; "
+ printk(KERN_INFO "Lustre: Lustre Client File System; "
"info@clusterfs.com\n");
rc = ll_init_inodecache();
if (rc)
proc_mkdir("llite", proc_lustre_root) : NULL;
ll_register_cache(&ll_cache_definition);
-
- rc = register_filesystem(&lustre_lite_fs_type);
- if (rc == 0)
- rc = register_filesystem(&lustre_fs_type);
- if (rc) {
- /* This is safe even if lustre_lite_fs_type isn't registered */
- unregister_filesystem(&lustre_lite_fs_type);
- ll_unregister_cache(&ll_cache_definition);
- }
-
+
+ lustre_register_client_fill_super(ll_fill_super);
+
get_random_bytes(seed, sizeof(seed));
ll_srand(seed[0], seed[1]);
-
+
return rc;
}
{
int rc;
- unregister_filesystem(&lustre_fs_type);
- unregister_filesystem(&lustre_lite_fs_type);
+ lustre_register_client_fill_super(NULL);
ll_unregister_cache(&ll_cache_definition);
int i, rc = 0;
ENTRY;
- LASSERT(lov->desc.ld_tgt_count == count);
- for (i = 0, tgt = lov->tgts; i < lov->desc.ld_tgt_count; i++, tgt++) {
+ /* We might have added an osc and not told the mds yet */
+ if (count != lov->desc.ld_tgt_count)
+ CERROR("Origin connect mds cnt %d != lov cnt %d\n", count,
+ lov->desc.ld_tgt_count);
+
+ for (i = 0, tgt = lov->tgts; i < count; i++, tgt++) {
struct obd_device *child;
struct llog_ctxt *cctxt;
rc = llog_connect(cctxt, 1, logid, gen, uuid);
if (rc) {
- CERROR("error osc_llog_connect %d\n", i);
+ CERROR("error osc_llog_connect tgt %d (%d)\n", i, rc);
break;
}
}
if (rc)
RETURN(rc);
+ CDEBUG(D_CONFIG, "llog init with %d targets\n", count);
LASSERT(lov->desc.ld_tgt_count == count);
for (i = 0, ctgt = lov->tgts; i < lov->desc.ld_tgt_count; i++, ctgt++) {
struct obd_device *child;
child = ctgt->ltd_exp->exp_obd;
rc = obd_llog_init(child, tgt, 1, logid + i);
if (rc) {
- CERROR("error osc_llog_init %d\n", i);
+ CERROR("error osc_llog_init %d (%d)\n", i, rc);
break;
}
}
#include <obd_lov.h>
#include <obd_ost.h>
#include <lprocfs_status.h>
+#include <lustre_param.h>
#include "lov_internal.h"
&obd->obd_uuid);
if (!tgt_obd) {
- CERROR("Target %s not attached\n", tgt_uuid->uuid);
+ CERROR("Target %s not attached\n", obd_uuid2str(tgt_uuid));
RETURN(-EINVAL);
}
+
+ CDEBUG(D_CONFIG, "Connect tgt %s (%s)\n", obd_uuid2str(tgt_uuid),
+ tgt_obd->obd_name);
if (!tgt_obd->obd_set_up) {
- CERROR("Target %s not set up\n", tgt_uuid->uuid);
+ CERROR("Target %s not set up\n", obd_uuid2str(tgt_uuid));
RETURN(-EINVAL);
}
if (activate) {
tgt_obd->obd_no_recov = 0;
+ /* FIXME this is probably supposed to be
+ ptlrpc_set_import_active. Horrible naming. */
ptlrpc_activate_import(tgt_obd->u.cli.cl_import);
}
if (imp->imp_invalid) {
CERROR("not connecting OSC %s; administratively "
- "disabled\n", tgt_uuid->uuid);
+ "disabled\n", obd_uuid2str(tgt_uuid));
rc = obd_register_observer(tgt_obd, obd);
if (rc) {
CERROR("Target %s register_observer error %d; "
"will not be able to reactivate\n",
- tgt_uuid->uuid, rc);
+ obd_uuid2str(tgt_uuid), rc);
}
RETURN(0);
}
rc = obd_connect(&conn, tgt_obd, &lov_osc_uuid, data);
if (rc) {
- CERROR("Target %s connect error %d\n", tgt_uuid->uuid, rc);
+ CERROR("Target %s connect error %d\n",
+ obd_uuid2str(tgt_uuid), rc);
RETURN(rc);
}
tgt->ltd_exp = class_conn2export(&conn);
rc = obd_register_observer(tgt_obd, obd);
if (rc) {
CERROR("Target %s register_observer error %d\n",
- tgt_uuid->uuid, rc);
+ obd_uuid2str(tgt_uuid), rc);
obd_disconnect(tgt->ltd_exp);
tgt->ltd_exp = NULL;
RETURN(rc);
struct obd_uuid *cluuid, struct obd_connect_data *data)
{
struct lov_obd *lov = &obd->u.lov;
- struct lov_tgt_desc *tgt;
- struct obd_export *exp;
- __u64 connect_flags = data ? data->ocd_connect_flags : 0;
- int rc, rc2, i;
+ int rc;
ENTRY;
- rc = class_connect(conn, obd, cluuid);
- if (rc)
- RETURN(rc);
-
- exp = class_conn2export(conn);
+ lov->ocd.ocd_connect_flags = OBD_CONNECT_EMPTY;
+ if (data)
+ lov->ocd = *data;
- /* We don't want to actually do the underlying connections more than
- * once, so keep track. */
- lov->connects++;
- if (lov->connects > 1) {
- class_export_put(exp);
- RETURN(0);
- }
-
- for (i = 0, tgt = lov->tgts; i < lov->desc.ld_tgt_count; i++, tgt++) {
- if (obd_uuid_empty(&tgt->uuid))
- continue;
- if (connect_flags & OBD_CONNECT_INDEX)
- data->ocd_index = i;
- rc = lov_connect_obd(obd, tgt, 0, data);
- if (rc)
- GOTO(out_disc, rc);
- if (data)
- connect_flags &= data->ocd_connect_flags;
- }
-
- if (data)
- data->ocd_connect_flags = connect_flags;
+ rc = class_connect(conn, obd, cluuid);
+ if (!rc)
+ lov->connects++;
+ CDEBUG(D_CONFIG, "connect #%d\n", lov->connects);
- class_export_put(exp);
- RETURN (0);
+ /* target connects are done in lov_add_target */
- out_disc:
- while (i-- > 0) {
- struct obd_uuid uuid;
- --tgt;
- --lov->desc.ld_active_tgt_count;
- tgt->active = 0;
- /* save for CERROR below; (we know it's terminated) */
- uuid = tgt->uuid;
- rc2 = obd_disconnect(tgt->ltd_exp);
- if (rc2)
- CERROR("error: LOV target %s disconnect on OST idx %d: "
- "rc = %d\n", uuid.uuid, i, rc2);
- }
- class_disconnect(exp);
RETURN (rc);
}
int rc;
ENTRY;
- CDEBUG(D_CONFIG, "Disconnecting lov target %s\n", obd->obd_uuid.uuid);
+ CDEBUG(D_CONFIG, "%s: disconnecting target %s\n",
+ obd->obd_name, osc_obd->obd_name);
lov_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds");
if (lov_proc_dir) {
RETURN(0);
}
-static int lov_del_obd(struct obd_device *obd, struct obd_uuid *uuidp,
- int index, int gen);
+static int lov_del_target(struct obd_device *obd, struct obd_uuid *uuidp,
+ int index, int gen);
static int lov_disconnect(struct obd_export *exp)
{
/* Only disconnect the underlying layers on the final disconnect. */
lov->connects--;
- if (lov->connects != 0)
+ if (lov->connects != 0) {
+ /* why should there be more than 1 connect? */
+ CERROR("disconnect #%d\n", lov->connects);
goto out;
+ }
/* Let's hold another reference so lov_del_obd doesn't spin through
putref every time */
for (i = 0, tgt = lov->tgts; i < lov->desc.ld_tgt_count; i++, tgt++) {
if (tgt->ltd_exp) {
/* Disconnection is the last we know about an obd */
- lov_del_obd(obd, &tgt->uuid, i, tgt->ltd_gen);
+ lov_del_target(obd, &tgt->uuid, i, tgt->ltd_gen);
}
}
lov_putref(obd);
out:
- rc = class_disconnect(exp);
+ rc = class_disconnect(exp); /* bz 9811 */
RETURN(rc);
}
}
static int lov_notify(struct obd_device *obd, struct obd_device *watched,
- enum obd_notify_event ev)
+ enum obd_notify_event ev, void *data)
{
- struct obd_uuid *uuid;
- int rc;
+ int rc = 0;
ENTRY;
- if (strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) {
- CERROR("unexpected notification of %s %s!\n",
- watched->obd_type->typ_name,
- watched->obd_name);
- RETURN(-EINVAL);
- }
- uuid = &watched->u.cli.cl_target_uuid;
-
if (ev == OBD_NOTIFY_ACTIVE || ev == OBD_NOTIFY_INACTIVE) {
+ struct obd_uuid *uuid;
+
+ LASSERT(watched);
+
+ if (strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) {
+ CERROR("unexpected notification of %s %s!\n",
+ watched->obd_type->typ_name,
+ watched->obd_name);
+ RETURN(-EINVAL);
+ }
+ uuid = &watched->u.cli.cl_target_uuid;
+
/* Set OSC as active before notifying the observer, so the
* observer can use the OSC normally.
*/
if (rc) {
CERROR("%sactivation of %s failed: %d\n",
(ev == OBD_NOTIFY_ACTIVE) ? "" : "de",
- uuid->uuid, rc);
+ obd_uuid2str(uuid), rc);
RETURN(rc);
}
}
/* Pass the notification up the chain. */
- rc = obd_notify_observer(obd, watched, ev);
+ if (watched) {
+ rc = obd_notify_observer(obd, watched, ev, data);
+ } else {
+ /* NULL watched means all osc's in the lov (only for syncs) */
+ struct lov_obd *lov = &obd->u.lov;
+ struct lov_tgt_desc *tgt;
+ struct obd_device *tgt_obd;
+ int i;
+ for (i = 0, tgt = lov->tgts; i < lov->desc.ld_tgt_count;
+ i++, tgt++) {
+ if (obd_uuid_empty(&tgt->uuid))
+ continue;
+ tgt_obd = class_exp2obd(tgt->ltd_exp);
+ rc = obd_notify_observer(obd, tgt_obd, ev, data);
+ if (rc) {
+ CERROR("%s: notify %s of %s failed %d\n",
+ obd->obd_name,
+ obd->obd_observer->obd_name,
+ tgt_obd->obd_name, rc);
+ break;
+ }
+ }
+ }
RETURN(rc);
}
-static int
-lov_add_obd(struct obd_device *obd, struct obd_uuid *uuidp, int index, int gen)
+static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
+ int index, int gen, int active)
{
struct lov_obd *lov = &obd->u.lov;
struct lov_tgt_desc *tgt;
- obd_id params[2];
- int rc, old_count;
- __u32 bufsize, size = 2;
+ struct obd_connect_data *ocd = NULL;
+ __u32 bufsize, idx;
+ int rc;
ENTRY;
- CDEBUG(D_CONFIG, "uuid: %s idx: %d gen: %d\n",
- uuidp->uuid, index, gen);
+ CDEBUG(D_CONFIG, "uuid:%s idx:%d gen:%d active:%d\n",
+ uuidp->uuid, index, gen, active);
if (index < 0) {
CERROR("request to add OBD %s at invalid index: %d\n",
tgt = &lov->tgts[index];
if (!obd_uuid_empty(&tgt->uuid)) {
- CERROR("OBD already assigned at LOV target index %d\n",
- index);
+ CERROR("UUID %s already assigned at LOV target index %d\n",
+ obd_uuid2str(&tgt->uuid), index);
RETURN(-EEXIST);
}
tgt->index = index;
INIT_LIST_HEAD(&tgt->qos_bavail_list);
- old_count = lov->desc.ld_tgt_count;
if (index >= lov->desc.ld_tgt_count)
lov->desc.ld_tgt_count = index + 1;
CDEBUG(D_CONFIG, "idx=%d ltd_gen=%d ld_tgt_count=%d\n",
index, tgt->ltd_gen, lov->desc.ld_tgt_count);
- if (lov->connects == 0)
- /* lov_connect hasn't been called yet. So we'll do the
- lov_connect_obd on this obd when that fn first runs. */
- RETURN(0);
-
if (tgt->ltd_exp) {
struct obd_device *osc_obd;
osc_obd->obd_no_recov = 0;
}
- /* NULL may need to change when we use flags for osc's */
- rc = lov_connect_obd(obd, tgt, 1, NULL);
- if (rc || !obd->obd_observer)
- RETURN(rc);
-
- /* tell the mds_lov about the new target */
- obd_llog_finish(obd->obd_observer, old_count);
- llog_cat_initialize(obd->obd_observer, lov->desc.ld_tgt_count);
-
- params[0] = index;
- rc = obd_get_info(tgt->ltd_exp, strlen("last_id"), "last_id", &size,
- ¶ms[1]);
+ if (lov->ocd.ocd_connect_flags != OBD_CONNECT_EMPTY) {
+ /* Keep the original connect flags pristine */
+ OBD_ALLOC(ocd, sizeof(*ocd));
+ if (!ocd)
+ RETURN(-ENOMEM);
+ *ocd = lov->ocd;
+ }
+ rc = lov_connect_obd(obd, tgt, active, ocd);
+ if (ocd)
+ OBD_FREE(ocd, sizeof(*ocd));
if (rc)
GOTO(out, rc);
- rc = obd_set_info_async(obd->obd_observer->obd_self_export,
- strlen("next_id"),"next_id", 2, params, NULL);
- if (rc)
- GOTO(out, rc);
+ idx = index;
+ rc = lov_notify(obd, tgt->ltd_exp->exp_obd,
+ active ? OBD_NOTIFY_ACTIVE : OBD_NOTIFY_INACTIVE,
+ (void *)&idx);
- rc = lov_notify(obd, tgt->ltd_exp->exp_obd, OBD_NOTIFY_ACTIVE);
- GOTO(out, rc);
- out:
- if (rc && tgt->ltd_exp != NULL)
- lov_disconnect_obd(obd, tgt);
- return rc;
+out:
+ if (rc) {
+ CERROR("add failed (%d), deleting %s\n", rc,
+ (char *)tgt->uuid.uuid);
+ lov_del_target(obd, &tgt->uuid, index, 0);
+ }
+ RETURN(rc);
}
/* Schedule a target for deletion */
-static int
-lov_del_obd(struct obd_device *obd, struct obd_uuid *uuidp, int index, int gen)
+static int lov_del_target(struct obd_device *obd, struct obd_uuid *uuidp,
+ int index, int gen)
{
struct lov_obd *lov = &obd->u.lov;
struct lov_tgt_desc *tgt;
RETURN(-EINVAL);
}
- if (strncmp(uuidp->uuid, tgt->uuid.uuid, sizeof uuidp->uuid) != 0) {
+ if (!obd_uuid_equals(uuidp, &tgt->uuid)) {
CERROR("LOV target UUID %s at index %d doesn't match %s.\n",
tgt->uuid.uuid, index, uuidp->uuid);
RETURN(-EINVAL);
LASSERT(tgt->reap);
osc_obd = class_exp2obd(tgt->ltd_exp);
+ CDEBUG(D_CONFIG, "Removing tgt %s : %s\n", tgt->uuid.uuid,
+ osc_obd ? osc_obd->obd_name : "<no obd>");
+
if (tgt->ltd_exp)
lov_disconnect_obd(obd, tgt);
}
}
+static void lov_fix_desc(struct lov_desc *desc)
+{
+ if (desc->ld_default_stripe_size < PTLRPC_MAX_BRW_SIZE) {
+ CWARN("Increasing default_stripe_size "LPU64" to %u\n",
+ desc->ld_default_stripe_size, PTLRPC_MAX_BRW_SIZE);
+ desc->ld_default_stripe_size = PTLRPC_MAX_BRW_SIZE;
+ } else if (desc->ld_default_stripe_size & (LOV_MIN_STRIPE_SIZE - 1)) {
+ CWARN("default_stripe_size "LPU64" isn't a multiple of %u\n",
+ desc->ld_default_stripe_size, LOV_MIN_STRIPE_SIZE);
+ desc->ld_default_stripe_size &= ~(LOV_MIN_STRIPE_SIZE - 1);
+ CWARN("changing to "LPU64"\n", desc->ld_default_stripe_size);
+ }
+
+ if (desc->ld_default_stripe_count == 0)
+ desc->ld_default_stripe_count = 1;
+
+ /* from lov_setstripe */
+ if ((desc->ld_pattern != 0) &&
+ (desc->ld_pattern != LOV_PATTERN_RAID0)) {
+ CDEBUG(D_IOCTL, "bad userland stripe pattern: %#x\n",
+ desc->ld_pattern);
+ desc->ld_pattern = 0;
+ }
+}
+
static int lov_setup(struct obd_device *obd, obd_count len, void *buf)
{
struct lprocfs_static_vars lvars;
}
}
- if (desc->ld_default_stripe_size < PTLRPC_MAX_BRW_SIZE) {
- CWARN("Increasing default_stripe_size "LPU64" to %u\n",
- desc->ld_default_stripe_size, PTLRPC_MAX_BRW_SIZE);
- CWARN("Please update config and run --write-conf on MDS\n");
-
- desc->ld_default_stripe_size = PTLRPC_MAX_BRW_SIZE;
- } else if (desc->ld_default_stripe_size & (LOV_MIN_STRIPE_SIZE - 1)) {
- CWARN("default_stripe_size "LPU64" isn't a multiple of %u\n",
- desc->ld_default_stripe_size, LOV_MIN_STRIPE_SIZE);
- CWARN("Please update config and run --write-conf on MDS\n");
-
- desc->ld_default_stripe_size &= ~(LOV_MIN_STRIPE_SIZE - 1);
- }
-
- if (desc->ld_default_stripe_count == 0)
- desc->ld_default_stripe_count = 1;
+ lov_fix_desc(desc);
/* Because of 64-bit divide/mod operations only work with a 32-bit
* divisor in a 32-bit kernel, we cannot support a stripe width
/* We should never get here - these should have
been removed in the disconnect. */
if (!obd_uuid_empty(&tgt->uuid)) {
- CERROR("lov tgt %d not cleaned!\n", i);
- lov_del_obd(obd, &tgt->uuid, i, 0);
+ CERROR("lov tgt %d not cleaned!"
+ " deathrow=%d, lovrc=%d\n",
+ i, lov->death_row,
+ atomic_read(&lov->refcount));
+ lov_del_target(obd, &tgt->uuid, i, 0);
}
}
OBD_FREE(lov->tgts, lov->bufsize);
switch(cmd = lcfg->lcfg_command) {
case LCFG_LOV_ADD_OBD:
+ case LCFG_LOV_ADD_INA:
case LCFG_LOV_DEL_OBD: {
+ /* lov_modify_tgts add 0:lov_mdsA 1:ost1_UUID 2:0 3:1 */
if (LUSTRE_CFG_BUFLEN(lcfg, 1) > sizeof(obd_uuid.uuid))
GOTO(out, rc = -EINVAL);
if (sscanf(lustre_cfg_buf(lcfg, 3), "%d", &gen) != 1)
GOTO(out, rc = -EINVAL);
if (cmd == LCFG_LOV_ADD_OBD)
- rc = lov_add_obd(obd, &obd_uuid, index, gen);
+ rc = lov_add_target(obd, &obd_uuid, index, gen, 1);
+ else if (cmd == LCFG_LOV_ADD_INA)
+ rc = lov_add_target(obd, &obd_uuid, index, gen, 0);
else
- rc = lov_del_obd(obd, &obd_uuid, index, gen);
+ rc = lov_del_target(obd, &obd_uuid, index, gen);
+ GOTO(out, rc);
+ }
+ case LCFG_PARAM: {
+ int i;
+ struct lov_obd *lov = &obd->u.lov;
+ struct lov_desc *desc = &(lov->desc);
+ if (!desc)
+ GOTO(out, rc = -EINVAL);
+ /* see jt_obd_lov_getconfig for variable names */
+ /* setparam 0:lov_mdsA 1:default_stripe_size=1048576
+ 2:default_stripe_pattern=0 3:default_stripe_offset=0 */
+ for (i = 1; i < lcfg->lcfg_bufcount; i++) {
+ char *key, *sval;
+ long val;
+ key = lustre_cfg_buf(lcfg, i);
+ sval = strchr(key, '=');
+ if (!sval || (*(sval + 1) == 0)) {
+ CERROR("Can't parse param %s\n", key);
+ rc = -EINVAL;
+ /* continue parsing other params */
+ continue;
+ }
+ *sval = 0;
+ val = simple_strtol(sval + 1, NULL, 0);
+ if (strcmp(key, PARAM_D_STRIPE_SIZE) == 0)
+ desc->ld_default_stripe_size = val;
+ else if (strcmp(key, PARAM_D_STRIPE_COUNT) == 0)
+ desc->ld_default_stripe_count = val;
+ else if (strcmp(key, PARAM_D_STRIPE_OFFSET) == 0)
+ desc->ld_default_stripe_offset = val;
+ else if (strcmp(key, PARAM_D_STRIPE_PATTERN) == 0)
+ desc->ld_pattern = val;
+ else {
+ CERROR("Unknown param %s\n", key);
+ rc = -EINVAL;
+ /* continue parsing other params */
+ continue;
+ }
+ LCONSOLE_INFO("set %s to %ld\n", key, val);
+ }
+ lov_fix_desc(desc);
GOTO(out, rc);
}
default: {
if (ost_uuid && !obd_uuid_equals(ost_uuid, &lov->tgts[i].uuid))
continue;
+ CDEBUG(D_CONFIG,"Clear orphans for %d:%s\n", i, ost_uuid->uuid);
+
memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
LASSERT(lov->tgts[i].ltd_exp);
{
struct lov_obd *lov = &exp->exp_obd->u.lov;
struct lov_async_page *lap;
- int rc;
+ int rc = 0;
ENTRY;
- if (!page)
- return size_round(sizeof(*lap)) +
- obd_prep_async_page(lov->tgts[0].ltd_exp, NULL, NULL,
- NULL, 0, NULL, NULL, NULL);
-
+ if (!page) {
+ int i = 0;
+ /* Find an existing osc so we can get it's stupid sizeof(*oap).
+ Only because of this layering limitation will a client
+ mount with no osts fail */
+ while (!lov->tgts[i].ltd_exp) {
+ i++;
+ if (i >= lov->desc.ld_tgt_count)
+ RETURN(-ENOTBLK);
+ }
+ rc = size_round(sizeof(*lap)) +
+ obd_prep_async_page(lov->tgts[i].ltd_exp, NULL, NULL,
+ NULL, 0, NULL, NULL, NULL);
+ RETURN(rc);
+ }
ASSERT_LSM_MAGIC(lsm);
LASSERT(loi == NULL);
GOTO(out, rc);
}
GOTO(out, rc = 0);
- } else if (keylen >= strlen("lovdesc") && strcmp(key, "lovdesc") == 0) {
+ } else if (keylen >= strlen(KEY_LOVDESC) &&
+ strcmp(key, KEY_LOVDESC) == 0) {
struct lov_desc *desc_ret = val;
*desc_ret = lov->desc;
RETURN(-ENOMEM);
}
- if (KEY_IS("next_id")) {
- if (vallen != lov->desc.ld_tgt_count)
+ if (KEY_IS(KEY_NEXT_ID)) {
+ if (vallen > lov->desc.ld_tgt_count)
RETURN(-EINVAL);
vallen = sizeof(obd_id);
}
lov_getref(obddev);
- if (KEY_IS("next_id") || KEY_IS("checksum")) {
+ if (KEY_IS(KEY_NEXT_ID) || KEY_IS("checksum")) {
for (i = 0; i < lov->desc.ld_tgt_count; i++) {
/* OST was disconnected */
if (!lov->tgts[i].ltd_exp)
GOTO(out, rc);
}
- if (KEY_IS("mds_conn") || KEY_IS("unlinked")) {
+ if (KEY_IS(KEY_MDS_CONN) || KEY_IS("unlinked")) {
if (vallen != 0)
GOTO(out, rc = -EINVAL);
} else {
if ((found = fsfilt_search_type(fs_ops->fs_type))) {
if (found != fs_ops) {
CERROR("different operations for type %s\n",
- fs_ops->fs_type);
+ fs_ops->fs_type);
/* unlock fsfilt_types list */
RETURN(-EEXIST);
}
} else {
PORTAL_MODULE_USE;
- list_add(&fs_ops->fs_list, &fsfilt_types);
- }
+ list_add(&fs_ops->fs_list, &fsfilt_types);
+ }
- /* unlock fsfilt_types list */
+ /* unlock fsfilt_types list */
return 0;
}
/* lock fsfilt_types list */
list_for_each(p, &fsfilt_types) {
- struct fsfilt_operations *found;
+ struct fsfilt_operations *found;
found = list_entry(p, typeof(*found), fs_list);
if (found == fs_ops) {
}
if (rc) {
- CERROR("Can't find fsfilt_%s interface\n", name);
- RETURN(ERR_PTR(rc));
- /* unlock fsfilt_types list */
+ CERROR("Can't find %s interface\n", name);
+ RETURN(ERR_PTR(rc < 0 ? rc : -rc));
+ /* unlock fsfilt_types list */
}
}
try_module_get(fs_ops->fs_owner);
#define EXT3_XATTR_INDEX_TRUSTED 4
#endif
-static char *fsfilt_ext3_label(struct super_block *sb)
+static char *fsfilt_ext3_get_label(struct super_block *sb)
{
return EXT3_SB(sb)->s_es->s_volume_name;
}
+static int fsfilt_ext3_set_label(struct super_block *sb, char *label)
+{
+ /* see e.g. fsfilt_ext3_write_record() */
+ journal_t *journal;
+ handle_t *handle;
+ int err;
+
+ journal = EXT3_SB(sb)->s_journal;
+ lock_24kernel();
+ handle = journal_start(journal, 1);
+ unlock_24kernel();
+ if (IS_ERR(handle)) {
+ CERROR("can't start transaction\n");
+ return(PTR_ERR(handle));
+ }
+
+ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
+ if (err)
+ goto out;
+
+ memcpy(EXT3_SB(sb)->s_es->s_volume_name, label,
+ sizeof(EXT3_SB(sb)->s_es->s_volume_name));
+
+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+
+out:
+ lock_24kernel();
+ journal_stop(handle);
+ unlock_24kernel();
+
+ return(err);
+}
+
static char *fsfilt_ext3_uuid(struct super_block *sb)
{
return EXT3_SB(sb)->s_es->s_uuid;
#undef EXT3_MULTIBLOCK_ALLOCATOR
#endif
#ifndef EXT3_EXTENTS_FL
-#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */
+#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */
#endif
#ifdef EXT3_MULTIBLOCK_ALLOCATOR
static struct fsfilt_operations fsfilt_ext3_ops = {
.fs_type = "ext3",
.fs_owner = THIS_MODULE,
- .fs_label = fsfilt_ext3_label,
+ .fs_getlabel = fsfilt_ext3_get_label,
+ .fs_setlabel = fsfilt_ext3_set_label,
.fs_uuid = fsfilt_ext3_uuid,
.fs_start = fsfilt_ext3_start,
.fs_brw_start = fsfilt_ext3_brw_start,
/* Fixup directory permissions if necessary */
if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
- CWARN("fixing permissions on %s from %o to %o\n",
- name, old_mode, mode);
+ CDEBUG(D_CONFIG,
+ "fixing permissions on %s from %o to %o\n",
+ name, old_mode, mode);
dchild->d_inode->i_mode = (mode & S_IALLUGO) |
(old_mode & ~S_IALLUGO);
mark_inode_dirty(dchild->d_inode);
struct obd_import *imp = class_exp2cliimp(exp);
int rc = -EINVAL;
- if (KEY_IS("initial_recov")) {
+ if (KEY_IS(KEY_INIT_RECOV)) {
if (vallen != sizeof(int))
RETURN(-EINVAL);
imp->imp_initial_recov = *(int *)val;
- CDEBUG(D_HA, "%s: set imp_no_init_recov = %d\n",
+ CDEBUG(D_HA, "%s: set imp_initial_recov = %d\n",
exp->exp_obd->obd_name, imp->imp_initial_recov);
RETURN(0);
}
/* Turn off initial_recov after we try all backup servers once */
- if (KEY_IS("init_recov_bk")) {
+ if (KEY_IS(KEY_INIT_RECOV_BACKUP)) {
if (vallen != sizeof(int))
RETURN(-EINVAL);
imp->imp_initial_recov_bk = *(int *)val;
break;
}
case IMP_EVENT_INACTIVE: {
- rc = obd_notify_observer(obd, obd, OBD_NOTIFY_INACTIVE);
+ rc = obd_notify_observer(obd, obd, OBD_NOTIFY_INACTIVE, NULL);
break;
}
case IMP_EVENT_INVALIDATE: {
break;
}
case IMP_EVENT_ACTIVE: {
- rc = obd_notify_observer(obd, obd, OBD_NOTIFY_ACTIVE);
+ rc = obd_notify_observer(obd, obd, OBD_NOTIFY_ACTIVE, NULL);
break;
}
case IMP_EVENT_OCD:
int rc, size;
ENTRY;
- rc = obd_get_info(lov_exp, strlen("lovdesc") + 1, "lovdesc",
+ rc = obd_get_info(lov_exp, strlen(KEY_LOVDESC) + 1, KEY_LOVDESC,
&valsize, &desc);
if (rc)
RETURN(rc);
int rc = 0;
ENTRY;
- if (stage < OBD_CLEANUP_SELF_EXP)
- RETURN(0);
-
- rc = obd_llog_finish(obd, 0);
- if (rc != 0)
- CERROR("failed to cleanup llogging subsystems\n");
-
+ switch (stage) {
+ case OBD_CLEANUP_EARLY:
+ case OBD_CLEANUP_EXPORTS:
+ break;
+ case OBD_CLEANUP_SELF_EXP:
+ rc = obd_llog_finish(obd, 0);
+ if (rc != 0)
+ CERROR("failed to cleanup llogging subsystems\n");
+ case OBD_CLEANUP_OBD:
+ break;
+ }
RETURN(rc);
}
#endif
#define DEBUG_SUBSYSTEM S_MDS
-#include <linux/module.h>
#include <lustre_mds.h>
-#include <lustre_dlm.h>
+#include <linux/module.h>
#include <linux/init.h>
-#include <obd_class.h>
#include <linux/random.h>
#include <linux/fs.h>
#include <linux/jbd.h>
#else
# include <linux/locks.h>
#endif
+
+#include <obd_class.h>
+#include <lustre_dlm.h>
#include <obd_lov.h>
-#include <lustre_mds.h>
#include <lustre_fsfilt.h>
#include <lprocfs_status.h>
#include <lustre_commit_confd.h>
#include <lustre_quota.h>
+#include <lustre_disk.h>
#include <lustre_ver.h>
#include "mds_internal.h"
return rc;
}
-
static int mds_obd_statfs(struct obd_device *obd, struct obd_statfs *osfs,
unsigned long max_age)
{
[REINT_OPEN] "open",
};
-static int mds_set_info(struct obd_export *exp, struct ptlrpc_request *req)
+static int mds_set_info_rpc(struct obd_export *exp, struct ptlrpc_request *req)
{
char *key;
__u32 *val;
case MDS_SET_INFO:
DEBUG_REQ(D_INODE, req, "set_info");
- rc = mds_set_info(req->rq_export, req);
+ rc = mds_set_info_rpc(req->rq_export, req);
break;
case MDS_QUOTACHECK:
int mds_update_server_data(struct obd_device *obd, int force_sync)
{
struct mds_obd *mds = &obd->u.mds;
- struct mds_server_data *msd = mds->mds_server_data;
+ struct lr_server_data *lsd = mds->mds_server_data;
+ struct lr_server_data *lsd_copy = NULL;
struct file *filp = mds->mds_rcvd_filp;
struct lvfs_run_ctxt saved;
loff_t off = 0;
int rc;
ENTRY;
- push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
- msd->msd_last_transno = cpu_to_le64(mds->mds_last_transno);
-
CDEBUG(D_SUPER, "MDS mount_count is "LPU64", last_transno is "LPU64"\n",
mds->mds_mount_count, mds->mds_last_transno);
- rc = fsfilt_write_record(obd, filp, msd, sizeof(*msd), &off,force_sync);
+
+ lsd->lsd_last_transno = cpu_to_le64(mds->mds_last_transno);
+
+ if (!(lsd->lsd_feature_incompat & cpu_to_le32(OBD_INCOMPAT_COMMON_LR))){
+ /* Swap to the old mds_server_data format, in case
+ someone wants to revert to a pre-1.6 lustre */
+ CDEBUG(D_CONFIG, "writing old last_rcvd format\n");
+ /* malloc new struct instead of swap in-place because
+ we don't have a lock on the last_trasno or mount count -
+ someone may modify it while we're here, and we don't want
+ them to inc the wrong thing. */
+ OBD_ALLOC(lsd_copy, sizeof(*lsd_copy));
+ if (!lsd_copy)
+ RETURN(-ENOMEM);
+ *lsd_copy = *lsd;
+ lsd_copy->lsd_unused = lsd->lsd_last_transno;
+ lsd_copy->lsd_last_transno = lsd->lsd_mount_count;
+ lsd = lsd_copy;
+ }
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ rc = fsfilt_write_record(obd, filp, lsd, sizeof(*lsd), &off,force_sync);
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
if (rc)
CERROR("error writing MDS server data: rc = %d\n", rc);
- pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+ if (lsd_copy)
+ OBD_FREE(lsd_copy, sizeof(*lsd_copy));
RETURN(rc);
}
struct lprocfs_static_vars lvars;
struct lustre_cfg* lcfg = buf;
struct mds_obd *mds = &obd->u.mds;
+ struct lustre_mount_info *lmi;
struct vfsmount *mnt;
struct obd_uuid uuid;
__u8 *uuid_ptr;
int rc = 0;
ENTRY;
+ /* setup 1:/dev/loop/0 2:ext3 3:mdsA 4:errors=remount-ro,iopen_nopriv */
+
CLASSERT(offsetof(struct obd_device, u.obt) ==
offsetof(struct obd_device, u.mds.mds_obt));
if (LUSTRE_CFG_BUFLEN(lcfg, 1) == 0 || LUSTRE_CFG_BUFLEN(lcfg, 2) == 0)
RETURN(rc = -EINVAL);
- obd->obd_fsops = fsfilt_get_ops(lustre_cfg_string(lcfg, 2));
- if (IS_ERR(obd->obd_fsops))
- RETURN(rc = PTR_ERR(obd->obd_fsops));
-
- page = __get_free_page(GFP_KERNEL);
- if (!page)
- RETURN(-ENOMEM);
-
- options = (char *)page;
- memset(options, 0, PAGE_SIZE);
-
- /* here we use "iopen_nopriv" hardcoded, because it affects MDS utility
- * and the rest of options are passed by mount options. Probably this
- * should be moved to somewhere else like startup scripts or lconf. */
- strcpy(options, "iopen_nopriv");
-
- if (LUSTRE_CFG_BUFLEN(lcfg, 4) > 0 && lustre_cfg_buf(lcfg, 4)) {
- sprintf(options + strlen(options), ",%s",
- lustre_cfg_string(lcfg, 4));
- fsoptions_to_mds_flags(mds, options);
- }
+ lmi = server_get_mount(obd->obd_name);
+ if (lmi) {
+ /* We already mounted in lustre_fill_super.
+ lcfg bufs 1, 2, 4 (device, fstype, mount opts) are ignored.*/
+ struct lustre_sb_info *lsi = s2lsi(lmi->lmi_sb);
+ mnt = lmi->lmi_mnt;
+ obd->obd_fsops = fsfilt_get_ops(MT_STR(lsi->lsi_ldd));
+ } else {
+ /* old path - used by lctl */
+ CERROR("Using old MDS mount method\n");
+ page = __get_free_page(GFP_KERNEL);
+ if (!page)
+ RETURN(-ENOMEM);
+
+ options = (char *)page;
+ memset(options, 0, PAGE_SIZE);
+
+ /* here we use "iopen_nopriv" hardcoded, because it affects
+ * MDS utility and the rest of options are passed by mount
+ * options. Probably this should be moved to somewhere else
+ * like startup scripts or lconf. */
+ strcpy(options, "iopen_nopriv");
+
+ if (LUSTRE_CFG_BUFLEN(lcfg, 4) > 0 && lustre_cfg_buf(lcfg, 4)) {
+ sprintf(options + strlen(options), ",%s",
+ lustre_cfg_string(lcfg, 4));
+ fsoptions_to_mds_flags(mds, options);
+ }
+
+ mnt = do_kern_mount(lustre_cfg_string(lcfg, 2), 0,
+ lustre_cfg_string(lcfg, 1),
+ (void *)options);
+ free_page(page);
+ if (IS_ERR(mnt)) {
+ rc = PTR_ERR(mnt);
+ LCONSOLE_ERROR("Can't mount disk %s (%d)\n",
+ lustre_cfg_string(lcfg, 1), rc);
+ RETURN(rc);
+ }
- mnt = do_kern_mount(lustre_cfg_string(lcfg, 2), 0,
- lustre_cfg_string(lcfg, 1), (void *)options);
- free_page(page);
- if (IS_ERR(mnt)) {
- rc = PTR_ERR(mnt);
- LCONSOLE_ERROR("Can't mount disk %s (%d)\n",
- lustre_cfg_string(lcfg, 1), rc);
- GOTO(err_ops, rc);
+ obd->obd_fsops = fsfilt_get_ops(lustre_cfg_string(lcfg, 2));
}
+ if (IS_ERR(obd->obd_fsops))
+ GOTO(err_put, rc = PTR_ERR(obd->obd_fsops));
CDEBUG(D_SUPER, "%s: mnt = %p\n", lustre_cfg_string(lcfg, 1), mnt);
obd->obd_namespace = ldlm_namespace_new(ns_name, LDLM_NAMESPACE_SERVER);
if (obd->obd_namespace == NULL) {
mds_cleanup(obd);
- GOTO(err_put, rc = -ENOMEM);
+ GOTO(err_ops, rc = -ENOMEM);
}
ldlm_register_intent(obd->obd_namespace, mds_intent_policy);
/* Don't wait for mds_postrecov trying to clear orphans */
obd->obd_async_recov = 1;
rc = mds_postsetup(obd);
+ obd->obd_async_recov = 0;
if (rc)
GOTO(err_qctxt, rc);
- obd->obd_async_recov = 0;
lprocfs_init_vars(mds, &lvars);
lprocfs_obd_setup(obd, lvars.obd_vars);
str = "no UUID";
}
- label = fsfilt_label(obd, obd->u.obt.obt_sb);
+ label = fsfilt_get_label(obd, obd->u.obt.obt_sb);
if (obd->obd_recovering) {
LCONSOLE_WARN("MDT %s now serving %s (%s%s%s), but will be in "
"recovery until %d %s reconnect, or if no clients"
err_ns:
ldlm_namespace_free(obd->obd_namespace, 0);
obd->obd_namespace = NULL;
-err_put:
- unlock_kernel();
- mntput(mds->mds_vfsmnt);
- obd->u.obt.obt_sb = NULL;
- lock_kernel();
err_ops:
fsfilt_put_ops(obd->obd_fsops);
+err_put:
+ if (lmi) {
+ server_put_mount(obd->obd_name, mds->mds_vfsmnt);
+ } else {
+ /* old method */
+ unlock_kernel();
+ mntput(mds->mds_vfsmnt);
+ lock_kernel();
+ }
+ obd->u.obt.obt_sb = NULL;
return rc;
}
/* There better be a lov */
if (!osc)
RETURN(0);
-
if (IS_ERR(osc))
RETURN(PTR_ERR(osc));
RETURN(rc);
if (mds->mds_profile) {
- struct lvfs_run_ctxt saved;
struct lustre_profile *lprof;
- struct config_llog_instance cfg;
-
- cfg.cfg_instance = NULL;
- cfg.cfg_uuid = mds->mds_lov_uuid;
- push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
- rc = class_config_parse_llog(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT),
- mds->mds_profile, &cfg);
- pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
- switch (rc) {
- case 0:
- break;
- case -EINVAL:
- LCONSOLE_ERROR("%s: the profile %s could not be read. "
- "If you recently installed a new "
- "version of Lustre, you may need to "
- "re-run 'lconf --write_conf "
- "<yourconfig>.xml' command line before "
- "restarting the MDS.\n",
- obd->obd_name, mds->mds_profile);
- /* fall through */
- default:
- GOTO(err_llog, rc);
- break;
- }
-
+ /* The profile defines which osc and mdc to connect to, for a
+ client. We reuse that here to figure out the name of the
+ lov to use (and ignore lprof->lp_mdc).
+ The profile was set in the config log with
+ LCFG_MOUNTOPT profilenm oscnm mdcnm */
lprof = class_get_profile(mds->mds_profile);
if (lprof == NULL) {
CERROR("No profile found: %s\n", mds->mds_profile);
err_cleanup:
mds_lov_clean(obd);
-err_llog:
llog_cleanup(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT));
llog_cleanup(llog_get_context(obd, LLOG_LOVEA_ORIG_CTXT));
RETURN(rc);
LASSERT(!obd->obd_recovering);
LASSERT(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT) != NULL);
+ /* FIXME why not put this in the synchronize? */
/* set nextid first, so we are sure it happens */
rc = mds_lov_set_nextid(obd);
if (rc) {
- CERROR("%s: mds_lov_set_nextid failed\n",
- obd->obd_name);
+ CERROR("%s: mds_lov_set_nextid failed %d\n",
+ obd->obd_name, rc);
GOTO(out, rc);
}
if (rc < 0)
GOTO(out, rc);
- /* Does anyone need this to be synchronous ever? */
- mds_lov_start_synchronize(obd, NULL, obd->obd_async_recov);
+ /* FIXME Does target_finish_recovery really need this to block? */
+ /* Notify the LOV, which will in turn call mds_notify for each tgt */
+ /* This means that we have to hack obd_notify to think we're obd_set_up
+ during mds_lov_connect. */
+ obd_notify(obd->u.mds.mds_osc_obd, NULL,
+ obd->obd_async_recov ? OBD_NOTIFY_SYNC_NONBLOCK :
+ OBD_NOTIFY_SYNC, NULL);
/* quota recovery */
lquota_recovery(quota_interface, obd);
{
struct mds_obd *mds = &obd->u.mds;
lvfs_sbdev_type save_dev;
+ int must_put = 0;
int must_relock = 0;
ENTRY;
lquota_cleanup(quota_interface, obd);
mds_update_server_data(obd, 1);
- if (mds->mds_lov_objids != NULL) {
- OBD_FREE(mds->mds_lov_objids,
- mds->mds_lov_desc.ld_tgt_count * sizeof(obd_id));
- }
+ if (mds->mds_lov_objids != NULL)
+ OBD_FREE(mds->mds_lov_objids, mds->mds_lov_objids_size);
mds_fs_cleanup(obd);
upcall_cache_cleanup(mds->mds_group_hash);
mds->mds_group_hash = NULL;
- /* 2 seems normal on mds, (may_umount() also expects 2
- fwiw), but we only see 1 at this point in obdfilter. */
- if (atomic_read(&obd->u.mds.mds_vfsmnt->mnt_count) > 2)
- CERROR("%s: mount busy, mnt_count %d != 2\n", obd->obd_name,
- atomic_read(&obd->u.mds.mds_vfsmnt->mnt_count));
+ must_put = server_put_mount(obd->obd_name, mds->mds_vfsmnt);
+ /* must_put is for old method (l_p_m returns non-0 on err) */
/* We can only unlock kernel if we are in the context of sys_ioctl,
otherwise we never called lock_kernel */
unlock_kernel();
must_relock++;
}
-
- mntput(mds->mds_vfsmnt);
+
+ if (must_put)
+ /* In case we didn't mount with lustre_get_mount -- old method*/
+ mntput(mds->mds_vfsmnt);
obd->u.obt.obt_sb = NULL;
ldlm_namespace_free(obd->obd_namespace, obd->obd_force);
#include <obd_support.h>
#include <lustre_lib.h>
#include <lustre_fsfilt.h>
+#include <lustre_disk.h>
#include <libcfs/list.h>
#include "mds_internal.h"
-#define HEALTH_CHECK "health_check"
/* Add client data to the MDS. We use a bitmap to locate a free space
* in the last_rcvd file if cl_off is -1 (i.e. a new client).
cl_idx, med->med_mcd->mcd_uuid);
med->med_lr_idx = cl_idx;
- med->med_lr_off = le32_to_cpu(mds->mds_server_data->msd_client_start) +
- (cl_idx * le16_to_cpu(mds->mds_server_data->msd_client_size));
+ med->med_lr_off = le32_to_cpu(mds->mds_server_data->lsd_client_start) +
+ (cl_idx * le16_to_cpu(mds->mds_server_data->lsd_client_size));
LASSERTF(med->med_lr_off > 0, "med_lr_off = %llu\n", med->med_lr_off);
if (new_client) {
static int mds_init_server_data(struct obd_device *obd, struct file *file)
{
struct mds_obd *mds = &obd->u.mds;
- struct mds_server_data *msd;
+ struct lr_server_data *lsd;
struct mds_client_data *mcd = NULL;
loff_t off = 0;
unsigned long last_rcvd_size = file->f_dentry->d_inode->i_size;
ENTRY;
/* ensure padding in the struct is the correct size */
- LASSERT(offsetof(struct mds_server_data, msd_padding) +
- sizeof(msd->msd_padding) == LR_SERVER_SIZE);
+ LASSERT(offsetof(struct lr_server_data, lsd_padding) +
+ sizeof(lsd->lsd_padding) == LR_SERVER_SIZE);
LASSERT(offsetof(struct mds_client_data, mcd_padding) +
sizeof(mcd->mcd_padding) == LR_CLIENT_SIZE);
- OBD_ALLOC_WAIT(msd, sizeof(*msd));
- if (!msd)
+ OBD_ALLOC_WAIT(lsd, sizeof(*lsd));
+ if (!lsd)
RETURN(-ENOMEM);
OBD_ALLOC_WAIT(mds->mds_client_bitmap, LR_MAX_CLIENTS / 8);
if (!mds->mds_client_bitmap) {
- OBD_FREE(msd, sizeof(*msd));
+ OBD_FREE(lsd, sizeof(*lsd));
RETURN(-ENOMEM);
}
- mds->mds_server_data = msd;
+ mds->mds_server_data = lsd;
if (last_rcvd_size == 0) {
- CWARN("%s: initializing new %s\n", obd->obd_name, LAST_RCVD);
-
- memcpy(msd->msd_uuid, obd->obd_uuid.uuid,sizeof(msd->msd_uuid));
- msd->msd_last_transno = 0;
- mount_count = msd->msd_mount_count = 0;
- msd->msd_server_size = cpu_to_le32(LR_SERVER_SIZE);
- msd->msd_client_start = cpu_to_le32(LR_CLIENT_START);
- msd->msd_client_size = cpu_to_le16(LR_CLIENT_SIZE);
- msd->msd_feature_rocompat = cpu_to_le32(OBD_ROCOMPAT_LOVOBJID);
+ LCONSOLE_WARN("%s: new disk, initializing\n", obd->obd_name);
+
+ memcpy(lsd->lsd_uuid, obd->obd_uuid.uuid,sizeof(lsd->lsd_uuid));
+ lsd->lsd_last_transno = 0;
+ mount_count = lsd->lsd_mount_count = 0;
+ lsd->lsd_server_size = cpu_to_le32(LR_SERVER_SIZE);
+ lsd->lsd_client_start = cpu_to_le32(LR_CLIENT_START);
+ lsd->lsd_client_size = cpu_to_le16(LR_CLIENT_SIZE);
+ lsd->lsd_feature_rocompat = cpu_to_le32(OBD_ROCOMPAT_LOVOBJID);
+ lsd->lsd_feature_incompat = cpu_to_le32(OBD_INCOMPAT_MDT |
+ OBD_INCOMPAT_COMMON_LR);
} else {
- rc = fsfilt_read_record(obd, file, msd, sizeof(*msd), &off);
+ rc = fsfilt_read_record(obd, file, lsd, sizeof(*lsd), &off);
if (rc) {
CERROR("error reading MDS %s: rc %d\n", LAST_RCVD, rc);
GOTO(err_msd, rc);
}
- if (strcmp(msd->msd_uuid, obd->obd_uuid.uuid) != 0) {
+ if (strcmp(lsd->lsd_uuid, obd->obd_uuid.uuid) != 0) {
LCONSOLE_ERROR("Trying to start OBD %s using the wrong"
" disk %s. Were the /dev/ assignments "
"rearranged?\n",
- obd->obd_uuid.uuid, msd->msd_uuid);
+ obd->obd_uuid.uuid, lsd->lsd_uuid);
GOTO(err_msd, rc = -EINVAL);
}
- mount_count = le64_to_cpu(msd->msd_mount_count);
+ mount_count = le64_to_cpu(lsd->lsd_mount_count);
}
- if (msd->msd_feature_incompat & ~cpu_to_le32(MDT_INCOMPAT_SUPP)) {
+
+ if (lsd->lsd_feature_incompat & ~cpu_to_le32(MDT_INCOMPAT_SUPP)) {
CERROR("%s: unsupported incompat filesystem feature(s) %x\n",
- obd->obd_name, le32_to_cpu(msd->msd_feature_incompat) &
+ obd->obd_name, le32_to_cpu(lsd->lsd_feature_incompat) &
~MDT_INCOMPAT_SUPP);
GOTO(err_msd, rc = -EINVAL);
}
-
- if (msd->msd_feature_rocompat & ~cpu_to_le32(MDT_ROCOMPAT_SUPP)) {
+ if (lsd->lsd_feature_rocompat & ~cpu_to_le32(MDT_ROCOMPAT_SUPP)) {
CERROR("%s: unsupported read-only filesystem feature(s) %x\n",
- obd->obd_name, le32_to_cpu(msd->msd_feature_rocompat) &
+ obd->obd_name, le32_to_cpu(lsd->lsd_feature_rocompat) &
~MDT_ROCOMPAT_SUPP);
/* Do something like remount filesystem read-only */
GOTO(err_msd, rc = -EINVAL);
}
+ if (!(lsd->lsd_feature_incompat & cpu_to_le32(OBD_INCOMPAT_COMMON_LR))){
+ CDEBUG(D_WARNING, "using old last_rcvd format\n");
+ lsd->lsd_mount_count = lsd->lsd_last_transno;
+ lsd->lsd_last_transno = lsd->lsd_unused;
+ /* If we update the last_rcvd, we can never go back to
+ an old install, so leave this in the old format for now.
+ lsd->lsd_feature_incompat |= cpu_to_le32(LR_INCOMPAT_COMMON_LR);
+ */
+ }
+ lsd->lsd_feature_compat = cpu_to_le32(OBD_COMPAT_MDT);
+
+ mds->mds_last_transno = le64_to_cpu(lsd->lsd_last_transno);
- mds->mds_last_transno = le64_to_cpu(msd->msd_last_transno);
-
- msd->msd_feature_compat = cpu_to_le32(OBD_COMPAT_MDT);
CDEBUG(D_INODE, "%s: server last_transno: "LPU64"\n",
obd->obd_name, mds->mds_last_transno);
CDEBUG(D_INODE, "%s: server mount_count: "LPU64"\n",
obd->obd_name, mount_count + 1);
CDEBUG(D_INODE, "%s: server data size: %u\n",
- obd->obd_name, le32_to_cpu(msd->msd_server_size));
+ obd->obd_name, le32_to_cpu(lsd->lsd_server_size));
CDEBUG(D_INODE, "%s: per-client data start: %u\n",
- obd->obd_name, le32_to_cpu(msd->msd_client_start));
+ obd->obd_name, le32_to_cpu(lsd->lsd_client_start));
CDEBUG(D_INODE, "%s: per-client data size: %u\n",
- obd->obd_name, le32_to_cpu(msd->msd_client_size));
+ obd->obd_name, le32_to_cpu(lsd->lsd_client_size));
CDEBUG(D_INODE, "%s: last_rcvd size: %lu\n",
obd->obd_name, last_rcvd_size);
CDEBUG(D_INODE, "%s: last_rcvd clients: %lu\n", obd->obd_name,
- last_rcvd_size <= le32_to_cpu(msd->msd_client_start) ? 0 :
- (last_rcvd_size - le32_to_cpu(msd->msd_client_start)) /
- le16_to_cpu(msd->msd_client_size));
+ last_rcvd_size <= le32_to_cpu(lsd->lsd_client_start) ? 0 :
+ (last_rcvd_size - le32_to_cpu(lsd->lsd_client_start)) /
+ le16_to_cpu(lsd->lsd_client_size));
+
+ if (!lsd->lsd_server_size || !lsd->lsd_client_start ||
+ !lsd->lsd_client_size) {
+ CERROR("Bad last_rcvd contents!\n");
+ GOTO(err_msd, rc = -EINVAL);
+ }
/* When we do a clean MDS shutdown, we save the last_transno into
* the header. If we find clients with higher last_transno values
* then those clients may need recovery done. */
- for (cl_idx = 0, off = le32_to_cpu(msd->msd_client_start);
+ for (cl_idx = 0, off = le32_to_cpu(lsd->lsd_client_start);
off < last_rcvd_size; cl_idx++) {
__u64 last_transno;
struct obd_export *exp;
/* Don't assume off is incremented properly by
* fsfilt_read_record(), in case sizeof(*mcd)
- * isn't the same as msd->msd_client_size. */
- off = le32_to_cpu(msd->msd_client_start) +
- cl_idx * le16_to_cpu(msd->msd_client_size);
+ * isn't the same as lsd->lsd_client_size. */
+ off = le32_to_cpu(lsd->lsd_client_start) +
+ cl_idx * le16_to_cpu(lsd->lsd_client_size);
rc = fsfilt_read_record(obd, file, mcd, sizeof(*mcd), &off);
if (rc) {
CERROR("error reading MDS %s idx %d, off %llu: rc %d\n",
*/
CDEBUG(D_HA, "RCVRNG CLIENT uuid: %s idx: %d lr: "LPU64
" srv lr: "LPU64" lx: "LPU64"\n", mcd->mcd_uuid, cl_idx,
- last_transno, le64_to_cpu(msd->msd_last_transno),
+ last_transno, le64_to_cpu(lsd->lsd_last_transno),
le64_to_cpu(mcd->mcd_last_xid));
exp = class_new_export(obd, (struct obd_uuid *)mcd->mcd_uuid);
}
mds->mds_mount_count = mount_count + 1;
- msd->msd_mount_count = cpu_to_le64(mds->mds_mount_count);
+ lsd->lsd_mount_count = cpu_to_le64(mds->mds_mount_count);
/* save it, so mount count and last_transno is current */
rc = mds_update_server_data(obd, 1);
RETURN(rc);
mds->mds_vfsmnt = mnt;
+ /* why not mnt->mnt_sb instead of mnt->mnt_root->d_inode->i_sb? */
obd->u.obt.obt_sb = mnt->mnt_root->d_inode->i_sb;
fsfilt_setup(obd, obd->u.obt.obt_sb);
}
mds->mds_pending_dir = dentry;
- dentry = simple_mkdir(current->fs->pwd, "LOGS", 0777, 1);
+ /* COMPAT_146 */
+ dentry = simple_mkdir(current->fs->pwd, MDT_LOGS_DIR, 0777, 1);
if (IS_ERR(dentry)) {
rc = PTR_ERR(dentry);
- CERROR("cannot create LOGS directory: rc = %d\n", rc);
+ CERROR("cannot create %s directory: rc = %d\n",
+ MDT_LOGS_DIR, rc);
GOTO(err_pending, rc);
}
mds->mds_logs_dir = dentry;
+ /* end COMPAT_146 */
dentry = simple_mkdir(current->fs->pwd, "OBJECTS", 0777, 1);
if (IS_ERR(dentry)) {
int rc = 0;
if (obd->obd_fail)
- CWARN("%s: shutting down for failover; client state will "
- "be preserved.\n", obd->obd_name);
+ LCONSOLE_WARN("%s: shutting down for failover; client state "
+ "will be preserved.\n", obd->obd_name);
class_disconnect_exports(obd); /* cleans up client info too */
mds_server_free_data(mds);
#ifndef _MDS_INTERNAL_H
#define _MDS_INTERNAL_H
-#include <linux/lustre_disk.h>
+#include <lustre_disk.h>
#include <lustre_mds.h>
#define MDT_ROCOMPAT_SUPP (OBD_ROCOMPAT_LOVOBJID)
-
-#define MDT_INCOMPAT_SUPP (OBD_INCOMPAT_MDT)
-
-/* Data stored per server at the head of the last_rcvd file. In le32 order.
- * Try to keep this the same as fsd_server_data so we might one day merge. */
-struct mds_server_data {
- __u8 msd_uuid[40]; /* server UUID */
- __u64 msd_last_transno; /* last completed transaction ID */
- __u64 msd_mount_count; /* MDS incarnation number */
- __u64 msd_mount_count_new; /* future MDS incarnation number */
- __u32 msd_feature_compat; /* compatible feature flags */
- __u32 msd_feature_rocompat;/* read-only compatible feature flags */
- __u32 msd_feature_incompat;/* incompatible feature flags */
- __u32 msd_server_size; /* size of server data area */
- __u32 msd_client_start; /* start of per-client data area */
- __u16 msd_client_size; /* size of per-client data area */
- __u16 msd_subdir_count; /* number of subdirectories for objects */
- __u64 msd_catalog_oid; /* recovery catalog object id */
- __u32 msd_catalog_ogen; /* recovery catalog inode generation */
- __u8 msd_peeruuid[40]; /* UUID of LOV/OSC associated with MDS */
- __u32 msd_ost_index; /* index number of OST in LOV */
- __u32 msd_mds_index; /* index number of MDS in LMV */
- __u8 msd_padding[LR_SERVER_SIZE - 148];
-};
+#define MDT_INCOMPAT_SUPP (OBD_INCOMPAT_MDT | OBD_INCOMPAT_COMMON_LR)
/* Data stored per client in the last_rcvd file. In le32 order. */
struct mds_client_data {
void mds_lov_update_objids(struct obd_device *obd, obd_id *ids);
int mds_lov_clear_orphans(struct mds_obd *mds, struct obd_uuid *ost_uuid);
int mds_lov_set_nextid(struct obd_device *obd);
-int mds_lov_start_synchronize(struct obd_device *obd, struct obd_uuid *uuid,
- int nonblock);
+int mds_lov_start_synchronize(struct obd_device *obd,
+ struct obd_device *watched,
+ void *data, int nonblock);
int mds_post_mds_lovconf(struct obd_device *obd);
int mds_notify(struct obd_device *obd, struct obd_device *watched,
- enum obd_notify_event ev);
+ enum obd_notify_event ev, void *data);
int mds_convert_lov_ea(struct obd_device *obd, struct inode *inode,
struct lov_mds_md *lmm, int lmm_size);
void mds_objids_from_lmm(obd_id *ids, struct lov_mds_md *lmm,
ENTRY;
lctxt = llog_get_context(lov_obd, ctxt->loc_idx);
- rc = llog_cancel(lctxt, lsm, count, cookies,flags);
+ rc = llog_cancel(lctxt, lsm, count, cookies, flags);
RETURN(rc);
}
lock_kernel();
for (i = 0; i < mds->mds_lov_desc.ld_tgt_count; i++)
- if (ids[i] > (mds->mds_lov_objids)[i])
+ if (ids[i] > (mds->mds_lov_objids)[i]) {
(mds->mds_lov_objids)[i] = ids[i];
+ mds->mds_lov_objids_dirty = 1;
+ }
unlock_kernel();
EXIT;
}
struct mds_obd *mds = &obd->u.mds;
obd_id *ids;
loff_t off = 0;
- int i, rc, size = mds->mds_lov_desc.ld_tgt_count * sizeof(*ids);
+ int i, rc, size;
ENTRY;
- if (mds->mds_lov_objids != NULL)
+ LASSERT(!mds->mds_lov_objids_size);
+ LASSERT(!mds->mds_lov_objids_dirty);
+
+ /* Read everything in the file, even if our current lov desc
+ has fewer targets. Old targets not in the lov descriptor
+ during mds setup may still have valid objids. */
+ size = mds->mds_lov_objid_filp->f_dentry->d_inode->i_size;
+ if (size == 0)
RETURN(0);
OBD_ALLOC(ids, size);
if (ids == NULL)
RETURN(-ENOMEM);
mds->mds_lov_objids = ids;
+ mds->mds_lov_objids_size = size;
- if (mds->mds_lov_objid_filp->f_dentry->d_inode->i_size == 0)
- RETURN(0);
rc = fsfilt_read_record(obd, mds->mds_lov_objid_filp, ids, size, &off);
if (rc < 0) {
CERROR("Error reading objids %d\n", rc);
- } else {
- mds->mds_lov_objids_valid = 1;
- rc = 0;
+ RETURN(rc);
}
-
- for (i = 0; i < mds->mds_lov_desc.ld_tgt_count; i++)
+
+ mds->mds_lov_objids_in_file = size / sizeof(*ids);
+
+ for (i = 0; i < mds->mds_lov_objids_in_file; i++) {
CDEBUG(D_INFO, "read last object "LPU64" for idx %d\n",
mds->mds_lov_objids[i], i);
-
- RETURN(rc);
+ }
+ RETURN(0);
}
int mds_lov_write_objids(struct obd_device *obd)
{
struct mds_obd *mds = &obd->u.mds;
loff_t off = 0;
- int i, rc, size = mds->mds_lov_desc.ld_tgt_count * sizeof(obd_id);
+ int i, rc, tgts;
ENTRY;
- for (i = 0; i < mds->mds_lov_desc.ld_tgt_count; i++)
+ if (!mds->mds_lov_objids_dirty)
+ RETURN(0);
+
+ tgts = max(mds->mds_lov_desc.ld_tgt_count, mds->mds_lov_objids_in_file);
+
+ if (!tgts)
+ RETURN(0);
+
+ for (i = 0; i < tgts; i++)
CDEBUG(D_INFO, "writing last object "LPU64" for idx %d\n",
mds->mds_lov_objids[i], i);
rc = fsfilt_write_record(obd, mds->mds_lov_objid_filp,
- mds->mds_lov_objids, size, &off, 0);
+ mds->mds_lov_objids, tgts * sizeof(obd_id),
+ &off, 0);
+ if (rc >= 0) {
+ mds->mds_lov_objids_dirty = 0;
+ rc = 0;
+ }
+
RETURN(rc);
}
LASSERT(mds->mds_lov_objids != NULL);
- rc = obd_set_info_async(mds->mds_osc_exp, strlen("next_id"), "next_id",
+ rc = obd_set_info_async(mds->mds_osc_exp, strlen(KEY_NEXT_ID),
+ KEY_NEXT_ID,
mds->mds_lov_desc.ld_tgt_count,
mds->mds_lov_objids, NULL);
+
+ if (rc)
+ CERROR ("%s: mds_lov_set_nextid failed (%d)\n",
+ obd->obd_name, rc);
+
RETURN(rc);
}
-int mds_init_lov_desc(struct obd_device *obd, struct obd_export *osc_exp)
+/* Update the lov desc for a new size lov. */
+static int mds_lov_update_desc(struct obd_device *obd, struct obd_export *lov)
{
struct mds_obd *mds = &obd->u.mds;
- int valsize, rc, tgt_count;
- __u32 stripes;
+ struct lov_desc *ld;
+ __u32 size, stripes, valsize = sizeof(mds->mds_lov_desc);
+ int rc = 0;
ENTRY;
- mds->mds_has_lov_desc = 0;
- valsize = sizeof(mds->mds_lov_desc);
- rc = obd_get_info(mds->mds_osc_exp, strlen("lovdesc") + 1,
- "lovdesc", &valsize, &mds->mds_lov_desc);
- if (rc) {
- CERROR("can't get lov_desc, rc %d\n", rc);
- RETURN(rc);
+ OBD_ALLOC(ld, sizeof(*ld));
+ if (!ld)
+ RETURN(-ENOMEM);
+
+ rc = obd_get_info(lov, strlen(KEY_LOVDESC) + 1, KEY_LOVDESC,
+ &valsize, ld);
+ if (rc)
+ GOTO(out, rc);
+
+ /* The size of the LOV target table may have increased. */
+ size = ld->ld_tgt_count * sizeof(obd_id);
+ if ((mds->mds_lov_objids_size == 0) ||
+ (size > mds->mds_lov_objids_size)) {
+ obd_id *ids;
+
+ /* add room by powers of 2 */
+ size = 1;
+ while (size < ld->ld_tgt_count)
+ size = size << 1;
+ size = size * sizeof(obd_id);
+
+ OBD_ALLOC(ids, size);
+ if (ids == NULL)
+ GOTO(out, rc = -ENOMEM);
+ memset(ids, 0, size);
+ if (mds->mds_lov_objids_size) {
+ obd_id *old_ids = mds->mds_lov_objids;
+ memcpy(ids, mds->mds_lov_objids,
+ mds->mds_lov_objids_size);
+ mds->mds_lov_objids = ids;
+ OBD_FREE(old_ids, mds->mds_lov_objids_size);
+ }
+ mds->mds_lov_objids = ids;
+ mds->mds_lov_objids_size = size;
}
- mds->mds_has_lov_desc = 1;
- tgt_count = mds->mds_lov_desc.ld_tgt_count;
- stripes = min(tgt_count, LOV_MAX_STRIPE_COUNT);
+ /* Don't change the mds_lov_desc until the objids size matches the
+ count (paranoia) */
+ mds->mds_lov_desc = *ld;
+ CDEBUG(D_CONFIG, "updated lov_desc, tgt_count: %d\n",
+ mds->mds_lov_desc.ld_tgt_count);
+ stripes = min((__u32)LOV_MAX_STRIPE_COUNT,
+ max(mds->mds_lov_desc.ld_tgt_count,
+ mds->mds_lov_objids_in_file));
mds->mds_max_mdsize = lov_mds_md_size(stripes);
mds->mds_max_cookiesize = stripes * sizeof(struct llog_cookie);
+ CDEBUG(D_CONFIG, "updated max_mdsize/max_cookiesize: %d/%d\n",
+ mds->mds_max_mdsize, mds->mds_max_cookiesize);
+
+out:
+ OBD_FREE(ld, sizeof(*ld));
+ RETURN(rc);
+}
- CDEBUG(D_HA, "updated lov_desc, tgt_count: %d\n", tgt_count);
- CDEBUG(D_HA, "updating max_mdsize/max_cookiesize: %d/%d\n",
- mds->mds_max_mdsize, mds->mds_max_cookiesize);
+#define MDSLOV_NO_INDEX -1
- RETURN(0);
+/* Inform MDS about new/updated target */
+static int mds_lov_update_mds(struct obd_device *obd,
+ struct obd_device *watched,
+ __u32 idx)
+{
+ struct mds_obd *mds = &obd->u.mds;
+ int old_count;
+ int rc = 0;
+ ENTRY;
+
+ old_count = mds->mds_lov_desc.ld_tgt_count;
+ rc = mds_lov_update_desc(obd, mds->mds_osc_exp);
+ if (rc)
+ RETURN(rc);
+
+ CDEBUG(D_CONFIG, "idx=%d, recov=%d/%d, cnt=%d/%d\n",
+ idx, obd->obd_recovering, obd->obd_async_recov, old_count,
+ mds->mds_lov_desc.ld_tgt_count);
+
+ /* idx is set as data from lov_notify. */
+ if (idx != MDSLOV_NO_INDEX && !obd->obd_recovering) {
+ if (idx >= mds->mds_lov_desc.ld_tgt_count) {
+ CERROR("index %d > count %d!\n", idx,
+ mds->mds_lov_desc.ld_tgt_count);
+ RETURN(-EINVAL);
+ }
+
+ if (idx >= mds->mds_lov_objids_in_file) {
+ /* We never read this lastid; ask the osc */
+ obd_id lastid;
+ __u32 size = sizeof(lastid);
+ rc = obd_get_info(watched->obd_self_export,
+ strlen("last_id"),
+ "last_id", &size, &lastid);
+ if (rc)
+ RETURN(rc);
+ mds->mds_lov_objids[idx] = lastid;
+ mds->mds_lov_objids_dirty = 1;
+ mds_lov_write_objids(obd);
+ } else {
+ /* We have read this lastid from disk; tell the osc.
+ Don't call this during recovery. */
+ rc = mds_lov_set_nextid(obd);
+ }
+
+ CDEBUG(D_CONFIG, "last object "LPU64" from OST %d\n",
+ mds->mds_lov_objids[idx], idx);
+ }
+
+ /* If we added a target we have to reconnect the llogs */
+ /* Only do this at first add (idx), or the first time after recovery */
+ if (idx != MDSLOV_NO_INDEX || 1/*FIXME*/) {
+ CDEBUG(D_CONFIG, "reset llogs idx=%d\n", idx);
+ /* These two must be atomic */
+ down(&mds->mds_orphan_recovery_sem);
+ obd_llog_finish(obd, old_count);
+ llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count);
+ up(&mds->mds_orphan_recovery_sem);
+ }
+
+ RETURN(rc);
}
/* update the LOV-OSC knowledge of the last used object id's */
GOTO(err_discon, rc);
}
- /* init lov_desc + easize */
- rc = mds_init_lov_desc(obd, mds->mds_osc_exp);
- if (rc)
- GOTO(err_reg, rc);
-
rc = mds_lov_read_objids(obd);
if (rc) {
CERROR("cannot read %s: rc = %d\n", "lov_objids", rc);
GOTO(err_reg, rc);
}
+ rc = mds_lov_update_desc(obd, mds->mds_osc_exp);
+ if (rc)
+ GOTO(err_reg, rc);
+
+ /* tgt_count may be 0! */
rc = llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count);
if (rc) {
CERROR("failed to initialize catalog %d\n", rc);
/* If we're mounting this code for the first time on an existing FS,
* we need to populate the objids array from the real OST values */
- if (!mds->mds_lov_objids_valid) {
+ if (mds->mds_lov_desc.ld_tgt_count > mds->mds_lov_objids_in_file) {
int size = sizeof(obd_id) * mds->mds_lov_desc.ld_tgt_count;
rc = obd_get_info(mds->mds_osc_exp, strlen("last_id"),
"last_id", &size, mds->mds_lov_objids);
for (i = 0; i < mds->mds_lov_desc.ld_tgt_count; i++)
CWARN("got last object "LPU64" from OST %d\n",
mds->mds_lov_objids[i], i);
- mds->mds_lov_objids_valid = 1;
+ mds->mds_lov_objids_dirty = 1;
rc = mds_lov_write_objids(obd);
if (rc)
CERROR("got last objids from OSTs, but error "
rc = llog_ioctl(ctxt, cmd, data);
pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count);
- rc2 = obd_set_info_async(mds->mds_osc_exp, strlen("mds_conn"),
- "mds_conn", 0, NULL, NULL);
+ rc2 = obd_set_info_async(mds->mds_osc_exp,
+ strlen(KEY_MDS_CONN), KEY_MDS_CONN,
+ 0, NULL, NULL);
if (!rc)
rc = rc2;
RETURN(rc);
}
struct mds_lov_sync_info {
- struct obd_device *mlsi_obd; /* the lov device to sync */
- struct obd_uuid *mlsi_uuid; /* target to sync */
+ struct obd_device *mlsi_obd; /* the lov device to sync */
+ struct obd_device *mlsi_watched; /* target osc */
+ __u32 mlsi_index; /* index of target */
};
-static int __mds_lov_syncronize(void *data)
+/* We only sync one osc at a time, so that we don't have to hold
+ any kind of lock on the whole mds_lov_desc, which may change
+ (grow) as a result of mds_lov_add_ost. This also avoids any
+ kind of mismatch between the lov_desc and the mds_lov_desc,
+ which are not in lock-step during lov_add_obd */
+static int __mds_lov_synchronize(void *data)
{
struct mds_lov_sync_info *mlsi = data;
- struct obd_device *obd;
+ struct obd_device *obd = mlsi->mlsi_obd;
+ struct obd_device *watched = mlsi->mlsi_watched;
+ struct mds_obd *mds = &obd->u.mds;
struct obd_uuid *uuid;
+ __u32 idx = mlsi->mlsi_index;
int rc = 0;
ENTRY;
- obd = mlsi->mlsi_obd;
- uuid = mlsi->mlsi_uuid;
-
OBD_FREE(mlsi, sizeof(*mlsi));
- LASSERT(obd != NULL);
+ LASSERT(obd);
+ LASSERT(watched);
+ uuid = &watched->u.cli.cl_target_uuid;
+ LASSERT(uuid);
- rc = obd_set_info_async(obd->u.mds.mds_osc_exp, strlen("mds_conn"),
- "mds_conn", 0, uuid, NULL);
+ rc = mds_lov_update_mds(obd, watched, idx);
+ if (rc != 0)
+ GOTO(out, rc);
+
+ rc = obd_set_info_async(mds->mds_osc_exp, strlen(KEY_MDS_CONN),
+ KEY_MDS_CONN, 0, uuid, NULL);
if (rc != 0)
GOTO(out, rc);
rc = llog_connect(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT),
- obd->u.mds.mds_lov_desc.ld_tgt_count,
+ mds->mds_lov_desc.ld_tgt_count,
NULL, NULL, uuid);
+
if (rc != 0) {
CERROR("%s: failed at llog_origin_connect: %d\n",
obd->obd_name, rc);
}
LCONSOLE_INFO("MDS %s: %s now active, resetting orphans\n",
- obd->obd_name, uuid ? (char *)uuid->uuid : "All OSCs");
+ obd->obd_name, obd_uuid2str(uuid));
if (obd->obd_stopping)
GOTO(out, rc = -ENODEV);
- rc = mds_lov_clear_orphans(&obd->u.mds, uuid);
+ rc = mds_lov_clear_orphans(mds, uuid);
if (rc != 0) {
CERROR("%s: failed at mds_lov_clear_orphans: %d\n",
obd->obd_name, rc);
GOTO(out, rc);
}
- EXIT;
out:
class_decref(obd);
- return rc;
+ RETURN(rc);
}
int mds_lov_synchronize(void *data)
{
- ptlrpc_daemonize("mds_lov_sync");
+ struct mds_lov_sync_info *mlsi = data;
+ char name[20];
- return (__mds_lov_syncronize(data));
+ sprintf(name, "ll_mlov_sync_%02u", mlsi->mlsi_index);
+ ptlrpc_daemonize(name);
+
+ RETURN(__mds_lov_synchronize(data));
}
-int mds_lov_start_synchronize(struct obd_device *obd, struct obd_uuid *uuid,
- int nonblock)
+int mds_lov_start_synchronize(struct obd_device *obd,
+ struct obd_device *watched,
+ void *data, int nonblock)
{
struct mds_lov_sync_info *mlsi;
int rc;
ENTRY;
+ LASSERT(watched);
+
OBD_ALLOC(mlsi, sizeof(*mlsi));
if (mlsi == NULL)
RETURN(-ENOMEM);
mlsi->mlsi_obd = obd;
- mlsi->mlsi_uuid = uuid;
+ mlsi->mlsi_watched = watched;
+ if (data)
+ mlsi->mlsi_index = *(__u32 *)data;
+ else
+ mlsi->mlsi_index = MDSLOV_NO_INDEX;
/* Although class_export_get(obd->obd_self_export) would lock
the MDS in place, since it's only a self-export
it doesn't lock the LOV in place. The LOV can be disconnected
- during MDS precleanup, leaving nothing for __mds_lov_syncronize.
+ during MDS precleanup, leaving nothing for __mds_lov_synchronize.
Simply taking an export ref on the LOV doesn't help, because it's
still disconnected. Taking an obd reference insures that we don't
disconnect the LOV. This of course means a cleanup won't
class_incref(obd);
if (nonblock) {
- /* Syncronize in the background */
- rc = kernel_thread(mds_lov_synchronize, mlsi, CLONE_VM | CLONE_FILES);
+ /* Synchronize in the background */
+ rc = cfs_kernel_thread(mds_lov_synchronize, mlsi,
+ CLONE_VM | CLONE_FILES);
if (rc < 0) {
CERROR("%s: error starting mds_lov_synchronize: %d\n",
obd->obd_name, rc);
class_decref(obd);
} else {
- CDEBUG(D_HA, "%s: mds_lov_synchronize thread: %d\n",
- obd->obd_name, rc);
+ CDEBUG(D_HA, "%s: mds_lov_synchronize idx=%d "
+ "thread=%d\n", obd->obd_name,
+ mlsi->mlsi_index, rc);
rc = 0;
}
} else {
- rc = __mds_lov_syncronize((void *)mlsi);
+ rc = __mds_lov_synchronize((void *)mlsi);
}
RETURN(rc);
}
int mds_notify(struct obd_device *obd, struct obd_device *watched,
- enum obd_notify_event ev)
+ enum obd_notify_event ev, void *data)
{
- struct mds_obd *mds = &obd->u.mds;
- struct obd_uuid *uuid;
int rc = 0;
ENTRY;
- if (ev != OBD_NOTIFY_ACTIVE)
+ switch (ev) {
+ /* We only handle these: */
+ case OBD_NOTIFY_ACTIVE:
+ case OBD_NOTIFY_SYNC:
+ case OBD_NOTIFY_SYNC_NONBLOCK:
+ break;
+ default:
RETURN(0);
+ }
+
+ CDEBUG(D_CONFIG, "notify %s ev=%d\n", watched->obd_name, ev);
- if (strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) {
+ if (strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME) != 0) {
CERROR("unexpected notification of %s %s!\n",
watched->obd_type->typ_name, watched->obd_name);
RETURN(-EINVAL);
}
- uuid = &watched->u.cli.cl_target_uuid;
if (obd->obd_recovering) {
- /* in the case OBD is in recovery we do not reinit desc and
- * easize, as that will be done in mds_lov_connect() after
- * recovery is finished. */
CWARN("MDS %s: in recovery, not resetting orphans on %s\n",
- obd->obd_name, uuid->uuid);
- } else {
- LASSERT(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT) != NULL);
-
- /* this may be called also in case of adding new OST, thus, we
- * have to update MDS lov_desc and re-init MDS easize. The same
- * should be done on clients. */
- rc = mds_init_lov_desc(obd, mds->mds_osc_exp);
- if (rc)
- RETURN(rc);
-
- rc = mds_lov_start_synchronize(obd, uuid, 1);
- lquota_recovery(quota_interface, obd);
+ obd->obd_name,
+ obd_uuid2str(&watched->u.cli.cl_target_uuid));
+ /* We still have to fix the lov descriptor for ost's added
+ after the mdt in the config log. They didn't make it into
+ mds_lov_connect. */
+ rc = mds_lov_update_desc(obd, obd->u.mds.mds_osc_exp);
+ RETURN(rc);
}
+
+ LASSERT(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT) != NULL);
+ rc = mds_lov_start_synchronize(obd, watched, data,
+ !(ev == OBD_NOTIFY_SYNC));
+
+ lquota_recovery(quota_interface, obd);
+
RETURN(rc);
}
struct obd_trans_info oti = { 0 };
struct lov_stripe_md *lsm = NULL;
struct lov_mds_md *lmm = NULL;
- int rc, lmm_bufsize, lmm_size;
+ int rc, lmm_size;
struct mds_body *body;
struct obdo *oa;
void *lmm_buf;
mds_objids_from_lmm(*ids, lmm, &mds->mds_lov_desc);
- lmm_buf = lustre_msg_buf(req->rq_repmsg, offset, 0);
- lmm_bufsize = req->rq_repmsg->buflens[offset];
+ rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size, "lov");
+ lmm_buf = lustre_msg_buf(req->rq_repmsg, offset, lmm_size);
LASSERT(lmm_buf);
- LASSERT(lmm_bufsize >= lmm_size);
memcpy(lmm_buf, lmm, lmm_size);
- rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size, "lov");
if (rc)
CERROR("open replay failed to set md:%d\n", rc);
- RETURN(0);
+ RETURN(rc);
}
if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_MDS_ALLOC_OBDO))
}
rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size, "lov");
- lmm_buf = lustre_msg_buf(req->rq_repmsg, offset, 0);
- lmm_bufsize = req->rq_repmsg->buflens[offset];
+ lmm_buf = lustre_msg_buf(req->rq_repmsg, offset, lmm_size);
LASSERT(lmm_buf);
- LASSERT(lmm_bufsize >= lmm_size);
-
memcpy(lmm_buf, lmm, lmm_size);
obd_free_diskmd(mds->mds_osc_exp, &lmm);
out_oa:
--- /dev/null
+.Xrefs
+config.log
+config.status
+configure
+Makefile
+.deps
+TAGS
+.*.cmd
+autoMakefile.in
+autoMakefile
+*.ko
+*.mod.c
+.*.o.flags
+.tmp_versions
+.depend
--- /dev/null
+MODULES := mgc
+mgc-objs := mgc_request.o
+
+@INCLUDE_RULES@
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+if MODULES
+modulefs_DATA = mgc$(KMODEXT)
+endif
+
+MOSTLYCLEANFILES := @MOSTLYCLEANFILES@
+DIST_SOURCES := $(mgc-objs:%.o=%.c)
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lustre/mgc/mgc_request.c
+ * Lustre Management Client
+ *
+ * Copyright (C) 2006 Cluster File Systems, Inc.
+ * Author: Nathan Rutman <nathan@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_MGC
+#define D_MGC D_CONFIG /*|D_WARNING*/
+
+#ifdef __KERNEL__
+# include <linux/module.h>
+# include <linux/pagemap.h>
+# include <linux/miscdevice.h>
+# include <linux/init.h>
+#else
+# include <liblustre.h>
+#endif
+
+#include <obd_class.h>
+#include <lustre_dlm.h>
+#include <lustre_log.h>
+#include <lustre_fsfilt.h>
+#include <lustre_disk.h>
+
+
+int mgc_logname2resid(char *logname, struct ldlm_res_id *res_id)
+{
+ char *name_end;
+ int len;
+ __u64 resname = 0;
+
+ /* fsname is at most 8 chars long at the beginning of the logname
+ e.g. "lustre-MDT0001" or "lustre" */
+ name_end = strrchr(logname, '-');
+ if (name_end)
+ len = name_end - logname;
+ else
+ len = strlen(logname);
+ if (len > 8) {
+ CERROR("fsname too long: %s\n", logname);
+ return -EINVAL;
+ }
+ if (len <= 0) {
+ CERROR("missing fsname: %s\n", logname);
+ return -EINVAL;
+ }
+ memcpy(&resname, logname, len);
+
+ memset(res_id, 0, sizeof(*res_id));
+ /* FIXME are resid names swabbed across the wire? */
+ res_id->name[0] = cpu_to_le64(resname);
+ CDEBUG(D_MGC, "log %s to resid "LPX64"/"LPX64" (%.8s)\n", logname,
+ res_id->name[0], res_id->name[1], (char *)&res_id->name[0]);
+ return 0;
+}
+EXPORT_SYMBOL(mgc_logname2resid);
+
+/********************** config llog list **********************/
+static struct list_head config_llog_list = LIST_HEAD_INIT(config_llog_list);
+static spinlock_t config_list_lock = SPIN_LOCK_UNLOCKED;
+
+/* Take a reference to a config log */
+static int config_log_get(struct config_llog_data *cld)
+{
+ ENTRY;
+ CDEBUG(D_INFO, "log %s refs %d\n", cld->cld_logname,
+ atomic_read(&cld->cld_refcount));
+ if (cld->cld_stopping)
+ RETURN(1);
+ atomic_inc(&cld->cld_refcount);
+ RETURN(0);
+}
+
+/* Drop a reference to a config log. When no longer referenced,
+ we can free the config log data */
+static void config_log_put(struct config_llog_data *cld)
+{
+ ENTRY;
+ CDEBUG(D_INFO, "log %s refs %d\n", cld->cld_logname,
+ atomic_read(&cld->cld_refcount));
+ if (atomic_dec_and_test(&cld->cld_refcount)) {
+ CDEBUG(D_MGC, "dropping config log %s\n", cld->cld_logname);
+ OBD_FREE(cld->cld_logname, strlen(cld->cld_logname) + 1);
+ if (cld->cld_cfg.cfg_instance != NULL)
+ OBD_FREE(cld->cld_cfg.cfg_instance,
+ strlen(cld->cld_cfg.cfg_instance) + 1);
+ OBD_FREE(cld, sizeof(*cld));
+ }
+ EXIT;
+}
+
+/* Find a config log by name */
+static struct config_llog_data *config_log_find(char *logname,
+ struct config_llog_instance *cfg)
+{
+ struct list_head *tmp;
+ struct config_llog_data *cld;
+ char *logid = logname;
+ int match_instance = 0;
+ ENTRY;
+
+ if (cfg && cfg->cfg_instance) {
+ match_instance++;
+ logid = cfg->cfg_instance;
+ }
+ if (!logid) {
+ CERROR("No log specified\n");
+ RETURN(ERR_PTR(-EINVAL));
+ }
+
+ spin_lock(&config_list_lock);
+ list_for_each(tmp, &config_llog_list) {
+ cld = list_entry(tmp, struct config_llog_data, cld_list_chain);
+ if (match_instance && cld->cld_cfg.cfg_instance &&
+ strcmp(logid, cld->cld_cfg.cfg_instance) == 0)
+ goto out_found;
+ if (!match_instance &&
+ strcmp(logid, cld->cld_logname) == 0)
+ goto out_found;
+ }
+ spin_unlock(&config_list_lock);
+
+ CERROR("can't get log %s\n", logid);
+ RETURN(ERR_PTR(-ENOENT));
+out_found:
+ atomic_inc(&cld->cld_refcount);
+ spin_unlock(&config_list_lock);
+ RETURN(cld);
+}
+
+/* Add this log to our list of active logs.
+ We have one active log per "mount" - client instance or servername.
+ Each instance may be at a different point in the log. */
+static int config_log_add(char *logname, struct config_llog_instance *cfg,
+ struct super_block *sb)
+{
+ struct config_llog_data *cld;
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_MGC, "adding config log %s:%s\n", logname, cfg->cfg_instance);
+
+ OBD_ALLOC(cld, sizeof(*cld));
+ if (!cld)
+ RETURN(-ENOMEM);
+ OBD_ALLOC(cld->cld_logname, strlen(logname) + 1);
+ if (!cld->cld_logname) {
+ OBD_FREE(cld, sizeof(*cld));
+ RETURN(-ENOMEM);
+ }
+ strcpy(cld->cld_logname, logname);
+ cld->cld_cfg = *cfg;
+ cld->cld_cfg.cfg_last_idx = 0;
+ cld->cld_cfg.cfg_flags = 0;
+ cld->cld_cfg.cfg_sb = sb;
+ atomic_set(&cld->cld_refcount, 1);
+ if (cfg->cfg_instance != NULL) {
+ OBD_ALLOC(cld->cld_cfg.cfg_instance,
+ strlen(cfg->cfg_instance) + 1);
+ strcpy(cld->cld_cfg.cfg_instance, cfg->cfg_instance);
+ }
+ rc = mgc_logname2resid(logname, &cld->cld_resid);
+ if (rc) {
+ config_log_put(cld);
+ RETURN(rc);
+ }
+ spin_lock(&config_list_lock);
+ list_add(&cld->cld_list_chain, &config_llog_list);
+ spin_unlock(&config_list_lock);
+
+ RETURN(rc);
+}
+
+/* Stop watching for updates on this log. */
+static int config_log_end(char *logname, struct config_llog_instance *cfg)
+{
+ struct config_llog_data *cld;
+ int rc = 0;
+ ENTRY;
+
+ cld = config_log_find(logname, cfg);
+ if (IS_ERR(cld))
+ RETURN(PTR_ERR(cld));
+ /* drop the ref from the find */
+ config_log_put(cld);
+
+ cld->cld_stopping = 1;
+ spin_lock(&config_list_lock);
+ list_del(&cld->cld_list_chain);
+ spin_unlock(&config_list_lock);
+ /* drop the start ref */
+ config_log_put(cld);
+ CDEBUG(D_MGC, "end config log %s (%d)\n", logname ? logname : "client",
+ rc);
+ RETURN(rc);
+}
+
+/* Failsafe */
+static void config_log_end_all(void)
+{
+ struct list_head *tmp, *n;
+ struct config_llog_data *cld;
+ ENTRY;
+
+ spin_lock(&config_list_lock);
+ list_for_each_safe(tmp, n, &config_llog_list) {
+ cld = list_entry(tmp, struct config_llog_data, cld_list_chain);
+ CERROR("conflog failsafe %s\n", cld->cld_logname);
+ list_del(&cld->cld_list_chain);
+ config_log_put(cld);
+ }
+ spin_unlock(&config_list_lock);
+ EXIT;
+}
+
+
+/********************** class fns **********************/
+
+static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb,
+ struct vfsmount *mnt)
+{
+ struct lvfs_run_ctxt saved;
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct client_obd *cli = &obd->u.cli;
+ struct dentry *dentry;
+ char *label;
+ int err = 0;
+ ENTRY;
+
+ LASSERT(lsi);
+ LASSERT(lsi->lsi_srv_mnt == mnt);
+
+ /* The mgc fs exclusion sem. Only one fs can be setup at a time. */
+ down(&cli->cl_mgc_sem);
+
+ cleanup_group_info();
+
+ obd->obd_fsops = fsfilt_get_ops(MT_STR(lsi->lsi_ldd));
+ if (IS_ERR(obd->obd_fsops)) {
+ up(&cli->cl_mgc_sem);
+ CERROR("No fstype %s rc=%ld\n", MT_STR(lsi->lsi_ldd),
+ PTR_ERR(obd->obd_fsops));
+ RETURN(PTR_ERR(obd->obd_fsops));
+ }
+
+ cli->cl_mgc_vfsmnt = mnt;
+ fsfilt_setup(obd, mnt->mnt_sb);
+
+ OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt);
+ obd->obd_lvfs_ctxt.pwdmnt = mnt;
+ obd->obd_lvfs_ctxt.pwd = mnt->mnt_root;
+ obd->obd_lvfs_ctxt.fs = get_ds();
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ dentry = lookup_one_len(MOUNT_CONFIGS_DIR, current->fs->pwd,
+ strlen(MOUNT_CONFIGS_DIR));
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ CERROR("cannot lookup %s directory: rc = %d\n",
+ MOUNT_CONFIGS_DIR, err);
+ GOTO(err_ops, err);
+ }
+ cli->cl_mgc_configs_dir = dentry;
+
+ /* We take an obd ref to insure that we can't get to mgc_cleanup
+ without calling mgc_fs_cleanup first. */
+ class_incref(obd);
+
+ label = fsfilt_get_label(obd, mnt->mnt_sb);
+ if (label)
+ CDEBUG(D_MGC, "MGC using disk labelled=%s\n", label);
+
+ /* We keep the cl_mgc_sem until mgc_fs_cleanup */
+ RETURN(0);
+
+err_ops:
+ fsfilt_put_ops(obd->obd_fsops);
+ obd->obd_fsops = NULL;
+ cli->cl_mgc_vfsmnt = NULL;
+ up(&cli->cl_mgc_sem);
+ RETURN(err);
+}
+
+static int mgc_fs_cleanup(struct obd_device *obd)
+{
+ struct client_obd *cli = &obd->u.cli;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(cli->cl_mgc_vfsmnt != NULL);
+
+ if (cli->cl_mgc_configs_dir != NULL) {
+ struct lvfs_run_ctxt saved;
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ l_dput(cli->cl_mgc_configs_dir);
+ cli->cl_mgc_configs_dir = NULL;
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ class_decref(obd);
+ }
+
+ cli->cl_mgc_vfsmnt = NULL;
+ if (obd->obd_fsops)
+ fsfilt_put_ops(obd->obd_fsops);
+
+ up(&cli->cl_mgc_sem);
+ RETURN(rc);
+}
+
+static int mgc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
+{
+ int rc = 0;
+ ENTRY;
+
+ switch (stage) {
+ case OBD_CLEANUP_EARLY:
+ case OBD_CLEANUP_EXPORTS:
+ break;
+ case OBD_CLEANUP_SELF_EXP:
+ rc = obd_llog_finish(obd, 0);
+ if (rc != 0)
+ CERROR("failed to cleanup llogging subsystems\n");
+ break;
+ case OBD_CLEANUP_OBD:
+ break;
+ }
+ RETURN(rc);
+}
+
+static int mgc_cleanup(struct obd_device *obd)
+{
+ struct client_obd *cli = &obd->u.cli;
+ int rc;
+ ENTRY;
+
+ LASSERT(cli->cl_mgc_vfsmnt == NULL);
+
+ config_log_end_all();
+
+ ptlrpcd_decref();
+
+ rc = client_obd_cleanup(obd);
+ RETURN(rc);
+}
+
+static struct obd_device *the_mgc;
+
+static int mgc_setup(struct obd_device *obd, obd_count len, void *buf)
+{
+ int rc;
+ ENTRY;
+
+ ptlrpcd_addref();
+
+ rc = client_obd_setup(obd, len, buf);
+ if (rc)
+ GOTO(err_decref, rc);
+
+ rc = obd_llog_init(obd, obd, 0, NULL);
+ if (rc) {
+ CERROR("failed to setup llogging subsystems\n");
+ GOTO(err_cleanup, rc);
+ }
+
+ the_mgc = obd;
+ RETURN(rc);
+
+err_cleanup:
+ client_obd_cleanup(obd);
+err_decref:
+ ptlrpcd_decref();
+ RETURN(rc);
+}
+
+static int mgc_process_log(struct obd_device *mgc,
+ struct config_llog_data *cld);
+
+/* FIXME I don't want a thread for every cld; make a list of cld's to requeue
+ and use only 1 thread. */
+/* reenqueue the lock, reparse the log */
+static int mgc_async_requeue(void *data)
+{
+ wait_queue_head_t waitq;
+ struct l_wait_info lwi;
+ struct config_llog_data *cld = (struct config_llog_data *)data;
+ char name[24];
+ int rc = 0;
+ ENTRY;
+
+ if (!data)
+ RETURN(-EINVAL);
+ if (cld->cld_stopping)
+ GOTO(out, rc = 0);
+
+ snprintf(name, sizeof(name), "ll_log_%s", cld->cld_logname);
+ name[sizeof(name)-1] = '\0';
+ ptlrpc_daemonize(name);
+
+ CDEBUG(D_MGC, "requeue "LPX64" %s:%s\n",
+ cld->cld_resid.name[0], cld->cld_logname,
+ cld->cld_cfg.cfg_instance);
+
+ /* Sleep a few seconds to allow the server who caused
+ the lock revocation to finish its setup, plus some random
+ so everyone doesn't try to reconnect at once. */
+ init_waitqueue_head(&waitq);
+ lwi = LWI_TIMEOUT(3 * HZ + (ll_rand() & 0x7f), NULL, NULL);
+ l_wait_event(waitq, 0, &lwi);
+
+ LASSERT(the_mgc);
+
+ class_export_get(the_mgc->obd_self_export);
+#if 0
+ /* Re-send server info every time, in case MGS needs to regen its
+ logs (for write_conf). Do we need this? It's extra RPCs for
+ every server at every update. Turning it off until I'm sure
+ it's needed. */
+ server_register_target(cld->cld_cfg.cfg_sb);
+#endif
+ rc = mgc_process_log(the_mgc, cld);
+ class_export_put(the_mgc->obd_self_export);
+out:
+ /* Whether we enqueued again or not in mgc_process_log,
+ we're done with the ref from the old mgc_blocking_ast */
+ config_log_put(cld);
+
+ RETURN(rc);
+}
+
+/* based on ll_mdc_blocking_ast */
+static int mgc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
+ void *data, int flag)
+{
+ struct lustre_handle lockh;
+ struct config_llog_data *cld = (struct config_llog_data *)data;
+ int rc = 0;
+ ENTRY;
+
+ switch (flag) {
+ case LDLM_CB_BLOCKING:
+ /* mgs wants the lock, give it up... */
+ LDLM_DEBUG(lock, "MGC blocking CB");
+ ldlm_lock2handle(lock, &lockh);
+ rc = ldlm_cli_cancel(&lockh);
+ break;
+ case LDLM_CB_CANCELING: {
+ /* We've given up the lock, prepare ourselves to update. */
+ LDLM_DEBUG(lock, "MGC cancel CB");
+
+ CDEBUG(D_MGC, "Lock res "LPX64" (%.8s)\n",
+ lock->l_resource->lr_name.name[0],
+ (char *)&lock->l_resource->lr_name.name[0]);
+
+ /* Make sure not to re-enqueue when the mgc is stopping
+ (we get called from client_disconnect_export) */
+ if (!lock->l_conn_export ||
+ !lock->l_conn_export->exp_obd->u.cli.cl_conn_count) {
+ CDEBUG(D_MGC, "Disconnecting, don't requeue\n");
+ goto out_drop;
+ }
+ if (lock->l_req_mode != lock->l_granted_mode) {
+ CERROR("original grant failed, won't requeue\n");
+ goto out_drop;
+ }
+ if (!data) {
+ CERROR("missing data, won't requeue\n");
+ goto out_drop;
+ }
+ if (cld->cld_stopping) {
+ CERROR("stopping, won't requeue\n");
+ goto out_drop;
+ }
+
+ /* Re-enqueue the lock in a separate thread, because we must
+ return from this fn before that lock can be taken. */
+ rc = cfs_kernel_thread(mgc_async_requeue, data,
+ CLONE_VM | CLONE_FILES);
+ if (rc < 0) {
+ CERROR("Cannot re-enqueue thread: %d\n", rc);
+ } else {
+ rc = 0;
+ break;
+ }
+out_drop:
+ /* Drop this here or in mgc_async_requeue,
+ in either case, we're done with the reference
+ after this. */
+ config_log_put(cld);
+ break;
+ }
+ default:
+ LBUG();
+ }
+
+
+ if (rc) {
+ CERROR("%s CB failed %d:\n", flag == LDLM_CB_BLOCKING ?
+ "blocking" : "cancel", rc);
+ LDLM_ERROR(lock, "MGC ast");
+ }
+ RETURN(rc);
+}
+
+/* Take a config lock so we can get cancel notifications */
+static int mgc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm,
+ __u32 type, ldlm_policy_data_t *policy, __u32 mode,
+ int *flags, void *bl_cb, void *cp_cb, void *gl_cb,
+ void *data, __u32 lvb_len, void *lvb_swabber,
+ struct lustre_handle *lockh)
+{
+ struct config_llog_data *cld = (struct config_llog_data *)data;
+ struct obd_device *obd = class_exp2obd(exp);
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_MGC, "Enqueue for %s (res "LPX64")\n", cld->cld_logname,
+ cld->cld_resid.name[0]);
+
+ /* We can only drop this config log ref when we drop the lock */
+ if (config_log_get(cld))
+ RETURN(ELDLM_LOCK_ABORTED);
+
+ /* We need a callback for every lockholder, so don't try to
+ ldlm_lock_match (see rev 1.1.2.11.2.47) */
+
+ rc = ldlm_cli_enqueue(exp, NULL, obd->obd_namespace, cld->cld_resid,
+ type, NULL, mode, flags,
+ mgc_blocking_ast, ldlm_completion_ast, NULL,
+ data, NULL, 0, NULL, lockh);
+
+ RETURN(rc);
+}
+
+static int mgc_cancel(struct obd_export *exp, struct lov_stripe_md *md,
+ __u32 mode, struct lustre_handle *lockh)
+{
+ ENTRY;
+
+ ldlm_lock_decref(lockh, mode);
+
+ RETURN(0);
+}
+
+#if 0
+static int mgc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
+ void *karg, void *uarg)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct obd_ioctl_data *data = karg;
+ struct llog_ctxt *ctxt;
+ struct lvfs_run_ctxt saved;
+ int rc;
+ ENTRY;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ MOD_INC_USE_COUNT;
+#else
+ if (!try_module_get(THIS_MODULE)) {
+ CERROR("Can't get module. Is it alive?");
+ return -EINVAL;
+ }
+#endif
+ switch (cmd) {
+ /* REPLicator context */
+ case OBD_IOC_PARSE: {
+ CERROR("MGC parsing llog %s\n", data->ioc_inlbuf1);
+ ctxt = llog_get_context(exp->exp_obd, LLOG_CONFIG_REPL_CTXT);
+ rc = class_config_parse_llog(ctxt, data->ioc_inlbuf1, NULL);
+ GOTO(out, rc);
+ }
+#ifdef __KERNEL__
+ case OBD_IOC_LLOG_INFO:
+ case OBD_IOC_LLOG_PRINT: {
+ ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT);
+ rc = llog_ioctl(ctxt, cmd, data);
+
+ GOTO(out, rc);
+ }
+#endif
+ /* ORIGinator context */
+ case OBD_IOC_DUMP_LOG: {
+ ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ rc = class_config_dump_llog(ctxt, data->ioc_inlbuf1, NULL);
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ if (rc)
+ RETURN(rc);
+
+ GOTO(out, rc);
+ }
+ default:
+ CERROR("mgc_ioctl(): unrecognised ioctl %#x\n", cmd);
+ GOTO(out, rc = -ENOTTY);
+ }
+out:
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ MOD_DEC_USE_COUNT;
+#else
+ module_put(THIS_MODULE);
+#endif
+
+ return rc;
+}
+#endif
+
+/* Send target_reg message to MGS */
+static int mgc_target_register(struct obd_export *exp,
+ struct mgs_target_info *mti)
+{
+ struct ptlrpc_request *req;
+ struct mgs_target_info *req_mti, *rep_mti;
+ int size = sizeof(*req_mti);
+ int rep_size = sizeof(*mti);
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MGS_VERSION,
+ MGS_TARGET_REG, 1, &size, NULL);
+ if (!req)
+ RETURN(-ENOMEM);
+
+ req_mti = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*req_mti));
+ if (!req_mti)
+ RETURN(-ENOMEM);
+ memcpy(req_mti, mti, sizeof(*req_mti));
+
+ req->rq_replen = lustre_msg_size(1, &rep_size);
+
+ CDEBUG(D_MGC, "register %s\n", mti->mti_svname);
+
+ rc = ptlrpc_queue_wait(req);
+ if (!rc) {
+ rep_mti = lustre_swab_repbuf(req, 0, sizeof(*rep_mti),
+ lustre_swab_mgs_target_info);
+ memcpy(mti, rep_mti, sizeof(*rep_mti));
+ CDEBUG(D_MGC, "register %s got index = %d\n",
+ mti->mti_svname, mti->mti_stripe_index);
+ } else {
+ CERROR("register failed. rc=%d\n", rc);
+ }
+ ptlrpc_req_finished(req);
+
+ RETURN(rc);
+}
+
+int mgc_set_info_async(struct obd_export *exp, obd_count keylen,
+ void *key, obd_count vallen, void *val,
+ struct ptlrpc_request_set *set)
+{
+ struct obd_import *imp = class_exp2cliimp(exp);
+ int rc = -EINVAL;
+ ENTRY;
+
+ /* Try to "recover" the initial connection; i.e. retry */
+ if (KEY_IS(KEY_INIT_RECOV)) {
+ if (vallen != sizeof(int))
+ RETURN(-EINVAL);
+ imp->imp_initial_recov = *(int *)val;
+ CDEBUG(D_HA, "%s: set imp_initial_recov = %d\n",
+ exp->exp_obd->obd_name, imp->imp_initial_recov);
+ RETURN(0);
+ }
+ /* Turn off initial_recov after we try all backup servers once */
+ if (KEY_IS(KEY_INIT_RECOV_BACKUP)) {
+ int value;
+ if (vallen != sizeof(int))
+ RETURN(-EINVAL);
+ value = *(int *)val;
+ imp->imp_initial_recov_bk = value > 0;
+ if (imp->imp_invalid || value > 1) {
+ /* Resurrect if we previously died */
+ CDEBUG(D_MGC, "Reactivate %s %d:%d:%d:%s\n",
+ imp->imp_obd->obd_name, value,
+ imp->imp_deactive, imp->imp_invalid,
+ ptlrpc_import_state_name(imp->imp_state));
+ /* can't put this in obdclass, module loop with ptlrpc*/
+ /* This seems to be necessary when restarting a
+ combo mgs/mdt while the mgc is alive */
+ ptlrpc_invalidate_import(imp);
+ /* Remove 'invalid' flag */
+ ptlrpc_activate_import(imp);
+ /* Attempt a new connect */
+ ptlrpc_recover_import(imp, NULL);
+ }
+ RETURN(0);
+ }
+ /* Hack alert */
+ if (KEY_IS("register_target")) {
+ struct mgs_target_info *mti;
+ if (vallen != sizeof(struct mgs_target_info))
+ RETURN(-EINVAL);
+ mti = (struct mgs_target_info *)val;
+ CDEBUG(D_MGC, "register_target %s %#x\n",
+ mti->mti_svname, mti->mti_flags);
+ rc = mgc_target_register(exp, mti);
+ RETURN(rc);
+ }
+ if (KEY_IS("set_fs")) {
+ struct super_block *sb = (struct super_block *)val;
+ struct lustre_sb_info *lsi;
+ if (vallen != sizeof(struct super_block))
+ RETURN(-EINVAL);
+ lsi = s2lsi(sb);
+ rc = mgc_fs_setup(exp->exp_obd, sb, lsi->lsi_srv_mnt);
+ if (rc) {
+ CERROR("set_fs got %d\n", rc);
+ }
+ RETURN(rc);
+ }
+ if (KEY_IS("clear_fs")) {
+ if (vallen != 0)
+ RETURN(-EINVAL);
+ rc = mgc_fs_cleanup(exp->exp_obd);
+ if (rc) {
+ CERROR("clear_fs got %d\n", rc);
+ }
+ RETURN(rc);
+ }
+
+ RETURN(rc);
+}
+
+static int mgc_import_event(struct obd_device *obd,
+ struct obd_import *imp,
+ enum obd_import_event event)
+{
+ int rc = 0;
+
+ LASSERT(imp->imp_obd == obd);
+ CDEBUG(D_MGC, "import event %#x\n", event);
+
+ switch (event) {
+ case IMP_EVENT_INVALIDATE: {
+ struct ldlm_namespace *ns = obd->obd_namespace;
+ ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
+ break;
+ }
+ case IMP_EVENT_DISCON:
+ /* MGC imports should not wait for recovery */
+ ptlrpc_invalidate_import(imp);
+ break;
+ case IMP_EVENT_INACTIVE:
+ case IMP_EVENT_ACTIVE:
+ case IMP_EVENT_OCD:
+ break;
+ default:
+ CERROR("Unknown import event %#x\n", event);
+ LBUG();
+ }
+ RETURN(rc);
+}
+
+static int mgc_llog_init(struct obd_device *obd, struct obd_device *tgt,
+ int count, struct llog_catid *logid)
+{
+ struct llog_ctxt *ctxt;
+ int rc;
+ ENTRY;
+
+ rc = llog_setup(obd, LLOG_CONFIG_ORIG_CTXT, tgt, 0, NULL,
+ &llog_lvfs_ops);
+ if (rc)
+ RETURN(rc);
+
+ rc = llog_setup(obd, LLOG_CONFIG_REPL_CTXT, tgt, 0, NULL,
+ &llog_client_ops);
+ if (rc == 0) {
+ ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT);
+ ctxt->loc_imp = obd->u.cli.cl_import;
+ }
+
+ RETURN(rc);
+}
+
+static int mgc_llog_finish(struct obd_device *obd, int count)
+{
+ int rc;
+ ENTRY;
+
+ rc = llog_cleanup(llog_get_context(obd, LLOG_CONFIG_REPL_CTXT));
+ rc = llog_cleanup(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT));
+
+ RETURN(rc);
+}
+
+/* identical to mgs_log_is_empty */
+static int mgc_llog_is_empty(struct obd_device *obd, struct llog_ctxt *ctxt,
+ char *name)
+{
+ struct lvfs_run_ctxt saved;
+ struct llog_handle *llh;
+ int rc = 0;
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ rc = llog_create(ctxt, &llh, NULL, name);
+ if (rc == 0) {
+ llog_init_handle(llh, LLOG_F_IS_PLAIN, NULL);
+ rc = llog_get_size(llh);
+ llog_close(llh);
+ }
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ /* header is record 1 */
+ return(rc <= 1);
+}
+
+static int mgc_copy_handler(struct llog_handle *llh, struct llog_rec_hdr *rec,
+ void *data)
+{
+ struct llog_rec_hdr local_rec = *rec;
+ struct llog_handle *local_llh = (struct llog_handle *)data;
+ char *cfg_buf = (char*) (rec + 1);
+ struct lustre_cfg *lcfg;
+ int rc = 0;
+ ENTRY;
+
+ lcfg = (struct lustre_cfg *)cfg_buf;
+
+ /* FIXME we should always write to an empty log, so remove this check.*/
+ /* append new records */
+ if (rec->lrh_index >= llog_get_size(local_llh)) {
+ rc = llog_write_rec(local_llh, &local_rec, NULL, 0,
+ (void *)cfg_buf, -1);
+
+ CDEBUG(D_INFO, "idx=%d, rc=%d, len=%d, cmd %x %s %s\n",
+ rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command,
+ lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
+ } else {
+ CDEBUG(D_INFO, "skip idx=%d\n", rec->lrh_index);
+ }
+
+ RETURN(rc);
+}
+
+static int mgc_copy_llog(struct obd_device *obd, struct llog_ctxt *rctxt,
+ struct llog_ctxt *lctxt, char *logname)
+{
+ struct llog_handle *local_llh, *remote_llh;
+ struct obd_uuid *uuid;
+ int rc, rc2;
+ ENTRY;
+
+ /* open local log */
+ rc = llog_create(lctxt, &local_llh, NULL, logname);
+ if (rc)
+ RETURN(rc);
+ /* set the log header uuid for fun */
+ OBD_ALLOC_PTR(uuid);
+ obd_str2uuid(uuid, logname);
+ rc = llog_init_handle(local_llh, LLOG_F_IS_PLAIN, uuid);
+ OBD_FREE_PTR(uuid);
+ if (rc)
+ GOTO(out_closel, rc);
+
+ /* FIXME write new log to a temp name, then vfs_rename over logname
+ upon successful completion. */
+
+ /* open remote log */
+ rc = llog_create(rctxt, &remote_llh, NULL, logname);
+ if (rc)
+ GOTO(out_closel, rc);
+ rc = llog_init_handle(remote_llh, LLOG_F_IS_PLAIN, NULL);
+ if (rc)
+ GOTO(out_closer, rc);
+
+ rc = llog_process(remote_llh, mgc_copy_handler,(void *)local_llh, NULL);
+
+out_closer:
+ rc2 = llog_close(remote_llh);
+ if (!rc)
+ rc = rc2;
+out_closel:
+ rc2 = llog_close(local_llh);
+ if (!rc)
+ rc = rc2;
+
+ CDEBUG(D_MGC, "Copied remote log %s (%d)\n", logname, rc);
+ RETURN(rc);
+}
+
+DECLARE_MUTEX(llog_process_lock);
+
+/* Get a config log from the MGS and process it.
+ This func is called for both clients and servers. */
+static int mgc_process_log(struct obd_device *mgc,
+ struct config_llog_data *cld)
+{
+ struct llog_ctxt *ctxt, *lctxt;
+ struct lustre_handle lockh;
+ struct client_obd *cli = &mgc->u.cli;
+ struct lvfs_run_ctxt saved;
+ struct lustre_sb_info *lsi;
+ int rc = 0, rcl, flags = 0, must_pop = 0;
+ ENTRY;
+
+ if (!cld || !cld->cld_cfg.cfg_sb) {
+ /* This should never happen */
+ CERROR("Missing cld, aborting log update\n");
+ RETURN(-EINVAL);
+ }
+ if (cld->cld_stopping)
+ RETURN(0);
+
+ lsi = s2lsi(cld->cld_cfg.cfg_sb);
+
+ CDEBUG(D_MGC, "Process log %s:%s from %d\n", cld->cld_logname,
+ cld->cld_cfg.cfg_instance, cld->cld_cfg.cfg_last_idx + 1);
+
+ ctxt = llog_get_context(mgc, LLOG_CONFIG_REPL_CTXT);
+ if (!ctxt) {
+ CERROR("missing llog context\n");
+ RETURN(-EINVAL);
+ }
+
+ /* I don't want mutliple processes running process_log at once --
+ sounds like badness. It actually might be fine, as long as
+ we're not trying to update from the same log
+ simultaneously (in which case we should use a per-log sem.) */
+ down(&llog_process_lock);
+
+ /* Get the cfg lock on the llog */
+ rcl = mgc_enqueue(mgc->u.cli.cl_mgc_mgsexp, NULL, LDLM_PLAIN, NULL,
+ LCK_CR, &flags, NULL, NULL, NULL,
+ cld, 0, NULL, &lockh);
+ if (rcl)
+ CERROR("Can't get cfg lock: %d\n", rcl);
+
+ lctxt = llog_get_context(mgc, LLOG_CONFIG_ORIG_CTXT);
+
+ /* Copy the setup log locally if we can. Don't mess around if we're
+ running an MGS though (logs are already local). */
+ if (lctxt && lsi && (lsi->lsi_flags & LSI_SERVER) &&
+ (lsi->lsi_srv_mnt == cli->cl_mgc_vfsmnt) &&
+ !IS_MGS(lsi->lsi_ldd)) {
+ push_ctxt(&saved, &mgc->obd_lvfs_ctxt, NULL);
+ must_pop++;
+ if (rcl == 0)
+ /* Only try to copy log if we have the lock. */
+ rc = mgc_copy_llog(mgc, ctxt, lctxt, cld->cld_logname);
+ if (rcl || rc) {
+ if (mgc_llog_is_empty(mgc, lctxt, cld->cld_logname)) {
+ LCONSOLE_ERROR("Failed to get MGS log %s "
+ "and no local copy.\n",
+ cld->cld_logname);
+ GOTO(out_pop, rc = -ENOTCONN);
+ }
+ LCONSOLE_WARN("Failed to get MGS log %s, using "
+ "local copy.\n", cld->cld_logname);
+ }
+ /* Now, whether we copied or not, start using the local llog.
+ If we failed to copy, we'll start using whatever the old
+ log has. */
+ ctxt = lctxt;
+ }
+
+ /* logname and instance info should be the same, so use our
+ copy of the instance for the update. The cfg_last_idx will
+ be updated here. */
+ rc = class_config_parse_llog(ctxt, cld->cld_logname, &cld->cld_cfg);
+
+ out_pop:
+ if (must_pop)
+ pop_ctxt(&saved, &mgc->obd_lvfs_ctxt, NULL);
+
+ /* Now drop the lock so MGS can revoke it */
+ if (!rcl) {
+ rcl = mgc_cancel(mgc->u.cli.cl_mgc_mgsexp, NULL,
+ LCK_CR, &lockh);
+ if (rcl)
+ CERROR("Can't drop cfg lock: %d\n", rcl);
+ }
+
+ if (rc) {
+ CERROR("%s: the configuration '%s' could not be read "
+ "(%d) from the MGS.\n",
+ mgc->obd_name, cld->cld_logname, rc);
+ }
+
+ up(&llog_process_lock);
+
+ RETURN(rc);
+}
+
+static int mgc_process_config(struct obd_device *obd, obd_count len, void *buf)
+{
+ struct lustre_cfg *lcfg = buf;
+ int cmd;
+ int rc = 0;
+ ENTRY;
+
+ switch(cmd = lcfg->lcfg_command) {
+ case LCFG_LOV_ADD_OBD: {
+ struct mgs_target_info *mti;
+
+ if (LUSTRE_CFG_BUFLEN(lcfg, 1) !=
+ sizeof(struct mgs_target_info))
+ GOTO(out, rc = -EINVAL);
+
+ mti = (struct mgs_target_info *)lustre_cfg_buf(lcfg, 1);
+ CDEBUG(D_MGC, "add_target %s %#x\n",
+ mti->mti_svname, mti->mti_flags);
+ rc = mgc_target_register(obd->u.cli.cl_mgc_mgsexp, mti);
+ break;
+ }
+ case LCFG_LOV_DEL_OBD:
+ /* FIXME */
+ CERROR("lov_del_obd unimplemented\n");
+ rc = -ENOSYS;
+ break;
+ case LCFG_LOG_START: {
+ struct config_llog_data *cld;
+ struct config_llog_instance *cfg;
+ struct super_block *sb;
+ char *logname = lustre_cfg_string(lcfg, 1);
+ cfg = (struct config_llog_instance *)lustre_cfg_buf(lcfg, 2);
+ sb = *(struct super_block **)lustre_cfg_buf(lcfg, 3);
+
+ CDEBUG(D_MGC, "parse_log %s from %d\n", logname,
+ cfg->cfg_last_idx);
+
+ /* We're only called through here on the initial mount */
+ rc = config_log_add(logname, cfg, sb);
+ if (rc)
+ break;
+ cld = config_log_find(logname, cfg);
+ if (IS_ERR(cld)) {
+ rc = PTR_ERR(cld);
+ break;
+ }
+
+ /* COMPAT_146 */
+ /* For old logs, there was no start marker. */
+ /* FIXME only set this for old logs! */
+ cld->cld_cfg.cfg_flags |= CFG_F_MARKER;
+
+ rc = mgc_process_log(obd, cld);
+ config_log_put(cld);
+
+ break;
+ }
+ case LCFG_LOG_END: {
+ struct config_llog_instance *cfg = NULL;
+ char *logname = lustre_cfg_string(lcfg, 1);
+ if (lcfg->lcfg_bufcount >= 2)
+ cfg = (struct config_llog_instance *)lustre_cfg_buf(
+ lcfg, 2);
+ rc = config_log_end(logname, cfg);
+ break;
+ }
+ default: {
+ CERROR("Unknown command: %d\n", lcfg->lcfg_command);
+ GOTO(out, rc = -EINVAL);
+
+ }
+ }
+out:
+ RETURN(rc);
+}
+
+struct obd_ops mgc_obd_ops = {
+ .o_owner = THIS_MODULE,
+ .o_setup = mgc_setup,
+ .o_precleanup = mgc_precleanup,
+ .o_cleanup = mgc_cleanup,
+ .o_add_conn = client_import_add_conn,
+ .o_del_conn = client_import_del_conn,
+ .o_connect = client_connect_import,
+ .o_disconnect = client_disconnect_export,
+ //.o_enqueue = mgc_enqueue,
+ .o_cancel = mgc_cancel,
+ //.o_iocontrol = mgc_iocontrol,
+ .o_set_info_async = mgc_set_info_async,
+ .o_import_event = mgc_import_event,
+ .o_llog_init = mgc_llog_init,
+ .o_llog_finish = mgc_llog_finish,
+ .o_process_config = mgc_process_config,
+};
+
+int __init mgc_init(void)
+{
+ return class_register_type(&mgc_obd_ops, NULL, LUSTRE_MGC_NAME);
+}
+
+#ifdef __KERNEL__
+static void /*__exit*/ mgc_exit(void)
+{
+ class_unregister_type(LUSTRE_MGC_NAME);
+}
+
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Lustre Management Client");
+MODULE_LICENSE("GPL");
+
+module_init(mgc_init);
+module_exit(mgc_exit);
+#endif
--- /dev/null
+.Xrefs
+config.log
+config.status
+configure
+Makefile
+.deps
+TAGS
+.*.cmd
+autoMakefile.in
+autoMakefile
+*.ko
+*.mod.c
+.*.o.flags
+.tmp_versions
+.depend
--- /dev/null
+MODULES := mgs
+mgs-objs := mgs_handler.o mgs_fs.o mgs_llog.o lproc_mgs.o
+
+@INCLUDE_RULES@
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+if MODULES
+modulefs_DATA = mgs$(KMODEXT)
+endif
+
+MOSTLYCLEANFILES := @MOSTLYCLEANFILES@
+DIST_SOURCES := $(mgs-objs:%.o=%.c) mgs_internal.h
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of the Lustre file system, http://www.lustre.org
+ * Lustre is a trademark of Cluster File Systems, Inc.
+ *
+ * You may have signed or agreed to another license before downloading
+ * this software. If so, you are bound by the terms and conditions
+ * of that agreement, and the following does not apply to you. See the
+ * LICENSE file included with this distribution for more information.
+ *
+ * If you did not agree to a different license, then this copy of Lustre
+ * is open source software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * In either case, Lustre is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * license text for more details.
+ *
+ */
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <asm/statfs.h>
+#endif
+#include <obd.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
+#include "mgs_internal.h"
+
+#ifdef LPROCFS
+struct lprocfs_vars lprocfs_mgs_obd_vars[] = {
+ { 0 }
+};
+
+struct lprocfs_vars lprocfs_mgs_module_vars[] = {
+ { 0 }
+};
+
+struct lprocfs_vars lprocfs_mgt_obd_vars[] = {
+ { 0 }
+};
+
+struct lprocfs_vars lprocfs_mgt_module_vars[] = {
+ { 0 }
+};
+
+LPROCFS_INIT_VARS(mgs, lprocfs_mgs_module_vars, lprocfs_mgs_obd_vars);
+LPROCFS_INIT_VARS(mgt, lprocfs_mgt_module_vars, lprocfs_mgt_obd_vars);
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lustre/mgs/mgs_fs.c
+ * Lustre Management Server (MGS) filesystem interface code
+ *
+ * Copyright (C) 2006 Cluster File Systems, Inc.
+ * Author: Nathan Rutman <nathan@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_MGS
+
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <linux/mount.h>
+#endif
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre_disk.h>
+#include <lustre_lib.h>
+#include <lustre_fsfilt.h>
+#include <libcfs/list.h>
+#include "mgs_internal.h"
+
+/* Same as mds_fid2dentry */
+/* Look up an entry by inode number. */
+/* this function ONLY returns valid dget'd dentries with an initialized inode
+ or errors */
+static struct dentry *mgs_fid2dentry(struct mgs_obd *mgs, struct ll_fid *fid)
+{
+ char fid_name[32];
+ unsigned long ino = fid->id;
+ __u32 generation = fid->generation;
+ struct inode *inode;
+ struct dentry *result;
+
+ CDEBUG(D_DENTRY, "--> mgs_fid2dentry: ino/gen %lu/%u, sb %p\n",
+ ino, generation, mgs->mgs_sb);
+
+ if (ino == 0)
+ RETURN(ERR_PTR(-ESTALE));
+
+ snprintf(fid_name, sizeof(fid_name), "0x%lx", ino);
+
+ /* under ext3 this is neither supposed to return bad inodes
+ nor NULL inodes. */
+ result = ll_lookup_one_len(fid_name, mgs->mgs_fid_de, strlen(fid_name));
+ if (IS_ERR(result))
+ RETURN(result);
+
+ inode = result->d_inode;
+ if (!inode)
+ RETURN(ERR_PTR(-ENOENT));
+
+ if (inode->i_generation == 0 || inode->i_nlink == 0) {
+ LCONSOLE_WARN("Found inode with zero generation or link -- this"
+ " may indicate disk corruption (inode: %lu, link:"
+ " %lu, count: %d)\n", inode->i_ino,
+ (unsigned long)inode->i_nlink,
+ atomic_read(&inode->i_count));
+ l_dput(result);
+ RETURN(ERR_PTR(-ENOENT));
+ }
+
+ if (generation && inode->i_generation != generation) {
+ /* we didn't find the right inode.. */
+ CDEBUG(D_INODE, "found wrong generation: inode %lu, link: %lu, "
+ "count: %d, generation %u/%u\n", inode->i_ino,
+ (unsigned long)inode->i_nlink,
+ atomic_read(&inode->i_count), inode->i_generation,
+ generation);
+ l_dput(result);
+ RETURN(ERR_PTR(-ENOENT));
+ }
+
+ RETURN(result);
+}
+
+static struct dentry *mgs_lvfs_fid2dentry(__u64 id, __u32 gen, __u64 gr,
+ void *data)
+{
+ struct obd_device *obd = data;
+ struct ll_fid fid;
+ fid.id = id;
+ fid.generation = gen;
+ return mgs_fid2dentry(&obd->u.mgs, &fid);
+}
+
+struct lvfs_callback_ops mgs_lvfs_ops = {
+ l_fid2dentry: mgs_lvfs_fid2dentry,
+};
+
+int mgs_fs_setup(struct obd_device *obd, struct vfsmount *mnt)
+{
+ struct mgs_obd *mgs = &obd->u.mgs;
+ struct lvfs_run_ctxt saved;
+ struct dentry *dentry;
+ int rc;
+ ENTRY;
+
+ // FIXME what's this?
+ rc = cleanup_group_info();
+ if (rc)
+ RETURN(rc);
+
+ mgs->mgs_vfsmnt = mnt;
+ mgs->mgs_sb = mnt->mnt_root->d_inode->i_sb;
+
+ fsfilt_setup(obd, mgs->mgs_sb);
+
+ OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt);
+ obd->obd_lvfs_ctxt.pwdmnt = mnt;
+ obd->obd_lvfs_ctxt.pwd = mnt->mnt_root;
+ obd->obd_lvfs_ctxt.fs = get_ds();
+ obd->obd_lvfs_ctxt.cb_ops = mgs_lvfs_ops;
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+ /* Setup the configs dir */
+ dentry = simple_mkdir(current->fs->pwd, MOUNT_CONFIGS_DIR, 0777, 1);
+ if (IS_ERR(dentry)) {
+ rc = PTR_ERR(dentry);
+ CERROR("cannot create %s directory: rc = %d\n",
+ MOUNT_CONFIGS_DIR, rc);
+ GOTO(err_pop, rc);
+ }
+ mgs->mgs_configs_dir = dentry;
+
+ /* Need the iopen dir for fid2dentry, required by
+ LLOG_ORIGIN_HANDLE_READ_HEADER */
+ dentry = lookup_one_len("__iopen__", current->fs->pwd,
+ strlen("__iopen__"));
+ if (IS_ERR(dentry)) {
+ rc = PTR_ERR(dentry);
+ CERROR("cannot lookup __iopen__ directory: rc = %d\n", rc);
+ GOTO(err_configs, rc);
+ }
+ mgs->mgs_fid_de = dentry;
+ if (!dentry->d_inode || is_bad_inode(dentry->d_inode)) {
+ rc = -ENOENT;
+ CERROR("__iopen__ directory has no inode? rc = %d\n", rc);
+ GOTO(err_fid, rc);
+ }
+
+err_pop:
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ return rc;
+err_fid:
+ dput(mgs->mgs_fid_de);
+err_configs:
+ dput(mgs->mgs_configs_dir);
+ goto err_pop;
+}
+
+int mgs_fs_cleanup(struct obd_device *obd)
+{
+ struct mgs_obd *mgs = &obd->u.mgs;
+ struct lvfs_run_ctxt saved;
+ int rc = 0;
+
+ class_disconnect_exports(obd); /* cleans up client info too */
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+ if (mgs->mgs_configs_dir) {
+ /*CERROR("configs dir dcount=%d\n",
+ atomic_read(&mgs->mgs_configs_dir->d_count));*/
+ l_dput(mgs->mgs_configs_dir);
+ mgs->mgs_configs_dir = NULL;
+ }
+
+ shrink_dcache_parent(mgs->mgs_fid_de);
+ /*CERROR("fid dir dcount=%d\n",
+ atomic_read(&mgs->mgs_fid_de->d_count));*/
+ dput(mgs->mgs_fid_de);
+
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+ return rc;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lustre/mgs/mgs_handler.c
+ * Lustre Management Server (mgs) request handler
+ *
+ * Copyright (C) 2006 Cluster File Systems, Inc.
+ * Author: Nathan Rutman <nathan@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_MGS
+#define D_MGS D_CONFIG/*|D_WARNING*/
+
+#ifdef __KERNEL__
+# include <linux/module.h>
+# include <linux/pagemap.h>
+# include <linux/miscdevice.h>
+# include <linux/init.h>
+#else
+# include <liblustre.h>
+#endif
+
+#include <obd_class.h>
+#include <lustre_dlm.h>
+#include <lprocfs_status.h>
+#include <lustre_fsfilt.h>
+#include <lustre_commit_confd.h>
+#include <lustre_disk.h>
+#include <lustre_ver.h>
+#include "mgs_internal.h"
+
+
+/* Establish a connection to the MGS.*/
+static int mgs_connect(struct lustre_handle *conn, struct obd_device *obd,
+ struct obd_uuid *cluuid, struct obd_connect_data *data)
+{
+ struct obd_export *exp;
+ int rc;
+ ENTRY;
+
+ if (!conn || !obd || !cluuid)
+ RETURN(-EINVAL);
+
+ rc = class_connect(conn, obd, cluuid);
+ if (rc)
+ RETURN(rc);
+ exp = class_conn2export(conn);
+ LASSERT(exp);
+
+ if (data != NULL) {
+ data->ocd_connect_flags &= MGS_CONNECT_SUPPORTED;
+ exp->exp_connect_flags = data->ocd_connect_flags;
+ data->ocd_version = LUSTRE_VERSION_CODE;
+ }
+
+ if (rc) {
+ class_disconnect(exp);
+ } else {
+ class_export_put(exp);
+ }
+
+ RETURN(rc);
+}
+
+static int mgs_disconnect(struct obd_export *exp)
+{
+ unsigned long irqflags;
+ int rc;
+ ENTRY;
+
+ LASSERT(exp);
+ class_export_get(exp);
+
+ /* Disconnect early so that clients can't keep using export */
+ rc = class_disconnect(exp);
+ ldlm_cancel_locks_for_export(exp);
+
+ /* complete all outstanding replies */
+ spin_lock_irqsave(&exp->exp_lock, irqflags);
+ while (!list_empty(&exp->exp_outstanding_replies)) {
+ struct ptlrpc_reply_state *rs =
+ list_entry(exp->exp_outstanding_replies.next,
+ struct ptlrpc_reply_state, rs_exp_list);
+ struct ptlrpc_service *svc = rs->rs_service;
+
+ spin_lock(&svc->srv_lock);
+ list_del_init(&rs->rs_exp_list);
+ ptlrpc_schedule_difficult_reply(rs);
+ spin_unlock(&svc->srv_lock);
+ }
+ spin_unlock_irqrestore(&exp->exp_lock, irqflags);
+
+ class_export_put(exp);
+ RETURN(rc);
+}
+
+static int mgs_cleanup(struct obd_device *obd);
+static int mgs_handle(struct ptlrpc_request *req);
+
+/* Start the MGS obd */
+static int mgs_setup(struct obd_device *obd, obd_count len, void *buf)
+{
+ struct lprocfs_static_vars lvars;
+ struct mgs_obd *mgs = &obd->u.mgs;
+ struct lustre_mount_info *lmi;
+ struct lustre_sb_info *lsi;
+ struct vfsmount *mnt;
+ int rc = 0;
+ ENTRY;
+
+ CDEBUG(D_CONFIG, "Starting MGS\n");
+
+ /* Find our disk */
+ lmi = server_get_mount(obd->obd_name);
+ if (!lmi)
+ RETURN(rc = -EINVAL);
+
+ mnt = lmi->lmi_mnt;
+ lsi = s2lsi(lmi->lmi_sb);
+ obd->obd_fsops = fsfilt_get_ops(MT_STR(lsi->lsi_ldd));
+ if (IS_ERR(obd->obd_fsops))
+ GOTO(err_put, rc = PTR_ERR(obd->obd_fsops));
+
+ /* namespace for mgs llog */
+ obd->obd_namespace = ldlm_namespace_new("MGS", LDLM_NAMESPACE_SERVER);
+ if (obd->obd_namespace == NULL) {
+ mgs_cleanup(obd);
+ GOTO(err_ops, rc = -ENOMEM);
+ }
+
+ /* ldlm setup */
+ ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
+ "mgs_ldlm_client", &obd->obd_ldlm_client);
+
+ LASSERT(!lvfs_check_rdonly(lvfs_sbdev(mnt->mnt_sb)));
+
+ rc = mgs_fs_setup(obd, mnt);
+ if (rc) {
+ CERROR("%s: MGS filesystem method init failed: rc = %d\n",
+ obd->obd_name, rc);
+ GOTO(err_ns, rc);
+ }
+
+ rc = llog_start_commit_thread();
+ if (rc < 0)
+ GOTO(err_fs, rc);
+
+ rc = llog_setup(obd, LLOG_CONFIG_ORIG_CTXT, obd, 0, NULL,
+ &llog_lvfs_ops);
+ if (rc)
+ GOTO(err_fs, rc);
+
+ /* Allow reconnect attempts */
+ obd->obd_replayable = 1;
+
+ /* Internal mgs setup */
+ mgs_init_fsdb_list(obd);
+ sema_init(&mgs->mgs_sem, 1);
+
+ /* Start the service threads */
+ mgs->mgs_service =
+ ptlrpc_init_svc(MGS_NBUFS, MGS_BUFSIZE, MGS_MAXREQSIZE,
+ MGS_MAXREPSIZE, MGS_REQUEST_PORTAL,
+ MGC_REPLY_PORTAL, MGS_SERVICE_WATCHDOG_TIMEOUT,
+ mgs_handle, LUSTRE_MGS_NAME,
+ obd->obd_proc_entry, NULL, MGS_NUM_THREADS);
+
+ if (!mgs->mgs_service) {
+ CERROR("failed to start service\n");
+ GOTO(err_fs, rc = -ENOMEM);
+ }
+
+ rc = ptlrpc_start_threads(obd, mgs->mgs_service, "ll_mgs");
+ if (rc)
+ GOTO(err_thread, rc);
+
+ /* Setup proc */
+ lprocfs_init_vars(mgs, &lvars);
+ lprocfs_obd_setup(obd, lvars.obd_vars);
+
+ ping_evictor_start();
+
+ LCONSOLE_INFO("MGS %s started\n", obd->obd_name);
+
+ RETURN(0);
+
+err_thread:
+ ptlrpc_unregister_service(mgs->mgs_service);
+err_fs:
+ /* No extra cleanup needed for llog_init_commit_thread() */
+ mgs_fs_cleanup(obd);
+err_ns:
+ ldlm_namespace_free(obd->obd_namespace, 0);
+ obd->obd_namespace = NULL;
+err_ops:
+ fsfilt_put_ops(obd->obd_fsops);
+err_put:
+ server_put_mount(obd->obd_name, mgs->mgs_vfsmnt);
+ mgs->mgs_sb = 0;
+ return rc;
+}
+
+static int mgs_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
+{
+ int rc = 0;
+ ENTRY;
+
+ switch (stage) {
+ case OBD_CLEANUP_EARLY:
+ case OBD_CLEANUP_EXPORTS:
+ break;
+ case OBD_CLEANUP_SELF_EXP:
+ llog_cleanup(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT));
+ rc = obd_llog_finish(obd, 0);
+ break;
+ case OBD_CLEANUP_OBD:
+ break;
+ }
+ RETURN(rc);
+}
+
+static int mgs_ldlm_nsfree(void *data)
+{
+ struct ldlm_namespace *ns = (struct ldlm_namespace *)data;
+ int rc;
+ ENTRY;
+
+ ptlrpc_daemonize("ll_mgs_nsfree");
+ rc = ldlm_namespace_free(ns, 1 /* obd_force should always be on */);
+ RETURN(rc);
+}
+
+static int mgs_cleanup(struct obd_device *obd)
+{
+ struct mgs_obd *mgs = &obd->u.mgs;
+ lvfs_sbdev_type save_dev;
+ ENTRY;
+
+ ping_evictor_stop();
+
+ if (mgs->mgs_sb == NULL)
+ RETURN(0);
+
+ save_dev = lvfs_sbdev(mgs->mgs_sb);
+
+ ptlrpc_unregister_service(mgs->mgs_service);
+
+ lprocfs_obd_cleanup(obd);
+
+ mgs_cleanup_fsdb_list(obd);
+
+ mgs_fs_cleanup(obd);
+
+ server_put_mount(obd->obd_name, mgs->mgs_vfsmnt);
+ mgs->mgs_sb = NULL;
+
+ /* Free the namespace in it's own thread, so that if the
+ ldlm_cancel_handler put the last mgs obd ref, we won't
+ deadlock here. */
+ cfs_kernel_thread(mgs_ldlm_nsfree, obd->obd_namespace,
+ CLONE_VM | CLONE_FILES);
+
+ lvfs_clear_rdonly(save_dev);
+
+ fsfilt_put_ops(obd->obd_fsops);
+
+ LCONSOLE_INFO("%s has stopped.\n", obd->obd_name);
+ RETURN(0);
+}
+
+/* similar to filter_prepare_destroy */
+static int mgs_get_cfg_lock(struct obd_device *obd, char *fsname,
+ struct lustre_handle *lockh)
+{
+ struct ldlm_res_id res_id;
+ int rc, flags = 0;
+ ENTRY;
+
+ rc = mgc_logname2resid(fsname, &res_id);
+ if (!rc)
+ rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, res_id,
+ LDLM_PLAIN, NULL, LCK_EX, &flags,
+ ldlm_blocking_ast, ldlm_completion_ast,
+ NULL, fsname, NULL, 0, NULL, lockh);
+ if (rc)
+ CERROR("can't take cfg lock for %s (%d)\n", fsname, rc);
+
+ RETURN(rc);
+}
+
+static int mgs_put_cfg_lock(struct lustre_handle *lockh)
+{
+ ENTRY;
+ ldlm_lock_decref(lockh, LCK_EX);
+ RETURN(0);
+}
+
+/* rc=0 means ok */
+static int mgs_check_target(struct obd_device *obd, struct mgs_target_info *mti)
+{
+ int rc;
+ ENTRY;
+
+ rc = mgs_check_index(obd, mti);
+ if (rc == 0) {
+ LCONSOLE_ERROR("Index for %s has disappeared! "
+ "Regenerating this portion of the logs."
+ "\n", mti->mti_svname);
+ mti->mti_flags |= LDD_F_UPDATE;
+ rc = 1;
+ } else if (rc == -1) {
+ LCONSOLE_ERROR("Client log %s-client has disappeared! "
+ "Regenerating all logs.\n",
+ mti->mti_fsname);
+ mti->mti_flags |= LDD_F_WRITECONF;
+ rc = 1;
+ } else {
+ /* Index is correctly marked as used */
+
+ /* If the logs don't contain the mti_nids then add
+ them as failover nids */
+ rc = mgs_check_failnid(obd, mti);
+ }
+
+
+ RETURN(rc);
+}
+
+/* Called whenever a target starts up. Flags indicate first connect, etc. */
+static int mgs_handle_target_reg(struct ptlrpc_request *req)
+{
+ struct obd_device *obd = req->rq_export->exp_obd;
+ struct lustre_handle lockh;
+ struct mgs_target_info *mti, *rep_mti;
+ int rep_size = sizeof(*mti);
+ int rc = 0, lockrc;
+ ENTRY;
+
+ mti = lustre_swab_reqbuf(req, 0, sizeof(*mti),
+ lustre_swab_mgs_target_info);
+
+ if (!(mti->mti_flags & (LDD_F_WRITECONF | LDD_F_UPGRADE14 |
+ LDD_F_UPDATE))) {
+ /* We're just here as a startup ping. */
+ CDEBUG(D_MGS, "Server %s is running on %s\n",
+ mti->mti_svname, obd_export_nid2str(req->rq_export));
+ rc = mgs_check_target(obd, mti);
+ /* above will set appropriate mti flags */
+ if (!rc)
+ /* Nothing wrong, don't revoke lock */
+ GOTO(out_nolock, rc);
+ }
+
+ /* Revoke the config lock to make sure nobody is reading. */
+ /* Although actually I think it should be alright if
+ someone was reading while we were updating the logs - if we
+ revoke at the end they will just update from where they left off. */
+ lockrc = mgs_get_cfg_lock(obd, mti->mti_fsname, &lockh);
+ if (lockrc != ELDLM_OK) {
+ LCONSOLE_ERROR("%s: Can't signal other nodes to update "
+ "their configuration (%d). Updating local logs "
+ "anyhow; you might have to manually restart "
+ "other nodes to get the latest configuration.\n",
+ obd->obd_name, lockrc);
+ }
+
+ /* Log writing contention is handled by the fsdb_sem */
+
+ if (mti->mti_flags & LDD_F_WRITECONF) {
+ rc = mgs_erase_logs(obd, mti->mti_fsname);
+ mti->mti_flags |= LDD_F_UPDATE;
+ LCONSOLE_WARN("%s: Logs for fs %s were removed by user request."
+ " All servers must re-register in order to "
+ "regenerate the client log.\n",
+ obd->obd_name, mti->mti_fsname);
+ mti->mti_flags &= ~LDD_F_WRITECONF;
+ }
+
+ /* COMPAT_146 */
+ if (mti->mti_flags & LDD_F_UPGRADE14) {
+ rc = mgs_upgrade_sv_14(obd, mti);
+ if (rc) {
+ CERROR("Can't upgrade from 1.4 (%d)\n", rc);
+ GOTO(out, rc);
+ }
+
+ mti->mti_flags &= ~LDD_F_UPGRADE14;
+ /* Turn off the upgrade flag permanently */
+ mti->mti_flags |= LDD_F_REWRITE_LDD;
+ }
+ /* end COMPAT_146 */
+
+ if (mti->mti_flags & LDD_F_UPDATE) {
+ CDEBUG(D_MGS, "adding %s, index=%d\n", mti->mti_svname,
+ mti->mti_stripe_index);
+
+ /* create the log for the new target
+ and update the client/mdt logs */
+ rc = mgs_write_log_target(obd, mti);
+ if (rc) {
+ CERROR("Failed to write %s log (%d)\n",
+ mti->mti_svname, rc);
+ GOTO(out, rc);
+ }
+
+ mti->mti_flags &= ~(LDD_F_VIRGIN | LDD_F_UPDATE |
+ LDD_F_NEED_INDEX);
+ mti->mti_flags |= LDD_F_REWRITE_LDD;
+ }
+
+out:
+ /* done with log update */
+ if (lockrc == ELDLM_OK)
+ mgs_put_cfg_lock(&lockh);
+out_nolock:
+ CDEBUG(D_MGS, "replying with %s, index=%d, rc=%d\n", mti->mti_svname,
+ mti->mti_stripe_index, rc);
+ lustre_pack_reply(req, 1, &rep_size, NULL);
+ /* send back the whole mti in the reply */
+ rep_mti = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*rep_mti));
+ memcpy(rep_mti, mti, sizeof(*rep_mti));
+ RETURN(rc);
+}
+
+int mgs_handle(struct ptlrpc_request *req)
+{
+ int fail = OBD_FAIL_MGS_ALL_REPLY_NET;
+ int rc = 0;
+ ENTRY;
+
+ OBD_FAIL_RETURN(OBD_FAIL_MGS_ALL_REQUEST_NET | OBD_FAIL_ONCE, 0);
+
+ LASSERT(current->journal_info == NULL);
+ if (req->rq_reqmsg->opc != MGS_CONNECT) {
+ if (req->rq_export == NULL) {
+ CERROR("lustre_mgs: operation %d on unconnected MGS\n",
+ req->rq_reqmsg->opc);
+ req->rq_status = -ENOTCONN;
+ GOTO(out, rc = -ENOTCONN);
+ }
+ }
+
+ switch (req->rq_reqmsg->opc) {
+ case MGS_CONNECT:
+ DEBUG_REQ(D_MGS, req, "connect");
+ rc = target_handle_connect(req, mgs_handle);
+ if (!rc && (req->rq_reqmsg->conn_cnt > 1))
+ /* Make clients trying to reconnect after a MGS restart
+ happy; also requires obd_replayable */
+ lustre_msg_add_op_flags(req->rq_repmsg,
+ MSG_CONNECT_RECONNECT);
+ break;
+ case MGS_DISCONNECT:
+ DEBUG_REQ(D_MGS, req, "disconnect");
+ rc = target_handle_disconnect(req);
+ req->rq_status = rc; /* superfluous? */
+ break;
+ case MGS_TARGET_REG:
+ DEBUG_REQ(D_MGS, req, "target add\n");
+ rc = mgs_handle_target_reg(req);
+ break;
+ case MGS_TARGET_DEL:
+ DEBUG_REQ(D_MGS, req, "target del\n");
+ //rc = mgs_handle_target_del(req);
+ break;
+
+ case LDLM_ENQUEUE:
+ DEBUG_REQ(D_MGS, req, "enqueue");
+ rc = ldlm_handle_enqueue(req, ldlm_server_completion_ast,
+ ldlm_server_blocking_ast, NULL);
+ fail = OBD_FAIL_LDLM_REPLY;
+ break;
+ case LDLM_BL_CALLBACK:
+ case LDLM_CP_CALLBACK:
+ DEBUG_REQ(D_MGS, req, "callback");
+ CERROR("callbacks should not happen on MGS\n");
+ LBUG();
+ break;
+
+ case OBD_PING:
+ DEBUG_REQ(D_INFO, req, "ping");
+ rc = target_handle_ping(req);
+ break;
+ case OBD_LOG_CANCEL:
+ DEBUG_REQ(D_MGS, req, "log cancel\n");
+ rc = -ENOTSUPP; /* la la la */
+ break;
+
+ case LLOG_ORIGIN_HANDLE_CREATE:
+ DEBUG_REQ(D_MGS, req, "llog_init");
+ rc = llog_origin_handle_create(req);
+ break;
+ case LLOG_ORIGIN_HANDLE_NEXT_BLOCK:
+ DEBUG_REQ(D_MGS, req, "llog next block");
+ rc = llog_origin_handle_next_block(req);
+ break;
+ case LLOG_ORIGIN_HANDLE_READ_HEADER:
+ DEBUG_REQ(D_MGS, req, "llog read header");
+ rc = llog_origin_handle_read_header(req);
+ break;
+ case LLOG_ORIGIN_HANDLE_CLOSE:
+ DEBUG_REQ(D_MGS, req, "llog close");
+ rc = llog_origin_handle_close(req);
+ break;
+ case LLOG_CATINFO:
+ DEBUG_REQ(D_MGS, req, "llog catinfo");
+ rc = llog_catinfo(req);
+ break;
+ default:
+ req->rq_status = -ENOTSUPP;
+ rc = ptlrpc_error(req);
+ RETURN(rc);
+ }
+
+ LASSERT(current->journal_info == NULL);
+
+ CDEBUG(D_CONFIG | (rc?D_ERROR:0), "MGS handle cmd=%d rc=%d\n",
+ req->rq_reqmsg->opc, rc);
+
+ out:
+ target_send_reply(req, rc, fail);
+ RETURN(0);
+}
+
+static inline int mgs_destroy_export(struct obd_export *exp)
+{
+ ENTRY;
+
+ target_destroy_export(exp);
+
+ RETURN(0);
+}
+
+/* from mdt_iocontrol */
+int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
+ void *karg, void *uarg)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct obd_ioctl_data *data = karg;
+ struct lvfs_run_ctxt saved;
+ int rc = 0;
+
+ ENTRY;
+ CDEBUG(D_IOCTL, "handling ioctl cmd %#x\n", cmd);
+
+ switch (cmd) {
+
+ case OBD_IOC_PARAM: {
+ struct lustre_handle lockh;
+ struct lustre_cfg *lcfg;
+ struct llog_rec_hdr rec;
+ char fsname[32], *devname;
+ int lockrc;
+
+ CERROR("MGS param\n");
+
+ rec.lrh_len = llog_data_len(data->ioc_plen1);
+
+ if (data->ioc_type == LUSTRE_CFG_TYPE) {
+ rec.lrh_type = OBD_CFG_REC;
+ } else {
+ CERROR("unknown cfg record type:%d \n", data->ioc_type);
+ RETURN(-EINVAL);
+ }
+
+ OBD_ALLOC(lcfg, data->ioc_plen1);
+ if (lcfg == NULL)
+ RETURN(-ENOMEM);
+ rc = copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1);
+ if (rc)
+ GOTO(out_free, rc);
+
+ if (lcfg->lcfg_bufcount < 1)
+ GOTO(out_free, rc = -EINVAL);
+
+ /* Extract fsname */
+ memset(fsname, 0, sizeof(fsname));
+ devname = lustre_cfg_string(lcfg, 0);
+ if (devname) {
+ char *ptr = strchr(devname, '-');
+ if (!ptr) {
+ /* assume devname is the fsname */
+ strncpy(fsname, devname, sizeof(fsname));
+ } else {
+ strncpy(fsname, devname, ptr - devname);
+ }
+ CDEBUG(D_MGS, "set param on fs %s device %s\n",
+ fsname, devname);
+ } else {
+ CDEBUG(D_MGS, "set global param\n");
+ }
+
+ rc = mgs_setparam(obd, fsname, lcfg);
+ if (rc) {
+ CERROR("setparam err %d\n", rc);
+ GOTO(out_free, rc);
+ }
+
+ /* Revoke lock so everyone updates. Should be alright if
+ someone was already reading while we were updating the logs,
+ so we don't really need to hold the lock while we're
+ writing (above). */
+ if (fsname) {
+ lockrc = mgs_get_cfg_lock(obd, fsname, &lockh);
+ if (lockrc != ELDLM_OK)
+ CERROR("lock error %d for fs %s\n", lockrc,
+ fsname);
+ else
+ mgs_put_cfg_lock(&lockh);
+ }
+out_free:
+ OBD_FREE(lcfg, data->ioc_plen1);
+ RETURN(rc);
+ }
+
+ case OBD_IOC_DUMP_LOG: {
+ struct llog_ctxt *ctxt =
+ llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ rc = class_config_dump_llog(ctxt, data->ioc_inlbuf1, NULL);
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ if (rc)
+ RETURN(rc);
+
+ RETURN(rc);
+ }
+
+ case OBD_IOC_LLOG_CHECK:
+ case OBD_IOC_LLOG_INFO:
+ case OBD_IOC_LLOG_PRINT: {
+ struct llog_ctxt *ctxt =
+ llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
+
+ push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
+ rc = llog_ioctl(ctxt, cmd, data);
+ pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
+
+ RETURN(rc);
+ }
+
+ default:
+ CDEBUG(D_INFO, "unknown command %x\n", cmd);
+ RETURN(-EINVAL);
+ }
+ RETURN(0);
+}
+
+/* use obd ops to offer management infrastructure */
+static struct obd_ops mgs_obd_ops = {
+ .o_owner = THIS_MODULE,
+ .o_connect = mgs_connect,
+ .o_disconnect = mgs_disconnect,
+ .o_setup = mgs_setup,
+ .o_precleanup = mgs_precleanup,
+ .o_cleanup = mgs_cleanup,
+ .o_destroy_export = mgs_destroy_export,
+ .o_iocontrol = mgs_iocontrol,
+};
+
+static int __init mgs_init(void)
+{
+ struct lprocfs_static_vars lvars;
+
+ lprocfs_init_vars(mgs, &lvars);
+ class_register_type(&mgs_obd_ops, lvars.module_vars, LUSTRE_MGS_NAME);
+
+ return 0;
+}
+
+static void /*__exit*/ mgs_exit(void)
+{
+ class_unregister_type(LUSTRE_MGS_NAME);
+}
+
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Lustre Management Server (MGS)");
+MODULE_LICENSE("GPL");
+
+module_init(mgs_init);
+module_exit(mgs_exit);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+
+#ifndef _MGS_INTERNAL_H
+#define _MGS_INTERNAL_H
+
+#ifdef __KERNEL__
+# include <linux/fs.h>
+#endif
+#include <libcfs/kp30.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_lib.h>
+#include <lustre_dlm.h>
+#include <lustre_log.h>
+#include <lustre_export.h>
+
+
+/* MDS has o_t * 1000 */
+#define MGS_SERVICE_WATCHDOG_TIMEOUT (obd_timeout * 10)
+
+/* mgs_llog.c */
+#define FSDB_EMPTY 0x0001
+
+struct fs_db {
+ char fsdb_name[8];
+ struct list_head fsdb_list;
+ struct semaphore fsdb_sem;
+ void* fsdb_ost_index_map;
+ void* fsdb_mdt_index_map;
+ __u32 fsdb_flags;
+ __u32 fsdb_gen;
+};
+
+int mgs_init_fsdb_list(struct obd_device *obd);
+int mgs_cleanup_fsdb_list(struct obd_device *obd);
+int mgs_check_index(struct obd_device *obd, struct mgs_target_info *mti);
+int mgs_check_failnid(struct obd_device *obd, struct mgs_target_info *mti);
+int mgs_write_log_target(struct obd_device *obd, struct mgs_target_info *mti);
+int mgs_upgrade_sv_14(struct obd_device *obd, struct mgs_target_info *mti);
+int mgs_erase_logs(struct obd_device *obd, char *fsname);
+int mgs_setparam(struct obd_device *obd, char *fsname, struct lustre_cfg *lcfg);
+
+/* mgs_fs.c */
+int mgs_fs_setup(struct obd_device *obd, struct vfsmount *mnt);
+int mgs_fs_cleanup(struct obd_device *obddev);
+
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lustre/mgs/mgs_llog.c
+ * Lustre Management Server (mgs) config llog creation
+ *
+ * Copyright (C) 2006 Cluster File Systems, Inc.
+ * Author: Nathan Rutman <nathan@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+#define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_MGS
+#define D_MGS D_CONFIG/*|D_WARNING*/
+
+#ifdef __KERNEL__
+#include <linux/module.h>
+#include <linux/pagemap.h>
+#include <linux/fs.h>
+#endif
+
+#include <obd.h>
+#include <obd_class.h>
+#include <lustre_log.h>
+#include <obd_ost.h>
+#include <libcfs/list.h>
+#include <linux/lvfs.h>
+#include <lustre_fsfilt.h>
+#include <lustre_disk.h>
+#include <lustre_param.h>
+#include "mgs_internal.h"
+
+/********************** Class fns ********************/
+
+static int class_dentry_readdir(struct obd_device *obd, struct dentry *dir,
+ struct vfsmount *inmnt,
+ struct list_head *dentry_list){
+ /* see mds_cleanup_pending */
+ struct lvfs_run_ctxt saved;
+ struct file *file;
+ struct dentry *dentry;
+ struct vfsmount *mnt;
+ int rc = 0;
+ ENTRY;
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ dentry = dget(dir);
+ if (IS_ERR(dentry))
+ GOTO(out_pop, rc = PTR_ERR(dentry));
+ mnt = mntget(inmnt);
+ if (IS_ERR(mnt)) {
+ l_dput(dentry);
+ GOTO(out_pop, rc = PTR_ERR(mnt));
+ }
+
+ file = dentry_open(dentry, mnt, O_RDONLY);
+ if (IS_ERR(file))
+ /* dentry_open_it() drops the dentry, mnt refs */
+ GOTO(out_pop, rc = PTR_ERR(file));
+
+ INIT_LIST_HEAD(dentry_list);
+ rc = l_readdir(file, dentry_list);
+ filp_close(file, 0);
+ /* filp_close->fput() drops the dentry, mnt refs */
+
+out_pop:
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ RETURN(rc);
+}
+
+/******************** DB functions *********************/
+
+/* from the (client) config log, figure out:
+ 1. which ost's/mdt's are configured (by index)
+ 2. what the last config step is
+*/
+/* FIXME is it better to have a separate db file, instead of parsing the info
+ out of the client log? */
+static int mgs_fsdb_handler(struct llog_handle *llh, struct llog_rec_hdr *rec,
+ void *data)
+{
+ struct fs_db *fsdb = (struct fs_db *)data;
+ int cfg_len = rec->lrh_len;
+ char *cfg_buf = (char*) (rec + 1);
+ struct lustre_cfg *lcfg;
+ __u32 index;
+ int rc = 0;
+ ENTRY;
+
+ if (rec->lrh_type != OBD_CFG_REC) {
+ CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
+ RETURN(-EINVAL);
+ }
+
+ rc = lustre_cfg_sanity_check(cfg_buf, cfg_len);
+ if (rc) {
+ CERROR("Insane cfg\n");
+ RETURN(rc);
+ }
+
+ lcfg = (struct lustre_cfg *)cfg_buf;
+
+ CDEBUG(D_INFO, "cmd %x %s %s\n", lcfg->lcfg_command,
+ lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
+
+ /* Figure out ost indicies */
+ /* lov_modify_tgts add 0:lov1 1:ost1_UUID 2(index):0 3(gen):1 */
+ if (lcfg->lcfg_command == LCFG_LOV_ADD_OBD ||
+ lcfg->lcfg_command == LCFG_LOV_DEL_OBD) {
+ index = simple_strtoul(lustre_cfg_string(lcfg, 2),
+ NULL, 10);
+ CDEBUG(D_MGS, "OST index for %s is %u (%s)\n",
+ lustre_cfg_string(lcfg, 1), index,
+ lustre_cfg_string(lcfg, 2));
+ set_bit(index, fsdb->fsdb_ost_index_map);
+ }
+
+ /* Figure out mdt indicies */
+ /* attach 0:MDC_uml1_mdsA_MNT_client 1:mdc 2:1d834_MNT_client_03f */
+ if ((lcfg->lcfg_command == LCFG_ATTACH) &&
+ (strcmp(lustre_cfg_string(lcfg, 1), LUSTRE_MDC_NAME) == 0)) {
+ rc = server_name2index(lustre_cfg_string(lcfg, 0),
+ &index, NULL);
+ if (rc != LDD_F_SV_TYPE_MDT) {
+ CWARN("Unparsable MDC name %s, assuming index 0\n",
+ lustre_cfg_string(lcfg, 0));
+ index = 0;
+ }
+ rc = 0;
+ CDEBUG(D_MGS, "MDT index is %u\n", index);
+ set_bit(index, fsdb->fsdb_mdt_index_map);
+ }
+
+ /* Keep track of the latest marker step */
+ if (lcfg->lcfg_command == LCFG_MARKER) {
+ struct cfg_marker *marker;
+ marker = lustre_cfg_buf(lcfg, 1);
+ fsdb->fsdb_gen = max(fsdb->fsdb_gen, marker->cm_step);
+ }
+
+ RETURN(rc);
+}
+
+static int mgs_get_fsdb_from_llog(struct obd_device *obd, char *logname,
+ struct fs_db *fsdb)
+{
+ struct llog_handle *loghandle;
+ struct lvfs_run_ctxt saved;
+ int rc, rc2;
+ ENTRY;
+
+ down(&fsdb->fsdb_sem);
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+ rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT),
+ &loghandle, NULL, logname);
+ if (rc)
+ GOTO(out_pop, rc);
+
+ rc = llog_init_handle(loghandle, LLOG_F_IS_PLAIN, NULL);
+ if (rc)
+ GOTO(out_close, rc);
+
+ if (llog_get_size(loghandle) <= 1)
+ fsdb->fsdb_flags |= FSDB_EMPTY;
+
+ rc = llog_process(loghandle, mgs_fsdb_handler, (void *)fsdb, NULL);
+ CDEBUG(D_MGS, "get_db = %d\n", rc);
+out_close:
+ rc2 = llog_close(loghandle);
+ if (!rc)
+ rc = rc2;
+
+out_pop:
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ up(&fsdb->fsdb_sem);
+
+ RETURN(rc);
+}
+
+static int next_index(void *index_map, int map_len)
+{
+ int i;
+ for (i = 0; i < map_len * 8; i++)
+ if (!test_bit(i, index_map)) {
+ return i;
+ }
+ CERROR("max index %d exceeded.\n", i);
+ return -1;
+}
+
+#if 0
+static int count_osts(void *index_map, int map_len)
+{
+ int i, num;
+ for (i = 0, num = 0; i < map_len * 8; i++)
+ if (test_bit(i, index_map))
+ num++;
+ return num;
+}
+#endif
+
+static struct fs_db *mgs_find_fsdb(struct obd_device *obd, char *fsname)
+{
+ struct mgs_obd *mgs = &obd->u.mgs;
+ struct fs_db *fsdb;
+ struct list_head *tmp;
+
+ list_for_each(tmp, &mgs->mgs_fs_db_list) {
+ fsdb = list_entry(tmp, struct fs_db, fsdb_list);
+ if (strcmp(fsdb->fsdb_name, fsname) == 0)
+ return fsdb;
+ }
+ return NULL;
+}
+
+#define INDEX_MAP_SIZE 4096
+
+/* caller must hold the mgs->mgs_fs_db_lock */
+static struct fs_db *mgs_new_fsdb(struct obd_device *obd, char *fsname)
+{
+ struct mgs_obd *mgs = &obd->u.mgs;
+ struct fs_db *fsdb;
+ ENTRY;
+
+ OBD_ALLOC_PTR(fsdb);
+ if (!fsdb)
+ RETURN(NULL);
+
+ OBD_ALLOC(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE);
+ OBD_ALLOC(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
+ if (!fsdb->fsdb_ost_index_map || !fsdb->fsdb_mdt_index_map) {
+ CERROR("No memory for index maps\n");
+ GOTO(err, 0);
+ }
+
+ strncpy(fsdb->fsdb_name, fsname, sizeof(fsdb->fsdb_name));
+ sema_init(&fsdb->fsdb_sem, 1);
+ list_add(&fsdb->fsdb_list, &mgs->mgs_fs_db_list);
+
+ RETURN(fsdb);
+err:
+ if (fsdb->fsdb_ost_index_map)
+ OBD_FREE(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE);
+ if (fsdb->fsdb_mdt_index_map)
+ OBD_FREE(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
+ OBD_FREE_PTR(fsdb);
+ RETURN(NULL);
+}
+
+static void mgs_free_fsdb(struct fs_db *fsdb)
+{
+ /* wait for anyone with the sem */
+ down(&fsdb->fsdb_sem);
+ list_del(&fsdb->fsdb_list);
+ OBD_FREE(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE);
+ OBD_FREE(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
+ OBD_FREE_PTR(fsdb);
+}
+
+int mgs_init_fsdb_list(struct obd_device *obd)
+{
+ struct mgs_obd *mgs = &obd->u.mgs;
+ INIT_LIST_HEAD(&mgs->mgs_fs_db_list);
+ return 0;
+}
+
+int mgs_cleanup_fsdb_list(struct obd_device *obd)
+{
+ struct mgs_obd *mgs = &obd->u.mgs;
+ struct fs_db *fsdb;
+ struct list_head *tmp, *tmp2;
+ down(&mgs->mgs_sem);
+ list_for_each_safe(tmp, tmp2, &mgs->mgs_fs_db_list) {
+ fsdb = list_entry(tmp, struct fs_db, fsdb_list);
+ mgs_free_fsdb(fsdb);
+ }
+ up(&mgs->mgs_sem);
+ return 0;
+}
+
+static inline int name_create(char *prefix, char *suffix, char **newname)
+{
+ LASSERT(newname);
+ OBD_ALLOC(*newname, strlen(prefix) + strlen(suffix) + 1);
+ if (!*newname)
+ return -ENOMEM;
+ sprintf(*newname, "%s%s", prefix, suffix);
+ return 0;
+}
+
+static inline void name_destroy(char *name)
+{
+ if (name)
+ OBD_FREE(name, strlen(name) + 1);
+}
+
+
+static int mgs_find_or_make_fsdb(struct obd_device *obd, char *name,
+ struct fs_db **dbh)
+{
+ struct mgs_obd *mgs = &obd->u.mgs;
+ struct fs_db *fsdb;
+ char *cliname;
+ int rc = 0;
+
+ down(&mgs->mgs_sem);
+ fsdb = mgs_find_fsdb(obd, name);
+ if (fsdb) {
+ up(&mgs->mgs_sem);
+ *dbh = fsdb;
+ return 0;
+ }
+
+ CDEBUG(D_MGS, "Creating new db\n");
+ fsdb = mgs_new_fsdb(obd, name);
+ up(&mgs->mgs_sem);
+ if (!fsdb)
+ return -ENOMEM;
+
+ /* populate the db from the client llog */
+ name_create(name, "-client", &cliname);
+ rc = mgs_get_fsdb_from_llog(obd, cliname, fsdb);
+ name_destroy(cliname);
+ if (rc) {
+ CERROR("Can't get db from llog %d\n", rc);
+ mgs_free_fsdb(fsdb);
+ return rc;
+ }
+
+ *dbh = fsdb;
+
+ return 0;
+}
+
+/* 1 = index in use
+ 0 = index unused
+ -1= empty client log */
+int mgs_check_index(struct obd_device *obd, struct mgs_target_info *mti)
+{
+ struct fs_db *fsdb;
+ void *imap;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(!(mti->mti_flags & LDD_F_NEED_INDEX));
+
+ rc = mgs_find_or_make_fsdb(obd, mti->mti_fsname, &fsdb);
+ if (rc) {
+ CERROR("Can't get db for %s\n", mti->mti_fsname);
+ RETURN(rc);
+ }
+
+ if (fsdb->fsdb_flags & FSDB_EMPTY)
+ RETURN(-1);
+
+ if (mti->mti_flags & LDD_F_SV_TYPE_OST)
+ imap = fsdb->fsdb_ost_index_map;
+ else if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
+ imap = fsdb->fsdb_mdt_index_map;
+ else
+ RETURN(-EINVAL);
+
+ if (test_bit(mti->mti_stripe_index, imap))
+ RETURN(1);
+ RETURN(0);
+}
+
+
+int mgs_set_index(struct obd_device *obd, struct mgs_target_info *mti)
+{
+ struct fs_db *fsdb;
+ void *imap;
+ int rc = 0;
+ ENTRY;
+
+ rc = mgs_find_or_make_fsdb(obd, mti->mti_fsname, &fsdb);
+ if (rc) {
+ CERROR("Can't get db for %s\n", mti->mti_fsname);
+ RETURN(rc);
+ }
+
+ if (mti->mti_flags & LDD_F_SV_TYPE_OST)
+ imap = fsdb->fsdb_ost_index_map;
+ else if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
+ imap = fsdb->fsdb_mdt_index_map;
+ else
+ RETURN(-EINVAL);
+
+ if (mti->mti_flags & LDD_F_NEED_INDEX) {
+ rc = next_index(imap, INDEX_MAP_SIZE);
+ if (rc == -1)
+ RETURN(-ERANGE);
+ mti->mti_stripe_index = rc;
+ }
+
+ /* Remove after CMD */
+ if ((mti->mti_flags & LDD_F_SV_TYPE_MDT) &&
+ (mti->mti_stripe_index > 0)) {
+ LCONSOLE_ERROR("MDT index must = 0 (until Clustered MetaData "
+ "feature is ready.)\n");
+ mti->mti_stripe_index = 0;
+ }
+
+ if (mti->mti_stripe_index >= INDEX_MAP_SIZE * 8) {
+ LCONSOLE_ERROR("Server %s requested index %d, but the"
+ "max index is %d.\n",
+ mti->mti_svname, mti->mti_stripe_index,
+ INDEX_MAP_SIZE * 8);
+ RETURN(-ERANGE);
+ }
+
+ if (test_bit(mti->mti_stripe_index, imap)) {
+ if (mti->mti_flags & LDD_F_VIRGIN) {
+ LCONSOLE_ERROR("Server %s requested index %d, but that "
+ "index is already in use\n",
+ mti->mti_svname, mti->mti_stripe_index);
+ RETURN(-EADDRINUSE);
+ } else {
+ CERROR("Server %s updating index %d\n",
+ mti->mti_svname, mti->mti_stripe_index);
+ RETURN(EALREADY);
+ }
+ }
+
+ set_bit(mti->mti_stripe_index, imap);
+ fsdb->fsdb_flags &= ~FSDB_EMPTY;
+ server_make_name(mti->mti_flags, mti->mti_stripe_index,
+ mti->mti_fsname, mti->mti_svname);
+
+ CDEBUG(D_MGS, "Set index for %s to %d\n", mti->mti_svname,
+ mti->mti_stripe_index);
+
+ RETURN(0);
+}
+
+/******************** config log recording functions *********************/
+
+static int record_lcfg(struct obd_device *obd, struct llog_handle *llh,
+ struct lustre_cfg *lcfg)
+{
+ struct lvfs_run_ctxt saved;
+ struct llog_rec_hdr rec;
+ int buflen, rc;
+
+ LASSERT(llh);
+ LASSERT(llh->lgh_ctxt);
+
+ buflen = lustre_cfg_len(lcfg->lcfg_bufcount,
+ lcfg->lcfg_buflens);
+ rec.lrh_len = llog_data_len(buflen);
+ rec.lrh_type = OBD_CFG_REC;
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ /* idx = -1 means append */
+ rc = llog_write_rec(llh, &rec, NULL, 0, (void *)lcfg, -1);
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ if (rc) {
+ CERROR("failed %d\n", rc);
+ }
+ LASSERT(!rc);
+ return rc;
+}
+
+static int record_base(struct obd_device *obd, struct llog_handle *llh,
+ char *cfgname, lnet_nid_t nid, int cmd,
+ char *s1, char *s2, char *s3, char *s4)
+{
+ struct lustre_cfg_bufs bufs;
+ struct lustre_cfg *lcfg;
+ int rc;
+
+ CDEBUG(D_MGS, "lcfg %s %#x %s %s %s %s\n", cfgname,
+ cmd, s1, s2, s3, s4);
+
+ lustre_cfg_bufs_reset(&bufs, cfgname);
+ if (s1)
+ lustre_cfg_bufs_set_string(&bufs, 1, s1);
+ if (s2)
+ lustre_cfg_bufs_set_string(&bufs, 2, s2);
+ if (s3)
+ lustre_cfg_bufs_set_string(&bufs, 3, s3);
+ if (s4)
+ lustre_cfg_bufs_set_string(&bufs, 4, s4);
+
+ lcfg = lustre_cfg_new(cmd, &bufs);
+ lcfg->lcfg_nid = nid;
+
+ rc = record_lcfg(obd, llh, lcfg);
+
+ lustre_cfg_free(lcfg);
+
+ if (rc) {
+ CERROR("error %d: lcfg %s %#x %s %s %s %s\n", rc, cfgname,
+ cmd, s1, s2, s3, s4);
+ }
+ return(rc);
+}
+
+
+static inline int record_add_uuid(struct obd_device *obd,
+ struct llog_handle *llh,
+ uint64_t nid, char *uuid)
+{
+ return record_base(obd,llh,NULL,nid,LCFG_ADD_UUID,uuid,0,0,0);
+
+}
+
+static inline int record_add_conn(struct obd_device *obd,
+ struct llog_handle *llh,
+ char *devname,
+ char *uuid)
+{
+ return record_base(obd,llh,devname,0,LCFG_ADD_CONN,uuid,0,0,0);
+}
+
+static inline int record_attach(struct obd_device *obd, struct llog_handle *llh,
+ char *devname, char *type, char *uuid)
+{
+ return record_base(obd,llh,devname,0,LCFG_ATTACH,type,uuid,0,0);
+}
+
+static inline int record_setup(struct obd_device *obd, struct llog_handle *llh,
+ char *devname,
+ char *s1, char *s2, char *s3, char *s4)
+{
+ return record_base(obd,llh,devname,0,LCFG_SETUP,s1,s2,s3,s4);
+}
+
+static int record_lov_setup(struct obd_device *obd, struct llog_handle *llh,
+ char *devname, struct lov_desc *desc)
+{
+ struct lustre_cfg_bufs bufs;
+ struct lustre_cfg *lcfg;
+ int rc;
+
+ lustre_cfg_bufs_reset(&bufs, devname);
+ lustre_cfg_bufs_set(&bufs, 1, desc, sizeof(*desc));
+ lcfg = lustre_cfg_new(LCFG_SETUP, &bufs);
+
+ rc = record_lcfg(obd, llh, lcfg);
+
+ lustre_cfg_free(lcfg);
+ return rc;
+}
+
+static inline int record_lov_add(struct obd_device *obd,
+ struct llog_handle *llh,
+ char *lov_name, char *ost_uuid,
+ char *index, char *gen)
+{
+ return record_base(obd,llh,lov_name,0,LCFG_LOV_ADD_OBD,
+ ost_uuid,index,gen,0);
+}
+
+static inline int record_mount_opt(struct obd_device *obd,
+ struct llog_handle *llh,
+ char *profile, char *lov_name,
+ char *mdc_name)
+{
+ return record_base(obd,llh,NULL,0,LCFG_MOUNTOPT,
+ profile,lov_name,mdc_name,0);
+}
+
+static int record_marker(struct obd_device *obd, struct llog_handle *llh,
+ struct fs_db *fsdb, __u32 flags,
+ char *svname, char *comment)
+{
+ struct cfg_marker marker;
+ struct timeval tv;
+ struct lustre_cfg_bufs bufs;
+ struct lustre_cfg *lcfg;
+ int rc;
+
+ if (flags & CM_START)
+ fsdb->fsdb_gen++;
+ marker.cm_step = fsdb->fsdb_gen;
+ marker.cm_flags = flags;
+ strncpy(marker.cm_svname, svname, sizeof(marker.cm_svname));
+ strncpy(marker.cm_comment, comment, sizeof(marker.cm_comment));
+ do_gettimeofday(&tv);
+ marker.cm_createtime = tv.tv_sec;
+ marker.cm_canceltime = 0;
+ lustre_cfg_bufs_reset(&bufs, NULL);
+ lustre_cfg_bufs_set(&bufs, 1, &marker, sizeof(marker));
+ lcfg = lustre_cfg_new(LCFG_MARKER, &bufs);
+
+ rc = record_lcfg(obd, llh, lcfg);
+
+ lustre_cfg_free(lcfg);
+ return rc;
+}
+
+static int record_start_log(struct obd_device *obd,
+ struct llog_handle **llh, char *name)
+{
+ static struct obd_uuid cfg_uuid = { .uuid = "config_uuid" };
+ struct lvfs_run_ctxt saved;
+ int rc = 0;
+
+ if (*llh) {
+ GOTO(out, rc = -EBUSY);
+ }
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+ rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT),
+ llh, NULL, name);
+ if (rc == 0)
+ llog_init_handle(*llh, LLOG_F_IS_PLAIN, &cfg_uuid);
+ else
+ *llh = NULL;
+
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+out:
+ if (rc) {
+ CERROR("Can't start log %s: %d\n", name, rc);
+ }
+ RETURN(rc);
+}
+
+static int record_end_log(struct obd_device *obd, struct llog_handle **llh)
+{
+ struct lvfs_run_ctxt saved;
+ int rc = 0;
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+ rc = llog_close(*llh);
+ *llh = NULL;
+
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ RETURN(rc);
+}
+
+static int mgs_log_is_empty(struct obd_device *obd, char *name)
+{
+ struct lvfs_run_ctxt saved;
+ struct llog_handle *llh;
+ int rc = 0;
+
+ /* FIXME cache the empty state in the db */
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT),
+ &llh, NULL, name);
+ if (rc == 0) {
+ llog_init_handle(llh, LLOG_F_IS_PLAIN, NULL);
+ rc = llog_get_size(llh);
+ llog_close(llh);
+ }
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ /* header is record 1 */
+ return(rc <= 1);
+}
+
+/******************** config "macros" *********************/
+
+/* write an lcfg directly into a log (with markers) */
+static int mgs_write_log_direct(struct obd_device *obd, struct fs_db *fsdb,
+ char *logname, char *obdname,
+ struct lustre_cfg *lcfg)
+{
+ struct llog_handle *llh = NULL;
+ int rc;
+ ENTRY;
+
+ rc = record_start_log(obd, &llh, logname);
+ rc = record_marker(obd, llh, fsdb, CM_START, obdname, "param");
+
+ rc = record_lcfg(obd, llh, lcfg);
+
+ rc = record_marker(obd, llh, fsdb, CM_END, obdname, "param");
+ rc = record_end_log(obd, &llh);
+
+ RETURN(rc);
+}
+
+/* write the lcfg in all logs for the given fs */
+int mgs_write_log_direct_all(struct obd_device *obd, struct fs_db *fsdb,
+ char *fsname, struct lustre_cfg *lcfg)
+{
+ struct mgs_obd *mgs = &obd->u.mgs;
+ struct list_head dentry_list;
+ struct l_linux_dirent *dirent, *n;
+ char *logname;
+ int rc, len = strlen(fsname);
+ ENTRY;
+
+ /* We need to set params for any future logs
+ as well. FIXME Append this file to every new log. */
+ name_create(fsname, "-params", &logname);
+ if (mgs_log_is_empty(obd, logname)) {
+ struct llog_handle *llh = NULL;
+ rc = record_start_log(obd, &llh, logname);
+ rc = record_end_log(obd, &llh);
+ }
+ name_destroy(logname);
+
+ /* Find all the logs in the CONFIGS directory */
+ rc = class_dentry_readdir(obd, mgs->mgs_configs_dir,
+ mgs->mgs_vfsmnt, &dentry_list);
+ if (rc) {
+ CERROR("Can't read %s dir\n", MOUNT_CONFIGS_DIR);
+ RETURN(rc);
+ }
+
+ /* Could use fsdb index maps instead of directory listing */
+ list_for_each_entry_safe(dirent, n, &dentry_list, lld_list) {
+ list_del(&dirent->lld_list);
+ if (strncmp(fsname, dirent->lld_name, len) == 0) {
+ CDEBUG(D_MGS, "Changing log %s\n", dirent->lld_name);
+ rc = mgs_write_log_direct(obd, fsdb, dirent->lld_name,
+ dirent->lld_name, lcfg);
+ }
+ OBD_FREE(dirent, sizeof(*dirent));
+ }
+
+ RETURN(rc);
+}
+
+/* lov is the first thing in the mdt and client logs */
+static int mgs_write_log_lov(struct obd_device *obd, struct fs_db *fsdb,
+ struct mgs_target_info *mti,
+ char *logname, char *lovname)
+{
+ struct llog_handle *llh = NULL;
+ struct lov_desc *lovdesc;
+ char *uuid;
+ int rc = 0;
+ ENTRY;
+
+ CDEBUG(D_MGS, "Writing log %s\n", logname);
+
+ /*
+ #01 L attach 0:lov_mdsA 1:lov 2:71ccb_lov_mdsA_19f961a9e1
+ #02 L lov_setup 0:lov_mdsA 1:(struct lov_desc)
+ uuid=lov1_UUID, stripe count=1, size=1048576, offset=0, pattern=0
+ */
+
+ /* FIXME just make lov_setup accept empty desc (put uuid in buf 2) */
+ OBD_ALLOC(lovdesc, sizeof(*lovdesc));
+ if (lovdesc == NULL)
+ RETURN(-ENOMEM);
+ lovdesc->ld_magic = LOV_DESC_MAGIC;
+ lovdesc->ld_tgt_count = 0;
+ /* Defaults. Can be changed later by lcfg config_param */
+ lovdesc->ld_default_stripe_count = 1;
+ lovdesc->ld_pattern = LOV_PATTERN_RAID0;
+ lovdesc->ld_default_stripe_size = 1024 * 1024;
+ lovdesc->ld_default_stripe_offset = 0;
+ sprintf((char*)lovdesc->ld_uuid.uuid, "%s_UUID", lovname);
+ /* can these be the same? */
+ uuid = (char *)lovdesc->ld_uuid.uuid;
+
+ /* This should always be the first entry in a log.
+ rc = mgs_clear_log(obd, logname); */
+ rc = record_start_log(obd, &llh, logname);
+ rc = record_marker(obd, llh, fsdb, CM_START, lovname, "lov setup");
+ rc = record_attach(obd, llh, lovname, "lov", uuid);
+ rc = record_lov_setup(obd, llh, lovname, lovdesc);
+ rc = record_marker(obd, llh, fsdb, CM_END, lovname, "lov setup");
+ rc = record_end_log(obd, &llh);
+
+ OBD_FREE(lovdesc, sizeof(*lovdesc));
+ RETURN(rc);
+}
+
+/* add failnids to open log */
+static int mgs_write_log_failnids(struct obd_device *obd,
+ struct mgs_target_info *mti,
+ struct llog_handle *llh,
+ char *cliname)
+{
+ char *failnodeuuid = NULL;
+ char *ptr = mti->mti_params;
+ lnet_nid_t nid;
+ int rc = 0;
+
+ /*
+ #03 L add_uuid nid=uml1@tcp(0x20000c0a80201) nal=90 0: 1:uml1_UUID
+ #04 L add_uuid nid=1@elan(0x1000000000001) nal=90 0: 1:uml1_UUID
+ #05 L setup 0:OSC_uml1_ost1_mdsA 1:ost1_UUID 2:uml1_UUID
+ #06 L add_uuid nid=uml2@tcp(0x20000c0a80202) nal=90 0: 1:uml2_UUID
+ #0x L add_uuid nid=2@elan(0x1000000000002) nal=90 0: 1:uml2_UUID
+ #07 L add_conn 0:OSC_uml1_ost1_mdsA 1:uml2_UUID
+ */
+
+ /* Pull failnid info out of params string */
+ while (class_find_param(ptr, PARAM_FAILNODE, &ptr) == 0) {
+ while (class_parse_nid(ptr, &nid, &ptr) == 0) {
+ if (failnodeuuid == NULL) {
+ /* We don't know the failover node name,
+ so just use the first nid as the uuid */
+ rc = name_create(libcfs_nid2str(nid), "",
+ &failnodeuuid);
+ if (rc)
+ return rc;
+ }
+ CDEBUG(D_MGS, "add nid %s for failover uuid %s, "
+ "client %s\n", libcfs_nid2str(nid),
+ failnodeuuid, cliname);
+ rc = record_add_uuid(obd, llh, nid, failnodeuuid);
+ }
+ if (failnodeuuid) {
+ rc = record_add_conn(obd, llh, cliname, failnodeuuid);
+ name_destroy(failnodeuuid);
+ failnodeuuid = NULL;
+ }
+ }
+
+ return rc;
+}
+
+static int mgs_write_log_mdt(struct obd_device *obd, struct fs_db *fsdb,
+ struct mgs_target_info *mti)
+{
+ struct llog_handle *llh = NULL;
+ char *cliname, *mdcname, *lovname, *nodeuuid, *mdcuuid;
+ int rc, i, first_log = 0;
+ ENTRY;
+
+ CDEBUG(D_MGS, "writing new mdt %s\n", mti->mti_svname);
+
+ /* COMPAT_146 */
+ if (mti->mti_flags & LDD_F_UPGRADE14) {
+ char *ptr, oldname[sizeof(mti->mti_uuid)];
+ /* We're starting with an old uuid. Assume old name for lov
+ as well since the lov entry already exists in the log. */
+ CERROR("old mds uuid %s\n", mti->mti_uuid);
+ strcpy(oldname, mti->mti_uuid);
+ ptr = strstr(oldname, "_UUID");
+ if (!ptr) {
+ CERROR("Can't get old MDT name from %s\n",
+ mti->mti_uuid);
+ RETURN(-EINVAL);
+ }
+ *ptr = '\0';
+ name_create("lov_", oldname, &lovname);
+ CERROR("lov name: %s\n", lovname);
+ } else {
+ /* Make up our own uuid and lov name */
+ snprintf(mti->mti_uuid, sizeof(mti->mti_uuid),
+ "%s_UUID", mti->mti_svname);
+ name_create(mti->mti_fsname, "-mdtlov", &lovname);
+ }
+
+ /* Append mdt info to mdt log */
+ if (mgs_log_is_empty(obd, mti->mti_svname)) {
+ /* This is the first time for all logs for this fs,
+ since any ost should have already started the mdt log. */
+ first_log++;
+ rc = mgs_write_log_lov(obd, fsdb, mti, mti->mti_svname,
+ lovname);
+ }
+ /* else there's already some ost entries in the mdt log. */
+
+ /* We added the lov, maybe some osc's, now for the mdt.
+ We might add more ost's after this. Note that during the parsing
+ of this log, this is when the mdt will start. (This was not
+ formerly part of the old mds log, it was directly executed by
+ lconf.) */
+ /*
+ #09 L mount_option 0: 1:mdsA 2:lov_mdsA
+ attach mds mdsA mdsA_UUID
+ setup /dev/loop2 ldiskfs mdsA errors=remount-ro,user_xattr
+ */
+ rc = record_start_log(obd, &llh, mti->mti_svname);
+ rc = record_marker(obd, llh, fsdb, CM_START, mti->mti_svname,"add mdt");
+ rc = record_mount_opt(obd, llh, mti->mti_svname, lovname, 0);
+ rc = record_attach(obd, llh, mti->mti_svname, LUSTRE_MDS_NAME,
+ mti->mti_uuid);
+ rc = record_setup(obd, llh, mti->mti_svname,
+ "dev"/*ignored*/, "type"/*ignored*/,
+ mti->mti_svname, 0/*options*/);
+ rc = record_marker(obd, llh, fsdb, CM_END, mti->mti_svname, "add mdt");
+ rc = record_end_log(obd, &llh);
+
+ /* Append the mdt info to the client log */
+ name_create(mti->mti_fsname, "-client", &cliname);
+ name_destroy(lovname);
+ name_create(mti->mti_fsname, "-clilov", &lovname);
+ if (first_log ||
+ /* If we're upgrading, the MDT log will exist but not the client. */
+ ((mti->mti_flags & LDD_F_UPGRADE14) &&
+ mgs_log_is_empty(obd, cliname))) {
+ /* Start client log */
+ rc = mgs_write_log_lov(obd, fsdb, mti, cliname, lovname);
+ }
+
+ name_create(libcfs_nid2str(mti->mti_nids[0]), /*"_UUID"*/"", &nodeuuid);
+ name_create(mti->mti_svname, "-mdc", &mdcname);
+ name_create(mdcname, "_UUID", &mdcuuid);
+ /*
+ #09 L add_uuid nid=uml1@tcp(0x20000c0a80201) 0: 1:uml1_UUID
+ #10 L attach 0:MDC_uml1_mdsA_MNT_client 1:mdc 2:1d834_MNT_client_03f
+ #11 L setup 0:MDC_uml1_mdsA_MNT_client 1:mdsA_UUID 2:uml1_UUID
+ #12 L add_uuid nid=uml2@tcp(0x20000c0a80202) 0: 1:uml2_UUID
+ #13 L add_conn 0:MDC_uml1_mdsA_MNT_client 1:uml2_UUID
+ #14 L mount_option 0: 1:client 2:lov1 3:MDC_uml1_mdsA_MNT_client
+ */
+ rc = record_start_log(obd, &llh, cliname);
+ rc = record_marker(obd, llh, fsdb, CM_START, mti->mti_svname,"add mdc");
+ for (i = 0; i < mti->mti_nid_count; i++) {
+ CDEBUG(D_MGS, "add nid %s\n", libcfs_nid2str(mti->mti_nids[i]));
+ rc = record_add_uuid(obd, llh, mti->mti_nids[i], nodeuuid);
+ }
+ rc = record_attach(obd, llh, mdcname, LUSTRE_MDC_NAME, mdcuuid);
+ rc = record_setup(obd, llh, mdcname, mti->mti_uuid,nodeuuid, 0, 0);
+ rc = mgs_write_log_failnids(obd, mti, llh, mdcname);
+ rc = record_mount_opt(obd, llh, cliname, lovname, mdcname);
+ rc = record_marker(obd, llh, fsdb, CM_END, mti->mti_svname, "add mdc");
+ rc = record_end_log(obd, &llh);
+
+ name_destroy(mdcuuid);
+ name_destroy(mdcname);
+ name_destroy(nodeuuid);
+ name_destroy(cliname);
+ name_destroy(lovname);
+ RETURN(rc);
+}
+
+/* Add the ost info to the client/mdt lov */
+static int mgs_write_log_osc(struct obd_device *obd, struct fs_db *fsdb,
+ struct mgs_target_info *mti,
+ char *logname, char *lovname, int flags)
+{
+ struct llog_handle *llh = NULL;
+ char *nodeuuid, *oscname, *oscuuid, *lovuuid;
+ char index[5];
+ int i, rc;
+
+ if (mgs_log_is_empty(obd, logname)) {
+ /* The first item in the log must be the lov, so we have
+ somewhere to add our osc. */
+ rc = mgs_write_log_lov(obd, fsdb, mti, logname, lovname);
+ }
+
+ CDEBUG(D_MGS, "adding osc for %s to log %s\n",
+ mti->mti_svname, logname);
+
+ name_create(libcfs_nid2str(mti->mti_nids[0]), "", &nodeuuid);
+ name_create(mti->mti_svname, "-osc", &oscname);
+ name_create(oscname, "_UUID", &oscuuid);
+ name_create(lovname, "_UUID", &lovuuid);
+
+ /*
+ #03 L add_uuid nid=uml1@tcp(0x20000c0a80201) 0: 1:uml1_UUID
+ multihomed (#4)
+ #04 L add_uuid nid=1@elan(0x1000000000001) nal=90 0: 1:uml1_UUID
+ #04 L attach 0:OSC_uml1_ost1_MNT_client 1:osc 2:89070_lov1_a41dff51a
+ #05 L setup 0:OSC_uml1_ost1_MNT_client 1:ost1_UUID 2:uml1_UUID
+ failover (#6,7)
+ #06 L add_uuid nid=uml2@tcp(0x20000c0a80202) 0: 1:uml2_UUID
+ #07 L add_conn 0:OSC_uml1_ost1_MNT_client 1:uml2_UUID
+ #08 L lov_modify_tgts add 0:lov1 1:ost1_UUID 2(index):0 3(gen):1
+ */
+ rc = record_start_log(obd, &llh, logname);
+ rc = record_marker(obd, llh, fsdb, CM_START | flags, mti->mti_svname,
+ "add osc");
+ for (i = 0; i < mti->mti_nid_count; i++) {
+ CDEBUG(D_MGS, "add nid %s\n", libcfs_nid2str(mti->mti_nids[i]));
+ rc = record_add_uuid(obd, llh, mti->mti_nids[i], nodeuuid);
+ }
+ rc = record_attach(obd, llh, oscname, LUSTRE_OSC_NAME, lovuuid);
+ rc = record_setup(obd, llh, oscname, mti->mti_uuid, nodeuuid, 0, 0);
+ rc = mgs_write_log_failnids(obd, mti, llh, oscname);
+ snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
+ rc = record_lov_add(obd, llh, lovname, mti->mti_uuid, index, "1");
+ rc = record_marker(obd, llh, fsdb, CM_END | flags, mti->mti_svname,
+ "add osc");
+ rc = record_end_log(obd, &llh);
+
+ name_destroy(lovuuid);
+ name_destroy(oscuuid);
+ name_destroy(oscname);
+ name_destroy(nodeuuid);
+ return rc;
+}
+
+static int mgs_write_log_ost(struct obd_device *obd, struct fs_db *fsdb,
+ struct mgs_target_info *mti)
+{
+ struct llog_handle *llh = NULL;
+ char *logname, *lovname;
+ int rc, flags = 0;
+ ENTRY;
+
+ CDEBUG(D_MGS, "writing new ost %s\n", mti->mti_svname);
+
+ /* The ost startup log */
+
+ /* If the ost log already exists, that means that someone reformatted
+ the ost and it called target_add again.
+ FIXME check and warn here, maybe inc config ver #? Or abort,
+ and claim there's already a server with that name? Maybe need
+ another flag to say it's okay to rewrite.
+ Heck, what do we do about the client and mds logs? We better
+ abort. */
+ if (!mgs_log_is_empty(obd, mti->mti_svname)) {
+ LCONSOLE_ERROR("The config log for %s already exists, yet the "
+ "server claims it never registered. It may have"
+ " been reformatted, or the index changed. Use "
+ " tunefs.lustre --writeconf to regenerate "
+ " all logs.\n", mti->mti_svname);
+ return -EALREADY;
+ }
+ /*
+ attach obdfilter ost1 ost1_UUID
+ setup /dev/loop2 ldiskfs f|n errors=remount-ro,user_xattr
+ */
+ rc = record_start_log(obd, &llh, mti->mti_svname);
+ rc = record_marker(obd, llh, fsdb, CM_START, mti->mti_svname,"add ost");
+ if (*mti->mti_uuid == '\0')
+ snprintf(mti->mti_uuid, sizeof(mti->mti_uuid),
+ "%s_UUID", mti->mti_svname);
+ rc = record_attach(obd, llh, mti->mti_svname,
+ "obdfilter"/*LUSTRE_OST_NAME*/, mti->mti_uuid);
+ rc = record_setup(obd,llh,mti->mti_svname,
+ "dev"/*ignored*/,"type"/*ignored*/,
+ "f", 0/*options*/);
+ rc = record_marker(obd, llh, fsdb, CM_END, mti->mti_svname, "add ost");
+ rc = record_end_log(obd, &llh);
+
+ /* We also have to update the other logs where this osc is part of
+ the lov */
+
+ /* Append ost info to mdt log */
+ if (mti->mti_flags & LDD_F_UPGRADE14)
+ /* If we're upgrading, the old mdt log already has our
+ entry. Let's do a fake one for fun. */
+ flags = CM_SKIP | CM_UPGRADE146;
+ /* FIXME add to all mdt logs for CMD */
+ // FIXME need real mdt name -- but MDT may not have registered yet!
+ name_create(mti->mti_fsname, "-MDT0000", &logname);
+ name_create(mti->mti_fsname, "-mdtlov", &lovname);
+ mgs_write_log_osc(obd, fsdb, mti, logname, lovname, flags);
+ name_destroy(lovname);
+ name_destroy(logname);
+
+ /* Append ost info to the client log */
+ name_create(mti->mti_fsname, "-client", &logname);
+ name_create(mti->mti_fsname, "-clilov", &lovname);
+ mgs_write_log_osc(obd, fsdb, mti, logname, lovname, 0);
+ name_destroy(lovname);
+ name_destroy(logname);
+
+ RETURN(rc);
+}
+
+/* Add additional failnids to an existing log.
+ The mdc/osc must have been added to logs first */
+/* tcp nids must be in dotted-quad ascii -
+ we can't resolve hostnames from the kernel. */
+static int mgs_write_log_add_failnid(struct obd_device *obd, struct fs_db *fsdb,
+ struct mgs_target_info *mti)
+{
+ char *logname, *cliname;
+ struct llog_handle *llh = NULL;
+ int rc;
+ ENTRY;
+
+ /* Verify that we know about this target */
+ if (mgs_log_is_empty(obd, mti->mti_svname)) {
+ LCONSOLE_ERROR("The target %s has not registered yet. "
+ "It must be started before failnids can "
+ "be added.\n", mti->mti_svname);
+ RETURN(-ENOENT);
+ }
+
+ /* Create mdc/osc client name (e.g. lustre-OST0001-osc) */
+ if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
+ name_create(mti->mti_svname, "-mdc", &cliname);
+ } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
+ name_create(mti->mti_svname, "-osc", &cliname);
+ } else {
+ RETURN(-EINVAL);
+ }
+
+ /* Add failover nids to client log */
+ name_create(mti->mti_fsname, "-client", &logname);
+ rc = record_start_log(obd, &llh, logname);
+ rc = record_marker(obd, llh, fsdb, CM_START, mti->mti_svname,
+ "add failnid");
+ rc = mgs_write_log_failnids(obd, mti, llh, cliname);
+ rc = record_marker(obd, llh, fsdb, CM_END, mti->mti_svname,
+ "add failnid");
+ rc = record_end_log(obd, &llh);
+ name_destroy(logname);
+
+ if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
+ /* Add OST failover nids to the MDT log as well */
+ name_create(mti->mti_fsname, "-MDT0000", &logname);
+ rc = record_start_log(obd, &llh, logname);
+ rc = record_marker(obd, llh, fsdb, CM_START, mti->mti_svname,
+ "add failnid");
+ rc = mgs_write_log_failnids(obd, mti, llh, cliname);
+ rc = record_marker(obd, llh, fsdb, CM_END, mti->mti_svname,
+ "add failnid");
+ rc = record_end_log(obd, &llh);
+ name_destroy(logname);
+ }
+
+ name_destroy(cliname);
+ RETURN(rc);
+}
+
+static int mgs_write_log_params(struct obd_device *obd, struct fs_db *fsdb,
+ struct mgs_target_info *mti)
+{
+ struct lustre_cfg_bufs bufs;
+ struct lustre_cfg *lcfg;
+ char *ptr = mti->mti_params;
+ char *endptr;
+ char *end = mti->mti_params + sizeof(mti->mti_params);
+ int rc = 0, len;
+ ENTRY;
+
+ if (!mti->mti_params)
+ RETURN(0);
+
+ while (ptr < end) {
+ while (*ptr == ' ')
+ ptr++;
+ if (*ptr == '\0' || (ptr >= end))
+ break;
+ endptr = strchr(ptr, ' ');
+ if (endptr)
+ len = endptr - ptr;
+ else
+ len = strlen(ptr);
+ CDEBUG(D_MGS, "next param '%.*s'\n", len, ptr);
+
+ if (class_match_param(ptr, PARAM_MGSNODE, &endptr) == 0)
+ GOTO(end_while, rc);
+
+ if (class_match_param(ptr, PARAM_FAILNODE, &endptr) == 0) {
+ /* Add a failover nidlist */
+ rc = 0;
+ /* We already processed failovers params for new
+ targets in mgs_write_log_target */
+ if (mti->mti_flags & MTI_F_IOCTL) {
+ CDEBUG(D_MGS, "Adding failnode\n");
+ rc = mgs_write_log_add_failnid(obd, fsdb, mti);
+ }
+ GOTO(end_while, rc);
+ }
+
+ if (class_match_param(ptr, PARAM_OBD_TIMEOUT, &endptr) == 0) {
+ /* Change obd timeout */
+ int timeout;
+ timeout = simple_strtoul(endptr, &endptr, 0);
+
+ CDEBUG(D_MGS, "obd timeout %d\n", timeout);
+ lustre_cfg_bufs_reset(&bufs, NULL);
+ lcfg = lustre_cfg_new(LCFG_SET_TIMEOUT, &bufs);
+ lcfg->lcfg_num = timeout;
+ /* modify all servers and clients */
+ rc = mgs_write_log_direct_all(obd, fsdb, mti->mti_fsname,
+ lcfg);
+ lustre_cfg_free(lcfg);
+ GOTO(end_while, rc);
+ }
+
+ if (class_match_param(ptr, PARAM_DEFAULT_STRIPE, &endptr) == 0){
+ /* Change lov default stripe params */
+ char *lovname, *logname;
+ CDEBUG(D_MGS, "lov param %s\n", ptr);
+ if (!(mti->mti_flags & LDD_F_SV_TYPE_MDT)) {
+ LCONSOLE_ERROR("Default stripe params must be "
+ "set on the MDT, not %s. "
+ "Ignoring.\n",
+ mti->mti_svname);
+ GOTO(end_while, rc = 0);
+ }
+
+ /* Modify mdtlov */
+ name_create(mti->mti_fsname, "-mdtlov", &lovname);
+ if (mgs_log_is_empty(obd, mti->mti_svname)) {
+ name_destroy(lovname);
+ GOTO(end_while, rc = -ENODEV);
+ }
+ lustre_cfg_bufs_reset(&bufs, lovname);
+ lustre_cfg_bufs_set(&bufs, 1, ptr, len);
+ lcfg = lustre_cfg_new(LCFG_PARAM, &bufs);
+ rc = mgs_write_log_direct(obd, fsdb, mti->mti_svname,
+ lovname, lcfg);
+ lustre_cfg_free(lcfg);
+ name_destroy(lovname);
+ if (rc)
+ GOTO(end_while, rc);
+
+ /* Modify clilov */
+ name_create(mti->mti_fsname, "-client", &logname);
+ name_create(mti->mti_fsname, "-clilov", &lovname);
+ lustre_cfg_bufs_reset(&bufs, lovname);
+ lustre_cfg_bufs_set(&bufs, 1, ptr, len);
+ lcfg = lustre_cfg_new(LCFG_PARAM, &bufs);
+ rc = mgs_write_log_direct(obd, fsdb, logname,
+ lovname, lcfg);
+ lustre_cfg_free(lcfg);
+ name_destroy(lovname);
+ name_destroy(logname);
+ GOTO(end_while, rc);
+ }
+
+ LCONSOLE_WARN("Ignoring unrecognized param '%.*s'\n", len, ptr);
+
+end_while:
+ if (rc) {
+ CERROR("err %d on param '%.*s\n", rc, len, ptr);
+ break;
+ }
+ ptr += len;
+ }
+
+ RETURN(rc);
+}
+
+int mgs_check_failnid(struct obd_device *obd, struct mgs_target_info *mti)
+{
+ /* Not implementing automatic failover nid addition at this time. */
+ return 0;
+#if 0
+ struct fs_db *fsdb;
+ int rc;
+ ENTRY;
+
+ rc = mgs_find_or_make_fsdb(obd, fsname, &fsdb);
+ if (rc)
+ RETURN(rc);
+
+ if (mgs_log_is_empty(obd, mti->mti_svname))
+ /* should never happen */
+ RETURN(-ENOENT);
+
+ CDEBUG(D_MGS, "Checking for new failnids for %s\n", mti->mti_svname);
+ // FIXME check logs
+ /* FIXME we need a real database lookup. Create on-disk db of known
+ size, lookup by index */
+ /* Check each nid, or check only nid0 and add all if nid0 is missing?
+ What if someone adds a net to a node? Better check everything. */
+ /* if nid 0 is missing, mgs_write_log_add_failnid.
+ if just one nid is missing, add uuid for nodeuuid[nid0]).
+ */
+
+ /* Hey, we can just check mti->params to see if we're already in
+ the failover list */
+
+ down(&fsdb->fsdb_sem);
+ rc = mgs_write_log_add_failnid(obd, fsdb, mti);
+ up(&fsdb->fsdb_sem);
+
+ RETURN(rc);
+#endif
+}
+
+int mgs_write_log_target(struct obd_device *obd,
+ struct mgs_target_info *mti)
+{
+ struct fs_db *fsdb;
+ int rc = -EINVAL;
+ ENTRY;
+
+ /* set/check the new target index */
+ rc = mgs_set_index(obd, mti);
+ if (rc < 0) {
+ CERROR("Can't get index (%d)\n", rc);
+ RETURN(rc);
+ }
+ if (rc == EALREADY) {
+ // FIXME mark old log sections as invalid, add new.
+ CERROR("updates not yet implemented\n");
+ RETURN(-EALREADY);
+ }
+
+ rc = mgs_find_or_make_fsdb(obd, mti->mti_fsname, &fsdb);
+ if (rc) {
+ CERROR("Can't get db for %s\n", mti->mti_fsname);
+ RETURN(rc);
+ }
+
+ down(&fsdb->fsdb_sem);
+
+ if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
+ rc = mgs_write_log_mdt(obd, fsdb, mti);
+ } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
+ rc = mgs_write_log_ost(obd, fsdb, mti);
+ } else {
+ CERROR("Unknown target type %#x, can't create log for %s\n",
+ mti->mti_flags, mti->mti_svname);
+ }
+ if (rc) {
+ CERROR("Can't write logs for %s (%d)\n", mti->mti_svname, rc);
+ GOTO(out_up, rc);
+ }
+
+ rc = mgs_write_log_params(obd, fsdb, mti);
+
+out_up:
+ up(&fsdb->fsdb_sem);
+ RETURN(rc);
+}
+
+
+/* COMPAT_146 */
+/***************** upgrade pre-mountconf logs to mountconf *****************/
+
+#if 0
+int mgs_upgrade_logs_14(struct obd_device *obd, struct fs_db *fsdb,
+ struct mgs_target_info *mti)
+{
+ int rc = 0;
+ ENTRY;
+
+ CDEBUG(D_MGS, "Upgrading old logs for %s\n", mti->mti_fsname);
+
+ /* If we get here, we know:
+ the client log fsname-client exists
+ the logs have not been updated
+ so
+ 1. parse the old client log (client log name?) to find out UUIDs for
+ all servers
+ 2. regen all ost logs: servers will get new
+ name based on index, but will keep their old uuids.
+ 3. append mdt startup to the end of the mdt log
+ 4. append marker to old client log signifying we did the upgrade
+ ? translate mds/client logs to new names?
+ 2 UP mdt MDS MDS_uuid 3
+ 3 UP lov lov_mdsA 47d06_lov_mdsA_61f31f85bc 4
+ 4 UP osc OSC_uml1_ost1_mdsA 47d06_lov_mdsA_61f31f85bc 4
+ 5 UP osc OSC_uml1_ost2_mdsA 47d06_lov_mdsA_61f31f85bc 4
+ 6 UP mds lustre-MDT0000 mdsA_UUID 3
+ to
+ ? update server uuids?
+ */
+
+
+ /* old mdt log:
+ old osc's were part of old lov,
+ mount opt connects mdt to lov
+ so need to use old lov name.
+ old client logs starts old mdc and lov,
+ so need to use old lov,mdc names in mount opt - but new client name
+ *-client instead of just "client" */
+
+
+ if ((mti->mti_flags & LDD_F_SV_TYPE_MDT)) {
+ CDEBUG(D_MGS, "Upgrade MDT\n");
+ if (mgs_log_is_empty(obd, mti->mti_svname)) {
+ CERROR("The MDT log %s is missing.\n", mti->mti_svname);
+ RETURN(-ENOENT);
+ }
+ /* Append the MDT startup sequence to the old log
+ (lconf used to start the MDT directly) */
+ rc = mgs_write_log_mdt(obd, fsdb, mti);
+ if (rc)
+ RETURN(rc);
+
+ /* this would be for trying to update an old client log */
+ struct llog_handle *llh = NULL;
+ char *cliname;
+ CDEBUG(D_MGS, "Upgrade client\n");
+
+ name_create(mti->mti_fsname, "-client", &cliname);
+
+ rc = record_start_log(obd, &llh, cliname);
+ rc = record_marker(obd, llh, fsdb, CM_START, "client",
+ "upgrade from 1.4");
+ /* FIXME find the old lovname and mdcname from old log */
+ /* old: mount_option 0: 1:client 2:lov1 3:MDC_uml1_mdsA_MNT_client */
+ /* new: mount_option 0: 1:lustre-client 2:lustre-clilov 3:lustre-MDT0000-mdc */
+ rc = record_mount_opt(obd, llh, cliname, "lov1",
+ "MDC_uml1_mdsA_MNT_client");
+ rc = record_marker(obd, llh, fsdb, CM_END, "client",
+ "upgrade to 1.6");
+ rc = record_end_log(obd, &llh);
+ name_destroy(cliname);
+ }
+
+ if ((mti->mti_flags & LDD_F_SV_TYPE_OST)) {
+ CDEBUG(D_MGS, "Upgrade OST\n");
+ /* A regular new ost log, but don't update client or MDT logs */
+ rc = mgs_write_log_ost(obd, fsdb, mti);
+ }
+
+ RETURN(rc);
+}
+#endif
+
+/* first connect of upgraded servers */
+int mgs_upgrade_sv_14(struct obd_device *obd, struct mgs_target_info *mti)
+{
+ struct fs_db *fsdb;
+ int rc = 0;
+ ENTRY;
+
+ /* Create client and ost log normally, as servers register.
+ That way logs are modern (except have old uuids (from last_rcvd))
+ - Old clients can continue to use upgraded OSTs
+ - New clients will only start with upgraded OSTs
+ - MDT won't know about old OSTs, only upgraded, so we need the old
+ MDT log in order for old clients to work. (Old clients connect to
+ the MDT, not the MGS, for their logs, and will therefore receive
+ the old client log from the MDT /LOGS dir.) */
+
+ CDEBUG(D_MGS, "upgrading server %s from pre-1.6\n",
+ mti->mti_svname);
+ server_mti_print("upgrade", mti);
+
+ rc = mgs_find_or_make_fsdb(obd, mti->mti_fsname, &fsdb);
+ if (rc)
+ RETURN(rc);
+
+ if (fsdb->fsdb_flags & FSDB_EMPTY)
+ /* First server to upgrade sees this */
+ CWARN("info: missing client log\n");
+
+ if (!(fsdb->fsdb_flags & FSDB_EMPTY) && (fsdb->fsdb_gen == 0)) {
+ /* There were no markers in the client log, meaning we have
+ not updated the logs for this fs */
+ CWARN("info: found old, unupdated client log\n");
+ }
+
+ if ((mti->mti_flags & LDD_F_SV_TYPE_MDT) &&
+ mgs_log_is_empty(obd, mti->mti_svname)) {
+ LCONSOLE_ERROR("The old MDT log %s is missing. Was "
+ "tunefs.lustre successful?\n",
+ mti->mti_svname);
+ RETURN(-ENOENT);
+ }
+
+ /* FIXME Old MDT log already has an old mount opt
+ which we should drop */
+ rc = mgs_write_log_target(obd, mti);
+ RETURN(rc);
+}
+/* end COMPAT_146 */
+
+static int mgs_clear_log(struct obd_device *obd, char *name)
+{
+ struct lvfs_run_ctxt saved;
+ struct llog_handle *llh;
+ int rc = 0;
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT),
+ &llh, NULL, name);
+ if (rc == 0) {
+ llog_init_handle(llh, LLOG_F_IS_PLAIN, NULL);
+ rc = llog_destroy(llh);
+ llog_free_handle(llh);
+ }
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+ if (rc)
+ CERROR("failed to clear log %s: %d\n", name, rc);
+
+ return(rc);
+}
+
+/* erase all logs for the given fs */
+int mgs_erase_logs(struct obd_device *obd, char *fsname)
+{
+ struct mgs_obd *mgs = &obd->u.mgs;
+ static struct fs_db *fsdb;
+ struct list_head dentry_list;
+ struct l_linux_dirent *dirent, *n;
+ int rc, len = strlen(fsname);
+ ENTRY;
+
+ /* Find all the logs in the CONFIGS directory */
+ rc = class_dentry_readdir(obd, mgs->mgs_configs_dir,
+ mgs->mgs_vfsmnt, &dentry_list);
+ if (rc) {
+ CERROR("Can't read %s dir\n", MOUNT_CONFIGS_DIR);
+ RETURN(rc);
+ }
+
+ /* Delete the fs db */
+ down(&mgs->mgs_sem);
+ fsdb = mgs_find_fsdb(obd, fsname);
+ if (fsdb)
+ mgs_free_fsdb(fsdb);
+ up(&mgs->mgs_sem);
+
+ list_for_each_entry_safe(dirent, n, &dentry_list, lld_list) {
+ list_del(&dirent->lld_list);
+ if (strncmp(fsname, dirent->lld_name, len) == 0) {
+ CDEBUG(D_MGS, "Removing log %s\n", dirent->lld_name);
+ mgs_clear_log(obd, dirent->lld_name);
+ }
+ OBD_FREE(dirent, sizeof(*dirent));
+ }
+
+ RETURN(rc);
+}
+
+/* from llog_swab */
+static void print_lustre_cfg(struct lustre_cfg *lcfg)
+{
+ int i;
+ ENTRY;
+
+ CDEBUG(D_MGS, "lustre_cfg: %p\n", lcfg);
+ CDEBUG(D_MGS, "\tlcfg->lcfg_version: %#x\n", lcfg->lcfg_version);
+
+ CDEBUG(D_MGS, "\tlcfg->lcfg_command: %#x\n", lcfg->lcfg_command);
+ CDEBUG(D_MGS, "\tlcfg->lcfg_num: %#x\n", lcfg->lcfg_num);
+ CDEBUG(D_MGS, "\tlcfg->lcfg_flags: %#x\n", lcfg->lcfg_flags);
+ CDEBUG(D_MGS, "\tlcfg->lcfg_nid: %s\n", libcfs_nid2str(lcfg->lcfg_nid));
+
+ CDEBUG(D_MGS, "\tlcfg->lcfg_bufcount: %d\n", lcfg->lcfg_bufcount);
+ if (lcfg->lcfg_bufcount < LUSTRE_CFG_MAX_BUFCOUNT)
+ for (i = 0; i < lcfg->lcfg_bufcount; i++) {
+ CDEBUG(D_MGS, "\tlcfg->lcfg_buflens[%d]: %d %s\n",
+ i, lcfg->lcfg_buflens[i],
+ lustre_cfg_string(lcfg, i));
+ }
+ EXIT;
+}
+
+/* Set a permanent (config log) param for a target or fs */
+int mgs_setparam(struct obd_device *obd, char *fsname, struct lustre_cfg *lcfg)
+{
+ struct fs_db *fsdb;
+ struct mgs_target_info *mti;
+ char *devname;
+ int rc = 0;
+ ENTRY;
+
+ print_lustre_cfg(lcfg);
+
+ /* lustre, lustre-mdtlov, lustre-client, lustre-MDT0000 */
+ devname = lustre_cfg_string(lcfg, 0);
+
+ if (devname == NULL) {
+ /* Global setting across all fs's? */
+ LCONSOLE_ERROR("Global settings not implemented yet!\n");
+ RETURN(-ENOSYS);
+ }
+
+ CDEBUG(D_MGS, "target: %s\n", devname);
+
+ rc = mgs_find_or_make_fsdb(obd, fsname, &fsdb);
+ if (rc)
+ RETURN(rc);
+ if (fsdb->fsdb_flags & FSDB_EMPTY) {
+ CERROR("No filesystem targets for %s\n", fsname);
+ RETURN(-EINVAL);
+ }
+
+ /* Create a fake mti to hold everything */
+ OBD_ALLOC_PTR(mti);
+ if (!mti)
+ GOTO(out, rc = -ENOMEM);
+ strcpy(mti->mti_fsname, fsname);
+ strcpy(mti->mti_svname, devname);
+ rc = server_name2index(devname, &mti->mti_stripe_index, NULL);
+ if (rc < 0)
+ GOTO(out, rc);
+ mti->mti_flags = rc | MTI_F_IOCTL;
+ strncpy(mti->mti_params, lustre_cfg_string(lcfg, 1),
+ sizeof(mti->mti_params));
+
+ down(&fsdb->fsdb_sem);
+ rc = mgs_write_log_params(obd, fsdb, mti);
+ up(&fsdb->fsdb_sem);
+
+out:
+ OBD_FREE_PTR(mti);
+ RETURN(rc);
+}
+
+
+#if 0
+/******************** unused *********************/
+static int mgs_backup_llog(struct obd_device *obd, char* fsname)
+{
+ struct file *filp, *bak_filp;
+ struct lvfs_run_ctxt saved;
+ char *logname, *buf;
+ loff_t soff = 0 , doff = 0;
+ int count = 4096, len;
+ int rc = 0;
+
+ OBD_ALLOC(logname, PATH_MAX);
+ if (logname == NULL)
+ return -ENOMEM;
+
+ OBD_ALLOC(buf, count);
+ if (!buf)
+ GOTO(out , rc = -ENOMEM);
+
+ len = snprintf(logname, PATH_MAX, "%s/%s.bak",
+ MOUNT_CONFIGS_DIR, fsname);
+
+ if (len >= PATH_MAX - 1) {
+ GOTO(out, -ENAMETOOLONG);
+ }
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+ bak_filp = l_filp_open(logname, O_RDWR|O_CREAT|O_TRUNC, 0660);
+ if (IS_ERR(bak_filp)) {
+ rc = PTR_ERR(bak_filp);
+ CERROR("backup logfile open %s: %d\n", logname, rc);
+ GOTO(pop, rc);
+ }
+ sprintf(logname, "%s/%s", MOUNT_CONFIGS_DIR, fsname);
+ filp = l_filp_open(logname, O_RDONLY, 0);
+ if (IS_ERR(filp)) {
+ rc = PTR_ERR(filp);
+ CERROR("logfile open %s: %d\n", logname, rc);
+ GOTO(close1f, rc);
+ }
+
+ while ((rc = lustre_fread(filp, buf, count, &soff)) > 0) {
+ rc = lustre_fwrite(bak_filp, buf, count, &doff);
+ break;
+ }
+
+ filp_close(filp, 0);
+close1f:
+ filp_close(bak_filp, 0);
+pop:
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+out:
+ if (buf)
+ OBD_FREE(buf, count);
+ OBD_FREE(logname, PATH_MAX);
+ return rc;
+}
+
+
+
+#endif
obdclass-all-objs += class_obd.o
obdclass-all-objs += debug.o genops.o uuid.o llog_ioctl.o
obdclass-all-objs += lprocfs_status.o lustre_handles.o lustre_peer.o
-obdclass-all-objs += statfs_pack.o obdo.o obd_config.o prng.o
+obdclass-all-objs += statfs_pack.o obdo.o obd_config.o obd_mount.o prng.o
obdclass-objs := $(obdclass-linux-objs) $(obdclass-all-objs)
EXPORT_SYMBOL(class_register_type);
EXPORT_SYMBOL(class_unregister_type);
+EXPORT_SYMBOL(class_search_type);
EXPORT_SYMBOL(class_get_type);
EXPORT_SYMBOL(class_put_type);
EXPORT_SYMBOL(class_name2dev);
EXPORT_SYMBOL(class_name2obd);
EXPORT_SYMBOL(class_uuid2dev);
EXPORT_SYMBOL(class_uuid2obd);
+EXPORT_SYMBOL(class_obd_list);
EXPORT_SYMBOL(class_find_client_obd);
EXPORT_SYMBOL(class_find_client_notype);
EXPORT_SYMBOL(class_devices_in_group);
EXPORT_SYMBOL(class_disconnect);
/* uuid.c */
+EXPORT_SYMBOL(class_generate_random_uuid);
EXPORT_SYMBOL(class_uuid_unparse);
EXPORT_SYMBOL(lustre_uuid_to_peer);
EXPORT_SYMBOL(class_handle_unhash);
EXPORT_SYMBOL(class_handle2object);
-/* config.c */
+/* obd_config.c */
EXPORT_SYMBOL(class_incref);
EXPORT_SYMBOL(class_decref);
EXPORT_SYMBOL(class_get_profile);
int init_obdclass(void)
#endif
{
+ int i, err;
struct obd_device *obd;
- int err;
- int i;
-
#ifdef __KERNEL__
+ int lustre_register_fs(void);
+
printk(KERN_INFO "Lustre: OBD class driver Build Version: "
BUILD_VERSION", info@clusterfs.com\n");
#else
CDEBUG(D_INFO, "Lustre: OBD class driver Build Version: "
BUILD_VERSION", info@clusterfs.com\n");
#endif
+
spin_lock_init(&obd_types_lock);
spin_lock_init(&handle_lock);
cfs_waitq_init(&obd_race_waitq);
return err;
#ifdef __KERNEL__
err = class_procfs_init();
+ lustre_register_fs();
#endif
return err;
static void cleanup_obdclass(void)
{
int i;
+ int lustre_unregister_fs(void);
ENTRY;
+ lustre_unregister_fs();
+
cfs_psdev_deregister(&obd_psdev);
for (i = 0; i < MAX_OBD_DEVICES; i++) {
struct obd_device *obd = &obd_dev[i];
* support functions: we could use inter-module communication, but this
* is more portable to other OS's
*/
-static struct obd_type *class_search_type(char *name)
+struct obd_type *class_search_type(char *name)
{
struct list_head *tmp;
struct obd_type *type;
#ifdef CONFIG_KMOD
if (!type) {
- if (!request_module(name)) {
- CDEBUG(D_INFO, "Loaded module '%s'\n", name);
+ char *modname = name;
+ if (strcmp(modname, LUSTRE_MDT_NAME) == 0)
+ modname = LUSTRE_MDS_NAME;
+ if (!request_module(modname)) {
+ CDEBUG(D_INFO, "Loaded module '%s'\n", modname);
type = class_search_type(name);
- } else
- CDEBUG(D_INFO, "Can't load module '%s'\n", name);
+ } else {
+ LCONSOLE_ERROR("Can't load module '%s'\n", modname);
+ }
}
#endif
if (type)
return &obd_dev[dev];
}
+void class_obd_list(void)
+{
+ char *status;
+ int i;
+
+ spin_lock(&obd_dev_lock);
+ for (i = 0; i < MAX_OBD_DEVICES; i++) {
+ struct obd_device *obd = &obd_dev[i];
+ if (obd->obd_type == NULL)
+ continue;
+ if (obd->obd_stopping)
+ status = "ST";
+ else if (obd->obd_set_up)
+ status = "UP";
+ else if (obd->obd_attached)
+ status = "AT";
+ else
+ status = "--";
+ LCONSOLE(D_CONFIG, "%3d %s %s %s %s %d\n",
+ i, status, obd->obd_type->typ_name,
+ obd->obd_name, obd->obd_uuid.uuid,
+ atomic_read(&obd->obd_refcount));
+ }
+ spin_unlock(&obd_dev_lock);
+ return;
+}
+
/* Search for a client OBD connected to tgt_uuid. If grp_uuid is
specified, then only the client with that uuid is returned,
otherwise any client connected to the tgt is returned. */
rc = llog_read_header(handle);
if (rc == 0) {
flags = llh->llh_flags;
- if (uuid)
- LASSERT(obd_uuid_equals(uuid, &llh->llh_tgtuuid));
+ if (uuid && !obd_uuid_equals(uuid, &llh->llh_tgtuuid)) {
+ CERROR("uuid mismatch: %s/%s\n", (char *)uuid->uuid,
+ (char *)llh->llh_tgtuuid.uuid);
+ rc = -EEXIST;
+ }
GOTO(out, rc);
} else if (rc != LLOG_EEMPTY || !flags) {
/* set a pesudo flag for initialization */
char *buf;
__u64 cur_offset = LLOG_CHUNK_SIZE;
int rc = 0, index = 1, last_index;
- int saved_index = 0;
+ int saved_index = 0, last_called_index = 0;
ENTRY;
+ LASSERT(llh);
+
OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
if (!buf)
RETURN(-ENOMEM);
- if (cd != NULL)
+ if (cd != NULL) {
+ last_called_index = cd->first_idx;
index = cd->first_idx + 1;
+ }
if (cd != NULL && cd->last_idx)
last_index = cd->last_idx;
else
/* if set, process the callback on this record */
if (ext2_test_bit(index, llh->llh_bitmap)) {
rc = cb(loghandle, rec, data);
+ last_called_index = index;
if (rc == LLOG_PROC_BREAK) {
CWARN("recovery from log: "LPX64":%x"
" stopped\n",
}
out:
+ if (cd != NULL)
+ cd->last_idx = last_called_index;
if (buf)
OBD_FREE(buf, LLOG_CHUNK_SIZE);
RETURN(rc);
}
EXPORT_SYMBOL(llog_process);
+inline int llog_get_size(struct llog_handle *loghandle)
+{
+ if (loghandle && loghandle->lgh_hdr)
+ return loghandle->lgh_hdr->llh_count;
+ return 0;
+}
+EXPORT_SYMBOL(llog_get_size);
+
int llog_reverse_process(struct llog_handle *loghandle, llog_cb_t cb,
void *data, void *catdata)
{
case MDS_UNLINK_REC:
case MDS_SETATTR_REC:
case OBD_CFG_REC:
- case PTL_CFG_REC: /* obsolete */
case LLOG_HDR_MAGIC: {
l = snprintf(out, remains, "[index]: %05d [type]: "
"%02x [len]: %04d ok\n",
#include <libcfs/list.h>
#include <lvfs.h>
#include <lustre_fsfilt.h>
+#include <lustre_disk.h>
#include "llog_internal.h"
#if defined(__KERNEL__) && defined(LLOG_LVFS)
RETURN(-EIO);
}
-static struct file *llog_filp_open(char *name, int flags, int mode)
+static struct file *llog_filp_open(char *dir, char *name, int flags, int mode)
{
char *logname;
struct file *filp;
if (logname == NULL)
return ERR_PTR(-ENOMEM);
- len = snprintf(logname, PATH_MAX, "LOGS/%s", name);
+ len = snprintf(logname, PATH_MAX, "%s/%s", dir, name);
if (len >= PATH_MAX - 1) {
filp = ERR_PTR(-ENAMETOOLONG);
} else {
CERROR("logfile creation %s: %ld\n", logname,
PTR_ERR(filp));
}
-
OBD_FREE(logname, PATH_MAX);
return filp;
}
handle->lgh_id = *logid;
} else if (name) {
- handle->lgh_file = llog_filp_open(name, open_flags, 0644);
+ /* COMPAT_146 */
+ if (strcmp(obd->obd_type->typ_name, LUSTRE_MDS_NAME) == 0) {
+ handle->lgh_file = llog_filp_open(MDT_LOGS_DIR, name,
+ open_flags, 0644);
+ } else {
+ /* end COMPAT_146 */
+ handle->lgh_file = llog_filp_open(MOUNT_CONFIGS_DIR,
+ name, open_flags,
+ 0644);
+ }
if (IS_ERR(handle->lgh_file))
GOTO(cleanup, rc = PTR_ERR(handle->lgh_file));
{
struct dentry *fdentry;
struct obdo *oa;
+ struct obd_device *obd = handle->lgh_ctxt->loc_exp->exp_obd;
+ char *dir;
int rc;
ENTRY;
+ /* COMPAT_146 */
+ if (strcmp(obd->obd_type->typ_name, LUSTRE_MDS_NAME) == 0)
+ dir = MDT_LOGS_DIR;
+ else
+ /* end COMPAT_146 */
+ dir = MOUNT_CONFIGS_DIR;
+
fdentry = handle->lgh_file->f_dentry;
- if (!strcmp(fdentry->d_parent->d_name.name, "LOGS")) {
- struct obd_device *obd = handle->lgh_ctxt->loc_exp->exp_obd;
+ if (strcmp(fdentry->d_parent->d_name.name, dir) == 0) {
struct inode *inode = fdentry->d_parent->d_inode;
struct lvfs_run_ctxt saved;
int size = sizeof(*idarray) * count;
loff_t off = 0;
- LASSERT(count);
+ if (!count)
+ return (0);
push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
file = filp_open(name, O_RDWR | O_CREAT | O_LARGEFILE, 0700);
name, rc);
GOTO(out, rc);
}
-
+
if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
CERROR("%s is not a regular file!: mode = %o\n", name,
file->f_dentry->d_inode->i_mode);
GOTO(out, rc = -ENOENT);
}
+ CDEBUG(D_CONFIG, "cat list: disk size=%d, read=%d\n",
+ (int)file->f_dentry->d_inode->i_size, size);
+
rc = fsfilt_read_record(disk_obd, file, idarray, size, &off);
if (rc) {
- CDEBUG(D_INODE,"OBD filter: error reading %s: rc %d\n",
- name, rc);
+ CERROR("OBD filter: error reading %s: rc %d\n", name, rc);
GOTO(out, rc);
}
int size = sizeof(*idarray) * count;
loff_t off = 0;
- LASSERT(count);
+ if (!count)
+ return (0);
push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
file = filp_open(name, O_RDWR | O_CREAT | O_LARGEFILE, 0700);
/* helper functions for calling the llog obd methods */
+int llog_cleanup(struct llog_ctxt *ctxt)
+{
+ int rc = 0;
+ ENTRY;
+
+ if (!ctxt) {
+ CERROR("No ctxt\n");
+ RETURN(-ENODEV);
+ }
+
+ if (CTXTP(ctxt, cleanup))
+ rc = CTXTP(ctxt, cleanup)(ctxt);
+
+ ctxt->loc_obd->obd_llog_ctxt[ctxt->loc_idx] = NULL;
+ if (ctxt->loc_exp)
+ class_export_put(ctxt->loc_exp);
+ OBD_FREE(ctxt, sizeof(*ctxt));
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_cleanup);
+
int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd,
int count, struct llog_logid *logid, struct llog_operations *op)
{
if (index < 0 || index >= LLOG_MAX_CTXTS)
RETURN(-EFAULT);
+ if (obd->obd_llog_ctxt[index]) {
+ /* During an mds_lov_add_ost, we try to tear down and resetup llogs.
+ But the mdt teardown does not flow down to the lov/osc's as the
+ setup does, because the lov/osc must clean up only when they are
+ done, not when the mdt is done. So instead, we just assume that
+ if the lov llogs are already set up then we must cleanup first. */
+ CDEBUG(D_CONFIG, "obd %s ctxt %d already set up\n",
+ obd->obd_name, index);
+ llog_cleanup(obd->obd_llog_ctxt[index]);
+ }
+
OBD_ALLOC(ctxt, sizeof(*ctxt));
if (!ctxt)
RETURN(-ENOMEM);
}
EXPORT_SYMBOL(llog_setup);
-int llog_cleanup(struct llog_ctxt *ctxt)
-{
- int rc = 0;
- ENTRY;
-
- if (!ctxt) {
- CERROR("No ctxt\n");
- RETURN(-ENODEV);
- }
-
- if (CTXTP(ctxt, cleanup))
- rc = CTXTP(ctxt, cleanup)(ctxt);
-
- ctxt->loc_obd->obd_llog_ctxt[ctxt->loc_idx] = NULL;
- if (ctxt->loc_exp)
- class_export_put(ctxt->loc_exp);
- OBD_FREE(ctxt, sizeof(*ctxt));
-
- RETURN(rc);
-}
-EXPORT_SYMBOL(llog_cleanup);
-
int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp)
{
int rc = 0;
}
__swab32s(&lcfg->lcfg_command);
-
__swab32s(&lcfg->lcfg_num);
__swab32s(&lcfg->lcfg_flags);
__swab64s(&lcfg->lcfg_nid);
-
__swab32s(&lcfg->lcfg_bufcount);
for (i = 0; i < lcfg->lcfg_bufcount && i < LUSTRE_CFG_MAX_BUFCOUNT; i++)
__swab32s(&lcfg->lcfg_buflens[i]);
return -ENOENT;
}
+/* Add a nid to a niduuid. Multiple nids can be added to a single uuid;
+ LNET will choose the best one. */
int class_add_uuid(char *uuid, __u64 nid)
{
struct uuid_nid_data *data;
return 0;
}
-/* delete only one entry if uuid is specified, otherwise delete all */
+/* Delete the nids for one uuid if specified, otherwise delete all */
int class_del_uuid (char *uuid)
{
struct list_head deathrow;
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2006 Cluster File Systems, Inc.
*
* This file is part of the Lustre file system, http://www.lustre.org
* Lustre is a trademark of Cluster File Systems, Inc.
#include <libcfs/list.h>
+/********************** class fns **********************/
+
/* Create a new device and set the type, name and uuid. If
* successful, the new device can be accessed by either name or uuid.
*/
RETURN(0);
err_exp:
+ CERROR("setup %s failed (%d)\n", obd->obd_name, err);
class_unlink_export(obd->obd_self_export);
obd->obd_self_export = NULL;
obd->obd_starting = 0;
obd->obd_force = 1;
break;
case 'A':
- LCONSOLE_WARN("Failing %s by user command\n",
- obd->obd_name);
+ LCONSOLE_WARN("Failing over %s\n",
+ obd->obd_name);
obd->obd_fail = 1;
obd->obd_no_transno = 1;
obd->obd_no_recov = 1;
RETURN(-EINVAL);
}
if (strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) &&
- strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME)) {
+ strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) &&
+ strcmp(obd->obd_type->typ_name, LUSTRE_MGC_NAME)) {
CERROR("can't add connection on non-client dev\n");
RETURN(-EINVAL);
}
ENTRY;
OBD_ALLOC(lprof, sizeof(*lprof));
if (lprof == NULL)
- GOTO(out, err = -ENOMEM);
+ RETURN(-ENOMEM);
CFS_INIT_LIST_HEAD(&lprof->lp_list);
LASSERT(proflen == (strlen(prof) + 1));
LASSERT(osclen == (strlen(osc) + 1));
OBD_ALLOC(lprof->lp_osc, osclen);
- if (lprof->lp_profile == NULL)
+ if (lprof->lp_osc == NULL)
GOTO(out, err = -ENOMEM);
memcpy(lprof->lp_osc, osc, osclen);
}
list_add(&lprof->lp_list, &lustre_profile_list);
+ RETURN(err);
out:
+ if (lprof->lp_mdc)
+ OBD_FREE(lprof->lp_mdc, mdclen);
+ if (lprof->lp_osc)
+ OBD_FREE(lprof->lp_osc, osclen);
+ if (lprof->lp_profile)
+ OBD_FREE(lprof->lp_profile, proflen);
+ OBD_FREE(lprof, sizeof(*lprof));
RETURN(err);
}
case LCFG_DEL_MOUNTOPT: {
CDEBUG(D_IOCTL, "mountopt: profile %s\n",
lustre_cfg_string(lcfg, 1));
- /* set these mount options somewhere, so ll_fill_super
- * can find them. */
class_del_profile(lustre_cfg_string(lcfg, 1));
GOTO(out, err = 0);
}
sizeof (obd_lustre_upcall));
GOTO(out, err = 0);
}
- case LCFG_PARAM:
case LCFG_MARKER: {
- LCONSOLE_WARN("LCFG_MARKER not yet implemented.\n");
+ struct cfg_marker *marker;
+ marker = lustre_cfg_buf(lcfg, 1);
+ CDEBUG(D_IOCTL, "marker %d (%#x) %.16s %s\n", marker->cm_step,
+ marker->cm_flags, marker->cm_svname, marker->cm_comment);
GOTO(out, err = 0);
}
}
}
}
out:
+ if ((err == -ENOSYS || err == -EINVAL) &&
+ !(lcfg->lcfg_command & LCFG_REQUIRED)) {
+ CWARN("Skipping optional command %#x\n", lcfg->lcfg_command);
+ err = 0;
+ }
return err;
}
+int class_config_dump_handler(struct llog_handle * handle,
+ struct llog_rec_hdr *rec, void *data);
+
+#ifdef __KERNEL__
+extern int lustre_check_exclusion(struct super_block *sb, char *svname);
+#else
+#define lustre_check_exclusion(a,b) 0
+#endif
+
static int class_config_llog_handler(struct llog_handle * handle,
struct llog_rec_hdr *rec, void *data)
{
- struct config_llog_instance *cfg = data;
+ struct config_llog_instance *clli = data;
int cfg_len = rec->lrh_len;
char *cfg_buf = (char*) (rec + 1);
int rc = 0;
ENTRY;
+
+ //class_config_dump_handler(handle, rec, data);
+
switch (rec->lrh_type) {
case OBD_CFG_REC: {
struct lustre_cfg *lcfg, *lcfg_new;
if (rc)
GOTO(out, rc);
+ /* Figure out config state info */
+ if (lcfg->lcfg_command == LCFG_MARKER) {
+ struct cfg_marker *marker = lustre_cfg_buf(lcfg, 1);
+ CDEBUG(D_CONFIG, "Marker, cfg_flg=%#x\n",
+ clli->cfg_flags);
+ if (marker->cm_flags & CM_START) {
+ /* all previous flags off */
+ clli->cfg_flags = CFG_F_MARKER;
+ if (marker->cm_flags & CM_SKIP) {
+ clli->cfg_flags |= CFG_F_SKIP;
+ CDEBUG(D_CONFIG, "SKIP #%d\n",
+ marker->cm_step);
+ } else if (lustre_check_exclusion(clli->cfg_sb,
+ marker->cm_svname)) {
+ clli->cfg_flags |= CFG_F_EXCLUDE;
+ CDEBUG(D_CONFIG, "EXCLUDE %d\n",
+ marker->cm_step);
+ }
+ } else if (marker->cm_flags & CM_END) {
+ clli->cfg_flags = 0;
+ }
+ }
+ /* A config command without a start marker before it is
+ illegal (1.4.6. compat must set it artificially) */
+ if (!(clli->cfg_flags & CFG_F_MARKER) &&
+ (lcfg->lcfg_command != LCFG_MARKER)) {
+ CWARN("Config not inside markers, ignoring! (%#x)\n",
+ clli->cfg_flags);
+ clli->cfg_flags |= CFG_F_SKIP;
+ }
+
+ if (clli->cfg_flags & CFG_F_SKIP) {
+ // FIXME warning
+ CDEBUG(D_CONFIG|D_WARNING, "skipping %#x\n",
+ clli->cfg_flags);
+ rc = 0;
+ /* No processing! */
+ break;
+ }
+
+ if ((clli->cfg_flags & CFG_F_EXCLUDE) &&
+ (lcfg->lcfg_command == LCFG_LOV_ADD_OBD))
+ /* Add inactive instead */
+ lcfg->lcfg_command = LCFG_LOV_ADD_INA;
+
lustre_cfg_bufs_init(&bufs, lcfg);
- if (cfg && cfg->cfg_instance && LUSTRE_CFG_BUFLEN(lcfg, 0) > 0) {
+ if (clli && clli->cfg_instance && LUSTRE_CFG_BUFLEN(lcfg, 0) > 0){
inst = 1;
inst_len = LUSTRE_CFG_BUFLEN(lcfg, 0) +
- strlen(cfg->cfg_instance) + 1;
+ strlen(clli->cfg_instance) + 1;
OBD_ALLOC(inst_name, inst_len);
if (inst_name == NULL)
GOTO(out, rc = -ENOMEM);
sprintf(inst_name, "%s-%s",
lustre_cfg_string(lcfg, 0),
- cfg->cfg_instance);
+ clli->cfg_instance);
lustre_cfg_bufs_set_string(&bufs, 0, inst_name);
+ CDEBUG(D_CONFIG, "cmd %x, instance name: %s\n",
+ lcfg->lcfg_command, inst_name);
}
- if (cfg && lcfg->lcfg_command == LCFG_ATTACH) {
- lustre_cfg_bufs_set_string(&bufs, 2, cfg->cfg_uuid.uuid);
+ /* we override the llog's uuid for clients, to insure they
+ are unique */
+ if (clli && clli->cfg_instance &&
+ lcfg->lcfg_command == LCFG_ATTACH) {
+ lustre_cfg_bufs_set_string(&bufs, 2,
+ clli->cfg_uuid.uuid);
}
lcfg_new = lustre_cfg_new(lcfg->lcfg_command, &bufs);
OBD_FREE(inst_name, inst_len);
break;
}
- case PTL_CFG_REC: {
- CWARN("Ignoring obsolete portals config\n");
- break;
- }
default:
CERROR("Unknown llog record type %#x encountered\n",
rec->lrh_type);
break;
}
out:
+ if (rc) {
+ CERROR("Err %d on cfg command:\n", rc);
+ class_config_dump_handler(handle, rec, data);
+ }
RETURN(rc);
}
int class_config_parse_llog(struct llog_ctxt *ctxt, char *name,
struct config_llog_instance *cfg)
{
+ struct llog_process_cat_data cd = {0, 0};
struct llog_handle *llh;
int rc, rc2;
ENTRY;
if (rc)
GOTO(parse_out, rc);
- rc = llog_process(llh, class_config_llog_handler, cfg, NULL);
+ /* continue processing from where we last stopped to end-of-log */
+ if (cfg)
+ cd.first_idx = cfg->cfg_last_idx;
+ cd.last_idx = 0;
+
+ rc = llog_process(llh, class_config_llog_handler, cfg, &cd);
+
+ /* FIXME remove warning */
+ CDEBUG(D_CONFIG|D_WARNING, "Processed log %s gen %d-%d (rc=%d)\n", name,
+ cd.first_idx + 1, cd.last_idx, rc);
+ if (cfg)
+ cfg->cfg_last_idx = cd.last_idx;
+
parse_out:
rc2 = llog_close(llh);
if (rc == 0)
rc = rc2;
RETURN(rc);
-
}
int class_config_dump_handler(struct llog_handle * handle,
{
int cfg_len = rec->lrh_len;
char *cfg_buf = (char*) (rec + 1);
+ char *outstr, *ptr, *end;
int rc = 0;
ENTRY;
+
+ OBD_ALLOC(outstr, 256);
+ end = outstr + 256;
+ ptr = outstr;
+ if (!outstr) {
+ RETURN(-ENOMEM);
+ }
if (rec->lrh_type == OBD_CFG_REC) {
struct lustre_cfg *lcfg;
int i;
GOTO(out, rc);
lcfg = (struct lustre_cfg *)cfg_buf;
- CDEBUG(D_INFO, "lcfg command: %x\n", lcfg->lcfg_command);
- if (LUSTRE_CFG_BUFLEN(lcfg, 0) > 0)
- CDEBUG(D_INFO, " devname: %s\n",
- lustre_cfg_string(lcfg, 0));
- if (lcfg->lcfg_flags)
- CDEBUG(D_INFO, " flags: %x\n", lcfg->lcfg_flags);
- if (lcfg->lcfg_nid)
- CDEBUG(D_INFO, " nid: %s\n",
- libcfs_nid2str(lcfg->lcfg_nid));
- if (lcfg->lcfg_nal)
- CDEBUG(D_INFO, " nal: %x (obsolete)\n", lcfg->lcfg_nal);
- if (lcfg->lcfg_num)
- CDEBUG(D_INFO, " num: %x\n", lcfg->lcfg_num);
- for (i = 1; i < lcfg->lcfg_bufcount; i++)
- if (LUSTRE_CFG_BUFLEN(lcfg, i) > 0)
- CDEBUG(D_INFO, " inlbuf%d: %s\n", i,
- lustre_cfg_string(lcfg, i));
- } else if (rec->lrh_type == PTL_CFG_REC) {
- CDEBUG(D_INFO, "Obsolete pcfg command\n");
+ ptr += snprintf(ptr, end-ptr, "cmd=%05x ",
+ lcfg->lcfg_command);
+ if (lcfg->lcfg_flags) {
+ ptr += snprintf(ptr, end-ptr, "flags=%#08x ",
+ lcfg->lcfg_flags);
+ }
+ if (lcfg->lcfg_num) {
+ ptr += snprintf(ptr, end-ptr, "num=%#08x ",
+ lcfg->lcfg_num);
+ }
+ if (lcfg->lcfg_nid) {
+ ptr += snprintf(ptr, end-ptr, "nid=%s("LPX64")\n ",
+ libcfs_nid2str(lcfg->lcfg_nid),
+ lcfg->lcfg_nid);
+ }
+ if (lcfg->lcfg_command == LCFG_MARKER) {
+ struct cfg_marker *marker = lustre_cfg_buf(lcfg, 1);
+ ptr += snprintf(ptr, end-ptr, "marker=%d(%#x)%s '%s'",
+ marker->cm_step, marker->cm_flags,
+ marker->cm_svname, marker->cm_comment);
+ } else {
+ for (i = 0; i < lcfg->lcfg_bufcount; i++) {
+ ptr += snprintf(ptr, end-ptr, "%d:%s ", i,
+ lustre_cfg_string(lcfg, i));
+ }
+ }
+ LCONSOLE(D_WARNING, " %s\n", outstr);
} else {
- CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
+ LCONSOLE(D_WARNING, "unhandled lrh_type: %#x\n", rec->lrh_type);
rc = -EINVAL;
}
out:
+ OBD_FREE(outstr, 256);
RETURN(rc);
}
int rc, rc2;
ENTRY;
+ LCONSOLE_INFO("Dumping config log %s\n", name);
+
rc = llog_create(ctxt, &llh, NULL, name);
if (rc)
RETURN(rc);
if (rc == 0)
rc = rc2;
+ LCONSOLE_INFO("End config log %s\n", name);
RETURN(rc);
}
/* Cleanup and detach */
-void class_manual_cleanup(struct obd_device *obd)
+int class_manual_cleanup(struct obd_device *obd)
{
struct lustre_cfg *lcfg;
struct lustre_cfg_bufs bufs;
- int err;
+ int rc;
char flags[3]="";
ENTRY;
if (!obd) {
CERROR("empty cleanup\n");
- EXIT;
- return;
+ RETURN(-EALREADY);
}
if (obd->obd_force)
lustre_cfg_bufs_set_string(&bufs, 1, flags);
lcfg = lustre_cfg_new(LCFG_CLEANUP, &bufs);
- err = class_process_config(lcfg);
- if (err)
- CERROR("cleanup failed %d: %s\n", err, obd->obd_name);
+ rc = class_process_config(lcfg);
+ if (rc) {
+ CERROR("cleanup failed %d: %s\n", rc, obd->obd_name);
+ GOTO(out, rc);
+ }
/* the lcfg is almost the same for both ops */
lcfg->lcfg_command = LCFG_DETACH;
- err = class_process_config(lcfg);
+ rc = class_process_config(lcfg);
+ if (rc)
+ CERROR("detach failed %d: %s\n", rc, obd->obd_name);
+out:
lustre_cfg_free(lcfg);
- if (err)
- CERROR("detach failed %d: %s\n", err, obd->obd_name);
- EXIT;
+ RETURN(rc);
}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lustre/obdclass/obd_mount.c
+ * Client/server mount routines
+ *
+ * Copyright (c) 2006 Cluster File Systems, Inc.
+ * Author: Nathan Rutman <nathan@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org/
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+#define DEBUG_SUBSYSTEM S_MGMT
+#define D_MOUNT D_SUPER|D_CONFIG /*|D_WARNING */
+#define PRINT_CMD LCONSOLE
+#define PRINT_MASK D_SUPER
+
+#include <obd.h>
+#include <lvfs.h>
+#include <lustre_fsfilt.h>
+#include <obd_class.h>
+#include <lustre/lustre_user.h>
+#include <linux/version.h>
+#include <lustre_log.h>
+#include <lustre_disk.h>
+#include <lustre_param.h>
+#include <lustre_ver.h>
+
+static int (*client_fill_super)(struct super_block *sb) = NULL;
+
+/*********** string parsing utils *********/
+
+/* returns 0 if we find this key in the buffer, else 1 */
+int class_find_param(char *buf, char *key, char **valp)
+{
+ char *ptr;
+
+ if (!buf)
+ return 1;
+
+ if ((ptr = strstr(buf, key)) == NULL)
+ return 1;
+
+ if (valp)
+ *valp = ptr + strlen(key);
+
+ return 0;
+}
+
+/* returns 0 if this is the first key in the buffer, else 1 */
+int class_match_param(char *buf, char *key, char **valp)
+{
+ if (!buf)
+ return 1;
+
+ if (memcmp(buf, key, strlen(key)) != 0)
+ return 1;
+
+ if (valp)
+ *valp = buf + strlen(key);
+
+ return 0;
+}
+
+/* 0 is good nid,
+ 1 not found
+ < 0 error
+ endh is set to next separator */
+int class_parse_nid(char *buf, lnet_nid_t *nid, char **endh)
+{
+ char tmp, *endp;
+
+ if (!buf)
+ return 1;
+ while (*buf == ',' || *buf == ':')
+ buf++;
+ if (*buf == ' ' || *buf == '/' || *buf == '\0')
+ return 1;
+
+ /* nid separators or end of nids */
+ endp = strpbrk(buf, ",: /");
+ if (endp == NULL)
+ endp = buf + strlen(buf);
+
+ tmp = *endp;
+ *endp = '\0';
+ *nid = libcfs_str2nid(buf);
+ if (*nid == LNET_NID_ANY) {
+ LCONSOLE_ERROR("Can't parse NID '%s'\n", buf);
+ *endp = tmp;
+ return -EINVAL;
+ }
+ *endp = tmp;
+
+ if (endh)
+ *endh = endp;
+ CDEBUG(D_MOUNT, "Nid %s\n", libcfs_nid2str(*nid));
+ return 0;
+}
+
+/*********** mount lookup *********/
+
+DECLARE_MUTEX(lustre_mount_info_lock);
+struct list_head server_mount_info_list = LIST_HEAD_INIT(server_mount_info_list);
+
+static struct lustre_mount_info *server_find_mount(char *name)
+{
+ struct list_head *tmp;
+ struct lustre_mount_info *lmi;
+ ENTRY;
+
+ list_for_each(tmp, &server_mount_info_list) {
+ lmi = list_entry(tmp, struct lustre_mount_info, lmi_list_chain);
+ if (strcmp(name, lmi->lmi_name) == 0)
+ RETURN(lmi);
+ }
+ RETURN(NULL);
+}
+
+/* we must register an obd for a mount before we call the setup routine.
+ *_setup will call lustre_get_mount to get the mnt struct
+ by obd_name, since we can't pass the pointer to setup. */
+static int server_register_mount(char *name, struct super_block *sb,
+ struct vfsmount *mnt)
+{
+ struct lustre_mount_info *lmi;
+ char *name_cp;
+ ENTRY;
+
+ LASSERT(mnt);
+ LASSERT(sb);
+
+ OBD_ALLOC(lmi, sizeof(*lmi));
+ if (!lmi)
+ RETURN(-ENOMEM);
+ OBD_ALLOC(name_cp, strlen(name) + 1);
+ if (!name_cp) {
+ OBD_FREE(lmi, sizeof(*lmi));
+ RETURN(-ENOMEM);
+ }
+ strcpy(name_cp, name);
+
+ down(&lustre_mount_info_lock);
+
+ if (server_find_mount(name)) {
+ up(&lustre_mount_info_lock);
+ OBD_FREE(lmi, sizeof(*lmi));
+ OBD_FREE(name_cp, strlen(name) + 1);
+ CERROR("Already registered %s\n", name);
+ RETURN(-EEXIST);
+ }
+ lmi->lmi_name = name_cp;
+ lmi->lmi_sb = sb;
+ lmi->lmi_mnt = mnt;
+ list_add(&lmi->lmi_list_chain, &server_mount_info_list);
+
+ up(&lustre_mount_info_lock);
+
+ CDEBUG(D_MOUNT, "reg_mnt %p from %s, vfscount=%d\n",
+ lmi->lmi_mnt, name, atomic_read(&lmi->lmi_mnt->mnt_count));
+
+ RETURN(0);
+}
+
+/* when an obd no longer needs a mount */
+static int server_deregister_mount(char *name)
+{
+ struct lustre_mount_info *lmi;
+ ENTRY;
+
+ down(&lustre_mount_info_lock);
+ lmi = server_find_mount(name);
+ if (!lmi) {
+ up(&lustre_mount_info_lock);
+ CERROR("%s not registered\n", name);
+ RETURN(-ENOENT);
+ }
+
+ CDEBUG(D_MOUNT, "dereg_mnt %p from %s, vfscount=%d\n",
+ lmi->lmi_mnt, name, atomic_read(&lmi->lmi_mnt->mnt_count));
+
+ OBD_FREE(lmi->lmi_name, strlen(lmi->lmi_name) + 1);
+ list_del(&lmi->lmi_list_chain);
+ OBD_FREE(lmi, sizeof(*lmi));
+ up(&lustre_mount_info_lock);
+
+ RETURN(0);
+}
+
+/* Deregister anyone referencing the mnt. Everyone should have
+ put_mount in *_cleanup, but this is a catch-all in case of err... */
+/* FIXME this should be removed from lustre_free_lsi, which may be called
+ from server_put_mount _before_ it gets to server_deregister_mount.
+ Leave it here for now for the error message it shows... */
+static void server_deregister_mount_all(struct vfsmount *mnt)
+{
+ struct list_head *tmp, *n;
+ struct lustre_mount_info *lmi;
+ ENTRY;
+
+ if (!mnt) {
+ EXIT;
+ return;
+ }
+
+ //down(&lustre_mount_info_lock);
+ list_for_each_safe(tmp, n, &server_mount_info_list) {
+ lmi = list_entry(tmp, struct lustre_mount_info, lmi_list_chain);
+ if (lmi->lmi_mnt == mnt) {
+ CERROR("Mount %p still referenced by %s\n", mnt,
+ lmi->lmi_name);
+ //OBD_FREE(lmi->lmi_name, strlen(lmi->lmi_name) + 1);
+ //list_del(&lmi->lmi_list_chain);
+ //OBD_FREE(lmi, sizeof(*lmi));
+ }
+ }
+ //up(&lustre_mount_info_lock);
+ EXIT;
+}
+
+/* obd's look up a registered mount using their name. This is just
+ for initial obd setup to find the mount struct. It should not be
+ called every time you want to mntget. */
+struct lustre_mount_info *server_get_mount(char *name)
+{
+ struct lustre_mount_info *lmi;
+ struct lustre_sb_info *lsi;
+ ENTRY;
+
+ down(&lustre_mount_info_lock);
+ lmi = server_find_mount(name);
+ up(&lustre_mount_info_lock);
+ if (!lmi) {
+ CERROR("Can't find mount for %s\n", name);
+ RETURN(NULL);
+ }
+ lsi = s2lsi(lmi->lmi_sb);
+ mntget(lmi->lmi_mnt);
+ atomic_inc(&lsi->lsi_mounts);
+
+ CDEBUG(D_MOUNT, "get_mnt %p from %s, refs=%d, vfscount=%d\n",
+ lmi->lmi_mnt, name, atomic_read(&lsi->lsi_mounts),
+ atomic_read(&lmi->lmi_mnt->mnt_count));
+
+ RETURN(lmi);
+}
+
+static void unlock_mntput(struct vfsmount *mnt)
+{
+ if (kernel_locked()) {
+ unlock_kernel();
+ mntput(mnt);
+ lock_kernel();
+ } else {
+ mntput(mnt);
+ }
+}
+
+static int lustre_put_lsi(struct super_block *sb);
+
+/* to be called from obd_cleanup methods */
+int server_put_mount(char *name, struct vfsmount *mnt)
+{
+ struct lustre_mount_info *lmi;
+ struct lustre_sb_info *lsi;
+ ENTRY;
+
+ down(&lustre_mount_info_lock);
+ lmi = server_find_mount(name);
+ up(&lustre_mount_info_lock);
+ if (!lmi) {
+ CERROR("Can't find mount for %s\n", name);
+ RETURN(-ENOENT);
+ }
+ lsi = s2lsi(lmi->lmi_sb);
+ LASSERT(lmi->lmi_mnt == mnt);
+ unlock_mntput(lmi->lmi_mnt);
+
+ CDEBUG(D_MOUNT, "put_mnt %p from %s, refs=%d, vfscount=%d\n",
+ lmi->lmi_mnt, name, atomic_read(&lsi->lsi_mounts),
+ atomic_read(&lmi->lmi_mnt->mnt_count));
+
+ if (lustre_put_lsi(lmi->lmi_sb)) {
+ CDEBUG(D_MOUNT, "Last put of mnt %p from %s, vfscount=%d\n",
+ lmi->lmi_mnt, name,
+ atomic_read(&lmi->lmi_mnt->mnt_count));
+ /* last mount is the One True Mount */
+ if (atomic_read(&lmi->lmi_mnt->mnt_count) > 1)
+ CERROR("%s: mount busy, vfscount=%d!\n", name,
+ atomic_read(&lmi->lmi_mnt->mnt_count));
+ }
+
+ /* this obd should never need the mount again */
+ server_deregister_mount(name);
+
+ RETURN(0);
+}
+
+
+/******* mount helper utilities *********/
+
+static void ldd_print(struct lustre_disk_data *ldd)
+{
+ PRINT_CMD(PRINT_MASK, " disk data:\n");
+ PRINT_CMD(PRINT_MASK, "config: %d\n", ldd->ldd_config_ver);
+ PRINT_CMD(PRINT_MASK, "fs: %s\n", ldd->ldd_fsname);
+ PRINT_CMD(PRINT_MASK, "server: %s\n", ldd->ldd_svname);
+ PRINT_CMD(PRINT_MASK, "index: %04x\n", ldd->ldd_svindex);
+ PRINT_CMD(PRINT_MASK, "flags: %#x\n", ldd->ldd_flags);
+ PRINT_CMD(PRINT_MASK, "diskfs: %s\n", MT_STR(ldd));
+ PRINT_CMD(PRINT_MASK, "options: %s\n", ldd->ldd_mount_opts);
+ PRINT_CMD(PRINT_MASK, "params: %s\n", ldd->ldd_params);
+}
+
+static int ldd_parse(struct lvfs_run_ctxt *mount_ctxt,
+ struct lustre_disk_data *ldd)
+{
+ struct lvfs_run_ctxt saved;
+ struct file *file;
+ loff_t off = 0;
+ unsigned long len;
+ int rc;
+ ENTRY;
+
+ push_ctxt(&saved, mount_ctxt, NULL);
+
+ file = filp_open(MOUNT_DATA_FILE, O_RDONLY, 0644);
+ if (IS_ERR(file)) {
+ rc = PTR_ERR(file);
+ CERROR("cannot open %s: rc = %d\n", MOUNT_DATA_FILE, rc);
+ GOTO(out, rc);
+ }
+
+ len = file->f_dentry->d_inode->i_size;
+ CDEBUG(D_MOUNT, "Have %s, size %lu\n", MOUNT_DATA_FILE, len);
+ if (len != sizeof(*ldd)) {
+ CERROR("disk data size does not match: see %lu expect %u\n",
+ len, sizeof(*ldd));
+ GOTO(out_close, rc = -EINVAL);
+ }
+
+ rc = lustre_fread(file, ldd, len, &off);
+ if (rc != len) {
+ CERROR("error reading %s: read %d of %lu\n",
+ MOUNT_DATA_FILE, rc, len);
+ GOTO(out_close, rc = -EINVAL);
+ }
+ rc = 0;
+
+ if (ldd->ldd_magic != LDD_MAGIC) {
+ /* FIXME add swabbing support */
+ CERROR("Bad magic in %s: %x!=%x\n", MOUNT_DATA_FILE,
+ ldd->ldd_magic, LDD_MAGIC);
+ GOTO(out_close, rc = -EINVAL);
+ }
+
+ if (ldd->ldd_feature_incompat & ~LDD_INCOMPAT_SUPP) {
+ CERROR("%s: unsupported incompat filesystem feature(s) %x\n",
+ ldd->ldd_svname,
+ ldd->ldd_feature_incompat & ~LDD_INCOMPAT_SUPP);
+ GOTO(out_close, rc = -EINVAL);
+ }
+ if (ldd->ldd_feature_rocompat & ~LDD_ROCOMPAT_SUPP) {
+ CERROR("%s: unsupported read-only filesystem feature(s) %x\n",
+ ldd->ldd_svname,
+ ldd->ldd_feature_rocompat & ~LDD_ROCOMPAT_SUPP);
+ /* Do something like remount filesystem read-only */
+ GOTO(out_close, rc = -EINVAL);
+ }
+
+ ldd_print(ldd);
+
+out_close:
+ filp_close(file, 0);
+out:
+ pop_ctxt(&saved, mount_ctxt, NULL);
+ RETURN(rc);
+}
+
+static int ldd_write(struct lvfs_run_ctxt *mount_ctxt,
+ struct lustre_disk_data *ldd)
+{
+ struct lvfs_run_ctxt saved;
+ struct file *file;
+ loff_t off = 0;
+ unsigned long len = sizeof(struct lustre_disk_data);
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(ldd->ldd_magic == LDD_MAGIC);
+
+ ldd->ldd_config_ver++;
+
+ push_ctxt(&saved, mount_ctxt, NULL);
+
+ file = filp_open(MOUNT_DATA_FILE, O_RDWR, 0644);
+ if (IS_ERR(file)) {
+ rc = PTR_ERR(file);
+ CERROR("cannot open %s: rc = %d\n", MOUNT_DATA_FILE, rc);
+ GOTO(out, rc);
+ }
+
+ rc = lustre_fwrite(file, ldd, len, &off);
+ if (rc != len) {
+ CERROR("error writing %s: read %d of %lu\n",
+ MOUNT_DATA_FILE, rc, len);
+ GOTO(out_close, rc = -EINVAL);
+ }
+
+ rc = 0;
+ ldd_print(ldd);
+
+out_close:
+ filp_close(file, 0);
+out:
+ pop_ctxt(&saved, mount_ctxt, NULL);
+ RETURN(rc);
+}
+
+
+/**************** config llog ********************/
+
+/* Get a config log from the MGS and process it.
+ This func is called for both clients and servers.
+ Continue to process new statements appended to the logs
+ (whenever the config lock is revoked) until lustre_end_log
+ is called. */
+int lustre_process_log(struct super_block *sb, char *logname,
+ struct config_llog_instance *cfg)
+{
+ struct lustre_cfg *lcfg;
+ struct lustre_cfg_bufs bufs;
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct obd_device *mgc = lsi->lsi_mgc;
+ int rc;
+ ENTRY;
+
+ LASSERT(mgc);
+ LASSERT(cfg);
+
+ /* mgc_process_config */
+ lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
+ lustre_cfg_bufs_set_string(&bufs, 1, logname);
+ lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
+ lustre_cfg_bufs_set(&bufs, 3, &sb, sizeof(sb));
+ lcfg = lustre_cfg_new(LCFG_LOG_START, &bufs);
+ rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
+ lustre_cfg_free(lcfg);
+
+ if (rc)
+ LCONSOLE_ERROR("%s: The configuration '%s' could not be read "
+ "from the MGS (%d). This may be the result of "
+ "communication errors between this node and "
+ "the MGS, or the MGS may not be running.\n",
+ mgc->obd_name, logname, rc);
+
+ class_obd_list();
+ RETURN(rc);
+}
+
+/* Stop watching this config log for updates */
+int lustre_end_log(struct super_block *sb, char *logname,
+ struct config_llog_instance *cfg)
+{
+ struct lustre_cfg *lcfg;
+ struct lustre_cfg_bufs bufs;
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct obd_device *mgc = lsi->lsi_mgc;
+ int rc;
+ ENTRY;
+
+ if (!mgc)
+ RETURN(-ENOENT);
+
+ /* mgc_process_config */
+ lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
+ lustre_cfg_bufs_set_string(&bufs, 1, logname);
+ if (cfg)
+ lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
+ lcfg = lustre_cfg_new(LCFG_LOG_END, &bufs);
+ rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
+ lustre_cfg_free(lcfg);
+ RETURN(rc);
+}
+
+/**************** obd start *******************/
+
+static int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
+ char *s1, char *s2, char *s3, char *s4)
+{
+ struct lustre_cfg_bufs bufs;
+ struct lustre_cfg * lcfg = NULL;
+ int rc;
+
+ CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname,
+ cmd, s1, s2, s3, s4);
+
+ lustre_cfg_bufs_reset(&bufs, cfgname);
+ if (s1)
+ lustre_cfg_bufs_set_string(&bufs, 1, s1);
+ if (s2)
+ lustre_cfg_bufs_set_string(&bufs, 2, s2);
+ if (s3)
+ lustre_cfg_bufs_set_string(&bufs, 3, s3);
+ if (s4)
+ lustre_cfg_bufs_set_string(&bufs, 4, s4);
+
+ lcfg = lustre_cfg_new(cmd, &bufs);
+ lcfg->lcfg_nid = nid;
+ rc = class_process_config(lcfg);
+ lustre_cfg_free(lcfg);
+ return(rc);
+}
+
+static int lustre_start_simple(char *obdname, char *type, char *uuid,
+ char *s1, char *s2)
+{
+ int rc;
+ CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
+
+ rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, 0, 0);
+ if (rc) {
+ CERROR("%s attach error %d\n", obdname, rc);
+ return(rc);
+ }
+ rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, 0, 0);
+ if (rc) {
+ CERROR("%s setup error %d\n", obdname, rc);
+ do_lcfg(obdname, 0, LCFG_DETACH, 0, 0, 0, 0);
+ }
+ return rc;
+}
+
+/* Set up a MGS to serve startup logs */
+static int server_start_mgs(struct super_block *sb)
+{
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct vfsmount *mnt = lsi->lsi_srv_mnt;
+ struct lustre_mount_info *lmi;
+ int rc = 0;
+ ENTRY;
+ LASSERT(mnt);
+
+ /* It is impossible to have more than 1 MGS per node, since
+ MGC wouldn't know which to connect to */
+ lmi = server_find_mount(LUSTRE_MGS_OBDNAME);
+ if (lmi) {
+ lsi = s2lsi(lmi->lmi_sb);
+ LCONSOLE_ERROR("The MGS service was already started from "
+ "server %s\n", lsi->lsi_ldd->ldd_svname);
+ RETURN(-EALREADY);
+ }
+
+ CDEBUG(D_CONFIG, "Start MGS service %s\n", LUSTRE_MGS_OBDNAME);
+
+ rc = server_register_mount(LUSTRE_MGS_OBDNAME, sb, mnt);
+
+ if (!rc &&
+ ((rc = lustre_start_simple(LUSTRE_MGS_OBDNAME, LUSTRE_MGS_NAME,
+ LUSTRE_MGS_OBDNAME, 0, 0))))
+ server_deregister_mount(LUSTRE_MGS_OBDNAME);
+
+ if (rc)
+ LCONSOLE_ERROR("Failed to start MGS '%s' (%d). Is the 'mgs' "
+ "module loaded?\n", LUSTRE_MGS_OBDNAME, rc);
+
+ RETURN(rc);
+}
+
+static int server_stop_mgs(struct super_block *sb)
+{
+ struct obd_device *obd;
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_MOUNT, "Stop MGS service %s\n", LUSTRE_MGS_OBDNAME);
+
+ /* There better be only one MGS */
+ obd = class_name2obd(LUSTRE_MGS_OBDNAME);
+ if (!obd) {
+ CDEBUG(D_CONFIG, "mgs %s not running\n", LUSTRE_MGS_OBDNAME);
+ RETURN(-EALREADY);
+ }
+
+ /* The MGS should always stop when we say so */
+ obd->obd_force = 1;
+ rc = class_manual_cleanup(obd);
+ RETURN(rc);
+}
+
+/* Set up a mgcobd to process startup logs */
+static int lustre_start_mgc(struct super_block *sb)
+{
+ struct lustre_handle mgc_conn = {0, };
+ struct obd_connect_data ocd = { 0 };
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct obd_device *obd;
+ struct obd_export *exp;
+ struct obd_uuid *uuid;
+ class_uuid_t uuidc;
+ lnet_nid_t nid;
+ char niduuid[10];
+ char *ptr;
+ int recov_bk = 0;
+ int rc = 0, i = 0, j;
+ ENTRY;
+
+ LASSERT(lsi->lsi_lmd);
+
+ obd = class_name2obd(LUSTRE_MGC_OBDNAME);
+ if (obd) {
+ atomic_inc(&obd->u.cli.cl_mgc_refcount);
+ /* FIXME There's only one MGC, but users could give different
+ MGS nids on the mount line. So now do we add new MGS uuids
+ or not? If there's truly one MGS per site, the MGS uuids
+ _should_ all be the same. Maybe check here?
+ */
+
+ /* If we are restarting the MGS, don't try to keep the MGC's
+ old connection, or registration will fail. */
+ if ((lsi->lsi_flags & LSI_SERVER) && IS_MGS(lsi->lsi_ldd)) {
+ CDEBUG(D_MOUNT|D_ERROR, "New MGS with live MGC\n");
+ recov_bk = 1;
+ }
+
+ /* Try all connections, but only once (again).
+ We don't want to block another target from starting
+ (using its local copy of the log), but we do want to connect
+ if at all possible. */
+ recov_bk++;
+ CDEBUG(D_MOUNT, "Set MGS reconnect %d\n", recov_bk);
+ rc = obd_set_info_async(obd->obd_self_export,
+ strlen(KEY_INIT_RECOV_BACKUP),
+ KEY_INIT_RECOV_BACKUP,
+ sizeof(recov_bk), &recov_bk, NULL);
+ GOTO(out, rc = 0);
+ }
+
+ CDEBUG(D_MOUNT, "Start MGC '%s'\n", LUSTRE_MGC_OBDNAME);
+
+ /* Add the primary nids for the MGS */
+ if (lsi->lsi_flags & LSI_SERVER) {
+ ptr = lsi->lsi_ldd->ldd_params;
+ if (IS_MGS(lsi->lsi_ldd)) {
+ /* Use local nids (including LO) */
+ lnet_process_id_t id;
+ while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
+ rc = do_lcfg(LUSTRE_MGC_OBDNAME, id.nid,
+ LCFG_ADD_UUID, "mgsnid0", 0,0,0);
+ }
+ } else {
+ /* Use mgsnode= nids */
+ if (class_find_param(ptr, PARAM_MGSNODE, &ptr) != 0) {
+ CERROR("No MGS nids given.\n");
+ RETURN(-EINVAL);
+ }
+ while (class_parse_nid(ptr, &nid, &ptr) == 0) {
+ rc = do_lcfg(LUSTRE_MGC_OBDNAME, nid,
+ LCFG_ADD_UUID, "mgsnid0", 0,0,0);
+ i++;
+ }
+ }
+ } else { /* client */
+ /* use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
+ ptr = lsi->lsi_lmd->lmd_dev;
+ while (class_parse_nid(ptr, &nid, &ptr) == 0) {
+ rc = do_lcfg(LUSTRE_MGC_OBDNAME, nid,
+ LCFG_ADD_UUID, "mgsnid0", 0,0,0);
+ i++;
+ if (*ptr == ':')
+ break;
+ }
+ }
+ if (i == 0) {
+ CERROR("No valid MGS nids found.\n");
+ RETURN(-EINVAL);
+ }
+ lsi->lsi_lmd->lmd_mgs_failnodes = 1;
+
+ /* Random uuid for MGC allows easier reconnects */
+ OBD_ALLOC_PTR(uuid);
+ class_generate_random_uuid(uuidc);
+ class_uuid_unparse(uuidc, uuid);
+
+ /* Start the MGC */
+ rc = lustre_start_simple(LUSTRE_MGC_OBDNAME, LUSTRE_MGC_NAME,
+ (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
+ "mgsnid0");
+ OBD_FREE_PTR(uuid);
+ if (rc)
+ RETURN(rc);
+
+ /* Add any failover MGS nids */
+ i = 1;
+ while ((*ptr == ':' ||
+ class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0)) {
+ /* New failover node */
+ sprintf(niduuid, "mgsnid%d", i);
+ j = 0;
+ while (class_parse_nid(ptr, &nid, &ptr) == 0) {
+ j++;
+ rc = do_lcfg(LUSTRE_MGC_OBDNAME, nid,
+ LCFG_ADD_UUID, niduuid, 0,0,0);
+ if (*ptr == ':')
+ break;
+ }
+ if (j > 0) {
+ rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_ADD_CONN,
+ niduuid, 0, 0, 0);
+ i++;
+ } else {
+ /* at ":/fsname" */
+ break;
+ }
+ }
+ lsi->lsi_lmd->lmd_mgs_failnodes = i;
+
+ obd = class_name2obd(LUSTRE_MGC_OBDNAME);
+ if (!obd) {
+ CERROR("Can't find mgcobd %s\n", LUSTRE_MGC_OBDNAME);
+ RETURN(-ENOTCONN);
+ }
+
+ /* Try all connections, but only once. */
+ recov_bk = 1;
+ rc = obd_set_info_async(obd->obd_self_export,
+ strlen(KEY_INIT_RECOV_BACKUP),
+ KEY_INIT_RECOV_BACKUP,
+ sizeof(recov_bk), &recov_bk, NULL);
+ if (rc)
+ /* nonfatal */
+ CERROR("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc);
+
+ /* FIXME add ACL support? */
+ //ocd.ocd_connect_flags = OBD_CONNECT_ACL;
+
+ /* We connect to the MGS at setup, and don't disconnect until cleanup */
+ rc = obd_connect(&mgc_conn, obd, &(obd->obd_uuid), &ocd);
+ if (rc) {
+ CERROR("connect failed %d\n", rc);
+ GOTO(out, rc);
+ }
+
+ exp = class_conn2export(&mgc_conn);
+ obd->u.cli.cl_mgc_mgsexp = exp;
+
+ /* And keep a refcount of servers/clients who started with "mount",
+ so we know when we can get rid of the mgc. */
+ atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
+
+out:
+ /* Keep the mgc info in the sb. Note that many lsi's can point
+ to the same mgc.*/
+ lsi->lsi_mgc = obd;
+ RETURN(rc);
+}
+
+static int lustre_stop_mgc(struct super_block *sb)
+{
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct obd_device *obd;
+ char niduuid[10];
+ int i, rc;
+ ENTRY;
+
+ if (!lsi)
+ RETURN(-ENOENT);
+ obd = lsi->lsi_mgc;
+ if (!obd)
+ RETURN(-ENOENT);
+
+ lsi->lsi_mgc = NULL;
+ if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
+ /* This is not fatal, every client that stops
+ will call in here. */
+ CDEBUG(D_MOUNT, "mgc still has %d references.\n",
+ atomic_read(&obd->u.cli.cl_mgc_refcount));
+ RETURN(-EBUSY);
+ }
+
+ /* MGC must always stop */
+ obd->obd_force = 1;
+ /* client_disconnect_export uses the no_recov flag to decide whether it
+ should disconnect or just invalidate. (The MGC has no
+ recoverable data in any case.) */
+ obd->obd_no_recov = 1;
+
+ if (obd->u.cli.cl_mgc_mgsexp)
+ obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
+
+ rc = class_manual_cleanup(obd);
+ if (rc)
+ RETURN(rc);
+
+ for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
+ sprintf(niduuid, "mgsnid%d", i);
+ rc = do_lcfg(obd->obd_name, 0, LCFG_DEL_UUID,
+ niduuid, 0, 0, 0);
+ if (rc)
+ CERROR("del MDC UUID %s failed: rc = %d\n",
+ niduuid, rc);
+ }
+ /* class_import_put will get rid of the additional connections */
+
+ RETURN(0);
+}
+
+/* Since there's only one mgc per node, we have to change it's fs to get
+ access to the right disk. */
+static int server_mgc_set_fs(struct obd_device *mgc, struct super_block *sb)
+{
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_MOUNT, "Set mgc disk for %s\n", lsi->lsi_lmd->lmd_dev);
+
+ /* cl_mgc_sem in mgc insures we sleep if the mgc_fs is busy */
+ rc = obd_set_info_async(mgc->obd_self_export,
+ strlen("set_fs"), "set_fs",
+ sizeof(*sb), sb, NULL);
+ if (rc) {
+ CERROR("can't set_fs %d\n", rc);
+ }
+
+ RETURN(rc);
+}
+
+static int server_mgc_clear_fs(struct obd_device *mgc)
+{
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_MOUNT, "Unassign mgc disk\n");
+
+ rc = obd_set_info_async(mgc->obd_self_export,
+ strlen("clear_fs"), "clear_fs",
+ 0, NULL, NULL);
+ RETURN(rc);
+}
+
+/* Stop MDS/OSS if nobody is using them */
+static int server_stop_servers(int lddflags, int lsiflags)
+{
+ struct obd_device *obd = NULL;
+ struct obd_type *type = NULL;
+ int rc = 0;
+ ENTRY;
+
+ /* Either an MDT or an OST or neither */
+
+ /* if this was an MDT, and there are no more MDT's, clean up the MDS */
+ if ((lddflags & LDD_F_SV_TYPE_MDT) && (obd = class_name2obd("MDS"))) {
+ //FIXME pre-rename, should eventually be LUSTRE_MDT_NAME
+ type = class_search_type(LUSTRE_MDS_NAME);
+ }
+ /* if this was an OST, and there are no more OST's, clean up the OSS */
+ if ((lddflags & LDD_F_SV_TYPE_OST) && (obd = class_name2obd("OSS"))) {
+ type = class_search_type(LUSTRE_OST_NAME);
+ }
+
+ if (obd && (!type || !type->typ_refcnt)) {
+ int err;
+ obd->obd_force = 1;
+ /* obd_fail doesn't mean much on a server obd */
+ err = class_manual_cleanup(obd);
+ if (!rc)
+ rc = err;
+ }
+
+ RETURN(rc);
+}
+
+int server_mti_print(char *title, struct mgs_target_info *mti)
+{
+ PRINT_CMD(PRINT_MASK, "mti %s\n", title);
+ PRINT_CMD(PRINT_MASK, "server: %s\n", mti->mti_svname);
+ PRINT_CMD(PRINT_MASK, "fs: %s\n", mti->mti_fsname);
+ PRINT_CMD(PRINT_MASK, "uuid: %s\n", mti->mti_uuid);
+ PRINT_CMD(PRINT_MASK, "ver: %d flags: %#x\n",
+ mti->mti_config_ver, mti->mti_flags);
+ return(0);
+}
+
+static int server_sb2mti(struct super_block *sb, struct mgs_target_info *mti)
+{
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct lustre_disk_data *ldd = lsi->lsi_ldd;
+ lnet_process_id_t id;
+ int i = 0;
+ ENTRY;
+
+ if (!(lsi->lsi_flags & LSI_SERVER))
+ RETURN(-EINVAL);
+
+ strncpy(mti->mti_fsname, ldd->ldd_fsname,
+ sizeof(mti->mti_fsname));
+ strncpy(mti->mti_svname, ldd->ldd_svname,
+ sizeof(mti->mti_svname));
+
+ mti->mti_nid_count = 0;
+ while (LNetGetId(i++, &id) != -ENOENT) {
+ if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
+ continue;
+ mti->mti_nids[mti->mti_nid_count] = id.nid;
+ mti->mti_nid_count++;
+ if (mti->mti_nid_count >= MTI_NIDS_MAX) {
+ CWARN("Only using first %d nids for %s\n",
+ mti->mti_nid_count, mti->mti_svname);
+ break;
+ }
+ }
+
+ mti->mti_config_ver = 0;
+ mti->mti_flags = ldd->ldd_flags;
+ mti->mti_stripe_index = ldd->ldd_svindex;
+ memcpy(mti->mti_uuid, ldd->ldd_uuid, sizeof(mti->mti_uuid));
+ if (strlen(ldd->ldd_params) > sizeof(mti->mti_params)) {
+ CERROR("params too big for mti\n");
+ RETURN(-ENOMEM);
+ /* FIXME we can't send a msg much bigger than 4k - use bulk? */
+ }
+ memcpy(mti->mti_params, ldd->ldd_params, sizeof(mti->mti_params));
+ RETURN(0);
+}
+
+/* Register an old or new target with the MGS. If needed MGS will construct
+ startup logs and assign index */
+int server_register_target(struct super_block *sb)
+{
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct obd_device *mgc = lsi->lsi_mgc;
+ struct lustre_disk_data *ldd = lsi->lsi_ldd;
+ struct mgs_target_info *mti = NULL;
+ int rc;
+ ENTRY;
+
+ LASSERT(mgc);
+
+ if (!(lsi->lsi_flags & LSI_SERVER))
+ RETURN(-EINVAL);
+
+ OBD_ALLOC_PTR(mti);
+ if (!mti)
+ RETURN(-ENOMEM);
+ rc = server_sb2mti(sb, mti);
+ if (rc)
+ GOTO(out, rc);
+
+ CDEBUG(D_MOUNT, "Registration %s, fs=%s, %s, index=%04x, flags=%#x\n",
+ mti->mti_svname, mti->mti_fsname,
+ libcfs_nid2str(mti->mti_nids[0]), mti->mti_stripe_index,
+ mti->mti_flags);
+
+ /* Register the target */
+ /* FIXME use mdc_process_config instead */
+ rc = obd_set_info_async(mgc->u.cli.cl_mgc_mgsexp,
+ strlen("register_target"), "register_target",
+ sizeof(*mti), mti, NULL);
+ if (rc) {
+ CERROR("registration with the MGS failed (%d)\n", rc);
+ GOTO(out, rc);
+ }
+
+ /* Always update our flags */
+ ldd->ldd_flags = mti->mti_flags & ~LDD_F_REWRITE_LDD;
+
+ /* If this flag is set, it means the MGS wants us to change our
+ on-disk data. (So far this means just the index.) */
+ if (mti->mti_flags & LDD_F_REWRITE_LDD) {
+ char *label;
+ int err;
+ CDEBUG(D_MOUNT, "Changing on-disk index from %#x to %#x "
+ "for %s\n", ldd->ldd_svindex, mti->mti_stripe_index,
+ mti->mti_svname);
+ ldd->ldd_svindex = mti->mti_stripe_index;
+ strncpy(ldd->ldd_svname, mti->mti_svname,
+ sizeof(ldd->ldd_svname));
+ /* or ldd_make_sv_name(ldd); */
+ ldd_write(&mgc->obd_lvfs_ctxt, ldd);
+
+ err = fsfilt_set_label(mgc, lsi->lsi_srv_mnt->mnt_sb,
+ mti->mti_svname);
+ if (err)
+ CERROR("Label set error %d\n", err);
+ label = fsfilt_get_label(mgc, lsi->lsi_srv_mnt->mnt_sb);
+ if (label)
+ CDEBUG(D_MOUNT, "Disk label changed to %s\n", label);
+ }
+
+out:
+ if (mti)
+ OBD_FREE_PTR(mti);
+ RETURN(rc);
+}
+
+/* Start targets */
+static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
+{
+ struct obd_device *obd;
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct config_llog_instance cfg;
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_MOUNT, "starting target %s\n", lsi->lsi_ldd->ldd_svname);
+
+ /* If we're an MDT, make sure the global MDS is running */
+ if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_MDT) {
+ /* make sure (what will be called) the MDS is started */
+ obd = class_name2obd("MDS");
+ if (!obd) {
+ //FIXME pre-rename, should eventually be LUSTRE_MDS_NAME
+ rc = lustre_start_simple("MDS", LUSTRE_MDT_NAME,
+ "MDS_uuid", 0, 0);
+ if (rc) {
+ CERROR("failed to start MDS: %d\n", rc);
+ GOTO(out_servers, rc);
+ }
+ }
+ }
+
+ /* If we're an OST, make sure the global OSS is running */
+ if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_OST) {
+ /* make sure OSS is started */
+ obd = class_name2obd("OSS");
+ if (!obd) {
+ rc = lustre_start_simple("OSS", LUSTRE_OSS_NAME,
+ "OSS_uuid", 0, 0);
+ if (rc) {
+ CERROR("failed to start OSS: %d\n", rc);
+ GOTO(out_servers, rc);
+ }
+ }
+ }
+
+ /* Set the mgc fs to our server disk. This allows the MGC
+ to read and write configs locally. */
+ server_mgc_set_fs(lsi->lsi_mgc, sb);
+
+ /* Register with MGS */
+ rc = server_register_target(sb);
+ if (rc && (lsi->lsi_ldd->ldd_flags &
+ (LDD_F_NEED_INDEX | LDD_F_UPDATE | LDD_F_UPGRADE14))){
+ CERROR("Required registration failed for %s: %d\n",
+ lsi->lsi_ldd->ldd_svname, rc);
+ if (rc == -EIO) {
+ LCONSOLE_ERROR("Communication error with the MGS. Is "
+ "the MGS running?\n");
+ }
+ GOTO(out, rc);
+ }
+
+ /* Let the target look up the mount using the target's name
+ (we can't pass the sb or mnt through class_process_config.) */
+ rc = server_register_mount(lsi->lsi_ldd->ldd_svname, sb, mnt);
+ if (rc)
+ GOTO(out, rc);
+
+ /* Start targets using the llog named for the target */
+ memset(&cfg, 0, sizeof(cfg));
+ rc = lustre_process_log(sb, lsi->lsi_ldd->ldd_svname, &cfg);
+ if (rc) {
+ CERROR("failed to start server %s: %d\n",
+ lsi->lsi_ldd->ldd_svname, rc);
+ GOTO(out, rc);
+ }
+
+ if (!class_name2obd(lsi->lsi_ldd->ldd_svname)) {
+ CERROR("no server named %s was started\n",
+ lsi->lsi_ldd->ldd_svname);
+ rc = -ENXIO;
+ }
+
+out:
+ /* Release the mgc fs for others to use */
+ server_mgc_clear_fs(lsi->lsi_mgc);
+
+out_servers:
+ RETURN(rc);
+}
+
+/***************** lustre superblock **************/
+
+struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
+{
+ struct lustre_sb_info *lsi = NULL;
+ ENTRY;
+
+ OBD_ALLOC(lsi, sizeof(*lsi));
+ if (!lsi)
+ RETURN(NULL);
+ OBD_ALLOC(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd));
+ if (!lsi->lsi_lmd) {
+ OBD_FREE(lsi, sizeof(*lsi));
+ RETURN(NULL);
+ }
+
+ lsi->lsi_lmd->lmd_exclude_count = 0;
+ s2lsi_nocast(sb) = lsi;
+ /* we take 1 extra ref for our setup */
+ atomic_set(&lsi->lsi_mounts, 1);
+
+ /* Default umount style */
+ lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
+ RETURN(lsi);
+}
+
+static int lustre_free_lsi(struct super_block *sb)
+{
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ ENTRY;
+
+ if (!lsi)
+ RETURN(0);
+
+ CDEBUG(D_MOUNT, "Freeing lsi\n");
+
+ /* someone didn't call server_put_mount. */
+ LASSERT(atomic_read(&lsi->lsi_mounts) == 0);
+
+ if (lsi->lsi_ldd != NULL)
+ OBD_FREE(lsi->lsi_ldd, sizeof(*lsi->lsi_ldd));
+
+ if (lsi->lsi_lmd != NULL) {
+ if (lsi->lsi_lmd->lmd_dev != NULL)
+ OBD_FREE(lsi->lsi_lmd->lmd_dev,
+ strlen(lsi->lsi_lmd->lmd_dev) + 1);
+ if (lsi->lsi_lmd->lmd_profile != NULL)
+ OBD_FREE(lsi->lsi_lmd->lmd_profile,
+ strlen(lsi->lsi_lmd->lmd_profile) + 1);
+ if (lsi->lsi_lmd->lmd_opts != NULL)
+ OBD_FREE(lsi->lsi_lmd->lmd_opts,
+ strlen(lsi->lsi_lmd->lmd_opts) + 1);
+ if (lsi->lsi_lmd->lmd_exclude_count)
+ OBD_FREE(lsi->lsi_lmd->lmd_exclude,
+ sizeof(lsi->lsi_lmd->lmd_exclude[0]) *
+ lsi->lsi_lmd->lmd_exclude_count);
+ OBD_FREE(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd));
+ }
+
+ LASSERT(lsi->lsi_llsbi == NULL);
+
+ server_deregister_mount_all(lsi->lsi_srv_mnt);
+
+ OBD_FREE(lsi, sizeof(*lsi));
+ s2lsi_nocast(sb) = NULL;
+
+ RETURN(0);
+}
+
+static int lustre_put_lsi(struct super_block *sb)
+{
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ ENTRY;
+
+ LASSERT(lsi);
+
+ CDEBUG(D_MOUNT, "put %p %d\n", sb, atomic_read(&lsi->lsi_mounts));
+
+ if (atomic_dec_and_test(&lsi->lsi_mounts)) {
+ lustre_free_lsi(sb);
+ RETURN(1);
+ }
+ RETURN(0);
+}
+
+/*************** server mount ******************/
+
+/* Kernel mount using mount options in MOUNT_DATA_FILE */
+static struct vfsmount *server_kernel_mount(struct super_block *sb)
+{
+ struct lvfs_run_ctxt mount_ctxt;
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct lustre_disk_data *ldd;
+ struct lustre_mount_data *lmd = lsi->lsi_lmd;
+ struct vfsmount *mnt;
+ char *options = NULL;
+ unsigned long page, s_flags;
+ int rc;
+ ENTRY;
+
+ OBD_ALLOC(ldd, sizeof(*ldd));
+ if (!ldd)
+ RETURN(ERR_PTR(-ENOMEM));
+
+ /* In the past, we have always used flags = 0.
+ Note ext3/ldiskfs can't be mounted ro. */
+ s_flags = sb->s_flags;
+
+ /* Pre-mount ext3 to read the MOUNT_DATA_FILE */
+ CDEBUG(D_MOUNT, "Pre-mount ext3 %s\n", lmd->lmd_dev);
+ mnt = do_kern_mount("ext3", s_flags, lmd->lmd_dev, 0);
+ if (IS_ERR(mnt)) {
+ rc = PTR_ERR(mnt);
+ CERROR("premount ext3 failed (%d), trying ldiskfs\n", rc);
+ /* If ext3 fails (bec. of mballoc, extents), try ldiskfs */
+ mnt = do_kern_mount("ldiskfs", s_flags, lmd->lmd_dev, 0);
+ if (IS_ERR(mnt)) {
+ rc = PTR_ERR(mnt);
+ CERROR("premount ldiskfs failed: rc = %d\n", rc);
+ GOTO(out_free, rc);
+ }
+ }
+
+ OBD_SET_CTXT_MAGIC(&mount_ctxt);
+ mount_ctxt.pwdmnt = mnt;
+ mount_ctxt.pwd = mnt->mnt_root;
+ mount_ctxt.fs = get_ds();
+
+ rc = ldd_parse(&mount_ctxt, ldd);
+ unlock_mntput(mnt);
+
+ if (rc) {
+ CERROR("premount parse options failed: rc = %d\n", rc);
+ GOTO(out_free, rc);
+ }
+
+ /* Done with our pre-mount, now do the real mount. */
+
+ /* Glom up mount options */
+ page = __get_free_page(GFP_KERNEL);
+ if (!page)
+ GOTO(out_free, rc = -ENOMEM);
+
+ options = (char *)page;
+ memset(options, 0, PAGE_SIZE);
+ strncpy(options, ldd->ldd_mount_opts, PAGE_SIZE - 2);
+
+ /* Add in any mount-line options */
+ if (lmd->lmd_opts && (*(lmd->lmd_opts) != 0)) {
+ int len = PAGE_SIZE - strlen(options) - 2;
+ if (*options != 0)
+ strcat(options, ",");
+ strncat(options, lmd->lmd_opts, len);
+ }
+
+ /* Special permanent mount flags */
+ if (IS_OST(ldd))
+ s_flags |= MS_NOATIME | MS_NODIRATIME;
+
+ CDEBUG(D_MOUNT, "kern_mount: %s %s %s\n",
+ MT_STR(ldd), lmd->lmd_dev, options);
+ mnt = do_kern_mount(MT_STR(ldd), s_flags, lmd->lmd_dev,
+ (void *)options);
+ free_page(page);
+ if (IS_ERR(mnt)) {
+ rc = PTR_ERR(mnt);
+ CERROR("do_kern_mount failed: rc = %d\n", rc);
+ GOTO(out_free, rc);
+ }
+
+ lsi->lsi_ldd = ldd; /* freed at lsi cleanup */
+ CDEBUG(D_SUPER, "%s: mnt = %p\n", lmd->lmd_dev, mnt);
+ RETURN(mnt);
+
+out_free:
+ OBD_FREE(ldd, sizeof(*ldd));
+ lsi->lsi_ldd = NULL;
+ RETURN(ERR_PTR(rc));
+}
+
+static void server_wait_finished(struct vfsmount *mnt)
+{
+ wait_queue_head_t waitq;
+ struct l_wait_info lwi;
+ int retries = 10;
+
+ init_waitqueue_head(&waitq);
+
+ while ((atomic_read(&mnt->mnt_count) > 0) && retries--) {
+ CWARN("Mount still busy with %d refs\n",
+ atomic_read(&mnt->mnt_count));
+
+ /* Wait for a bit */
+ lwi = LWI_TIMEOUT(2 * HZ, NULL, NULL);
+ l_wait_event(waitq, 0, &lwi);
+ }
+ if (atomic_read(&mnt->mnt_count)) {
+ CERROR("Mount is still busy, giving up.\n");
+ }
+}
+
+static void server_put_super(struct super_block *sb)
+{
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct obd_device *obd;
+ struct vfsmount *mnt = lsi->lsi_srv_mnt;
+ char *tmpname;
+ int tmpname_sz;
+ int lddflags = lsi->lsi_ldd->ldd_flags;
+ int lsiflags = lsi->lsi_flags;
+ int rc;
+ ENTRY;
+
+ LASSERT(lsiflags & LSI_SERVER);
+
+ tmpname_sz = strlen(lsi->lsi_ldd->ldd_svname) + 1;
+ OBD_ALLOC(tmpname, tmpname_sz);
+ memcpy(tmpname, lsi->lsi_ldd->ldd_svname, tmpname_sz);
+ CDEBUG(D_MOUNT, "server put_super %s\n", tmpname);
+
+ /* Stop the target */
+ if (IS_MDT(lsi->lsi_ldd) || IS_OST(lsi->lsi_ldd)) {
+
+ /* tell the mgc to drop the config log */
+ lustre_end_log(sb, lsi->lsi_ldd->ldd_svname, NULL);
+
+ obd = class_name2obd(lsi->lsi_ldd->ldd_svname);
+ if (obd) {
+ CDEBUG(D_MOUNT, "stopping %s\n", obd->obd_name);
+ if (lsi->lsi_flags & LSI_UMOUNT_FORCE)
+ obd->obd_force = 1;
+ if (lsi->lsi_flags & LSI_UMOUNT_FAILOVER)
+ obd->obd_fail = 1;
+ /* We can't seem to give an error return code
+ to .put_super, so we better make sure we clean up!
+ FIXME is there a way to get around this? */
+ obd->obd_force = 1;
+ class_manual_cleanup(obd);
+ } else {
+ CERROR("no obd %s\n", lsi->lsi_ldd->ldd_svname);
+ server_deregister_mount(lsi->lsi_ldd->ldd_svname);
+ }
+ }
+
+ /* If they wanted the mgs to stop separately from the mdt, they
+ should have put it on a different device. */
+ if (IS_MGS(lsi->lsi_ldd)) {
+ /* stop the mgc before the mgs so the connection gets cleaned
+ up */
+ lustre_stop_mgc(sb);
+ server_stop_mgs(sb);
+ }
+
+ /* clean the mgc and sb */
+ rc = lustre_common_put_super(sb);
+ // FIXME how do I return a failure?
+
+ /* drop the One True Mount */
+ unlock_mntput(mnt);
+
+ /* Wait for the targets to really clean up - can't exit (and let the
+ sb get destroyed) while the mount is still in use */
+ server_wait_finished(mnt);
+
+ /* Stop the servers (MDS, OSS) if no longer needed. We must wait
+ until the target is really gone so that our type refcount check
+ is right. */
+ server_stop_servers(lddflags, lsiflags);
+
+ LCONSOLE_WARN("server umount %s complete\n", tmpname);
+ OBD_FREE(tmpname, tmpname_sz);
+ EXIT;
+}
+
+static void server_umount_begin(struct super_block *sb)
+{
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ ENTRY;
+
+ CDEBUG(D_MOUNT, "umount -f\n");
+ /* umount = failover
+ umount -f = force
+ no third way to do non-force, non-failover */
+ lsi->lsi_flags &= ~LSI_UMOUNT_FAILOVER;
+ lsi->lsi_flags |= LSI_UMOUNT_FORCE;
+ EXIT;
+}
+
+static int server_statfs (struct super_block *sb, struct kstatfs *buf)
+{
+ struct vfsmount *mnt = s2lsi(sb)->lsi_srv_mnt;
+ ENTRY;
+
+ if (mnt && mnt->mnt_sb && mnt->mnt_sb->s_op->statfs) {
+ int rc = mnt->mnt_sb->s_op->statfs(mnt->mnt_sb, buf);
+ if (!rc) {
+ buf->f_type = sb->s_magic;
+ RETURN(0);
+ }
+ }
+
+ /* just return 0 */
+ buf->f_type = sb->s_magic;
+ buf->f_bsize = sb->s_blocksize;
+ buf->f_blocks = 1;
+ buf->f_bfree = 0;
+ buf->f_bavail = 0;
+ buf->f_files = 1;
+ buf->f_ffree = 0;
+ buf->f_namelen = NAME_MAX;
+ RETURN(0);
+}
+
+static struct super_operations server_ops =
+{
+ .put_super = server_put_super,
+ .umount_begin = server_umount_begin, /* umount -f */
+ .statfs = server_statfs,
+};
+
+#define log2(n) ffz(~(n))
+#define LUSTRE_SUPER_MAGIC 0x0BD00BD1
+
+static int server_fill_super_common(struct super_block *sb)
+{
+ struct inode *root = 0;
+ ENTRY;
+
+ CDEBUG(D_MOUNT, "Server sb, dev=%d\n", (int)sb->s_dev);
+
+ sb->s_blocksize = 4096;
+ sb->s_blocksize_bits = log2(sb->s_blocksize);
+ sb->s_magic = LUSTRE_SUPER_MAGIC;
+ sb->s_maxbytes = 0; //PAGE_CACHE_MAXBYTES;
+ sb->s_flags |= MS_RDONLY;
+ sb->s_op = &server_ops;
+
+ root = new_inode(sb);
+ if (!root) {
+ CERROR("Can't make root inode\n");
+ RETURN(-EIO);
+ }
+
+ /* returns -EIO for every operation */
+ /* make_bad_inode(root); -- badness - can't umount */
+ /* apparently we need to be a directory for the mount to finish */
+ root->i_mode = S_IFDIR;
+
+ sb->s_root = d_alloc_root(root);
+ if (!sb->s_root) {
+ CERROR("Can't make root dentry\n");
+ iput(root);
+ RETURN(-EIO);
+ }
+
+ RETURN(0);
+}
+
+static int server_fill_super(struct super_block *sb)
+{
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct vfsmount *mnt;
+ int rc;
+ ENTRY;
+
+ /* the One True Mount */
+ mnt = server_kernel_mount(sb);
+ if (IS_ERR(mnt)) {
+ rc = PTR_ERR(mnt);
+ CERROR("Unable to mount device %s: %d\n",
+ lsi->lsi_lmd->lmd_dev, rc);
+ GOTO(out, rc);
+ }
+ lsi->lsi_srv_mnt = mnt;
+
+ LASSERT(lsi->lsi_ldd);
+ CDEBUG(D_MOUNT, "Found service %s for fs '%s' on device %s\n",
+ lsi->lsi_ldd->ldd_svname, lsi->lsi_ldd->ldd_fsname,
+ lsi->lsi_lmd->lmd_dev);
+
+ if (class_name2obd(lsi->lsi_ldd->ldd_svname)) {
+ LCONSOLE_ERROR("The target named %s is already running. "
+ "Double-mount may have compromised the disk "
+ "journal.\n", lsi->lsi_ldd->ldd_svname);
+ unlock_mntput(mnt);
+ lustre_put_lsi(sb);
+ GOTO(out, rc = -EALREADY);
+ }
+
+ /* start MGS before MGC */
+ if (IS_MGS(lsi->lsi_ldd)) {
+ rc = server_start_mgs(sb);
+ if (rc) {
+ CERROR("ignoring Failed MGS start!!\n");
+ //GOTO(out_mnt, rc);
+ }
+ }
+
+ rc = lustre_start_mgc(sb);
+ if (rc)
+ GOTO(out_mnt, rc);
+
+ /* Set up all obd devices for service */
+ if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
+ (IS_OST(lsi->lsi_ldd) || IS_MDT(lsi->lsi_ldd))) {
+ rc = server_start_targets(sb, mnt);
+ if (rc < 0) {
+ CERROR("Unable to start targets: %d\n", rc);
+ GOTO(out_mnt, rc);
+ }
+ /* FIXME overmount client here,
+ or can we just start a client log and client_fill_super on this sb?
+ We need to make sure server_put_super gets called too - ll_put_super
+ calls lustre_common_put_super; check there for LSI_SERVER flag,
+ call s_p_s if so.
+ Probably should start client from new thread so we can return.
+ Client will not finish until all servers are connected.
+ Note - MGMT-only server does NOT get a client, since there is no
+ lustre fs associated - the MGMT is for all lustre fs's */
+ }
+
+ rc = server_fill_super_common(sb);
+ if (rc)
+ GOTO(out_mnt, rc);
+
+ RETURN(0);
+
+out_mnt:
+ server_put_super(sb);
+out:
+ RETURN(rc);
+}
+
+/* Get the index from the obd name.
+ rc = server type, or
+ rc < 0 on error
+ if endptr isn't NULL it is set to end of name */
+int server_name2index(char *svname, __u32 *idx, char **endptr)
+{
+ unsigned long index;
+ int rc;
+ char *dash = strchr(svname, '-');
+ if (!dash) {
+ CERROR("Can't understand server name %s\n", svname);
+ return(-EINVAL);
+ }
+
+ if (strncmp(dash + 1, "MDT", 3) == 0)
+ rc = LDD_F_SV_TYPE_MDT;
+ else if (strncmp(dash + 1, "OST", 3) == 0)
+ rc = LDD_F_SV_TYPE_OST;
+ else
+ return(-EINVAL);
+
+ index = simple_strtoul(dash + 4, endptr, 16);
+ *idx = index;
+ return rc;
+}
+
+/*************** mount common betweeen server and client ***************/
+
+/* Common umount */
+int lustre_common_put_super(struct super_block *sb)
+{
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_MOUNT, "dropping sb %p\n", sb);
+
+ rc = lustre_stop_mgc(sb);
+ if (rc && (rc != -ENOENT)) {
+ if (rc != -EBUSY) {
+ CERROR("Can't stop MGC: %d\n", rc);
+ RETURN(rc);
+ }
+ /* BUSY just means that there's some other obd that
+ needs the mgc. Let him clean it up. */
+ CDEBUG(D_MOUNT, "MGC still in use\n");
+ }
+ lustre_put_lsi(sb);
+ RETURN(rc);
+}
+
+static void lmd_print(struct lustre_mount_data *lmd)
+{
+ int i;
+
+ PRINT_CMD(PRINT_MASK, " mount data:\n");
+ if (lmd_is_client(lmd))
+ PRINT_CMD(PRINT_MASK, "profile: %s\n", lmd->lmd_profile);
+ PRINT_CMD(PRINT_MASK, "device: %s\n", lmd->lmd_dev);
+ PRINT_CMD(PRINT_MASK, "flags: %x\n", lmd->lmd_flags);
+ if (lmd->lmd_opts)
+ PRINT_CMD(PRINT_MASK, "options: %s\n", lmd->lmd_opts);
+ for (i = 0; i < lmd->lmd_exclude_count; i++) {
+ PRINT_CMD(PRINT_MASK, "exclude %d: OST%04x\n", i,
+ lmd->lmd_exclude[i]);
+ }
+}
+
+/* Is this server on the exclusion list */
+int lustre_check_exclusion(struct super_block *sb, char *svname)
+{
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct lustre_mount_data *lmd = lsi->lsi_lmd;
+ __u32 index;
+ int i, rc;
+ ENTRY;
+
+ rc = server_name2index(svname, &index, NULL);
+ if (rc != LDD_F_SV_TYPE_OST)
+ RETURN(0);
+
+ CDEBUG(D_MOUNT, "Check exclusion %s (%d) in %d of %s\n", svname,
+ index, lmd->lmd_exclude_count, lmd->lmd_dev);
+
+ for(i = 0; i < lmd->lmd_exclude_count; i++) {
+ if (index == lmd->lmd_exclude[i]) {
+ CWARN("Excluding %s (on exclusion list)\n", svname);
+ RETURN(1);
+ }
+ }
+ RETURN(0);
+}
+
+/* mount -v -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
+static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr)
+{
+ char *s1 = ptr, *s2;
+ __u32 index, *exclude_list;
+ int rc = 0;
+ ENTRY;
+
+ /* temp storage until we figure out how many we have */
+ OBD_ALLOC(exclude_list, sizeof(index) * MAX_OBD_DEVICES);
+ if (!exclude_list)
+ RETURN(-ENOMEM);
+
+ /* we enter this fn pointing at the '=' */
+ while (*s1 && *s1 != ' ' && *s1 != ',') {
+ s1++;
+ rc = server_name2index(s1, &index, &s2);
+ if (rc < 0) {
+ CERROR("Can't parse %s\n", s1);
+ break;
+ }
+ if (rc == LDD_F_SV_TYPE_OST)
+ exclude_list[lmd->lmd_exclude_count++] = index;
+ else
+ CDEBUG(D_MOUNT, "ignoring exclude %.7s\n", s1);
+ s1 = s2;
+ /* now we are pointing at ':' (next exclude)
+ or ',' (end of excludes) */
+
+ if (lmd->lmd_exclude_count >= MAX_OBD_DEVICES)
+ break;
+ }
+ if (rc >= 0) /* non-err */
+ rc = 0;
+
+ if (lmd->lmd_exclude_count) {
+ /* permanent, freed in lustre_free_lsi */
+ OBD_ALLOC(lmd->lmd_exclude, sizeof(index) *
+ lmd->lmd_exclude_count);
+ if (lmd->lmd_exclude) {
+ memcpy(lmd->lmd_exclude, exclude_list,
+ sizeof(index) * lmd->lmd_exclude_count);
+ } else {
+ rc = -ENOMEM;
+ lmd->lmd_exclude_count = 0;
+ }
+ }
+ OBD_FREE(exclude_list, sizeof(index) * MAX_OBD_DEVICES);
+ RETURN(rc);
+}
+
+/* mount -v -t lustre uml1:uml2:/lustre-client /mnt/lustre */
+static int lmd_parse(char *options, struct lustre_mount_data *lmd)
+{
+ char *s1, *s2, *devname = NULL;
+ struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(lmd);
+ if (!options) {
+ LCONSOLE_ERROR("Missing mount data: check that "
+ "/sbin/mount.lustre is installed.\n");
+ RETURN(-EINVAL);
+ }
+
+ /* Options should be a string - try to detect old lmd data */
+ if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) {
+ LCONSOLE_ERROR("You're using an old version of "
+ "/sbin/mount.lustre. Please install version "
+ "%s\n", LUSTRE_VERSION_STRING);
+ RETURN(-EINVAL);
+ }
+ lmd->lmd_magic = LMD_MAGIC;
+
+ /* Default flags */
+ lmd->lmd_flags |= LMD_FLG_RECOVER;
+
+ s1 = options;
+ while (*s1) {
+ /* Skip whitespace and extra commas */
+ while (*s1 == ' ' || *s1 == ',')
+ s1++;
+
+ /* Client options are parsed in ll_options: eg. flock,
+ user_xattr, acl */
+
+ if (strncmp(s1, "recov", 5) == 0)
+ /* FIXME do something with the RECOVER flag - see lconf */
+ lmd->lmd_flags |= LMD_FLG_RECOVER;
+ else if (strncmp(s1, "norecov", 7) == 0)
+ lmd->lmd_flags &= ~LMD_FLG_RECOVER;
+ else if (strncmp(s1, "nosvc", 5) == 0)
+ lmd->lmd_flags |= LMD_FLG_NOSVC;
+
+ /* ost exclusion list */
+ else if (strncmp(s1, "exclude=", 8) == 0) {
+ rc = lmd_make_exclusion(lmd, s1 + 7);
+ if (rc)
+ goto invalid;
+ }
+
+ /* Linux 2.4 doesn't pass the device, so we stuck it at the
+ end of the options. */
+ else if (strncmp(s1, "device=", 7) == 0) {
+ devname = s1 + 7;
+ /* terminate options right before device. device
+ must be the last one. */
+ *s1 = 0;
+ break;
+ }
+
+ /* Find next opt */
+ s2 = strchr(s1, ',');
+ if (s2 == NULL)
+ break;
+ s1 = s2 + 1;
+ }
+
+ if (!devname) {
+ LCONSOLE_ERROR("Can't find the device name "
+ "(need mount option 'device=...')\n");
+ goto invalid;
+ }
+
+ s1 = strrchr(devname, ':');
+ if (s1) {
+ lmd->lmd_flags = LMD_FLG_CLIENT;
+ /* Remove leading /s from fsname */
+ while (*++s1 == '/') ;
+ /* Freed in lustre_free_lsi */
+ OBD_ALLOC(lmd->lmd_profile, strlen(s1) + 8);
+ if (!lmd->lmd_profile)
+ RETURN(-ENOMEM);
+ sprintf(lmd->lmd_profile, "%s-client", s1);
+ }
+
+ /* Freed in lustre_free_lsi */
+ OBD_ALLOC(lmd->lmd_dev, strlen(devname) + 1);
+ if (!lmd->lmd_dev)
+ RETURN(-ENOMEM);
+ strcpy(lmd->lmd_dev, devname);
+
+ /* Save mount options */
+ s1 = options + strlen(options) - 1;
+ while (s1 >= options && (*s1 == ',' || *s1 == ' '))
+ *s1-- = 0;
+ if (*options != 0) {
+ /* Freed in lustre_free_lsi */
+ OBD_ALLOC(lmd->lmd_opts, strlen(options) + 1);
+ if (!lmd->lmd_opts)
+ RETURN(-ENOMEM);
+ strcpy(lmd->lmd_opts, options);
+ }
+
+ lmd->lmd_magic = LMD_MAGIC;
+
+ lmd_print(lmd);
+ RETURN(rc);
+
+invalid:
+ CERROR("Bad mount options %s\n", options);
+ RETURN(-EINVAL);
+}
+
+
+/* Common mount */
+int lustre_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct lustre_mount_data *lmd;
+ struct lustre_sb_info *lsi;
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);
+
+ lsi = lustre_init_lsi(sb);
+ if (!lsi)
+ RETURN(-ENOMEM);
+ lmd = lsi->lsi_lmd;
+
+ /* Figure out the lmd from the mount options */
+ if (lmd_parse((char *)data, lmd)) {
+ lustre_put_lsi(sb);
+ RETURN(-EINVAL);
+ }
+
+ if (lmd_is_client(lmd)) {
+ CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile);
+ if (!client_fill_super) {
+ LCONSOLE_ERROR("Nothing registered for client mount!"
+ " Is llite module loaded?\n");
+ rc = -ENODEV;
+ } else {
+ rc = lustre_start_mgc(sb);
+ if (rc)
+ goto out;
+ /* Connect and start */
+ /* (should always be ll_fill_super) */
+ rc = (*client_fill_super)(sb);
+ /* c_f_s will call lustre_common_put_super on failure */
+
+ }
+ } else {
+ CDEBUG(D_MOUNT, "Mounting server from %s\n", lmd->lmd_dev);
+ lsi->lsi_flags |= LSI_SERVER;
+ rc = server_fill_super(sb);
+ /* s_f_s calls lustre_start_mgc after the mount because we need
+ the MGS nids which are stored on disk. Plus, we may
+ need to start the MGS first. */
+ /* s_f_s will call server_put_super on failure */
+ }
+
+out:
+ if (rc){
+ CERROR("Unable to mount %s\n",
+ s2lsi(sb) ? lmd->lmd_dev : "");
+ } else {
+ LCONSOLE_WARN("mount %s complete\n", lmd->lmd_dev);
+ }
+ RETURN(rc);
+}
+
+
+/* We can't call ll_fill_super by name because it lives in a module that
+ must be loaded after this one. */
+void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb))
+{
+ client_fill_super = cfs;
+}
+
+/***************** FS registration ******************/
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+/* 2.5 and later */
+struct super_block * lustre_get_sb(struct file_system_type *fs_type,
+ int flags, const char *devname, void * data)
+{
+ /* calls back in fill super */
+ /* we could append devname= onto options (*data) here,
+ but 2.4 doesn't get devname. So we do it in mount_lustre.c */
+ return get_sb_nodev(fs_type, flags, data, lustre_fill_super);
+}
+
+struct file_system_type lustre_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "lustre",
+ .get_sb = lustre_get_sb,
+ .kill_sb = kill_anon_super,
+ .fs_flags = FS_BINARY_MOUNTDATA,
+};
+
+#else
+/* 2.4 */
+static struct super_block *lustre_read_super(struct super_block *sb,
+ void *data, int silent)
+{
+ int rc;
+ ENTRY;
+
+ rc = lustre_fill_super(sb, data, silent);
+ if (rc)
+ RETURN(NULL);
+ RETURN(sb);
+}
+
+static struct file_system_type lustre_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "lustre",
+ .fs_flags = FS_NFSEXP_FSID,
+ .read_super = lustre_read_super,
+};
+#endif
+
+int lustre_register_fs(void)
+{
+ return register_filesystem(&lustre_fs_type);
+}
+
+int lustre_unregister_fs(void)
+{
+ return unregister_filesystem(&lustre_fs_type);
+}
+
+EXPORT_SYMBOL(lustre_register_client_fill_super);
+EXPORT_SYMBOL(lustre_common_put_super);
+EXPORT_SYMBOL(lustre_process_log);
+EXPORT_SYMBOL(lustre_end_log);
+EXPORT_SYMBOL(server_get_mount);
+EXPORT_SYMBOL(server_put_mount);
+EXPORT_SYMBOL(server_register_target);
+EXPORT_SYMBOL(server_name2index);
+EXPORT_SYMBOL(server_mti_print);
+EXPORT_SYMBOL(class_find_param);
+EXPORT_SYMBOL(class_match_param);
+EXPORT_SYMBOL(class_parse_nid);
+
+
#include <obd_class.h>
struct uuid {
- __u32 time_low;
- __u16 time_mid;
- __u16 time_hi_and_version;
- __u16 clock_seq;
- __u8 node[6];
+ __u32 time_low;
+ __u16 time_mid;
+ __u16 time_hi_and_version;
+ __u16 clock_seq;
+ __u8 node[6];
};
static void uuid_unpack(class_uuid_t in, struct uuid *uu)
{
- __u8 *ptr = in;
- __u32 tmp;
+ __u8 *ptr = in;
+ __u32 tmp;
- tmp = *ptr++;
- tmp = (tmp << 8) | *ptr++;
- tmp = (tmp << 8) | *ptr++;
- tmp = (tmp << 8) | *ptr++;
- uu->time_low = tmp;
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->time_low = tmp;
- tmp = *ptr++;
- tmp = (tmp << 8) | *ptr++;
- uu->time_mid = tmp;
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->time_mid = tmp;
- tmp = *ptr++;
- tmp = (tmp << 8) | *ptr++;
- uu->time_hi_and_version = tmp;
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->time_hi_and_version = tmp;
- tmp = *ptr++;
- tmp = (tmp << 8) | *ptr++;
- uu->clock_seq = tmp;
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->clock_seq = tmp;
- memcpy(uu->node, ptr, 6);
+ memcpy(uu->node, ptr, 6);
}
#if 0
static void uuid_pack(struct uuid *uu, class_uuid_t ptr)
{
- __u32 tmp;
- unsigned char *out = ptr;
-
- tmp = uu->time_low;
- out[3] = (unsigned char) tmp;
- tmp >>= 8;
- out[2] = (unsigned char) tmp;
- tmp >>= 8;
- out[1] = (unsigned char) tmp;
- tmp >>= 8;
- out[0] = (unsigned char) tmp;
-
- tmp = uu->time_mid;
- out[5] = (unsigned char) tmp;
- tmp >>= 8;
- out[4] = (unsigned char) tmp;
-
- tmp = uu->time_hi_and_version;
- out[7] = (unsigned char) tmp;
- tmp >>= 8;
- out[6] = (unsigned char) tmp;
-
- tmp = uu->clock_seq;
- out[9] = (unsigned char) tmp;
- tmp >>= 8;
- out[8] = (unsigned char) tmp;
-
- memcpy(out+10, uu->node, 6);
+ __u32 tmp;
+ unsigned char *out = ptr;
+
+ tmp = uu->time_low;
+ out[3] = (unsigned char) tmp;
+ tmp >>= 8;
+ out[2] = (unsigned char) tmp;
+ tmp >>= 8;
+ out[1] = (unsigned char) tmp;
+ tmp >>= 8;
+ out[0] = (unsigned char) tmp;
+
+ tmp = uu->time_mid;
+ out[5] = (unsigned char) tmp;
+ tmp >>= 8;
+ out[4] = (unsigned char) tmp;
+
+ tmp = uu->time_hi_and_version;
+ out[7] = (unsigned char) tmp;
+ tmp >>= 8;
+ out[6] = (unsigned char) tmp;
+
+ tmp = uu->clock_seq;
+ out[9] = (unsigned char) tmp;
+ tmp >>= 8;
+ out[8] = (unsigned char) tmp;
+
+ memcpy(out+10, uu->node, 6);
}
int class_uuid_parse(struct obd_uuid in, class_uuid_t uu)
{
- struct uuid uuid;
- int i;
- char *cp, buf[3];
-
- if (strlen(in) != 36)
- return -1;
- for (i=0, cp = in; i <= 36; i++,cp++) {
- if ((i == 8) || (i == 13) || (i == 18) ||
- (i == 23))
- if (*cp == '-')
- continue;
- if (i== 36)
- if (*cp == 0)
- continue;
- if (!isxdigit(*cp))
- return -1;
- }
- uuid.time_low = simple_strtoul(in, NULL, 16);
- uuid.time_mid = simple_strtoul(in+9, NULL, 16);
- uuid.time_hi_and_version = simple_strtoul(in+14, NULL, 16);
- uuid.clock_seq = simple_strtoul(in+19, NULL, 16);
- cp = in+24;
- buf[2] = 0;
- for (i=0; i < 6; i++) {
- buf[0] = *cp++;
- buf[1] = *cp++;
- uuid.node[i] = simple_strtoul(buf, NULL, 16);
- }
-
- uuid_pack(&uuid, uu);
- return 0;
+ struct uuid uuid;
+ int i;
+ char *cp, buf[3];
+
+ if (strlen(in) != 36)
+ return -1;
+ for (i=0, cp = in; i <= 36; i++,cp++) {
+ if ((i == 8) || (i == 13) || (i == 18) ||
+ (i == 23))
+ if (*cp == '-')
+ continue;
+ if (i== 36)
+ if (*cp == 0)
+ continue;
+ if (!isxdigit(*cp))
+ return -1;
+ }
+ uuid.time_low = simple_strtoul(in, NULL, 16);
+ uuid.time_mid = simple_strtoul(in+9, NULL, 16);
+ uuid.time_hi_and_version = simple_strtoul(in+14, NULL, 16);
+ uuid.clock_seq = simple_strtoul(in+19, NULL, 16);
+ cp = in+24;
+ buf[2] = 0;
+ for (i=0; i < 6; i++) {
+ buf[0] = *cp++;
+ buf[1] = *cp++;
+ uuid.node[i] = simple_strtoul(buf, NULL, 16);
+ }
+
+ uuid_pack(&uuid, uu);
+ return 0;
}
#endif
+
+void generate_random_uuid(unsigned char uuid_out[16]);
+
+/* We need to have some extra twiddling here because some systems have
+ * no random state when they start up. */
+void class_generate_random_uuid(class_uuid_t uuid)
+{
+ struct timeval t;
+ int *i, j, k;
+
+ LASSERT(sizeof(class_uuid_t) % sizeof(*i) == 0);
+
+ j = jiffies;
+ do_gettimeofday(&t);
+ k = t.tv_usec;
+
+ generate_random_uuid(uuid);
+
+ for (i = (int *)uuid; (char *)i < (char *)uuid + sizeof(class_uuid_t); i++) {
+ *i ^= j ^ k;
+ j = ((j << 8) & 0xffffff00) | ((j >> 24) & 0x000000ff);
+ k = ((k >> 8) & 0x00ffffff) | ((k << 24) & 0xff000000);
+ }
+}
+
void class_uuid_unparse(class_uuid_t uu, struct obd_uuid *out)
{
- struct uuid uuid;
-
- uuid_unpack(uu, &uuid);
- sprintf(out->uuid,
- "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
- uuid.time_low, uuid.time_mid, uuid.time_hi_and_version,
- uuid.clock_seq >> 8, uuid.clock_seq & 0xFF,
- uuid.node[0], uuid.node[1], uuid.node[2],
- uuid.node[3], uuid.node[4], uuid.node[5]);
+ struct uuid uuid;
+
+ uuid_unpack(uu, &uuid);
+ sprintf(out->uuid,
+ "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+ uuid.time_low, uuid.time_mid, uuid.time_hi_and_version,
+ uuid.clock_seq >> 8, uuid.clock_seq & 0xFF,
+ uuid.node[0], uuid.node[1], uuid.node[2],
+ uuid.node[3], uuid.node[4], uuid.node[5]);
}
#include <lustre_log.h>
#include <lustre_commit_confd.h>
#include <libcfs/list.h>
+#include <lustre_disk.h>
#include <lustre_quota.h>
#include <lustre_ver.h>
/* we don't allocate new transnos for replayed requests */
if (oti->oti_transno == 0) {
spin_lock(&filter->fo_translock);
- last_rcvd = le64_to_cpu(filter->fo_fsd->fsd_last_transno) + 1;
- filter->fo_fsd->fsd_last_transno = cpu_to_le64(last_rcvd);
+ last_rcvd = le64_to_cpu(filter->fo_fsd->lsd_last_transno) + 1;
+ filter->fo_fsd->lsd_last_transno = cpu_to_le64(last_rcvd);
spin_unlock(&filter->fo_translock);
oti->oti_transno = last_rcvd;
} else {
spin_lock(&filter->fo_translock);
last_rcvd = oti->oti_transno;
- if (last_rcvd > le64_to_cpu(filter->fo_fsd->fsd_last_transno))
- filter->fo_fsd->fsd_last_transno =
+ if (last_rcvd > le64_to_cpu(filter->fo_fsd->lsd_last_transno))
+ filter->fo_fsd->lsd_last_transno =
cpu_to_le64(last_rcvd);
spin_unlock(&filter->fo_translock);
}
}
fed->fed_lr_idx = cl_idx;
- fed->fed_lr_off = le32_to_cpu(filter->fo_fsd->fsd_client_start) +
- cl_idx * le16_to_cpu(filter->fo_fsd->fsd_client_size);
+ fed->fed_lr_off = le32_to_cpu(filter->fo_fsd->lsd_client_start) +
+ cl_idx * le16_to_cpu(filter->fo_fsd->lsd_client_size);
LASSERTF(fed->fed_lr_off > 0, "fed_lr_off = %llu\n", fed->fed_lr_off);
CDEBUG(D_INFO, "client at index %d (%llu) with UUID '%s' added\n",
/* assumes caller is already in kernel ctxt */
int filter_update_server_data(struct obd_device *obd, struct file *filp,
- struct filter_server_data *fsd, int force_sync)
+ struct lr_server_data *fsd, int force_sync)
{
loff_t off = 0;
int rc;
ENTRY;
- CDEBUG(D_INODE, "server uuid : %s\n", fsd->fsd_uuid);
+ CDEBUG(D_INODE, "server uuid : %s\n", fsd->lsd_uuid);
CDEBUG(D_INODE, "server last_rcvd : "LPU64"\n",
- le64_to_cpu(fsd->fsd_last_transno));
+ le64_to_cpu(fsd->lsd_last_transno));
CDEBUG(D_INODE, "server last_mount: "LPU64"\n",
- le64_to_cpu(fsd->fsd_mount_count));
+ le64_to_cpu(fsd->lsd_mount_count));
rc = fsfilt_write_record(obd, filp, fsd, sizeof(*fsd), &off,force_sync);
if (rc)
- CERROR("error writing filter_server_data: rc = %d\n", rc);
+ CERROR("error writing lr_server_data: rc = %d\n", rc);
RETURN(rc);
}
static int filter_init_server_data(struct obd_device *obd, struct file * filp)
{
struct filter_obd *filter = &obd->u.filter;
- struct filter_server_data *fsd;
+ struct lr_server_data *fsd;
struct filter_client_data *fcd = NULL;
struct inode *inode = filp->f_dentry->d_inode;
unsigned long last_rcvd_size = inode->i_size;
int rc;
/* ensure padding in the struct is the correct size */
- CLASSERT(offsetof(struct filter_server_data, fsd_padding) +
- sizeof(fsd->fsd_padding) == LR_SERVER_SIZE);
- CLASSERT(offsetof(struct filter_client_data, fcd_padding) +
+ CLASSERT (offsetof(struct lr_server_data, lsd_padding) +
+ sizeof(fsd->lsd_padding) == LR_SERVER_SIZE);
+ CLASSERT (offsetof(struct filter_client_data, fcd_padding) +
sizeof(fcd->fcd_padding) == LR_CLIENT_SIZE);
OBD_ALLOC(fsd, sizeof(*fsd));
}
if (last_rcvd_size == 0) {
- CWARN("%s: initializing new %s\n", obd->obd_name, LAST_RCVD);
-
- memcpy(fsd->fsd_uuid, obd->obd_uuid.uuid,sizeof(fsd->fsd_uuid));
- fsd->fsd_last_transno = 0;
- mount_count = fsd->fsd_mount_count = 0;
- fsd->fsd_server_size = cpu_to_le32(LR_SERVER_SIZE);
- fsd->fsd_client_start = cpu_to_le32(LR_CLIENT_START);
- fsd->fsd_client_size = cpu_to_le16(LR_CLIENT_SIZE);
- fsd->fsd_subdir_count = cpu_to_le16(FILTER_SUBDIR_COUNT);
+ LCONSOLE_WARN("%s: new disk, initializing\n", obd->obd_name);
+
+ memcpy(fsd->lsd_uuid, obd->obd_uuid.uuid,sizeof(fsd->lsd_uuid));
+ fsd->lsd_last_transno = 0;
+ mount_count = fsd->lsd_mount_count = 0;
+ fsd->lsd_server_size = cpu_to_le32(LR_SERVER_SIZE);
+ fsd->lsd_client_start = cpu_to_le32(LR_CLIENT_START);
+ fsd->lsd_client_size = cpu_to_le16(LR_CLIENT_SIZE);
+ fsd->lsd_subdir_count = cpu_to_le16(FILTER_SUBDIR_COUNT);
filter->fo_subdir_count = FILTER_SUBDIR_COUNT;
+ fsd->lsd_feature_incompat = cpu_to_le32(OBD_INCOMPAT_OST);
} else {
rc = fsfilt_read_record(obd, filp, fsd, sizeof(*fsd), &off);
if (rc) {
LAST_RCVD, rc);
GOTO(err_fsd, rc);
}
- if (strcmp(fsd->fsd_uuid, obd->obd_uuid.uuid) != 0) {
+ if (strcmp(fsd->lsd_uuid, obd->obd_uuid.uuid) != 0) {
LCONSOLE_ERROR("Trying to start OBD %s using the wrong"
" disk %s. Were the /dev/ assignments "
"rearranged?\n",
- obd->obd_uuid.uuid, fsd->fsd_uuid);
+ obd->obd_uuid.uuid, fsd->lsd_uuid);
GOTO(err_fsd, rc = -EINVAL);
}
- mount_count = le64_to_cpu(fsd->fsd_mount_count);
- filter->fo_subdir_count = le16_to_cpu(fsd->fsd_subdir_count);
+ mount_count = le64_to_cpu(fsd->lsd_mount_count);
+ filter->fo_subdir_count = le16_to_cpu(fsd->lsd_subdir_count);
}
- if (fsd->fsd_feature_incompat & ~cpu_to_le32(FILTER_INCOMPAT_SUPP)) {
+ if (fsd->lsd_feature_incompat & ~cpu_to_le32(FILTER_INCOMPAT_SUPP)) {
CERROR("%s: unsupported incompat filesystem feature(s) %x\n",
- obd->obd_name, le32_to_cpu(fsd->fsd_feature_incompat) &
+ obd->obd_name, le32_to_cpu(fsd->lsd_feature_incompat) &
~FILTER_INCOMPAT_SUPP);
GOTO(err_fsd, rc = -EINVAL);
}
- if (fsd->fsd_feature_rocompat & ~cpu_to_le32(FILTER_ROCOMPAT_SUPP)) {
+ if (fsd->lsd_feature_rocompat & ~cpu_to_le32(FILTER_ROCOMPAT_SUPP)) {
CERROR("%s: unsupported read-only filesystem feature(s) %x\n",
- obd->obd_name, le32_to_cpu(fsd->fsd_feature_rocompat) &
+ obd->obd_name, le32_to_cpu(fsd->lsd_feature_rocompat) &
~FILTER_ROCOMPAT_SUPP);
/* Do something like remount filesystem read-only */
GOTO(err_fsd, rc = -EINVAL);
}
CDEBUG(D_INODE, "%s: server last_rcvd : "LPU64"\n",
- obd->obd_name, le64_to_cpu(fsd->fsd_last_transno));
+ obd->obd_name, le64_to_cpu(fsd->lsd_last_transno));
CDEBUG(D_INODE, "%s: server mount_count: "LPU64"\n",
obd->obd_name, mount_count + 1);
CDEBUG(D_INODE, "%s: server data size: %u\n",
- obd->obd_name, le32_to_cpu(fsd->fsd_server_size));
+ obd->obd_name, le32_to_cpu(fsd->lsd_server_size));
CDEBUG(D_INODE, "%s: per-client data start: %u\n",
- obd->obd_name, le32_to_cpu(fsd->fsd_client_start));
+ obd->obd_name, le32_to_cpu(fsd->lsd_client_start));
CDEBUG(D_INODE, "%s: per-client data size: %u\n",
- obd->obd_name, le32_to_cpu(fsd->fsd_client_size));
+ obd->obd_name, le32_to_cpu(fsd->lsd_client_size));
CDEBUG(D_INODE, "%s: server subdir_count: %u\n",
- obd->obd_name, le16_to_cpu(fsd->fsd_subdir_count));
+ obd->obd_name, le16_to_cpu(fsd->lsd_subdir_count));
CDEBUG(D_INODE, "%s: last_rcvd clients: %lu\n", obd->obd_name,
- last_rcvd_size <= le32_to_cpu(fsd->fsd_client_start) ? 0 :
- (last_rcvd_size - le32_to_cpu(fsd->fsd_client_start)) /
- le16_to_cpu(fsd->fsd_client_size));
+ last_rcvd_size <= le32_to_cpu(fsd->lsd_client_start) ? 0 :
+ (last_rcvd_size - le32_to_cpu(fsd->lsd_client_start)) /
+ le16_to_cpu(fsd->lsd_client_size));
if (!obd->obd_replayable) {
CWARN("%s: recovery support OFF\n", obd->obd_name);
GOTO(out, rc = 0);
}
- for (cl_idx = 0, off = le32_to_cpu(fsd->fsd_client_start);
+ for (cl_idx = 0, off = le32_to_cpu(fsd->lsd_client_start);
off < last_rcvd_size; cl_idx++) {
__u64 last_rcvd;
struct obd_export *exp;
/* Don't assume off is incremented properly by
* fsfilt_read_record(), in case sizeof(*fcd)
- * isn't the same as fsd->fsd_client_size. */
- off = le32_to_cpu(fsd->fsd_client_start) +
- cl_idx * le16_to_cpu(fsd->fsd_client_size);
+ * isn't the same as fsd->lsd_client_size. */
+ off = le32_to_cpu(fsd->lsd_client_start) +
+ cl_idx * le16_to_cpu(fsd->lsd_client_size);
rc = fsfilt_read_record(obd, filp, fcd, sizeof(*fcd), &off);
if (rc) {
CERROR("error reading FILT %s idx %d off %llu: rc %d\n",
exp = class_new_export(obd, (struct obd_uuid *)fcd->fcd_uuid);
CDEBUG(D_HA, "RCVRNG CLIENT uuid: %s idx: %d lr: "LPU64
" srv lr: "LPU64"\n", fcd->fcd_uuid, cl_idx,
- last_rcvd, le64_to_cpu(fsd->fsd_last_transno));
+ last_rcvd, le64_to_cpu(fsd->lsd_last_transno));
if (IS_ERR(exp))
GOTO(err_client, rc = PTR_ERR(exp));
CDEBUG(D_OTHER, "client at idx %d has last_rcvd = "LPU64"\n",
cl_idx, last_rcvd);
- if (last_rcvd > le64_to_cpu(fsd->fsd_last_transno))
- fsd->fsd_last_transno = cpu_to_le64(last_rcvd);
+ if (last_rcvd > le64_to_cpu(fsd->lsd_last_transno))
+ fsd->lsd_last_transno = cpu_to_le64(last_rcvd);
}
if (fcd)
OBD_FREE(fcd, sizeof(*fcd));
- obd->obd_last_committed = le64_to_cpu(fsd->fsd_last_transno);
+ obd->obd_last_committed = le64_to_cpu(fsd->lsd_last_transno);
if (obd->obd_recoverable_clients) {
CWARN("RECOVERY: service %s, %d recoverable clients, "
"last_rcvd "LPU64"\n", obd->obd_name,
obd->obd_recoverable_clients,
- le64_to_cpu(fsd->fsd_last_transno));
+ le64_to_cpu(fsd->lsd_last_transno));
obd->obd_next_recovery_transno = obd->obd_last_committed + 1;
obd->obd_recovering = 1;
obd->obd_recovery_start = CURRENT_SECONDS;
out:
filter->fo_mount_count = mount_count + 1;
- fsd->fsd_mount_count = cpu_to_le64(filter->fo_mount_count);
+ fsd->lsd_mount_count = cpu_to_le64(filter->fo_mount_count);
/* save it, so mount count and last_transno is current */
rc = filter_update_server_data(obd, filp, filter->fo_fsd, 1);
CERROR("error renaming O/R to O/0: rc %d\n", rc);
GOTO(cleanup_O0, rc);
}
- filter->fo_fsd->fsd_feature_incompat |=
+ filter->fo_fsd->lsd_feature_incompat |=
cpu_to_le32(OBD_INCOMPAT_GROUPS);
rc = filter_update_server_data(obd, filter->fo_rcvd_filp,
filter->fo_fsd, 1);
struct lustre_cfg* lcfg = buf;
struct filter_obd *filter = &obd->u.filter;
struct vfsmount *mnt;
+ struct lustre_mount_info *lmi;
struct obd_uuid uuid;
__u8 *uuid_ptr;
char *str, *label;
LUSTRE_CFG_BUFLEN(lcfg, 2) < 1)
RETURN(-EINVAL);
- obd->obd_fsops = fsfilt_get_ops(lustre_cfg_string(lcfg, 2));
+ lmi = server_get_mount(obd->obd_name);
+ if (lmi) {
+ /* We already mounted in lustre_fill_super.
+ lcfg bufs 1, 2, 4 (device, fstype, mount opts) are ignored.*/
+ struct lustre_sb_info *lsi = s2lsi(lmi->lmi_sb);
+ mnt = lmi->lmi_mnt;
+ obd->obd_fsops = fsfilt_get_ops(MT_STR(lsi->lsi_ldd));
+ } else {
+ /* old path - used by lctl */
+ CERROR("Using old MDS mount method\n");
+ mnt = do_kern_mount(lustre_cfg_string(lcfg, 2),
+ MS_NOATIME|MS_NODIRATIME,
+ lustre_cfg_string(lcfg, 1), option);
+ if (IS_ERR(mnt)) {
+ rc = PTR_ERR(mnt);
+ LCONSOLE_ERROR("Can't mount disk %s (%d)\n",
+ lustre_cfg_string(lcfg, 1), rc);
+ RETURN(rc);
+ }
+
+ obd->obd_fsops = fsfilt_get_ops(lustre_cfg_string(lcfg, 2));
+ }
if (IS_ERR(obd->obd_fsops))
- RETURN(PTR_ERR(obd->obd_fsops));
+ GOTO(err_mntput, rc = PTR_ERR(obd->obd_fsops));
rc = filter_iobuf_pool_init(filter);
if (rc != 0)
GOTO(err_ops, rc);
- mnt = do_kern_mount(lustre_cfg_string(lcfg, 2),MS_NOATIME|MS_NODIRATIME,
- lustre_cfg_string(lcfg, 1), option);
- if (IS_ERR(mnt)) {
- rc = PTR_ERR(mnt);
- LCONSOLE_ERROR("Can't mount disk %s (%d)\n",
- lustre_cfg_string(lcfg, 1), rc);
- GOTO(err_ops, rc);
- }
-
LASSERT(!lvfs_check_rdonly(lvfs_sbdev(mnt->mnt_sb)));
+ /* failover is the default */
obd->obd_replayable = 1;
if (lcfg->lcfg_bufcount > 3 && LUSTRE_CFG_BUFLEN(lcfg, 3) > 0) {
rc = filter_prep(obd);
if (rc)
- GOTO(err_mntput, rc);
+ GOTO(err_ops, rc);
filter->fo_destroy_in_progress = 0;
sema_init(&filter->fo_create_lock, 1);
} else {
str = "no UUID";
}
- label = fsfilt_label(obd, obd->u.obt.obt_sb);
+
+ label = fsfilt_get_label(obd, obd->u.obt.obt_sb);
if (obd->obd_recovering) {
- LCONSOLE_WARN("OST %s now serving %s (%s%s%s), but will be in"
+ LCONSOLE_WARN("OST %s now serving %s (%s%s%s), but will be in "
"recovery until %d %s reconnect, or if no clients"
" reconnect for %d:%.02d; during that time new "
"clients will not be allowed to connect. "
err_post:
filter_post(obd);
-err_mntput:
- unlock_kernel();
- mntput(mnt);
- obd->u.obt.obt_sb = 0;
- lock_kernel();
err_ops:
fsfilt_put_ops(obd->obd_fsops);
filter_iobuf_pool_done(filter);
+err_mntput:
+ if (lmi) {
+ server_put_mount(obd->obd_name, mnt);
+ } else {
+ /* old method */
+ unlock_kernel();
+ mntput(mnt);
+ lock_kernel();
+ }
+ obd->u.obt.obt_sb = 0;
return rc;
}
{
struct filter_obd *filter = &obd->u.filter;
lvfs_sbdev_type save_dev;
- int must_relock = 0;
+ int must_relock = 0, must_put = 0;
ENTRY;
if (obd->obd_fail)
- CERROR("%s: shutting down for failover; client state will"
- " be preserved.\n", obd->obd_name);
+ LCONSOLE_WARN("%s: shutting down for failover; client state "
+ "will be preserved.\n", obd->obd_name);
if (!list_empty(&obd->obd_exports)) {
CERROR("%s: still has clients!\n", obd->obd_name);
LL_DQUOT_OFF(obd->u.obt.obt_sb);
- if (atomic_read(&filter->fo_vfsmnt->mnt_count) > 1)
- CERROR("%s: mount point %p busy, mnt_count: %d\n",
- obd->obd_name, filter->fo_vfsmnt,
- atomic_read(&filter->fo_vfsmnt->mnt_count));
+ must_put = server_put_mount(obd->obd_name, filter->fo_vfsmnt);
+ /* must_put is for old method (l_p_m returns non-0 on err) */
/* We can only unlock kernel if we are in the context of sys_ioctl,
otherwise we never called lock_kernel */
unlock_kernel();
must_relock++;
}
-
- mntput(filter->fo_vfsmnt);
- //destroy_buffers(obd->u.obt.obt_sb->s_dev);
+
+ if (must_put)
+ /* In case we didn't mount with lustre_get_mount -- old method*/
+ mntput(filter->fo_vfsmnt);
obd->u.obt.obt_sb = NULL;
lvfs_clear_rdonly(save_dev);
if (data->ocd_connect_flags & OBD_CONNECT_INDEX) {
struct filter_obd *filter = &exp->exp_obd->u.filter;
- struct filter_server_data *fsd = filter->fo_fsd;
- int index = le32_to_cpu(fsd->fsd_ost_index);
-
- if (!(fsd->fsd_feature_compat &
+ struct lr_server_data *lsd = filter->fo_fsd;
+ int index = le32_to_cpu(lsd->lsd_ost_index);
+
+ if (!(lsd->lsd_feature_compat &
cpu_to_le32(OBD_COMPAT_OST))) {
/* this will only happen on the first connect */
- fsd->fsd_ost_index = cpu_to_le32(data->ocd_index);
- fsd->fsd_feature_compat |= cpu_to_le32(OBD_COMPAT_OST);
+ lsd->lsd_ost_index = cpu_to_le32(data->ocd_index);
+ lsd->lsd_feature_compat |= cpu_to_le32(OBD_COMPAT_OST);
filter_update_server_data(exp->exp_obd,
- filter->fo_rcvd_filp, fsd, 1);
+ filter->fo_rcvd_filp, lsd, 1);
} else if (index != data->ocd_index) {
LCONSOLE_ERROR("Connection from %s to index "
"%u doesn't match actual OST "
RETURN(-EINVAL);
}
- if (keylen < strlen("mds_conn") ||
- memcmp(key, "mds_conn", keylen) != 0)
+ if (keylen < strlen(KEY_MDS_CONN) ||
+ memcmp(key, KEY_MDS_CONN, keylen) != 0)
RETURN(-EINVAL);
- CWARN("%s: received MDS connection from %s\n", obd->obd_name,
- obd_export_nid2str(exp));
+ LCONSOLE_WARN("%s: received MDS connection from %s\n", obd->obd_name,
+ obd_export_nid2str(exp));
obd->u.filter.fo_mdc_conn.cookie = exp->exp_handle.h_cookie;
/* setup llog imports */
init_obd_quota_ops(quota_interface, &filter_sanobd_ops);
rc = class_register_type(&filter_obd_ops, lvars.module_vars,
- OBD_FILTER_DEVICENAME);
+ LUSTRE_OST_NAME);
if (rc)
GOTO(out, rc);
rc = class_register_type(&filter_sanobd_ops, lvars.module_vars,
- OBD_FILTER_SAN_DEVICENAME);
+ LUSTRE_OSTSAN_NAME);
if (rc) {
- class_unregister_type(OBD_FILTER_DEVICENAME);
+ class_unregister_type(LUSTRE_OST_NAME);
out:
if (quota_interface)
PORTAL_SYMBOL_PUT(filter_quota_interface);
if (quota_interface)
PORTAL_SYMBOL_PUT(filter_quota_interface);
- class_unregister_type(OBD_FILTER_SAN_DEVICENAME);
- class_unregister_type(OBD_FILTER_DEVICENAME);
+ class_unregister_type(LUSTRE_OSTSAN_NAME);
+ class_unregister_type(LUSTRE_OST_NAME);
+
OBD_FREE(obdfilter_created_scratchpad,
OBDFILTER_CREATED_SCRATCHPAD_ENTRIES *
sizeof(*obdfilter_created_scratchpad));
#ifdef __KERNEL__
# include <linux/spinlock.h>
#endif
-#include <linux/lustre_disk.h>
+#include <lustre_disk.h>
#include <lustre_handles.h>
#include <lustre_debug.h>
#include <obd.h>
#define FILTER_LAYOUT_VERSION "2"
-#ifndef OBD_FILTER_DEVICENAME
-# define OBD_FILTER_DEVICENAME "obdfilter"
-#endif
-
-#ifndef OBD_FILTER_SAN_DEVICENAME
-# define OBD_FILTER_SAN_DEVICENAME "sanobdfilter"
-#endif
-
-#define HEALTH_CHECK "health_check"
#define FILTER_INIT_OBJID 0
#define FILTER_SUBDIR_COUNT 32 /* set to zero for no subdirs */
#define FILTER_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */
-#define FILTER_INCOMPAT_SUPP (OBD_INCOMPAT_GROUPS)
+#define FILTER_INCOMPAT_SUPP (OBD_INCOMPAT_GROUPS | OBD_INCOMPAT_OST | \
+ OBD_INCOMPAT_COMMON_LR)
#define FILTER_GRANT_CHUNK (2ULL * PTLRPC_MAX_BRW_SIZE)
#define GRANT_FOR_LLOG(obd) 16
-/* Data stored per server at the head of the last_rcvd file. In le32 order.
- * Try to keep this the same as mds_server_data so we might one day merge. */
-struct filter_server_data {
-/* 00*/ __u8 fsd_uuid[40]; /* server UUID */
-/* 28*/ __u64 fsd_last_transno_new;/* future last completed transaction ID */
-/* 30*/ __u64 fsd_last_transno; /* last completed transaction ID */
- __u64 fsd_mount_count; /* FILTER incarnation number */
-/* 40*/ __u32 fsd_feature_compat; /* compatible feature flags */
- __u32 fsd_feature_rocompat;/* read-only compatible feature flags */
- __u32 fsd_feature_incompat;/* incompatible feature flags */
- __u32 fsd_server_size; /* size of server data area */
-/* 50*/ __u32 fsd_client_start; /* start of per-client data area */
- __u16 fsd_client_size; /* size of per-client data area */
- __u16 fsd_subdir_count; /* number of subdirectories for objects */
- __u64 fsd_catalog_oid; /* recovery catalog object id */
-/* 60*/ __u32 fsd_catalog_ogen; /* recovery catalog inode generation */
- __u8 fsd_peeruuid[40]; /* UUID of MDS associated with this OST */
-/* 8c*/ __u32 fsd_ost_index; /* index number of OST in LOV */
- __u32 fsd_mds_index; /* index number of MDS in LMV */
-/* 94*/ __u8 fsd_padding[LR_SERVER_SIZE - 148];
-};
-
/* Data stored per client in the last_rcvd file. In le32 order. */
struct filter_client_data {
__u8 fcd_uuid[40]; /* client UUID */
int filter_update_fidea(struct obd_export *exp, struct inode *inode,
void *handle, struct obdo *oa);
int filter_update_server_data(struct obd_device *, struct file *,
- struct filter_server_data *, int force_sync);
+ struct lr_server_data *, int force_sync);
int filter_update_last_objid(struct obd_device *, obd_gr, int force_sync);
int filter_common_setup(struct obd_device *, obd_count len, void *buf,
void *option);
struct obd_ioobj *, int niocount, struct niobuf_local *,
struct obd_trans_info *, int rc);
int filter_brw(int cmd, struct obd_export *, struct obdo *,
- struct lov_stripe_md *, obd_count oa_bufs, struct brw_page *,
- struct obd_trans_info *);
+ struct lov_stripe_md *, obd_count oa_bufs, struct brw_page *,
+ struct obd_trans_info *);
void flip_into_page_cache(struct inode *inode, struct page *new_page);
/* filter_io_*.c */
OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_SHUTDOWN, 10);
- if (KEY_IS("next_id")) {
+ if (KEY_IS(KEY_NEXT_ID)) {
if (vallen != sizeof(obd_id))
RETURN(-EINVAL);
obd->u.cli.cl_oscc.oscc_next_id = *((obd_id*)val) + 1;
RETURN(0);
}
- if (KEY_IS("initial_recov")) {
+ if (KEY_IS(KEY_INIT_RECOV)) {
if (vallen != sizeof(int))
RETURN(-EINVAL);
imp->imp_initial_recov = *(int *)val;
- CDEBUG(D_HA, "%s: set imp_no_init_recov = %d\n",
+ CDEBUG(D_HA, "%s: set imp_initial_recov = %d\n",
exp->exp_obd->obd_name,
imp->imp_initial_recov);
RETURN(0);
break;
}
case IMP_EVENT_INACTIVE: {
- rc = obd_notify_observer(obd, obd, OBD_NOTIFY_INACTIVE);
+ rc = obd_notify_observer(obd, obd, OBD_NOTIFY_INACTIVE, NULL);
break;
}
case IMP_EVENT_INVALIDATE: {
oscc->oscc_flags &= ~OSCC_FLAG_NOSPC;
spin_unlock(&oscc->oscc_lock);
}
- rc = obd_notify_observer(obd, obd, OBD_NOTIFY_ACTIVE);
+ rc = obd_notify_observer(obd, obd, OBD_NOTIFY_ACTIVE, NULL);
break;
}
case IMP_EVENT_OCD: {
if (ocd->ocd_connect_flags & OBD_CONNECT_REQPORTAL)
imp->imp_client->cli_request_portal =OST_REQUEST_PORTAL;
- rc = obd_notify_observer(obd, obd, OBD_NOTIFY_OCD);
+ rc = obd_notify_observer(obd, obd, OBD_NOTIFY_OCD, NULL);
break;
}
default:
EXIT;
}
+#if 0
+/* see ldlm_blocking_ast */
/* cut-n-paste of mds_blocking_ast() */
static int ost_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
void *data, int flag)
}
RETURN(0);
}
-
+#endif
+
static int ost_brw_lock_get(int mode, struct obd_export *exp,
struct obd_ioobj *obj, struct niobuf_remote *nb,
struct lustre_handle *lh)
ptlrpc_init_svc(OST_NBUFS, OST_BUFSIZE, OST_MAXREQSIZE,
OST_MAXREPSIZE, OST_REQUEST_PORTAL,
OSC_REPLY_PORTAL,
- obd_timeout * 1000, ost_handle, LUSTRE_OST_NAME,
+ obd_timeout * 1000, ost_handle, LUSTRE_OSS_NAME,
obd->obd_proc_entry, ost_print_req,
ost_num_threads);
if (ost->ost_service == NULL) {
int rc;
ENTRY;
- lprocfs_init_vars(ost,&lvars);
+ lprocfs_init_vars(ost, &lvars);
rc = class_register_type(&ost_obd_ops, lvars.module_vars,
- LUSTRE_OST_NAME);
+ LUSTRE_OSS_NAME);
RETURN(rc);
}
static void /*__exit*/ ost_exit(void)
{
- class_unregister_type(LUSTRE_OST_NAME);
+ class_unregister_type(LUSTRE_OSS_NAME);
}
MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
return 0;
}
+EXPORT_SYMBOL(ptlrpc_init_import);
#define UUID_STR "_UUID"
static void deuuidify(char *uuid, const char *prefix, char **uuid_start, int *uuid_len)
obd_import_event(imp->imp_obd, imp, IMP_EVENT_INVALIDATE);
}
+/* unset imp_invalid */
void ptlrpc_activate_import(struct obd_import *imp)
{
struct obd_device *obd = imp->imp_obd;
/* Don't retry if connect fails */
rc = 0;
obd_set_info_async(obd->obd_self_export,
- strlen("initial_recov"), "initial_recov",
+ strlen(KEY_INIT_RECOV), KEY_INIT_RECOV,
sizeof(rc), &rc, NULL);
}
RETURN(rc);
}
+EXPORT_SYMBOL(ptlrpc_connect_import);
static void ptlrpc_maybe_ping_import_soon(struct obd_import *imp)
{
switch (imp->imp_connect_op) {
case OST_CONNECT: rq_opc = OST_DISCONNECT; break;
case MDS_CONNECT: rq_opc = MDS_DISCONNECT; break;
+ case MGS_CONNECT: rq_opc = MGS_DISCONNECT; break;
default:
CERROR("don't know how to disconnect from %s (connect_op %d)\n",
obd2cli_tgt(imp->imp_obd), imp->imp_connect_op);
static int llog_client_close(struct llog_handle *handle)
{
- int rc = 0;
-
- ENTRY;
- RETURN(rc);
+ /* this doesn't call LLOG_ORIGIN_HANDLE_CLOSE because
+ the servers all close the file at the end of every
+ other LLOG_ RPC. */
+ return(0);
}
bufcount = m->bufcount;
if (n >= bufcount) {
- CDEBUG(D_INFO, "msg %p buffer[%d] not present (count %d)\n",
+ CERROR("msg %p buffer[%d] not present (count %d)\n",
m, n, bufcount);
return NULL;
}
__swab32s (&b->padding_4);
}
+void lustre_swab_mgs_target_info(struct mgs_target_info *mti)
+{
+ int i;
+ LASSERT(sizeof(lnet_nid_t) == sizeof(__u64));
+ for (i = 0; i < MTI_NIDS_MAX; i++) {
+ __swab64s(&mti->mti_nids[i]);
+ __swab64s(&mti->mti_failnids[i]);
+ }
+ for (i = 0; i < 8; i++) {
+ __swab16s(&mti->mti_failnodes[i]);
+ }
+ __swab32s(&mti->mti_stripe_index);
+ __swab32s(&mti->mti_nid_count);
+ __swab32s(&mti->mti_failnid_count);
+ __swab32s(&mti->mti_config_ver);
+ __swab32s(&mti->mti_flags);
+}
+
static void lustre_swab_obd_dqinfo (struct obd_dqinfo *i)
{
__swab64s (&i->dqi_bgrace);
(long long)MDS_STATUS_CONN);
LASSERTF(MDS_STATUS_LOV == 2, " found %lld\n",
(long long)MDS_STATUS_LOV);
+ LASSERTF(MGS_CONNECT == 250, " found %lld\n",
+ (long long)MGS_CONNECT);
+ LASSERTF(MGS_DISCONNECT == 251, " found %lld\n",
+ (long long)MGS_DISCONNECT);
+ LASSERTF(MGS_EXCEPTION == 252, " found %lld\n",
+ (long long)MGS_EXCEPTION);
+ LASSERTF(MGS_TARGET_REG == 253, " found %lld\n",
+ (long long)MGS_TARGET_REG);
+ LASSERTF(MGS_TARGET_DEL == 254, " found %lld\n",
+ (long long)MGS_TARGET_DEL);
LASSERTF(LDLM_ENQUEUE == 101, " found %lld\n",
(long long)LDLM_ENQUEUE);
LASSERTF(LDLM_CONVERT == 102, " found %lld\n",
time_t expire_time;
ENTRY;
- ptlrpc_daemonize("ping_evictor");
+ ptlrpc_daemonize("ll_evictor");
CDEBUG(D_HA, "Starting Ping Evictor\n");
pet_exp = NULL;
init_waitqueue_head(&pet_waitq);
- rc = kernel_thread(ping_evictor_main, NULL, CLONE_VM | CLONE_FS);
+ rc = cfs_kernel_thread(ping_evictor_main, NULL, CLONE_VM | CLONE_FILES);
if (rc < 0) {
pet_refcount--;
CERROR("Cannot start ping evictor thread: %d\n", rc);
EXPORT_SYMBOL(lustre_swab_ldlm_request);
EXPORT_SYMBOL(lustre_swab_ldlm_reply);
EXPORT_SYMBOL(lustre_swab_qdata);
+EXPORT_SYMBOL(lustre_swab_mgs_target_info);
/* recover.c */
EXPORT_SYMBOL(ptlrpc_run_recovery_over_upcall);
}
/*
+ * Administratively active/deactive a client.
* This should only be called by the ioctl interface, currently
- * with the lctl deactivate and activate commands.
+ * with the lctl deactivate and activate commands, and
+ * client umount -f (ll_umount_begin)
*/
int ptlrpc_set_import_active(struct obd_import *imp, int active)
{
RETURN(rc);
}
+/* Attempt to reconnect an import */
int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid)
{
int rc;
ENTRY;
spin_lock_irqsave(&imp->imp_lock, flags);
+ /* Check if reconnect is already in progress */
if (imp->imp_state != LUSTRE_IMP_DISCON) {
in_recovery = 1;
}
if (avail <= low_water)
ptlrpc_grow_req_bufs(svc);
- lprocfs_counter_add(svc->srv_stats, PTLRPC_REQBUF_AVAIL_CNTR, avail);
+ if (svc->srv_stats)
+ lprocfs_counter_add(svc->srv_stats, PTLRPC_REQBUF_AVAIL_CNTR,
+ avail);
}
static int
rc = -EINTR;
qchk->obd_uuid = cli->cl_target_uuid;
+ /* FIXME change strncmp to strcmp and save the strlen op */
if (strncmp(exp->exp_obd->obd_type->typ_name, LUSTRE_OSC_NAME,
strlen(LUSTRE_OSC_NAME)))
- memcpy(qchk->obd_type, LUSTRE_FILTER_NAME,
- strlen(LUSTRE_FILTER_NAME));
+ memcpy(qchk->obd_type, LUSTRE_OST_NAME,
+ strlen(LUSTRE_OST_NAME));
else if (strncmp(exp->exp_obd->obd_type->typ_name, LUSTRE_MDC_NAME,
strlen(LUSTRE_MDC_NAME)))
memcpy(qchk->obd_type, LUSTRE_MDS_NAME,
pkgexample_scripts += local.sh echo.sh uml.sh lov.sh
noinst_DATA =
noinst_SCRIPTS = leak_finder.pl llecho.sh llmount.sh llmountcleanup.sh
-noinst_SCRIPTS += llrmount.sh runfailure-mds runvmstat runfailure-net
+noinst_SCRIPTS += runfailure-mds runvmstat runfailure-net
noinst_SCRIPTS += runfailure-ost runiozone runregression-net.sh runtests
noinst_SCRIPTS += sanity.sh rundbench
PATH=`dirname $0`/../utils:$PATH
-[ "$CONFIGS" ] || CONFIGS="local lov"
+[ "$CONFIGS" ] || CONFIGS="local" #"local lov"
[ "$MAX_THREADS" ] || MAX_THREADS=10
if [ -z "$THREADS" ]; then
KB=`awk '/MemTotal:/ { print $2 }' /proc/meminfo`
[ "$MOUNT2" ] || MOUNT2=${MOUNT}2
[ "$TMP" ] || TMP=/tmp
[ "$COUNT" ] || COUNT=1000
-#[ "$DEBUG_LVL" ] || DEBUG_LVL=0x370200
[ "$DEBUG_LVL" ] || DEBUG_LVL=0
[ "$DEBUG_OFF" ] || DEBUG_OFF="sysctl -w lnet.debug=$DEBUG_LVL"
-[ "$DEBUG_ON" ] || DEBUG_ON="sysctl -w lnet.debug=0x33f0480"
+[ "$DEBUG_ON" ] || DEBUG_ON="sysctl -w lnet.debug=0x33f0484"
LIBLUSTRE=${LIBLUSTRE:-../liblustre}
LIBLUSTRETESTS=${LIBLUSTRETESTS:-$LIBLUSTRE/tests}
+LUSTRE=${LUSTRE:-`dirname $0`/..}
+. $LUSTRE/tests/test-framework.sh
+init_test_env $@
+. mountconf.sh
+
+SETUP=${SETUP:-mcsetup}
+FORMAT=${FORMAT:-mcformat}
+CLEANUP=${CLEANUP:-mcstopall}
+
for NAME in $CONFIGS; do
export NAME MOUNT START CLEAN
- [ -e $NAME.sh ] && sh $NAME.sh
- [ ! -e $NAME.xml ] && [ -z "$LDAPURL" ] && \
- echo "no config '$NAME.xml'" 1>&2 && exit 1
+ . $LUSTRE/tests/cfg/$NAME.sh
+
+ assert_env mds_HOST MDS_MKFS_OPTS MDSDEV
+ assert_env ost_HOST ost2_HOST OST_MKFS_OPTS OSTDEV
+ assert_env FSNAME
if [ "$RUNTESTS" != "no" ]; then
sh runtests
fi
if [ "$DBENCH" != "no" ]; then
- mount | grep $MOUNT || sh llmount.sh
+ mount_client $MOUNT
SPACE=`df -P $MOUNT | tail -n 1 | awk '{ print $4 }'`
DB_THREADS=`expr $SPACE / 50000`
[ $THREADS -lt $DB_THREADS ] && DB_THREADS=$THREADS
$DEBUG_OFF
sh rundbench 1
$DEBUG_ON
- sh llmountcleanup.sh
- sh llrmount.sh
+ $CLEANUP
+ $SETUP
if [ $DB_THREADS -gt 1 ]; then
$DEBUG_OFF
sh rundbench $DB_THREADS
$DEBUG_ON
- sh llmountcleanup.sh
- sh llrmount.sh
+ $CLEANUP
+ $SETUP
fi
rm -f /mnt/lustre/`hostname`/client.txt
fi
+
chown $UID $MOUNT && chmod 700 $MOUNT
if [ "$BONNIE" != "no" ]; then
- mount | grep $MOUNT || sh llmount.sh
+ mount_client $MOUNT
$DEBUG_OFF
bonnie++ -f -r 0 -s $(($SIZE / 1024)) -n 10 -u $UID -d $MOUNT
$DEBUG_ON
- sh llmountcleanup.sh
- sh llrmount.sh
+ $CLEANUP
+ $SETUP
fi
IOZONE_OPTS="-i 0 -i 1 -i 2 -e -+d -r $RSIZE -s $SIZE"
IOZFILE="-f $MOUNT/iozone"
if [ "$IOZONE" != "no" ]; then
- mount | grep $MOUNT || sh llmount.sh
+ mount_client $MOUNT
$DEBUG_OFF
iozone $IOZONE_OPTS $IOZFILE
$DEBUG_ON
- sh llmountcleanup.sh
- sh llrmount.sh
+ $CLEANUP
+ $SETUP
if [ "$O_DIRECT" != "no" -a "$IOZONE_DIR" != "no" ]; then
$DEBUG_OFF
iozone -I $IOZONE_OPTS $IOZFILE.odir
$DEBUG_ON
- sh llmountcleanup.sh
- sh llrmount.sh
+ $CLEANUP
+ $SETUP
fi
SPACE=`df -P $MOUNT | tail -n 1 | awk '{ print $4 }'`
done
iozone $IOZONE_OPTS -t $IOZ_THREADS $IOZFILE
$DEBUG_ON
- sh llmountcleanup.sh
- sh llrmount.sh
+ $CLEANUP
+ $SETUP
elif [ $IOZVER -lt 3145 ]; then
VER=`iozone -v | awk '/Revision:/ { print $3 }'`
echo "iozone $VER too old for multi-thread test"
fi
fi
+
if [ "$FSX" != "no" ]; then
- mount | grep $MOUNT || sh llmount.sh
+ mount | grep $MOUNT || $SETUP
$DEBUG_OFF
./fsx -c 50 -p 1000 -P $TMP -l $SIZE \
-N $(($COUNT * 100)) $MOUNT/fsxfile
$DEBUG_ON
- sh llmountcleanup.sh
- sh llrmount.sh
+ $CLEANUP
+ $SETUP
fi
mkdir -p $MOUNT2
esac
if [ "$SANITYN" != "no" ]; then
- mount | grep $MOUNT || sh llmount.sh
+ mount_client $MOUNT
$DEBUG_OFF
if [ "$MDSNODE" -a "$MDSNAME" -a "$CLIENT" ]; then
- llmount $MDSNODE:/$MDSNAME/$CLIENT $MOUNT2
+ mount_client $MOUNT2
SANITYLOG=$TMP/sanity.log START=: CLEAN=: sh sanityN.sh
umount $MOUNT2
else
fi
$DEBUG_ON
- sh llmountcleanup.sh
- sh llrmount.sh
+ $CLEANUP
+ $SETUP
fi
if [ "$LIBLUSTRE" != "no" ]; then
- mount | grep $MOUNT || sh llmount.sh
+ mount_client $MOUNT
export LIBLUSTRE_MOUNT_POINT=$MOUNT2
export LIBLUSTRE_MOUNT_TARGET=$MDSNODE:/$MDSNAME/$CLIENT
export LIBLUSTRE_TIMEOUT=`cat /proc/sys/lustre/timeout`
if [ -x $LIBLUSTRETESTS/sanity ]; then
$LIBLUSTRETESTS/sanity --target=$LIBLUSTRE_MOUNT_TARGET
fi
- sh llmountcleanup.sh
- #sh llrmount.sh
+ $CLEANUP
+ #$SETUP
fi
- mount | grep $MOUNT && sh llmountcleanup.sh
+ $CLEANUP
done
if [ "$REPLAY_SINGLE" != "no" ]; then
+FSNAME=lustre
mds_HOST=${mds_HOST:-`hostname`}
+mgs_HOST=${mgs_HOST:-$mds_HOST}
mdsfailover_HOST=${mdsfailover_HOST:-""}
ost1_HOST=${ost1_HOST:-"`hostname`"}
ost2_HOST=${ost2_HOST:-"`hostname`"}
EXTRA_OSTS=${EXTRA_OSTS:-"`hostname`"}
-client_HOST=${client_HOST:-"'*'"}
LIVE_CLIENT=${LIVE_CLIENT:-"`hostname`"}
# This should always be a list, not a regexp
FAIL_CLIENTS=${FAIL_CLIENTS:-""}
+MDSDEV=${MDSDEV:-$TMP/${FSNAME}-mdt}
+MDSSIZE=${MDSSIZE:-10000} #50000000
+OSTDEV=${OSTDEV:-"$TMP/${FSNAME}-ost%d"}
+OSTSIZE=${OSTSIZE:=10000} #50000000
+
NETTYPE=${NETTYPE:-tcp}
+MGSNID=`h2$NETTYPE $mgs_HOST`
+FSTYPE=${FSTYPE:-ext3}
+STRIPE_BYTES=${STRIPE_BYTES:-1048576}
+STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-0}
TIMEOUT=${TIMEOUT:-30}
-PTLDEBUG=${PTLDEBUG:-0x3f0400}
+PTLDEBUG=${PTLDEBUG:-0x33f0404}
SUBSYSTEM=${SUBSYSTEM:- 0xffb7e3ff}
-MOUNT=${MOUNT:-"/mnt/lustre"}
-#CLIENT_UPCALL=${CLIENT_UPCALL:-`pwd`/client-upcall-mdev.sh}
-#UPCALL=${CLIENT_UPCALL:-`pwd`/replay-single-upcall.sh}
-MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
-MDSSIZE=${MDSSIZE:-10000} #50000000
-MDSJOURNALSIZE=${MDSJOURNALSIZE:-0}
+MKFSOPT=""
+MOUNTOPT=""
+[ "x$MDSJOURNALSIZE" != "x" ] &&
+ MKFSOPT=$MKFSOPT" -J size=$MDSJOURNALSIZE"
+[ "x$MDSISIZE" != "x" ] &&
+ MKFSOPT=$MKFSOPT" -i $MDSISIZE"
+[ "x$MKFSOPT" != "x" ] &&
+ MKFSOPT="--mkfsoptions=\"$MKFSOPT\""
+[ "x$mdsfailover_HOST" != "x" ] &&
+ MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $mdsfailover_HOST`"
+[ "x$STRIPE_BYTES" != "x" ] &&
+ MOUNTOPT=$MOUNTOPT" --param default_stripe_size=$STRIPE_BYTES"
+[ "x$STRIPES_PER_OBJ" != "x" ] &&
+ MOUNTOPT=$MOUNTOPT" --param default_stripe_count=$STRIPES_PER_OBJ"
+MDS_MKFS_OPTS="--mgs --mdt --device-size=$MDSSIZE $MKFSOPT $MOUNTOPT $MDSOPT"
-OSTDEV=${OSTDEV:-"$TMP/ost%d-`hostname`"}
-OSTSIZE=${OSTSIZE:=10000} #50000000
-OSTJOURNALSIZE=${OSTJOURNALSIZE:-0}
+MKFSOPT=""
+MOUNTOPT=""
+[ "x$OSTJOURNALSIZE" != "x" ] &&
+ MKFSOPT=$MKFSOPT" -J size=$OSTJOURNALSIZE"
+[ "x$MKFSOPT" != "x" ] &&
+ MKFSOPT="--mkfsoptions=\"$MKFSOPT\""
+[ "x$ostfailover_HOST" != "x" ] &&
+ MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $ostfailover_HOST`"
+OST_MKFS_OPTS="--ost --device-size=$OSTSIZE --mgsnode=$MGSNID $MKFSOPT $MOUNTOPT $OSTOPT"
-FSTYPE=${FSTYPE:-ext3}
-STRIPE_BYTES=${STRIPE_BYTES:-65536} #1048576
-STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-0}
+MDS_MOUNT_OPTS="-o loop"
+OST_MOUNT_OPTS="-o loop"
+MOUNT=${MOUNT:-"/mnt/lustre"}
+PDSH=${PDSH:-no_dsh}
FAILURE_MODE=${FAILURE_MODE:-SOFT} # or HARD
POWER_DOWN=${POWER_DOWN:-"powerman --off"}
POWER_UP=${POWER_UP:-"powerman --on"}
OSTJOURNALSIZE=${OSTJOURNALSIZE:-0}
FSTYPE=${FSTYPE:-ext3}
-#STRIPE_BYTES=${STRIPE_BYTES:-65536}
STRIPE_BYTES=${STRIPE_BYTES:-1048576}
STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-0}
OSTNODE=${OSTNODE:-`hostname`}
CLIENT=${CLIENT:-client}
+FSNAME=lustre
mds_HOST=${mds_HOST:-$MDSNODE}
mdsfailover_HOST=${mdsfailover_HOST}
+mgs_HOST=${mgs_HOST:-$mds_HOST}
ost_HOST=${ost_HOST:-$OSTNODE}
+ostfailover_HOST=${ostfailover_HOST}
ost2_HOST=${ost2_HOST:-$ost_HOST}
-client_HOST=${client_HOST:-$CLIENT}
-NETTYPE=${NETTYPE:-tcp}
-
-MOUNT=${MOUNT:-"/mnt/lustre"}
-MOUNT1=${MOUNT1:-$MOUNT}
-MOUNT2=${MOUNT2:-${MOUNT}2}
-DIR=${DIR:-$MOUNT}
-DIR2=${DIR2:-$MOUNT1}
-PTLDEBUG=${PTLDEBUG:-0x3f0400}
-SUBSYSTEM=${SUBSYSTEM:- 0xffb7e3ff}
-PDSH=${PDSH:-no_dsh}
TMP=${TMP:-/tmp}
-MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
+MDSDEV=${MDSDEV:-$TMP/${FSNAME}-mdt}
MDSSIZE=${MDSSIZE:-100000}
-OSTDEV=${OSTDEV:-$TMP/ost1-`hostname`}
+MDSOPT=${MDSOPT:-"--mountfsoptions=acl"}
+OSTDEV=${OSTDEV:-$TMP/${FSNAME}-ost0}
OSTSIZE=${OSTSIZE:-200000}
-FSTYPE=${FSTYPE:-ext3}
+OSTDEV2=${OSTDEV2:-$TMP/${FSNAME}-ost1}
+
+NETTYPE=${NETTYPE:-tcp}
+MGSNID=`h2$NETTYPE $mgs_HOST`
+FSTYPE=${FSTYPE:-ldiskfs}
+STRIPE_BYTES=${STRIPE_BYTES:-1048576}
+STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-0}
TIMEOUT=${TIMEOUT:-20}
UPCALL=${UPCALL:-DEFAULT}
+PTLDEBUG=${PTLDEBUG:-0x33f0404}
+SUBSYSTEM=${SUBSYSTEM:- 0xffb7e3ff}
-MDSOPT=${MDSOPT:-"user_xattr,acl"}
-CLIENTOPT=${CLIENTOPT:-"user_xattr,acl"}
-MOUNTOPT=${MOUNTOPT:-"user_xattr,acl"}
+MKFSOPT=""
+MOUNTOPT=""
+[ "x$MDSJOURNALSIZE" != "x" ] &&
+ MKFSOPT=$MKFSOPT" -J size=$MDSJOURNALSIZE"
+[ "x$MDSISIZE" != "x" ] &&
+ MKFSOPT=$MKFSOPT" -i $MDSISIZE"
+[ "x$MKFSOPT" != "x" ] &&
+ MKFSOPT="--mkfsoptions=\"$MKFSOPT\""
+[ "x$mdsfailover_HOST" != "x" ] &&
+ MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $mdsfailover_HOST`"
+[ "x$STRIPE_BYTES" != "x" ] &&
+ MOUNTOPT=$MOUNTOPT" --param default_stripe_size=$STRIPE_BYTES"
+[ "x$STRIPES_PER_OBJ" != "x" ] &&
+ MOUNTOPT=$MOUNTOPT" --param default_stripe_count=$STRIPES_PER_OBJ"
+MDS_MKFS_OPTS="--mgs --mdt --device-size=$MDSSIZE --param obd_timeout=$TIMEOUT $MKFSOPT $MOUNTOPT $MDSOPT"
-STRIPE_BYTES=${STRIPE_BYTES:-1048576}
-STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-0}
+MKFSOPT=""
+MOUNTOPT=""
+[ "x$OSTJOURNALSIZE" != "x" ] &&
+ MKFSOPT=$MKFSOPT" -J size=$OSTJOURNALSIZE"
+[ "x$MKFSOPT" != "x" ] &&
+ MKFSOPT="--mkfsoptions=\"$MKFSOPT\""
+[ "x$ostfailover_HOST" != "x" ] &&
+ MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $ostfailover_HOST`"
+OST_MKFS_OPTS="--ost --device-size=$OSTSIZE --mgsnode=$MGSNID --param obd_timeout=$TIMEOUT $MKFSOPT $MOUNTOPT $OSTOPT"
+OST2_MKFS_OPTS=${OST2_MKFS_OPTS:-${OST_MKFS_OPTS}}
+
+MDS_MOUNT_OPTS="-o loop"
+OST_MOUNT_OPTS="-o loop"
+OST2_MOUNT_OPTS="-o loop"
+MOUNT=${MOUNT:-/mnt/${FSNAME}}
+MOUNT1=${MOUNT1:-$MOUNT}
+MOUNT2=${MOUNT2:-${MOUNT}2}
+DIR=${DIR:-$MOUNT}
+DIR1=${DIR:-$MOUNT1}
+DIR2=${DIR2:-$MOUNT2}
+MOUNTOPT=${MOUNTOPT:-"user_xattr,acl"}
+
+PDSH=${PDSH:-no_dsh}
FAILURE_MODE=${FAILURE_MODE:-SOFT} # or HARD
POWER_DOWN=${POWER_DOWN:-"powerman --off"}
POWER_UP=${POWER_UP:-"powerman --on"}
TIMEOUT=${TIMEOUT:-10}
#UPCALL=${UPCALL:-$PWD/replay-single-upcall.sh}
-STRIPE_BYTES=${STRIPE_BYTES:-65536}
+STRIPE_BYTES=${STRIPE_BYTES:-1048576}
STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-0}
FAILURE_MODE=${FAILURE_MODE:-SOFT} # or HARD
set -e
ONLY=${ONLY:-"$*"}
-# bug number for skipped test:
-ALWAYS_EXCEPT=" $CONF_SANITY_EXCEPT"
+# bug number for skipped test: mc mc mc mc mc mc mc mc mc
+ALWAYS_EXCEPT=" $CONF_SANITY_EXCEPT 9 10 11 12 13 13b 14 15 18"
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
SRCDIR=`dirname $0`
LUSTRE=${LUSTRE:-`dirname $0`/..}
RLUSTRE=${RLUSTRE:-$LUSTRE}
MOUNTLUSTRE=${MOUNTLUSTRE:-/sbin/mount.lustre}
+MKFSLUSTRE=${MKFSLUSTRE:-/usr/sbin/mkfs.lustre}
HOSTNAME=`hostname`
. $LUSTRE/tests/test-framework.sh
. ${CONFIG:=$LUSTRE/tests/cfg/local.sh}
-gen_config() {
- rm -f $XMLCONFIG
-
- add_mds mds --dev $MDSDEV --size $MDSSIZE
- add_lov lov1 mds --stripe_sz $STRIPE_BYTES\
- --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
- add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE
- add_client client mds --lov lov1 --path $MOUNT
+reformat() {
+ grep " $MOUNT " /proc/mounts && zconf_umount `hostname` $MOUNT
+ stop ost -f
+ stop ost2 -f
+ stop mds -f
+ echo Formatting mds, ost, ost2
+ add mds $MDS_MKFS_OPTS --reformat $MDSDEV > /dev/null
+ add ost $OST_MKFS_OPTS --reformat $OSTDEV > /dev/null
+ add ost2 $OST2_MKFS_OPTS --reformat $OSTDEV2 > /dev/null
}
-gen_second_config() {
- rm -f $XMLCONFIG
-
- add_mds mds2 --dev $MDSDEV --size $MDSSIZE
- add_lov lov2 mds2 --stripe_sz $STRIPE_BYTES\
- --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
- add_ost ost2 --lov lov2 --dev $OSTDEV --size $OSTSIZE
- add_client client mds2 --lov lov2 --path $MOUNT2
+gen_config() {
+ reformat
+ # The MGS must be started before the OSTs for a new fs, so start
+ # and stop to generate the startup logs.
+ start_mds
+ start_ost
+ sleep 5
+ stop_ost
+ stop_mds
}
start_mds() {
echo "start mds service on `facet_active_host mds`"
- start mds --reformat $MDSLCONFARGS || return 94
+ start mds $MDSDEV $MDS_MOUNT_OPTS || return 94
}
stop_mds() {
echo "stop mds service on `facet_active_host mds`"
- stop mds $@ || return 97
+ # These tests all use non-failover stop
+ stop mds -f || return 97
}
start_ost() {
echo "start ost service on `facet_active_host ost`"
- start ost --reformat $OSTLCONFARGS || return 95
+ start ost $OSTDEV $OST_MOUNT_OPTS || return 95
}
stop_ost() {
echo "stop ost service on `facet_active_host ost`"
- stop ost $@ || return 98
+ # These tests all use non-failover stop
+ stop ost -f || return 98
+}
+
+start_ost2() {
+ echo "start ost2 service on `facet_active_host ost2`"
+ start ost2 $OSTDEV2 $OST2_MOUNT_OPTS || return 92
+}
+
+stop_ost2() {
+ echo "stop ost2 service on `facet_active_host ost2`"
+ # These tests all use non-failover stop
+ stop ost2 -f || return 93
}
mount_client() {
local MOUNTPATH=$1
echo "mount lustre on ${MOUNTPATH}....."
- zconf_mount `hostname` $MOUNTPATH || return 96
+ zconf_mount `hostname` $MOUNTPATH || return 96
}
umount_client() {
local MOUNTPATH=$1
echo "umount lustre on ${MOUNTPATH}....."
- zconf_umount `hostname` $MOUNTPATH || return 97
+ zconf_umount `hostname` $MOUNTPATH || return 97
}
manual_umount_client(){
- echo "manual umount lustre on ${MOUNTPATH}...."
- do_facet client "umount $MOUNT"
+ echo "manual umount lustre on ${MOUNT}...."
+ do_facet client "umount -d $MOUNT"
}
setup() {
mount_client $MOUNT
}
+cleanup_nocli() {
+ stop_mds || return 201
+ stop_ost || return 202
+ unload_modules || return 203
+}
+
cleanup() {
- umount_client $MOUNT $FORCE || return 200
- stop_mds $FORCE || return 201
- stop_ost $FORCE || return 202
- # catch case where these return just fine, but modules are still not unloaded
- /sbin/lsmod | egrep -q "lnet|libcfs"
- if [ 1 -ne $? ]; then
- echo "modules still loaded..."
- /sbin/lsmod
- return 203
- fi
+ umount_client $MOUNT || return 200
+ cleanup_nocli || return $?
}
check_mount() {
- do_facet client "touch $DIR/a" || return 71
- do_facet client "rm $DIR/a" || return 72
+ do_facet client "cp /etc/passwd $DIR/a" || return 71
+ do_facet client "rm $DIR/a" || return 72
+ # make sure lustre is actually mounted (touch will block,
+ # but grep won't, so do it after)
+ do_facet client "grep $MOUNT' ' /proc/mounts > /dev/null" || return 73
echo "setup single mount lustre success"
}
test_0() {
- start_ost
- start_mds
- mount_client $MOUNT
+ setup
check_mount || return 41
cleanup || return $?
}
test_1() {
start_ost
echo "start ost second time..."
- start ost --reformat $OSTLCONFARGS
- start_mds
- mount_client $MOUNT
+ setup
check_mount || return 42
cleanup || return $?
}
start_ost
start_mds
echo "start mds second time.."
- start mds --reformat $MDSLCONFARGS
-
+ start_mds
mount_client $MOUNT
check_mount || return 43
cleanup || return $?
test_3() {
setup
- mount_client $MOUNT
-
+ #mount.lustre returns an error if already in mtab
+ mount_client $MOUNT && return $?
check_mount || return 44
-
- umount_client $MOUNT
- cleanup || return $?
+ cleanup || return $?
}
run_test 3 "mount client twice"
test_4() {
setup
touch $DIR/$tfile || return 85
- stop_ost --force
+ stop_ost -f
cleanup
eno=$?
# ok for ost to fail shutdown
test_5() {
setup
touch $DIR/$tfile || return 1
- stop_mds --force || return 2
+ stop_mds -f || return 2
# cleanup may return an error from the failed
# disconnects; for now I'll consider this successful
# if all the modules have unloaded.
- umount $MOUNT &
+ umount -d $MOUNT &
UMOUNT_PID=$!
sleep 6
echo "killing umount"
grep " $MOUNT " /etc/mtab && echo "test 5: mtab after second umount" && return 11
fi
- # cleanup client modules
- $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null
-
+ manual_umount_client
# stop_mds is a no-op here, and should not fail
- stop_mds || return 4
- stop_ost || return 5
-
- lsmod | grep -q lnet && return 6
- return 0
+ cleanup_nocli || return $?
+ # df may have lingering entry
+ manual_umount_client
+ # mtab may have lingering entry
+ grep -v $MOUNT" " /etc/mtab > $TMP/mtabtemp
+ mv $TMP/mtabtemp /etc/mtab
}
run_test 5 "force cleanup mds, then cleanup"
test_5b() {
start_ost
-
[ -d $MOUNT ] || mkdir -p $MOUNT
- grep " $MOUNT " /etc/mtab && echo "test 5b: mtab before lconf" && return 9
- $LCONF --nosetup --node client_facet $XMLCONFIG > /dev/null
grep " $MOUNT " /etc/mtab && echo "test 5b: mtab before mount" && return 10
- llmount -o nettype=$NETTYPE,$MOUNTOPT $mds_HOST:/mds_svc/client_facet $MOUNT && return 1
+ mount_client $MOUNT && return 1
grep " $MOUNT " /etc/mtab && echo "test 5b: mtab after failed mount" && return 11
-
- # cleanup client modules
- $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null
-
+ umount_client $MOUNT
# stop_mds is a no-op here, and should not fail
- stop_mds || return 2
- stop_ost || return 3
-
- lsmod | grep -q lnet && return 4
+ cleanup_nocli || return $?
return 0
-
}
run_test 5b "mds down, cleanup after failed mount (bug 2712)"
test_5c() {
start_ost
start_mds
-
[ -d $MOUNT ] || mkdir -p $MOUNT
- grep " $MOUNT " /etc/mtab && echo "test 5c: mtab before lconf" && return 9
- $LCONF --nosetup --node client_facet $XMLCONFIG > /dev/null
grep " $MOUNT " /etc/mtab && echo "test 5c: mtab before mount" && return 10
- llmount -vv -o nettype=$NETTYPE,$MOUNTOPT $mds_HOST:/wrong_mds_svc/client_facet $MOUNT && return 1
+ mount -t lustre `facet_nid mgs`:/wrong.$FSNAME $MOUNT || :
grep " $MOUNT " /etc/mtab && echo "test 5c: mtab after failed mount" && return 11
-
- # cleanup client modules
- $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null
-
- stop_mds || return 2
- stop_ost || return 3
-
- lsmod | grep -q lnet && return 4
- return 0
-
+ umount_client $MOUNT
+ cleanup_nocli || return $?
}
run_test 5c "cleanup after failed mount (bug 2712)"
test_5d() {
start_ost
start_mds
- stop_ost --force
-
- [ -d $MOUNT ] || mkdir -p $MOUNT
- grep " $MOUNT " /etc/mtab && echo "test 5d: mtab before lconf" && return 9
- $LCONF --nosetup --node client_facet $XMLCONFIG > /dev/null
+ stop_ost -f
grep " $MOUNT " /etc/mtab && echo "test 5d: mtab before mount" && return 10
- llmount -vv -o nettype=$NETTYPE,$MOUNTOPT `facet_nid mds`:/mds_svc/client_facet $MOUNT || return 1
-
- umount_client $MOUNT || return 2
+ mount_client $MOUNT || return 1
+ cleanup || return $?
grep " $MOUNT " /etc/mtab && echo "test 5d: mtab after unmount" && return 11
-
- stop_mds || return 3
-
- lsmod | grep -q lnet && return 4
return 0
-
}
-run_test 5d "ost down, don't crash during mount attempt"
+run_test 5d "mount with ost down"
test_5e() {
start_ost
start_mds
- sleep 5 # give MDS a chance to connect to OSTs before delaying requests
+ # give MDS a chance to connect to OSTs (bz 10476)
+ sleep 5
#define OBD_FAIL_PTLRPC_DELAY_SEND 0x506
do_facet client "sysctl -w lustre.fail_loc=0x80000506"
grep " $MOUNT " /etc/mtab && echo "test 5e: mtab before mount" && return 10
mount_client $MOUNT || echo "mount failed (not fatal)"
- umount_client $MOUNT || return 2
+ cleanup || return $?
grep " $MOUNT " /etc/mtab && echo "test 5e: mtab after unmount" && return 11
-
- stop_mds || return 3
- stop_ost || return 3
-
- lsmod | grep -q lnet && return 4
return 0
}
run_test 5e "delayed connect, don't crash (bug 10268)"
test_7() {
setup
manual_umount_client
- cleanup || return $?
+ cleanup_nocli || return $?
}
run_test 7 "manual umount, then cleanup"
test_8() {
- start_ost
- start_mds
-
- mount_client $MOUNT
+ setup
mount_client $MOUNT2
-
check_mount2 || return 45
- umount $MOUNT
umount_client $MOUNT2
-
- stop_mds
- stop_ost
+ cleanup || return $?
}
run_test 8 "double mount setup"
echo "mount lustre on $MOUNT with $MOUNTLUSTRE: success"
[ -d /r ] && $LCTL modules > /r/tmp/ogdb-`hostname`
check_mount || return 41
- do_node `hostname` umount $MOUNT
+ do_node `hostname` umount -d $MOUNT
[ -f "$MOUNTLUSTRE" ] && rm -f $MOUNTLUSTRE
echo "mount lustre on ${MOUNT} without $MOUNTLUSTRE....."
if [ ! -f "$MDSDEV" ]; then
echo "no $MDSDEV existing, so mount Lustre to create one"
- start_ost
- start_mds
- mount_client $MOUNT
+ setup
check_mount || return 41
cleanup || return $?
fi
do_facet mds "[ -d $TMPMTPT ] || mkdir -p $TMPMTPT;
mount -o loop -t ext3 $MDSDEV $TMPMTPT || return \$?;
chmod 555 $TMPMTPT/{OBJECTS,LOGS,PENDING} || return \$?;
- umount $TMPMTPT || return \$?" || return $?
+ umount -d $TMPMTPT || return \$?" || return $?
echo "mount Lustre to change the mode of OBJECTS/LOGS/PENDING, then umount Lustre"
- start_ost
- start_mds
- mount_client $MOUNT
+ setup
check_mount || return 41
cleanup || return $?
run_test 16 "verify that lustre will correct the mode of OBJECTS/LOGS/PENDING"
test_17() {
- TMPMTPT="${MOUNT%/*}/conf17"
-
if [ ! -f "$MDSDEV" ]; then
echo "no $MDSDEV existing, so mount Lustre to create one"
- start_ost
- start_mds
- mount_client $MOUNT
+ setup
check_mount || return 41
cleanup || return $?
fi
echo "Remove mds config log"
- do_facet mds "debugfs -w -R 'unlink LOGS/mds_svc' $MDSDEV || return \$?" || return $?
+ do_facet mds "debugfs -w -R 'unlink CONFIGS/$FSNAME-MDT0000' $MDSDEV || return \$?" || return $?
start_ost
- start mds $MDSLCONFARGS && return 42
- cleanup || return $?
+ start_mds && return 42
+ gen_config
}
run_test 17 "Verify failed mds_postsetup won't fail assertion (2936)"
echo "mount mds with large journal..."
OLDMDSSIZE=$MDSSIZE
MDSSIZE=2000000
+ #FIXME have to change MDS_MKFS_OPTS
gen_config
echo "mount lustre system..."
- start_ost
- start_mds
- mount_client $MOUNT
+ setup
check_mount || return 41
echo "check journal size..."
}
run_test 18 "check lconf creates large journals"
-test_19() {
- # first format the ost/mdt
+test_19a() {
+ start_mds || return 1
+ stop_mds -f || return 2
+}
+run_test 19a "start/stop MDS without OSTs"
+
+test_19b() {
+ start_ost || return 1
+ stop_ost -f || return 2
+}
+run_test 19b "start/stop OSTs without MDS"
+
+test_20a() {
+ start_mds
+ start_ost
+ stop_ost
+ stop_mds
+}
+run_test 20a "start mds before ost, stop ost first"
+
+test_20b() {
start_ost
start_mds
stop_mds
stop_ost
- start mds $MDSLCONFARGS || return 1
- stop mds --force || return 2
}
-run_test 19 "start/stop MDS without OSTs"
+run_test 20b "start ost before mds, stop mds first"
+
+test_20c() {
+ start_ost
+ start_mds
+ start_ost2
+ stop_ost
+ stop_ost2
+ stop_mds
+}
+run_test 20c "start mds between two osts, stop mds last"
+
+test_21() {
+ reformat
+ start_mds
+ echo Client mount before any osts are in the logs
+ mount_client $MOUNT
+ check_mount && return 41
+ pass
+
+ echo Client mount with ost in logs, but none running
+ start_ost
+ stop_ost
+ mount_client $MOUNT
+ # check_mount will block trying to contact ost
+ umount_client $MOUNT
+ pass
+
+ echo Client mount with a running ost
+ start_ost
+ mount_client $MOUNT
+ sleep 5 #bz10476
+ check_mount || return 41
+ pass
+
+ cleanup
+}
+run_test 21 "start a client before osts"
+
+
+umount_client $MOUNT
+cleanup_nocli
equals_msg "Done"
build_test_filter
-assert_env mds_HOST ost1_HOST ost2_HOST client_HOST LIVE_CLIENT
+assert_env mds_HOST MDS_MKFS_OPTS MDSDEV
+assert_env ost1_HOST ost2_HOST OST_MKFS_OPTS OSTDEV
+assert_env LIVE_CLIENT FSNAME
####
# Initialize all the ostN_HOST
DOWN_NUM=0
}
-gen_config() {
- rm -f $XMLCONFIG
- add_mds mds --dev $MDSDEV --size $MDSSIZE --journal-size $MDSJOURNALSIZE
-
- if [ ! -z "$mdsfailover_HOST" ]; then
- add_mdsfailover mds --dev $MDSDEV --size $MDSSIZE
- fi
-
- add_lov lov1 mds --stripe_sz $STRIPE_BYTES\
- --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
- for i in `seq $NUMOST`; do
- dev=`printf $OSTDEV $i`
- add_ost ost$i --lov lov1 --dev $dev --size $OSTSIZE \
- --journal-size $OSTJOURNALSIZE
- done
-
-
- add_client client mds --lov lov1 --path $MOUNT
+start_ost() {
+ local dev=`printf $OSTDEV $1`
+ start ost$1 $dev $OST_MOUNT_OPTS
}
setup() {
- gen_config
-
+ cleanup
rm -rf logs/*
+ wait_for mds
+ add mds $MDS_MKFS_OPTS --reformat $MDSDEV >> /dev/null
+ start mds $MDSDEV $MDS_MOUNT_OPTS
for i in `seq $NUMOST`; do
+ local dev=`printf $OSTDEV $i`
+ local index=$((i - 1))
wait_for ost$i
- start ost$i ${REFORMAT} $OSTLCONFARGS
+ echo Adding ost$i at index $index dev $dev
+ add ost$i $OST_MKFS_OPTS --reformat --index=$index $dev >> /dev/null
+ start ost$i $dev $OST_MOUNT_OPTS
done
[ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
- wait_for mds
- start mds $MDSLCONFARGS ${REFORMAT}
+
while ! do_node $CLIENTS "ls -d $LUSTRE" > /dev/null; do sleep 5; done
grep " $MOUNT " /proc/mounts || zconf_mount $CLIENTS $MOUNT
-
}
cleanup() {
zconf_umount $CLIENTS $MOUNT
-
- stop mds ${FORCE} $MDSLCONFARGS || :
+ stop mds -f
for i in `seq $NUMOST`; do
- stop ost$i ${FORCE} $OSTLCONFARGS || :
+ stop ost$i -f
done
}
done
echo "No ost found for node; $node"
return 1
-
}
-
if [ "$ONLY" == "cleanup" ]; then
$CLEANUP
exit
echo "Starting Test 17 at `date`"
test_0() {
- echo "Failover MDS"
facet_failover mds
echo "Waiting for df pid: $DFPID"
wait $DFPID || { echo "df returned $?" && return 1; }
- echo "Failing OST1"
facet_failover ost1
echo "Waiting for df pid: $DFPID"
wait $DFPID || { echo "df returned $?" && return 2; }
- echo "Failing OST2"
facet_failover ost2
echo "Waiting for df pid: $DFPID"
wait $DFPID || { echo "df returned $?" && return 3; }
echo "Verify Lustre filesystem is up and running"
client_df
- echo "Failing MDS"
shutdown_facet mds
reboot_facet mds
DFPID=$!
sleep 5
- echo "Failing OST"
shutdown_facet ost1
echo "Reintegrating OST"
reboot_facet ost1
wait_for ost1
- start ost1
+ start_ost 1
- echo "Failover MDS"
wait_for mds
- start mds
+ start mds $MDSDEV $MDS_MOUNT_OPTS
#Check FS
wait $DFPID
echo "Fourth Failure Mode: OST/MDS `date`"
#OST Portion
- echo "Failing OST ost1"
shutdown_facet ost1
#Check FS
sleep 5
#MDS Portion
- echo "Failing MDS"
shutdown_facet mds
reboot_facet mds
echo "Reintegrating OST"
reboot_facet ost1
wait_for ost1
- start ost1
+ start_ost 1
- echo "Failover MDS"
wait_for mds
- start mds
+ start mds $MDSDEV $MDS_MOUNT_OPTS
#Check FS
wait $DFPIDA
client_df
#OST Portion
- echo "Failing OST"
shutdown_facet ost1
reboot_facet ost1
sleep 5
#OST Portion
- echo "Failing OST"
shutdown_facet ost2
reboot_facet ost2
#Reintegration
echo "Reintegrating OSTs"
wait_for ost1
- start ost1
+ start_ost 1
wait_for ost2
- start ost2
+ start_ost 2
clients_recover_osts ost1
clients_recover_osts ost2
client_touch testfile || return 2
#OST Portion
- echo "Failing OST"
shutdown_facet ost1
reboot_facet ost1
#Reintegration
echo "Reintegrating OST/CLIENTs"
wait_for ost1
- start ost1
+ start_ost 1
reintegrate_clients
sleep 5
client_rm testfile
#MDS Portion
- echo "Failing MDS"
facet_failover mds
#Check FS
#OST Portion
- echo "Failing OST"
shutdown_facet ost1
reboot_facet ost1
echo "Reintegrating CLIENTs/OST"
reintegrate_clients
wait_for ost1
- start ost1
+ start_ost 1
wait $DFPID
client_df || return 1
client_touch testfile2 || return 2
run_test 10 "Running Availability for 6 hours..."
equals_msg "Done, cleaning up"
-# we need to force cleanup for the stale MDS conns until bug 5921 is fixed
-FORCE=--force $CLEANUP
+$CLEANUP
[ "$DEBUG" ] && debug_opt="--ptldebug=$DEBUG"
[ "$PTLDEBUG" ] && debug_opt="--ptldebug=$PTLDEBUG"
-${LCONF} $NOMOD $portals_opt $lustre_opt $debug_opt $node_opt ${REFORMAT:---reformat} $@ \
- $conf_opt || {
+echo llmount: FIXME replace llmount.sh with ". mountconf.sh" and "$SETUP"
+
+exit 1
+
+#${LCONF} $NOMOD $portals_opt $lustre_opt $debug_opt $node_opt ${REFORMAT:---reformat} $@ $conf_opt || {
# maybe acceptor error, dump tcp port usage
- netstat -tpn
- exit 2
-}
+# netstat -tpn
+# exit 2
+#}
-if [ "$MOUNT2" ]; then
- $LLMOUNT -v -o user_xattr,acl `hostname`:/mds1/client $MOUNT2 || exit 3
-fi
+#if [ "$MOUNT2" ]; then
+# $LLMOUNT -v -o user_xattr,acl `hostname`:/mds1/client $MOUNT2 || exit 3
+#fi
[ "$MOUNT2" ] && umount $MOUNT2
-${LCONF} $NOMOD $portals_opt $lustre_opt $node_opt --cleanup $@ \
+#${LCONF} $NOMOD $portals_opt $lustre_opt $node_opt --cleanup $@ \
--dump $TMP/debug $conf_opt
+
+echo FIXME llmountcleanup should be replaced with $CLEANUP
+exit 1
+
rc=$?
echo "lconf DONE"
BUSY=`dmesg | grep -i destruct`
+++ /dev/null
-#!/bin/sh
-# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
-
-export PATH=`dirname $0`/../utils:$PATH
-
-LCONF=${LCONF:-lconf}
-NAME=${NAME:-local}
-LLMOUNT=${LLMOUNT:-llmount}
-
-config=$NAME.xml
-mkconfig=$NAME.sh
-
-if [ "$PORTALS" ]; then
- portals_opt="--portals=$PORTALS"
-fi
-
-if [ "$LUSTRE" ]; then
- lustre_opt="--lustre=$LUSTRE"
-fi
-
-if [ "$LDAPURL" ]; then
- conf_opt="--ldapurl $LDAPURL --config $NAME"
-else
- if [ ! -f $config -o $mkconfig -nt $config ]; then
- sh $mkconfig $config || exit 1
- fi
- conf_opt="$config"
-fi
-
-[ "$NODE" ] && node_opt="--node $NODE"
-[ "$DEBUG" ] && portals_opt="$portals_opt --ptldebug=$DEBUG"
-[ "$PTLDEBUG" ] && portals_opt="$portals_opt --ptldebug=$PTLDEBUG"
-
-${LCONF} $NOMOD $portals_opt $lustre_opt $node_opt $@ $conf_opt || {
- # maybe acceptor error, dump tcp port usage
- netstat -tpn
- exit 2
-}
-
-
-if [ "$MOUNT2" ]; then
- $LLMOUNT -v -o user_xattr,acl `hostname`:/mds1/client $MOUNT2 || exit 3
-fi
config=${1:-`basename $0 .sh`.xml}
-LMC="${LMC:-lmc} -m $config"
+LMC=echo
TMP=${TMP:-/tmp}
+FSNAME=lustre
HOSTNAME=`hostname`
-MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
+MDSDEV=${MDSDEV:-$TMP/mdt-${FSNAME}}
MDSSIZE=${MDSSIZE:-400000}
-FSTYPE=${FSTYPE:-ext3}
-MOUNT=${MOUNT:-/mnt/lustre}
+MOUNT=${MOUNT:-/mnt/${FSNAME}}
MOUNT2=${MOUNT2:-${MOUNT}2}
NETTYPE=${NETTYPE:-tcp}
[ "$ACCEPTOR_PORT" ] && PORT_OPT="--port $ACCEPTOR_PORT"
-OSTDEV=${OSTDEV:-$TMP/ost1-`hostname`}
+OSTDEV=${OSTDEV:-$TMP/ost0-${FSNAME}}
OSTSIZE=${OSTSIZE:-400000}
+OSTDEV2=${OSTDEV2:-$TMP/ost1-${FSNAME}}
MDS_MOUNT_OPTS="user_xattr,acl,${MDS_MOUNT_OPTS:-""}"
CLIENTOPT="user_xattr,acl,${CLIENTOPT:-""}"
# specific journal size for the ost, in MB
JSIZE=${JSIZE:-0}
-[ "$JSIZE" -gt 0 ] && JARG="--journal_size $JSIZE"
+[ "$JSIZE" -gt 0 ] && OST_MKFS_OPTS=$OST_MKFS_OPTS" -J size=$JSIZE"
MDSISIZE=${MDSISIZE:-0}
-[ "$MDSISIZE" -gt 0 ] && IARG="--inode_size $MDSISIZE"
+[ "$MDSISIZE" -gt 0 ] && MDS_MKFS_OPTS=$MDS_MKFS_OPTS" -i $MDSISIZE"
STRIPE_BYTES=${STRIPE_BYTES:-1048576}
STRIPES_PER_OBJ=1 # 0 means stripe over all OSTs
esac
}
-# create nodes
-${LMC} --add node --node $HOSTNAME || exit 10
-${LMC} --add net --node $HOSTNAME --nid `h2$NETTYPE $HOSTNAME` \
- --nettype $NETTYPE $PORT_OPT || exit 11
-${LMC} --add net --node client --nid '*' --nettype $NETTYPE $PORT_OPT|| exit 12
+MGSNID=`h2$NETTYPE $HOSTNAME`
# configure mds server
[ "x$MDS_MOUNT_OPTS" != "x" ] &&
- MDS_MOUNT_OPTS="--mountfsoptions $MDS_MOUNT_OPTS"
-
+ MDS_MOUNT_OPTS="--mountfsoptions=$MDS_MOUNT_OPTS"
+[ "x$MDS_MKFS_OPTS" != "x" ] &&
+ MDS_MOUNT_OPTS="--mkfsoptions=\"$MDS_MOUNT_OPTS\""
[ "x$QUOTA_OPTS" != "x" ] &&
QUOTA_OPTS="--quota $QUOTA_OPTS"
-
-# configure mds server
-${LMC} --add mds --node $HOSTNAME --mds mds1 --fstype $FSTYPE \
- --dev $MDSDEV $MDS_MOUNT_OPTS $QUOTA_OPTS\
- --size $MDSSIZE $JARG $IARG $MDSOPT || exit 20
+[ ! -z "$mdsfailover_HOST" ] && MDS_FAIL_OPT="--failnode=$mdsfailover_HOST"
+
+MDS_OPTS="--mgs $MDS_FAIL_OPT --device-size=$MDSSIZE $MDS_MOUNT_OPTS $MDS_MKFS_OPTS"
+echo mkfs.lustre --mdt $MDS_OPTS --reformat $MDSDEV
[ "x$OST_MOUNT_OPTS" != "x" ] &&
- OST_MOUNT_OPTS="--mountfsoptions $OST_MOUNT_OPTS"
-
-# configure ost
-${LMC} --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES \
- --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 $LOVOPT || exit 20
-
-${LMC} --add ost --node $HOSTNAME --lov lov1 --fstype $FSTYPE \
- --dev $OSTDEV $QUOTA_OPTS\
- $OST_MOUNT_OPTS --size $OSTSIZE $JARG $OSTOPT || exit 30
-
-# create client config
-[ "x$CLIENTOPT" != "x" ] && CLIENTOPT="--clientoptions $CLIENTOPT"
-${LMC} --add mtpt --node $HOSTNAME --path $MOUNT \
- --mds mds1 --lov lov1 $CLIENTOPT || exit 40
-${LMC} --add mtpt --node client --path $MOUNT2 \
- --mds mds1 --lov lov1 $CLIENTOPT || exit 41
+ OST_MOUNT_OPTS="--mountfsoptions=$OST_MOUNT_OPTS"
+[ "x$OST_MKFS_OPTS" != "x" ] &&
+ OST_MOUNT_OPTS="--mkfsoptions=\"$OST_MOUNT_OPTS\""
+
+OST_OPTS="--mgsnode=`h2$NETTYPE $HOSTNAME` $OST_FAIL_OPT --device-size=$OSTSIZE $OST_MOUNT_OPTS $OST_MKFS_OPTS"
+echo mkfs.lustre --ost $OST_OPTS --reformat $OSTDEV
+
+OST2_OPTS="--mgsnode=`h2$NETTYPE $HOSTNAME` $OST_FAIL_OPT --device-size=$OSTSIZE $OST_MOUNT_OPTS $OST_MKFS_OPTS"
+echo mkfs.lustre --ost $OST2_OPTS --reformat $OSTDEV2
+
[ "$ACCEPTOR_PORT" ] && PORT_OPT="--port $ACCEPTOR_PORT"
OSTCOUNT=${OSTCOUNT:-5}
-# OSTDEVN will still override the device for OST N
+# OSTDEVn will still override the device for OST n
OSTSIZE=${OSTSIZE:-150000}
# 1 to config an echo client instead of llite
return rc;
}
-/* cocurrent mmap operations on two nodes */
+/* concurrent mmap operations on two nodes */
static int mmap_tst3(char *mnt)
{
char *ptr, mmap_file[256];
}
if (prefix)
- sprintf(cmd, "ls /proc/fs/lustre/ldlm/namespaces/%s_*/lru_size", prefix);
+ sprintf(cmd, "ls /proc/fs/lustre/ldlm/namespaces/*-%s-*/lru_size", prefix);
else
sprintf(cmd, "ls /proc/fs/lustre/ldlm/namespaces/*/lru_size");
memset(ptr, 'a', region);
/* cancel unused locks */
- cancel_lru_locks("OSC");
+ cancel_lru_locks("osc");
if (rc)
goto out_unmap;
goto out;
}
- cancel_lru_locks("OSC");
+ cancel_lru_locks("osc");
if (rc)
goto out;
struct test_case tests[] = {
{ 1, "mmap test1: basic mmap operation", mmap_tst1, 1 },
{ 2, "mmap test2: MAP_PRIVATE not write back", mmap_tst2, 1 },
- { 3, "mmap test3: cocurrent mmap ops on two nodes", mmap_tst3, 2 },
- { 4, "mmap test4: c1 write to f1 from mmaped f2, "
- "c2 write to f1 from mmaped f1", mmap_tst4, 2 },
+ { 3, "mmap test3: concurrent mmap ops on two nodes", mmap_tst3, 2 },
+ { 4, "mmap test4: c1 write to f1 from mmapped f2, "
+ "c2 write to f1 from mmapped f1", mmap_tst4, 2 },
{ 5, "mmap test5: read/write file to/from the buffer "
- "which mmaped to just this file", mmap_tst5, 1 },
+ "which mmapped to just this file", mmap_tst5, 1 },
{ 6, "mmap test6: check mmap write/read content on two nodes",
mmap_tst6, 2 },
{ 0, NULL, 0, 0 }
--- /dev/null
+#!/bin/sh
+
+#set -vx
+
+# mountconf setup of MDS and two OSTs
+
+#export PATH=`dirname $0`/../utils:$PATH
+#LUSTRE=${LUSTRE:-`dirname $0`/..}
+#. $LUSTRE/tests/test-framework.sh
+#init_test_env $@
+
+mcstopall() {
+ # make sure we are using the primary server, so test-framework will
+ # be able to clean up properly.
+ activemds=`facet_active mds`
+ if [ $activemds != "mds" ]; then
+ fail mds
+ fi
+
+ grep " $MOUNT " /proc/mounts && zconf_umount `hostname` $MOUNT $*
+ stop ost -f
+ stop ost2 -f
+ stop mds -f
+ return 0
+}
+
+mccleanup() {
+ echo "mountconf cleanup $*"
+ mcstopall $*
+ unload_modules
+}
+
+mcformat() {
+ mcstopall
+ echo Formatting mds, ost, ost2
+ add mds $MDS_MKFS_OPTS --reformat $MDSDEV > /dev/null || exit 10
+ add ost $OST_MKFS_OPTS --reformat $OSTDEV > /dev/null || exit 10
+ add ost2 $OST2_MKFS_OPTS --reformat $OSTDEV2 > /dev/null || exit 10
+}
+export MCFORMAT=${MCFORMAT:-"mcformat"}
+
+mount_client() {
+ grep " $1 " /proc/mounts || zconf_mount `hostname` $*
+}
+
+mcsetup() {
+ echo Setup mds, ost, ost2
+ start mds $MDSDEV $MDS_MOUNT_OPTS
+ start ost $OSTDEV $OST_MOUNT_OPTS
+ start ost2 $OSTDEV2 $OST2_MOUNT_OPTS
+ [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
+
+ mount_client $MOUNT
+ sleep 5
+}
+
+export MCSETUP=${MCSETUP:-"mcsetup"}
+export MCCLEANUP=${MCCLEANUP:-"mccleanup"}
+
#!/bin/bash
set -e
-set -vx
+#set -vx
export PATH=`dirname $0`/../utils:$PATH
LFS=${LFS:-lfs}
# flush cache to OST(s) so avail numbers are correct
sync; sleep 1 ; sync
-for OSC in /proc/fs/lustre/osc/OSC*MNT*; do
+for OSC in /proc/fs/lustre/osc/*-osc-*; do
AVAIL=`cat $OSC/kbytesavail`
GRANT=`cat $OSC/cur_grant_bytes`
[ $(($AVAIL - $GRANT / 1024)) -lt 400 ] && OSCFULL=full
if [ -z "$OSCFULL" ]; then
echo "no OSTs are close to full"
- grep "[0-9]" /proc/fs/lustre/osc/OSC*MNT*/{kbytesavail,cur*}
+ grep "[0-9]" /proc/fs/lustre/osc/*-osc-*/{kbytesavail,cur*}
SUCCESS=0
fi
# flush cache to OST(s) so avail numbers are correct
sync; sleep 1 ; sync
-for OSC in /proc/fs/lustre/osc/OSC*MNT*; do
+for OSC in /proc/fs/lustre/osc/*-osc-*; do
AVAIL=`cat $OSC/kbytesavail`
GRANT=`cat $OSC/cur_grant_bytes`
[ $(($AVAIL - $GRANT / 1024)) -lt 400 ] && OSCFULL=full
done
if [ -z "$OSCFULL" ]; then
echo "no OSTs are close to full"
- grep "[0-9]" /proc/fs/lustre/osc/OSC*MNT*/{kbytesavail,cur*}|tee -a $LOG
+ grep "[0-9]" /proc/fs/lustre/osc/*-osc-*/{kbytesavail,cur*}|tee -a $LOG
SUCCESS=0
fi
# bug 2986 5494 7288
ALWAYS_EXCEPT="20b 24 27 $RECOVERY_SMALL_EXCEPT"
-LUSTRE=${LUSTRE:-`dirname $0`/..}
+# Tests that always fail with mountconf -- FIXME
+# 16 fails with 1, not evicted
+EXCEPT="$EXCEPT 16"
-. $LUSTRE/tests/test-framework.sh
+LUSTRE=${LUSTRE:-`dirname $0`/..}
+. $LUSTRE/tests/test-framework.sh
init_test_env $@
-
. ${CONFIG:=$LUSTRE/tests/cfg/local.sh}
build_test_filter
-
# Allow us to override the setup if we already have a mounted system by
# setting SETUP=" " and CLEANUP=" "
SETUP=${SETUP:-"setup"}
CLEANUP=${CLEANUP:-"cleanup"}
-FORCE=${FORCE:-"--force"}
-make_config() {
- rm -f $XMLCONFIG
- add_mds mds --dev $MDSDEV --size $MDSSIZE
- add_lov lov1 mds --stripe_sz $STRIPE_BYTES \
- --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
- add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE
- add_ost ost2 --lov lov1 --dev ${OSTDEV}-2 --size $OSTSIZE
- add_client client mds --lov lov1 --path $MOUNT
-}
+# for MCSETUP and MCCLEANUP
+. mountconf.sh
setup() {
- make_config
- start ost --reformat $OSTLCONFARGS
- start ost2 --reformat $OSTLCONFARGS
- [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
- start mds $MDSLCONFARGS --reformat
- grep " $MOUNT " /proc/mounts || zconf_mount `hostname` $MOUNT
+ $MCFORMAT
+ $MCSETUP
}
cleanup() {
- zconf_umount `hostname` $MOUNT
- stop mds ${FORCE} $MDSLCONFARGS
- stop ost2 ${FORCE}
- stop ost ${FORCE} --dump $TMP/recovery-small-`hostname`.log
-}
-
-replay() {
- do_mds "sync"
- do_mds 'echo -e "device \$mds1\\nprobe\\nnotransno\\nreadonly" | lctl'
- do_client "$1" &
- shutdown_mds -f
- start_mds
- wait
- do_client "df -h $MOUNT" # trigger failover, if we haven't already
+ $MCCLEANUP > /dev/null || { echo "FAILed to clean up"; exit 20; }
}
if [ ! -z "$EVAL" ]; then
if [ "$ONLY" == "cleanup" ]; then
sysctl -w lnet.debug=0 || true
- FORCE=--force cleanup
+ cleanup
exit
fi
-REFORMAT=--reformat $SETUP
-unset REFORMAT
+$SETUP
[ "$ONLY" == "setup" ] && exit
run_test 3 "stat: drop req, drop rep"
test_4() {
- do_facet client "cp /etc/resolv.conf $MOUNT/resolv.conf" || return 1
- drop_request "cat $MOUNT/resolv.conf > /dev/null" || return 2
- drop_reply "cat $MOUNT/resolv.conf > /dev/null" || return 3
+ do_facet client "cp /etc/passwd $MOUNT/passwd" || return 1
+ drop_request "cat $MOUNT/passwd > /dev/null" || return 2
+ drop_reply "cat $MOUNT/passwd > /dev/null" || return 3
}
run_test 4 "open: drop req, drop rep"
test_5() {
- drop_request "mv $MOUNT/resolv.conf $MOUNT/renamed" || return 1
+ drop_request "mv $MOUNT/passwd $MOUNT/renamed" || return 1
drop_reint_reply "mv $MOUNT/renamed $MOUNT/renamed-again" || return 2
do_facet client "checkstat -v $MOUNT/renamed-again" || return 3
}
do_facet client multiop $MOUNT/$tfile Ow || return 1
do_facet client multiop $MOUNT/$tfile or || return 2
- cancel_lru_locks OSC
+ cancel_lru_locks osc
do_facet client multiop $MOUNT/$tfile or || return 3
drop_bl_callback multiop $MOUNT/$tfile Ow || echo "evicted as expected"
}
run_test 15 "failed open (-ENOMEM)"
-READ_AHEAD=`cat /proc/fs/lustre/llite/*/max_read_ahead_mb | head -n 1`
+READ_AHEAD=`cat $LPROC/llite/*/max_read_ahead_mb | head -n 1`
stop_read_ahead() {
- for f in /proc/fs/lustre/llite/*/max_read_ahead_mb; do
+ for f in $LPROC/llite/*/max_read_ahead_mb; do
echo 0 > $f
done
}
start_read_ahead() {
- for f in /proc/fs/lustre/llite/*/max_read_ahead_mb; do
+ for f in $LPROC/llite/*/max_read_ahead_mb; do
echo $READ_AHEAD > $f
done
}
#define OBD_FAIL_PTLRPC_BULK_PUT_NET 0x504 | OBD_FAIL_ONCE
do_facet ost sysctl -w lustre.fail_loc=0x80000504
- cancel_lru_locks OSC
+ cancel_lru_locks osc
# will get evicted here
do_facet client "cmp /etc/termcap $MOUNT/termcap" && return 1
sysctl -w lustre.fail_loc=0
do_facet client mkdir -p $MOUNT/$tdir
f=$MOUNT/$tdir/$tfile
- cancel_lru_locks OSC
+ cancel_lru_locks osc
pgcache_empty || return 1
# 1 stripe on ost2
do_facet client cp /etc/termcap $f
sync
- local osc2_dev=`$LCTL device_list | \
- awk '(/ost2.*client_facet/){print $4}' `
- $LCTL --device %$osc2_dev deactivate
+ local osc2dev=`grep ${ost2_svc}-osc- $LPROC/devices | awk '{print $1}'`
+ $LCTL --device $osc2dev deactivate || return 3
# my understanding is that there should be nothing in the page
# cache after the client reconnects?
rc=0
pgcache_empty || rc=2
- $LCTL --device %$osc2_dev activate
+ $LCTL --device $osc2dev activate
rm -f $f
return $rc
}
f=$MOUNT/$tdir/$tfile
f2=$MOUNT/$tdir/${tfile}-2
- cancel_lru_locks OSC
+ cancel_lru_locks osc
pgcache_empty || return 1
# shouldn't have to set stripe size of count==1
do_facet client multiop $f Ow || return 1
do_facet client multiop $f or || return 2
- cancel_lru_locks OSC
+ cancel_lru_locks osc
do_facet client multiop $f or || return 3
drop_ldlm_cancel multiop $f Ow || echo "client evicted, as expected"
multiop $DIR/$tdir/${tfile} O_wc &
MULTI_PID=$!
sleep 1
- cancel_lru_locks OSC
+ cancel_lru_locks osc
#define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308
do_facet ost sysctl -w lustre.fail_loc=0x80000308
kill -USR1 $MULTI_PID
test_20b() { # bug 2986 - ldlm_handle_enqueue error during open
mkdir -p $DIR/$tdir
touch $DIR/$tdir/${tfile}
- cancel_lru_locks OSC
+ cancel_lru_locks osc
#define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308
do_facet ost sysctl -w lustre.fail_loc=0x80000308
dd if=/etc/hosts of=$DIR/$tdir/$tfile && \
test_24() { # bug 2248 - eviction fails writeback but app doesn't see it
mkdir -p $DIR/$tdir
- cancel_lru_locks OSC
+ cancel_lru_locks osc
multiop $DIR/$tdir/$tfile Owy_wyc &
MULTI_PID=$!
usleep 500
echo "skipping test 26 (local OST)" && return
[ "`lsmod | grep mds`" ] && \
echo "skipping test 26 (local MDS)" && return
- OST_FILE=/proc/fs/lustre/obdfilter/ost_svc/num_exports
+ OST_FILE=$LPROC/obdfilter/ost_svc/num_exports
OST_EXP="`do_facet ost cat $OST_FILE`"
OST_NEXP1=`echo $OST_EXP | cut -d' ' -f2`
echo starting with $OST_NEXP1 OST exports
test_26b() { # bug 10140 - evict dead exports by pinger
zconf_mount `hostname` $MOUNT2
- MDS_FILE=/proc/fs/lustre/mds/mds_svc/num_exports
+ MDS_FILE=$LPROC/mds/${mds_svc}/num_exports
MDS_NEXP1="`do_facet mds cat $MDS_FILE | cut -d' ' -f2`"
- OST_FILE=/proc/fs/lustre/obdfilter/ost_svc/num_exports
+ OST_FILE=$LPROC/obdfilter/${ost_svc}/num_exports
OST_NEXP1="`do_facet ost cat $OST_FILE | cut -d' ' -f2`"
echo starting with $OST_NEXP1 OST and $MDS_NEXP1 MDS exports
zconf_umount `hostname` $MOUNT2 -f
}
run_test 52 "failover OST under load"
-
-FORCE=--force $CLEANUP
+$CLEANUP
SETUP=${SETUP:-"setup"}
CLEANUP=${CLEANUP:-"cleanup"}
-FORCE=${FORCE:-"--force"}
-
-gen_config() {
- rm -f $XMLCONFIG
- add_mds mds --dev $MDSDEV --size $MDSSIZE
- if [ ! -z "$mdsfailover_HOST" ]; then
- add_mdsfailover mds --dev $MDSDEV --size $MDSSIZE
- fi
-
- add_lov lov1 mds --stripe_sz $STRIPE_BYTES \
- --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
- add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE --failover
- add_ost ost2 --lov lov1 --dev ${OSTDEV}-2 --size $OSTSIZE --failover
- add_client client mds --lov lov1 --path $MOUNT
-}
-
-
build_test_filter
cleanup() {
- # make sure we are using the primary MDS, so the config log will
+ # make sure we are using the primary server, so test-framework will
# be able to clean up properly.
activemds=`facet_active mds`
if [ $activemds != "mds" ]; then
fail mds
fi
- umount $MOUNT2 || true
- umount $MOUNT || true
- rmmod llite || true
- stop mds ${FORCE}
- stop ost2 ${FORCE}
- stop ost ${FORCE} --dump $TMP/replay-dual-`hostname`.log
+ grep " $MOUNT " /proc/mounts && zconf_umount `hostname` $MOUNT
+ grep " $MOUNT2 " /proc/mounts && zconf_umount `hostname` $MOUNT2
+ stop mds -f
+ stop ost2 -f
+ stop ost -f
}
if [ "$ONLY" == "cleanup" ]; then
sysctl -w lnet.debug=0
- FORCE=--force cleanup
+ cleanup
exit
fi
setup() {
- gen_config
- start ost --reformat $OSTLCONFARGS
- start ost2 --reformat $OSTLCONFARGS
- start mds $MDSLCONFARGS --reformat
- grep " $MOUNT " /proc/mounts || zconf_mount `hostname` $MOUNT
- grep " $MOUNT2 " /proc/mounts || zconf_mount `hostname` $MOUNT2
-
-# echo $TIMEOUT > /proc/sys/lustre/timeout
+ cleanup
+ add mds $MDS_MKFS_OPTS --reformat $MDSDEV
+ add ost $OST_MKFS_OPTS --reformat $OSTDEV
+ add ost2 $OST2_MKFS_OPTS --reformat $OSTDEV2
+ start mds $MDSDEV $MDS_MOUNT_OPTS
+ start ost $OSTDEV $OST_MOUNT_OPTS
+ start ost2 $OSTDEV2 $OST2_MOUNT_OPTS
+ # client actions will get EIO until MDT contacts OSTs, so give it a sec
+ sleep 5
+ zconf_mount `hostname` $MOUNT
+ zconf_mount `hostname` $MOUNT2
}
$SETUP
sleep 1
#define OBD_FAIL_LDLM_BL_CALLBACK 0x305
do_facet client sysctl -w lustre.fail_loc=0x80000305 # drop cb, evict
- cancel_lru_locks MDC
+ cancel_lru_locks mdc
usleep 500 # wait to ensure first client is one that will be evicted
openfile -f O_RDONLY $MOUNT2/$tdir/f0
wait $OPENPID
equals_msg test complete, cleaning up
SLEEP=$((`date +%s` - $NOW))
[ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP
- FORCE=--force $CLEANUP
+ $CLEANUP
fi
. ${CONFIG:=$LUSTRE/tests/cfg/local.sh}
ostfailover_HOST=${ostfailover_HOST:-$ost_HOST}
+#failover= must be defined in OST_MKFS_OPTIONS if ostfailover_HOST != ost_HOST
# Skip these tests
# BUG NUMBER: 2766?
ALWAYS_EXCEPT="5 $REPLAY_OST_SINGLE_EXCEPT"
gen_config() {
- rm -f $XMLCONFIG
- add_mds mds --dev $MDSDEV --size $MDSSIZE
- add_lov lov1 mds --stripe_sz $STRIPE_BYTES \
- --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
- add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE --failover
- if [ ! -z "$ostfailover_HOST" ]; then
- add_ostfailover ost --dev $OSTDEV --size $OSTSIZE
- fi
- add_client client mds --lov lov1 --path $MOUNT
+ grep " $MOUNT " /proc/mounts && zconf_umount `hostname` $MOUNT
+ stop ost -f
+ stop ost2 -f
+ stop mds -f
+ echo Formatting mds, ost
+ add mds $MDS_MKFS_OPTS --reformat $MDSDEV
+ add ost $OST_MKFS_OPTS --reformat $OSTDEV
}
cleanup() {
- # make sure we are using the primary MDS, so the config log will
+ # make sure we are using the primary server, so test-framework will
# be able to clean up properly.
activeost=`facet_active ost`
if [ $activeost != "ost" ]; then
fail ost
fi
+
zconf_umount `hostname` $MOUNT
- stop mds ${FORCE} $MDSLCONFARGS
- stop ost ${FORCE} --dump $TMP/replay-ost-single-`hostname`.log
+ stop mds
+ stop ost
+ unload_modules
}
if [ "$ONLY" == "cleanup" ]; then
sysctl -w lnet.debug=0
- FORCE=--force cleanup
+ cleanup
exit
fi
setup() {
gen_config
-
- start ost --reformat $OSTLCONFARGS
+ start mds $MDSDEV $MDS_MOUNT_OPTS
+ start ost $OSTDEV $OST_MOUNT_OPTS
[ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
- start mds --reformat $MDSLCONFARGS
if [ -z "`grep " $MOUNT " /proc/mounts`" ]; then
# test "-1" needed during initial client->OST connection
log "== test 00: target handle mismatch (bug 5317) === `date +%H:%M:%S`"
-
#define OBD_FAIL_OST_ALL_REPLY_NET 0x211
do_facet ost "sysctl -w lustre.fail_loc=0x80000211"
-
zconf_mount `hostname` $MOUNT && df $MOUNT && pass || error "mount fail"
fi
}
verify=$ROOT/tmp/verify-$$
dd if=/dev/urandom bs=4096 count=1280 | tee $verify > $DIR/$tfile
# invalidate cache, so that we're reading over the wire
- for i in /proc/fs/lustre/ldlm/namespaces/OSC_*MNT*; do
+ for i in /proc/fs/lustre/ldlm/namespaces/*-osc-*; do
echo -n clear > $i/lru_size
done
cmp $verify $DIR/$tfile &
run_test 5 "Fail OST during iozone"
kbytesfree() {
- awk '{total+=$1} END {print total}' /proc/fs/lustre/osc/OSC_*MNT*/kbytesfree
+ awk '{total+=$1} END {print total}' /proc/fs/lustre/osc/*-osc-*/kbytesfree
}
test_6() {
run_test 7 "Fail OST before obd_destroy"
equals_msg test complete, cleaning up
-FORCE=--force $CLEANUP
+$CLEANUP
#!/bin/sh
set -e
+#set -v
#
# This test needs to be run on the client
init_test_env $@
. ${CONFIG:=$LUSTRE/tests/cfg/local.sh}
+. mountconf.sh
# Skip these tests
-# bug number: 2766 9930
+# bug number: 2766
ALWAYS_EXCEPT="0b $REPLAY_SINGLE_EXCEPT"
-gen_config() {
- rm -f $XMLCONFIG
- add_mds mds --dev $MDSDEV --size $MDSSIZE
- if [ ! -z "$mdsfailover_HOST" ]; then
- add_mdsfailover mds --dev $MDSDEV --size $MDSSIZE
- fi
-
- add_lov lov1 mds --stripe_sz $STRIPE_BYTES \
- --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
- add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE
- add_ost ost2 --lov lov1 --dev ${OSTDEV}-2 --size $OSTSIZE
- add_client client mds --lov lov1 --path $MOUNT
-}
-
build_test_filter
-cleanup() {
- # make sure we are using the primary MDS, so the config log will
- # be able to clean up properly.
- activemds=`facet_active mds`
- if [ $activemds != "mds" ]; then
- fail mds
- fi
- zconf_umount `hostname` $MOUNT
- stop mds ${FORCE} $MDSLCONFARGS
- stop ost2 ${FORCE}
- stop ost ${FORCE} --dump $TMP/replay-single-`hostname`.log
-}
+SETUP=${SETUP:-"setup"}
+CLEANUP=${CLEANUP:-"mcstopall"}
if [ "$ONLY" == "cleanup" ]; then
sysctl -w lnet.debug=0 || true
- FORCE=--force cleanup
- exit
+ $CLEANUP
+ exit 0
fi
-SETUP=${SETUP:-"setup"}
-CLEANUP=${CLEANUP:-"cleanup"}
-
setup() {
- gen_config
-
- start ost --reformat $OSTLCONFARGS
- start ost2 --reformat $OSTLCONFARGS
- [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
- start mds $MDSLCONFARGS --reformat
- grep " $MOUNT " /proc/mounts || zconf_mount `hostname` $MOUNT
+ mcformat
+ mcsetup
}
$SETUP
do_facet ost "sysctl -w lustre.fail_loc=0"
rm -fr $DIR/$tfile
- local old_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id`
+ local old_last_id=`cat $LPROC/obdfilter/*/last_id`
touch -o $DIR/$tfile 1
sync
- local new_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id`
+ local new_last_id=`cat $LPROC/obdfilter/*/last_id`
test "$old_last_id" = "$new_last_id" || {
echo "OST object create is caused by MDS"
return 1
}
- old_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id`
+ old_last_id=`cat $LPROC/obdfilter/*/last_id`
echo "data" > $DIR/$tfile
sync
- new_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id`
+ new_last_id=`cat $LPROC/obdfilter/*/last_id`
test "$old_last_id" = "$new_last_id "&& {
echo "CROW does not work on write"
return 1
do_facet ost "sysctl -w lustre.fail_loc=0x80000801"
rm -fr $DIR/1a1
- old_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id`
+ old_last_id=`cat $LPROC/obdfilter/*/last_id`
echo "data" > $DIR/1a1
sync
- new_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id`
+ new_last_id=`cat $LPROC/obdfilter/*/last_id`
test "$old_last_id" = "$new_last_id" || {
echo "CROW does work with fail_loc=0x80000801"
return 1
touch $DIR/$tfile
checkstat $DIR/$tfile
facet_failover mds
- cancel_lru_locks MDC
+ cancel_lru_locks mdc
if dmesg | grep "unknown lock cookie"; then
echo "cancel after replay failed"
return 1
run_test 39 "test recovery from unlink llog (test llog_gen_rec) "
count_ost_writes() {
- cat /proc/fs/lustre/osc/*/stats |
- awk -vwrites=0 '/ost_write/ { writes += $2 } END { print writes; }'
+ awk -vwrites=0 '/ost_write/ { writes += $2 } END { print writes; }' $LPROC/osc/*/stats
}
#b=2477,2532
# make sure the start of the file is ost1
lfs setstripe $f $((128 * 1024)) 0 0
do_facet client dd if=/dev/zero of=$f bs=4k count=1 || return 3
- cancel_lru_locks OSC
+ cancel_lru_locks osc
# fail ost2 and read from ost1
- local osc2_dev=`$LCTL device_list | \
- awk '(/ost2.*client_facet/){print $4}' `
- $LCTL --device %$osc2_dev deactivate
+ local osc2dev=`grep ${ost2_svc}-osc- $LPROC/devices | awk '{print $1}'`
+ [ "$osc2dev" ] || return 4
+ $LCTL --device $osc2dev deactivate || return 1
do_facet client dd if=$f of=/dev/null bs=4k count=1 || return 3
- $LCTL --device %$osc2_dev activate
+ $LCTL --device $osc2dev activate || return 2
return 0
}
run_test 41 "read from a valid osc while other oscs are invalid"
run_test 43 "mds osc import failure during recovery; don't LBUG"
test_44() {
- mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices`
+ mdcdev=`awk '/-mdc-/ {print $1}' $LPROC/devices`
+ [ "$mdcdev" ] || exit 2
for i in `seq 1 10`; do
+ echo iteration $i
#define OBD_FAIL_TGT_CONN_RACE 0x701
do_facet mds "sysctl -w lustre.fail_loc=0x80000701"
$LCTL --device $mdcdev recover
run_test 44 "race in target handle connect"
test_44b() {
- mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices`
+ mdcdev=`awk '/-mdc-/ {print $1}' $LPROC/devices`
+ [ "$mdcdev" ] || exit 2
for i in `seq 1 10`; do
+ echo iteration $i
#define OBD_FAIL_TGT_DELAY_RECONNECT 0x704
do_facet mds "sysctl -w lustre.fail_loc=0x80000704"
$LCTL --device $mdcdev recover
# Handle failed close
test_45() {
- mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices`
+ mdcdev=`awk '/-mdc-/ {print $1}' $LPROC/devices`
+ [ "$mdcdev" ] || exit 2
$LCTL --device $mdcdev recover
multiop $DIR/$tfile O_c &
# This will cause the CLOSE to fail before even
# allocating a reply buffer
- $LCTL --device $mdcdev deactivate
+ $LCTL --device $mdcdev deactivate || return 4
# try the close
kill -USR1 $pid
wait $pid || return 1
- $LCTL --device $mdcdev activate
+ $LCTL --device $mdcdev activate || return 5
sleep 1
$CHECKSTAT -t file $DIR/$tfile || return 2
run_test 48 "MDS->OSC failure during precreate cleanup (2824)"
test_50() {
- local osc_dev=`$LCTL device_list | \
- awk '(/ost_svc_mds_svc/){print $4}' `
- $LCTL --device %$osc_dev recover && $LCTL --device %$osc_dev recover
+ local oscdev=`grep ${ost_svc}-osc- $LPROC/devices | awk '{print $1}'`
+ [ "$oscdev" ] || return 1
+ $LCTL --device $oscdev recover && $LCTL --device $oscdev recover
# give the mds_lov_sync threads a chance to run
sleep 5
}
# b3764 timed out lock replay
test_52() {
touch $DIR/$tfile
- cancel_lru_locks MDC
+ cancel_lru_locks mdc
multiop $DIR/$tfile s
replay_barrier mds
run_test 58 "test recovery from llog for setattr op (test llog_gen_rec)"
equals_msg test complete, cleaning up
-FORCE=--force $CLEANUP
+$CLEANUP
# Probably a good idea to run this before doing any checkins.
# In the future this can become more fancy, but it's OK for now.
+LUSTRE=${LUSTRE:-`dirname $0`/..}
SRCDIR="`dirname $0`"
+export PATH=/sbin:/usr/sbin:$SRCDIR:$SRCDIR/../utils:$PATH
+
+. $LUSTRE/tests/test-framework.sh
+init_test_env $@
+. ${CONFIG:=$LUSTRE/tests/cfg/local.sh}
+. mountconf.sh
+
+SETUP=${SETUP:-mcsetup}
+FORMAT=${FORMAT:-mcformat}
+CLEANUP=${CLEANUP:-mcstopall}
+
fail() {
echo "ERROR: $1" 1>&2
[ $2 ] && RC=$2 || RC=1
lctl mark "$*"
}
-export PATH=/sbin:/usr/sbin:$SRCDIR:$SRCDIR/../utils:$PATH
ERROR=
SRC=/etc
[ "$COUNT" ] || COUNT=1000
-[ "$LCONF" ] || LCONF=lconf
-
[ "$MCREATE" ] || MCREATE=mcreate
[ "$MKDIRMANY" ] || MKDIRMANY="createmany -d"
shift
done
-EXISTING_MOUNT="`mount | awk '/ lustre(_lite)? / { print $3 }' | tail -n 1`"
+EXISTING_MOUNT=`awk '($3 ~ "lustre" && $1 ~ ":") { print $2 }' /proc/mounts`
if [ -z "$EXISTING_MOUNT" ]; then
- sh llmount.sh $OPTS
- EXISTING_MOUNT="`mount | awk '/ lustre(_lite)? / { print $3 }' | tail -n 1`"
+ $FORMAT
+ $SETUP
+ EXISTING_MOUNT=`awk '($3 ~ "lustre" && $1 ~ ":") { print $2 }' /proc/mounts`
[ -z "$EXISTING_MOUNT" ] && fail "no lustre filesystem mounted" 1
I_MOUNTED="yes"
fi
[ "$ERROR" ] && fail "old and new files are different" $ERROR
log "finished at `date` ($(($(date +%s) - START)))"
-sh llmountcleanup.sh || exit 19
-sh llrmount.sh $OPTS || exit 20
+$CLEANUP || exit 19
+$SETUP || exit 20
log "comparing previously copied files"
for f in $FILES; do
[ "$ERROR" ] && fail "old and new files are different on second diff" $ERROR
-sh llmountcleanup.sh || exit 19
-sh llrmount.sh $OPTS || exit 20
+$CLEANUP || exit 19
+$SETUP || exit 20
log "removing $DST"
rm -r $V $DST || fail "can't remove $DST" 37
if [ "$I_MOUNTED" = "yes" ]; then
sync && sleep 2 && sync # wait for delete thread
- sh llmountcleanup.sh || exit 29
+ $CLEANUP
fi
}
mounted_lustre_filesystems() {
- awk '($3 ~ "lustre") { print $2 }' /proc/mounts
+ awk '($3 ~ "lustre" && $1 ~ ":") { print $2 }' /proc/mounts
}
MOUNT="`mounted_lustre_filesystems`"
if [ -z "$MOUNT" ]; then
echo 0 > /proc/sys/lustre/fail_loc
echo " Trigger recovery..."
- OSC0_UUID="`$LCTL dl | awk '/.* OSC_[^ ]+_OST.* / { print $1 }'`"
- [ -z "$OSC0_UUID" ] && OSC0_UUID="`$LCTL dl | awk '/.* OSC_[^ ]+_ost1.* / { print $1 }'`"
+ OSC0_UUID="`$LCTL dl | awk '/.* *-osc-* / { print $1 }'`"
for i in $OSC0_UUID; do
$LCTL --device $i activate > /dev/null 2>&1 || error "activate osc failed!"
done
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
[ "$SLOW" = "no" ] && EXCEPT="$EXCEPT 24o 27m 51b 51c 63 64b 71 77 101"
+# Tests that fail on uml
+[ "$UML" = "true" ] && EXCEPT="$EXCEPT 31d"
+
+# Tests that always fail with mountconf -- FIXME
+# 48a moving the working dir succeeds
+EXCEPT="$EXCEPT 48a"
case `uname -r` in
2.4*) FSTYPE=${FSTYPE:-ext3}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT 76" ;;
fi
fi
+SANITYLOG=${SANITYLOG:-/tmp/sanity.log}
+
export NAME=${NAME:-local}
SAVE_PWD=$PWD
-clean() {
+# for MCSETUP and MCCLEANUP
+LUSTRE=${LUSTRE:-`dirname $0`/..}
+. $LUSTRE/tests/test-framework.sh
+init_test_env $@
+. ${CONFIG:=$LUSTRE/tests/cfg/local.sh}
+. mountconf.sh
+
+cleanup() {
echo -n "cln.."
- sh llmountcleanup.sh ${FORCE} > /dev/null || { echo "FAILed to clean up"; exit 20; }
+ $MCCLEANUP ${FORCE} $* || { echo "FAILed to clean up"; exit 20; }
}
-CLEAN=${CLEAN:-:}
+CLEANUP=${CLEANUP:-:}
-start() {
+setup() {
echo -n "mnt.."
- sh llrmount.sh > /dev/null || exit 10
+ $MCSETUP || exit 10
echo "done"
}
-START=${START:-:}
+SETUP=${SETUP:-:}
log() {
echo "$*"
}
TRACE=${TRACE:-""}
-LPROC=/proc/fs/lustre
check_kernel_version() {
VERSION_FILE=$LPROC/kernel_version
WANT_VER=$1
}
run_one() {
- if ! mount | grep -q $DIR; then
- $START
+ if ! grep -q $DIR /proc/mounts; then
+ $SETUP
fi
testnum=$1
message=$2
unset TESTNAME
pass "($((`date +%s` - $BEFORE))s)"
cd $SAVE_PWD
- $CLEAN
+ $CLEANUP
}
build_test_filter() {
}
mounted_lustre_filesystems() {
- awk '($3 ~ "lustre") { print $2 }' /proc/mounts
+ awk '($3 ~ "lustre" && $1 ~ ":") { print $2 }' /proc/mounts
}
-MOUNT="`mounted_lustre_filesystems`"
-if [ -z "$MOUNT" ]; then
- sh llmount.sh
- MOUNT="`mounted_lustre_filesystems`"
- [ -z "$MOUNT" ] && error "NAME=$NAME not mounted"
+
+MOUNTED="`mounted_lustre_filesystems`"
+if [ -z "$MOUNTED" ]; then
+ $MCFORMAT
+ $MCSETUP
+ MOUNTED="`mounted_lustre_filesystems`"
+ [ -z "$MOUNTED" ] && error "NAME=$NAME not mounted"
I_MOUNTED=yes
fi
$CHECKSTAT ${f}.rename
$CHECKSTAT -a ${f}
}
-run_test 24n "Statting the old file after renameing (Posix rename 2)"
+run_test 24n "Statting the old file after renaming (Posix rename 2)"
test_24o() {
check_kernel_version 37 || return 0
exhaust_precreations() {
OSTIDX=$1
- OST=$(head -n $((OSTIDX + 1)) $LPROC/lov/${LOVNAME}/target_obd |\
- tail -n 1 | awk '{print $2}' | sed -e 's/_UUID$//')
-
- last_id=$(cat $LPROC/osc/OSC_*_${OST}_${MDS}/prealloc_last_id)
- next_id=$(cat $LPROC/osc/OSC_*_${OST}_${MDS}/prealloc_next_id)
+ OST=$(grep ${OSTIDX}": " $LPROC/lov/${LOVNAME}/target_obd | \
+ awk '{print $2}' | sed -e 's/_UUID$//')
+ # on the mdt's osc
+ last_id=$(cat $LPROC/osc/${OST}-osc/prealloc_last_id)
+ next_id=$(cat $LPROC/osc/${OST}-osc/prealloc_next_id)
mkdir -p $DIR/d27/${OST}
$LSTRIPE $DIR/d27/${OST} 0 $OSTIDX 1
sysctl -w lustre.fail_loc=0x215
echo "Creating to objid $last_id on ost $OST..."
createmany -o $DIR/d27/${OST}/f $next_id $((last_id - next_id + 2))
- grep '[0-9]' $LPROC/osc/OSC_*_${OST}_${MDS}/prealloc*
+ grep '[0-9]' $LPROC/osc/${OST}-osc/prealloc*
reset_enospc $2
}
run_test 28 "create/mknod/mkdir with bad file types ============"
cancel_lru_locks() {
- for d in $LPROC/ldlm/namespaces/$1*; do
+ for d in $LPROC/ldlm/namespaces/*-$1-*; do
echo clear > $d/lru_size
done
- grep "[0-9]" $LPROC/ldlm/namespaces/$1*/lock_unused_count /dev/null
+ grep "[0-9]" $LPROC/ldlm/namespaces/*-$1-*/lock_unused_count /dev/null
}
test_29() {
- cancel_lru_locks MDC
+ cancel_lru_locks mdc
mkdir $DIR/d29
touch $DIR/d29/foo
log 'first d29'
ls -l $DIR/d29
- MDCDIR=${MDCDIR:-$LPROC/ldlm/namespaces/MDC_*}
+ MDCDIR=${MDCDIR:-$LPROC/ldlm/namespaces/*-mdc-*}
LOCKCOUNTORIG=`cat $MDCDIR/lock_count`
LOCKUNUSEDCOUNTORIG=`cat $MDCDIR/lock_unused_count`
log 'second d29'
# file truncation, and file removal.
test_42a() {
setup_test42
- cancel_lru_locks OSC
+ cancel_lru_locks osc
stop_writeback
sync; sleep 1; sync # just to be safe
BEFOREWRITES=`count_ost_writes`
- grep "[0-9]" $LPROC/osc/OSC*MNT*/cur_grant_bytes
+ grep "[0-9]" $LPROC/osc/*-osc-*/cur_grant_bytes
dd if=/dev/zero of=$DIR/f42a bs=1024 count=100
AFTERWRITES=`count_ost_writes`
[ $BEFOREWRITES -eq $AFTERWRITES ] || \
test_42b() {
setup_test42
- cancel_lru_locks OSC
+ cancel_lru_locks osc
stop_writeback
sync
dd if=/dev/zero of=$DIR/f42b bs=1024 count=100
test=$1
file=$DIR/$test
offset=$2
- cancel_lru_locks OSC
+ cancel_lru_locks osc
stop_writeback
# prime the file with 0,EOF PW to match
touch $file
dd if=/dev/zero of=$file bs=1024 count=100
BEFOREWRITES=`count_ost_writes`
$TRUNCATE $file $offset
- cancel_lru_locks OSC
+ cancel_lru_locks osc
AFTERWRITES=`count_ost_writes`
start_writeback
}
[ $before -gt $after ] || error "writeback didn't lower dirty count"
do_dirty_record "echo blah > $f"
[ $before -eq $after ] && error "write wasn't cached"
- do_dirty_record "cancel_lru_locks OSC"
+ do_dirty_record "cancel_lru_locks osc"
[ $before -gt $after ] || error "lock cancellation didn't lower dirty count"
start_writeback
}
run_test 52b "immutable flag test (should return errors) ======="
test_53() {
- for i in `ls -d $LPROC/osc/OSC*mds1 2> /dev/null` ; do
- ostname=`echo $i | cut -d _ -f 3-4 | sed -e s/_mds1//`
+ for i in `ls -d $LPROC/osc/*-osc 2> /dev/null` ; do
+ ostname=`basename $i | cut -d - -f 1-2`
ost_last=`cat $LPROC/obdfilter/$ostname/last_id`
mds_last=`cat $i/prealloc_last_id`
echo "$ostname.last_id=$ost_last ; MDS.last_id=$mds_last"
test_61() {
f="$DIR/f61"
dd if=/dev/zero of=$f bs=`page_size` count=1
- cancel_lru_locks OSC
+ cancel_lru_locks osc
multiop $f OSMWUc || error
sync
}
test_62() {
f="$DIR/f62"
echo foo > $f
- cancel_lru_locks OSC
+ cancel_lru_locks osc
sysctl -w lustre.fail_loc=0x405
cat $f && error "cat succeeded, expect -EIO"
sysctl -w lustre.fail_loc=0
test_64a () {
df $DIR
- grep "[0-9]" $LPROC/osc/OSC*MNT*/cur*
+ grep "[0-9]" $LPROC/osc/*-osc-*/cur*
}
run_test 64a "verify filter grant calculations (in kernel) ====="
test_65j() { # bug6367
# if we aren't already remounting for each test, do so for this test
- if [ "$CLEAN" = ":" ]; then
- clean || error "failed to unmount"
- start || error "failed to remount"
+ if [ "$CLEANUP" = ":" ]; then
+ cleanup -f || error "failed to unmount"
+ setup || error "failed to remount"
fi
$LSTRIPE -d $MOUNT || true
}
sysctl -w lustre.fail_loc=0
$DIRECTIO write $f 0 2 || error "write error"
- cancel_lru_locks OSC
+ cancel_lru_locks osc
$DIRECTIO read $f 0 1 || error "read error"
sysctl -w lustre.fail_loc=0x217
# See if we are still setuid/sgid
test -u $DIR/f72 -o -g $DIR/f72 && error "S/gid is not dropped on write"
# Now test that MDS is updated too
- cancel_lru_locks MDC
+ cancel_lru_locks mdc
test -u $DIR/f72 -o -g $DIR/f72 && error "S/gid is not dropped on MDS"
true
}
local nreads=10000
local cache_limit=32
- for s in $LPROC/osc/OSC_*/rpc_stats; do
+ for s in $LPROC/osc/*-osc*/rpc_stats; do
echo 0 > $s
done
trap cleanup_101 EXIT
cleanup_101
if [ $(($discard * 10)) -gt $nreads ] ;then
- cat $LPROC/osc/OSC_*/rpc_stats
+ cat $LPROC/osc/*-osc*/rpc_stats
cat $LPROC/llite/*/read_ahead_stats
error "too many ($discard) discarded pages"
fi
touch $testfile
[ "$UID" != 0 ] && echo "skipping $TESTNAME (must run as root)" && return
- [ -z "`grep xattr $LPROC/mdc/MDC*MNT*/connect_flags`" ] && echo "skipping $TESTNAME (must have user_xattr)" && return
+ [ -z "`grep xattr $LPROC/mdc/*-mdc-*/connect_flags`" ] && echo "skipping $TESTNAME (must have user_xattr)" && return
echo "set/get xattr..."
setfattr -n trusted.name1 -v value1 $testfile || error
[ "`getfattr -n trusted.name1 $testfile 2> /dev/null | \
[ "$UID" != 0 ] && echo "skipping $TESTNAME (must run as root)" && return
[ -z "`mount | grep " $DIR .*\<acl\>"`" ] && echo "skipping $TESTNAME (must have acl)" && return
- [ -z "`grep acl $LPROC/mdc/MDC*MNT*/connect_flags`" ] && echo "skipping $TESTNAME (must have acl)" && return
- which setfacl 2>/dev/null || (echo "skipping $TESTNAME (could not find setfacl)" && return)
+ [ -z "`grep acl $LPROC/mdc/*-mdc-*/connect_flags`" ] && echo "skipping $TESTNAME (must have acl)" && return
+ $(which setfacl 2>/dev/null) || echo "skipping $TESTNAME (could not find setfacl)" && return
echo "performing cp ..."
run_acl_subtest cp || error
lfs df $DIR/$tfile || error "lfs df $DIR/$tfile failed"
lfs df -ih $DIR/$tfile || error "lfs df -ih $DIR/$tfile failed"
- OSC=`lctl dl | awk '/OSC.*MNT/ {print $4}' | head -n 1`
+ OSC=`awk '/-osc-/ {print $4}' $LPROC/devices | head -n 1`
lctl --device %$OSC deactivate
lfs df || error "lfs df with deactivated OSC failed"
lctl --device %$OSC recover
log "cleanup: ======================================================"
if [ "`mount | grep ^$NAME`" ]; then
- rm -rf $DIR/[Rdfs][1-9]*
- if [ "$I_MOUNTED" = "yes" ]; then
- sh llmountcleanup.sh || error "llmountcleanup failed"
- fi
+ rm -rf $DIR/[Rdfs][1-9]*
fi
+if [ "$I_MOUNTED" = "yes" ]; then
+ $MCCLEANUP -f || error "cleanup failed"
+fi
+
echo '=========================== finished ==============================='
[ -f "$SANITYLOG" ] && cat $SANITYLOG && exit 1 || true
ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"14b 14c"}
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
+# Tests that fail on uml
+[ "$UML" = "true" ] && EXCEPT="$EXCEPT 7"
+
SRCDIR=`dirname $0`
PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
SAVE_PWD=$PWD
-clean() {
+# for MCSETUP and MCCLEANUP
+LUSTRE=${LUSTRE:-`dirname $0`/..}
+. $LUSTRE/tests/test-framework.sh
+init_test_env $@
+. ${CONFIG:=$LUSTRE/tests/cfg/local.sh}
+. mountconf.sh
+
+cleanup() {
echo -n "cln.."
- sh llmountcleanup.sh ${FORCE} > /dev/null || exit 20
+ grep " $MOUNT2 " /proc/mounts && zconf_umount `hostname` $MOUNT2 ${FORCE}
+ $MCCLEANUP ${FORCE} > /dev/null || { echo "FAILed to clean up"; exit 20; }
}
-CLEAN=${CLEAN:-}
+CLEANUP=${CLEANUP:-:}
-start() {
+setup() {
echo -n "mnt.."
- sh llrmount.sh > /dev/null || exit 10
+ $MCSETUP || exit 10
echo "done"
}
-START=${START:-}
+SETUP=${SETUP:-:}
log() {
echo "$*"
TRACE=${TRACE:-""}
run_one() {
- if ! mount | grep -q $DIR1; then
- $START
+ if ! grep -q $DIR /proc/mounts; then
+ $SETUP
fi
testnum=$1
message=$2
unset TESTNAME
pass "($((`date +%s` - $BEFORE))s)"
cd $SAVE_PWD
- $CLEAN
+ $CLEANUP
+}
+
+build_test_filter() {
+ [ "$ALWAYS_EXCEPT$EXCEPT$SANITYN_EXCEPT" ] && \
+ echo "Skipping tests: `echo $ALWAYS_EXCEPT $EXCEPT $SANITYN_EXCEPT`"
+
+ for O in $ONLY; do
+ eval ONLY_${O}=true
+ done
+ for E in $EXCEPT $ALWAYS_EXCEPT $SANITY_EXCEPT; do
+ eval EXCEPT_${E}=true
+ done
+}
+
+_basetest() {
+ echo $*
+}
+
+basetest() {
+ IFS=abcdefghijklmnopqrstuvwxyz _basetest $1
}
build_test_filter() {
echo PASS $@
}
-export MOUNT1=`mount| awk '/ lustre/ { print $3 }'| head -n 1`
-export MOUNT2=`mount| awk '/ lustre/ { print $3 }'| tail -n 1`
+mounted_lustre_filesystems() {
+ awk '($3 ~ "lustre" && $1 ~ ":") { print $2 }' /proc/mounts
+}
+MOUNTED="`mounted_lustre_filesystems`"
+if [ -z "$MOUNTED" ]; then
+ $MCFORMAT
+ $MCSETUP
+ mount_client $MOUNT2
+ MOUNTED="`mounted_lustre_filesystems`"
+ [ -z "$MOUNTED" ] && error "NAME=$NAME not mounted"
+ I_MOUNTED=yes
+fi
+export MOUNT1=`mounted_lustre_filesystems | head -n 1`
[ -z "$MOUNT1" ] && error "NAME=$NAME not mounted once"
+export MOUNT2=`mounted_lustre_filesystems | tail -n 1`
[ "$MOUNT1" = "$MOUNT2" ] && error "NAME=$NAME not mounted twice"
-[ `mount| awk '/ lustre/ { print $3 }'| wc -l` -ne 2 ] && \
+[ `mounted_lustre_filesystems | wc -l` -ne 2 ] && \
error "NAME=$NAME mounted more than twice"
export DIR1=${DIR1:-$MOUNT1}
run_test 16 "2500 iterations of dual-mount fsx ================="
cancel_lru_locks() {
- for d in /proc/fs/lustre/ldlm/namespaces/$1*; do
+ for d in /proc/fs/lustre/ldlm/namespaces/*-$1-*; do
echo clear > $d/lru_size
done
- grep "[0-9]" /proc/fs/lustre/ldlm/namespaces/$1*/lock_unused_count /dev/null
+ grep "[0-9]" /proc/fs/lustre/ldlm/namespaces/*-$1-*/lock_unused_count /dev/null
}
test_17() { # bug 3513, 3667
[ ! -d /proc/fs/lustre/ost ] && echo "skipping OST-only test" && return
cp /etc/termcap $DIR1/f17
- cancel_lru_locks OSC > /dev/null
+ cancel_lru_locks osc > /dev/null
#define OBD_FAIL_ONCE|OBD_FAIL_LDLM_CREATE_RESOURCE 0x30a
echo 0x8000030a > /proc/sys/lustre/fail_loc
ls -ls $DIR1/f17 | awk '{ print $1,$6 }' > $DIR1/f17-1 & \
[ -d /proc/fs/lustre/obdfilter ] || return 0
MAX=`cat /proc/fs/lustre/obdfilter/*/readcache_max_filesize | head -n 1`
- for O in /proc/fs/lustre/obdfilter/OST*; do
+ for O in /proc/fs/lustre/obdfilter/*OST*; do
echo 4096 > $O/readcache_max_filesize
done
dd if=/dev/urandom of=$TMP/f19b bs=512k count=32
cp $TMP/f19b $DIR1/f19b
for i in `seq 1 20`; do
[ $((i % 5)) -eq 0 ] && log "test_18 loop $i"
- cancel_lru_locks OSC > /dev/null
+ cancel_lru_locks osc > /dev/null
cksum $DIR1/f19b | cut -d" " -f 1,2 > $TMP/sum1 & \
cksum $DIR2/f19b | cut -d" " -f 1,2 > $TMP/sum2
wait
[ "`cat $TMP/sum2`" = "$SUM" ] || \
error "$DIR2/f19b `cat $TMP/sum2` != $SUM"
done
- for O in /proc/fs/lustre/obdfilter/OST*; do
+ for O in /proc/fs/lustre/obdfilter/*OST*; do
echo $MAX > $O/readcache_max_filesize
done
rm $DIR1/f19b
test_20() {
mkdir $DIR1/d20
- cancel_lru_locks OSC
+ cancel_lru_locks osc
CNT=$((`cat /proc/fs/lustre/llite/fs0/dump_page_cache | wc -l`))
multiop $DIR1/f20 Ow8190c
multiop $DIR2/f20 Oz8194w8190c
multiop $DIR1/f20 Oz0r8190c
- cancel_lru_locks OSC
+ cancel_lru_locks osc
CNTD=$((`cat /proc/fs/lustre/llite/fs0/dump_page_cache | wc -l` - $CNT))
[ $CNTD -gt 0 ] && \
error $CNTD" page left in cache after lock cancel" || true
echo "others should see updated atime while another read" > $DIR1/f23
# clear the lock(mode: LCK_PW) gotten from creating operation
- cancel_lru_locks OSC
+ cancel_lru_locks osc
time1=`date +%s`
sleep 2
log "cleanup: ======================================================"
rm -rf $DIR1/[df][0-9]* $DIR1/lnk || true
+if [ "$I_MOUNTED" = "yes" ]; then
+ cleanup
+fi
echo '=========================== finished ==============================='
[ -f "$SANITYLOG" ] && cat $SANITYLOG && exit 1 || true
+
# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
set -e
+#set -vx
export REFORMAT=""
export VERBOSE=false
export TMP=${TMP:-$ROOT/tmp}
export PATH=:$PATH:$LUSTRE/utils:$LUSTRE/tests
- export LLMOUNT=${LLMOUNT:-"llmount"}
- export LCONF=${LCONF:-"lconf"}
- export LMC=${LMC:-"lmc"}
export LCTL=${LCTL:-"$LUSTRE/utils/lctl"}
+ export MKFS=${MKFS:-"$LUSTRE/utils/mkfs.lustre"}
export CHECKSTAT="${CHECKSTAT:-checkstat} "
export FSYTPE=${FSTYPE:-"ext3"}
+ export LPROC=/proc/fs/lustre
if [ "$ACCEPTOR_PORT" ]; then
export PORT_OPT="--port $ACCEPTOR_PORT"
# echo "CONFIG=`canonical_path $CONFIG`" > $LUSTRE/tests/CONFIG
}
+unload_modules() {
+ lsmod | grep lnet > /dev/null && $LCTL dk $TMP/debug
+ local MODULES=`$LCTL modules | awk '{ print $2 }'`
+ rmmod $MODULES >/dev/null 2>&1
+ # do it again, in case we tried to unload ksocklnd too early
+ lsmod | grep lnet > /dev/null && rmmod $MODULES >/dev/null 2>&1
+ lsmod | grep lnet && echo "modules still loaded" && return 1
+
+ LEAK_LUSTRE=`dmesg | tail -n 30 | grep "obd mem.*leaked"`
+ LEAK_PORTALS=`dmesg | tail -n 20 | grep "Portals memory leaked"`
+ if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then
+ echo "$LEAK_LUSTRE" 1>&2
+ echo "$LEAK_PORTALS" 1>&2
+ mv $TMP/debug $TMP/debug-leak.`date +%s`
+ echo "Memory leaks detected"
+ return 254
+ fi
+}
+
# Facet functions
+# start facet device options
start() {
facet=$1
shift
- active=`facet_active $facet`
- do_facet $facet $LCONF --select ${facet}_svc=${active}_facet \
- --node ${active}_facet --ptldebug $PTLDEBUG --subsystem $SUBSYSTEM \
- $@ $XMLCONFIG
+ device=$1
+ shift
+ echo "Starting ${facet}: $@ ${device} /mnt/${facet}"
+ do_facet ${facet} mkdir -p /mnt/${facet}
+ do_facet ${facet} mount -t lustre $@ ${device} /mnt/${facet}
+ #do_facet $facet $LCONF --select ${facet}_svc=${active}_facet \
+ # --node ${active}_facet --ptldebug $PTLDEBUG --subsystem $SUBSYSTEM \
+ # $@ $XMLCONFIG
RC=${PIPESTATUS[0]}
if [ $RC -ne 0 ]; then
- # maybe acceptor error, dump tcp port usage
- netstat -tpn
+ echo mount -t lustre $@ ${device} /mnt/${facet}
+ echo Start of ${device} on ${facet} failed ${RC}
+ else
+ do_facet ${facet} sync
+ # need the awk in case running with -v
+ label=`do_facet ${facet} "e2label ${device}" | awk '{print $(NF)}'`
+ eval export ${facet}_svc=${label}
+ eval export ${facet}_dev=${device}
+ eval export ${facet}_opt=\"$@\"
+ echo Started ${label}
fi
return $RC
}
stop() {
facet=$1
- active=`facet_active $facet`
shift
- do_facet $facet $LCONF --select ${facet}_svc=${active}_facet \
- --node ${active}_facet --ptldebug $PTLDEBUG --subsystem $SUBSYSTEM \
- $@ --cleanup $XMLCONFIG
+ # the following line fails with VERBOSE set
+ local running=`do_facet ${facet} "grep -c /mnt/${facet}' ' /proc/mounts" | awk '{print $(NF)}'`
+ if [ $running -ne 0 ]; then
+ echo "Stopping /mnt/${facet} (opts:$@)"
+ do_facet ${facet} umount -d $@ /mnt/${facet}
+ fi
+ #do_facet ${facet} umount -d $@ /mnt/${facet} >> /dev/null 2>&1 || :
+ [ -e /proc/fs/lustre ] && grep "ST " /proc/fs/lustre/devices && echo "service didn't stop" && exit 1
+ return 0
}
zconf_mount() {
local OPTIONS
- client=$1
- mnt=$2
-
- do_node $client mkdir $mnt 2> /dev/null || :
-
+ local client=$1
+ local mnt=$2
# Only supply -o to mount if we have options
if [ -n "$MOUNTOPT" ]; then
OPTIONS="-o $MOUNTOPT"
fi
-
- if [ -x /sbin/mount.lustre ] ; then
- do_node $client mount -t lustre $OPTIONS \
- `facet_nid mds`:/mds_svc/client_facet $mnt || return 1
- do_node $client "sysctl -w lnet.debug=$PTLDEBUG; sysctl -w lnet.subsystem_debug=${SUBSYSTEM# }"
- else
- # this is so cheating
- do_node $client $LCONF --nosetup --node client_facet $XMLCONFIG > \
- /dev/null || return 2
- do_node $client $LLMOUNT $OPTIONS \
- `facet_nid mds`:/mds_svc/client_facet $mnt || return 4
+ local device=`facet_nid mgs`:/$FSNAME
+ if [ -z "$mnt" -o -z "$FSNAME" ]; then
+ echo Bad zconf mount command: opt=$OPTIONS dev=$device mnt=$mnt
+ exit 1
fi
+ echo "Starting client: $OPTIONS $device $mnt"
+ do_node $client mkdir -p $mnt
+ do_node $client mount -t lustre $OPTIONS $device $mnt || return 1
+
+ do_node $client "sysctl -w lnet.debug=$PTLDEBUG; sysctl -w lnet.subsystem_debug=${SUBSYSTEM# }"
[ -d /r ] && $LCTL modules > /r/tmp/ogdb-`hostname`
return 0
}
client=$1
mnt=$2
[ "$3" ] && force=-f
- do_node $client umount $force $mnt || :
- do_node $client $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null || :
+ local running=`do_node $client "grep -c $mnt' ' /proc/mounts" | awk '{print $(NF)}'`
+ if [ $running -ne 0 ]; then
+ echo "Stopping client $mnt (opts:$force)"
+ do_node $client umount $force $mnt
+ fi
}
shutdown_facet() {
$POWER_DOWN `facet_active_host $facet`
sleep 2
elif [ "$FAILURE_MODE" = SOFT ]; then
- stop $facet --force --failover --nomod
+ stop $facet
fi
}
facet_failover() {
facet=$1
- echo "Failing $facet node `facet_active_host $facet`"
+ echo "Failing $facet on node `facet_active_host $facet`"
shutdown_facet $facet
reboot_facet $facet
client_df &
TO=`facet_active_host $facet`
echo "Failover $facet to $TO"
wait_for $facet
- start $facet
+ local dev=${facet}_dev
+ local opt=${facet}_opt
+ start $facet ${!dev} ${!opt}
+}
+
+obd_name() {
+ local facet=$1
}
replay_barrier() {
local facet=$1
do_facet $facet sync
df $MOUNT
- do_facet $facet $LCTL --device %${facet}_svc readonly
- do_facet $facet $LCTL --device %${facet}_svc notransno
- do_facet $facet $LCTL mark "$facet REPLAY BARRIER"
- $LCTL mark "local REPLAY BARRIER"
+ local svc=${facet}_svc
+ do_facet $facet $LCTL --device %${!svc} readonly
+ do_facet $facet $LCTL --device %${!svc} notransno
+ do_facet $facet $LCTL mark "$facet REPLAY BARRIER on ${!svc}"
+ $LCTL mark "local REPLAY BARRIER on ${!svc}"
}
replay_barrier_nodf() {
local facet=$1
do_facet $facet sync
- do_facet $facet $LCTL --device %${facet}_svc readonly
- do_facet $facet $LCTL --device %${facet}_svc notransno
- do_facet $facet $LCTL mark "$facet REPLAY BARRIER"
- $LCTL mark "local REPLAY BARRIER"
+ local svc=${facet}_svc
+ echo Replay barrier on ${!svc}
+ do_facet $facet $LCTL --device %${!svc} readonly
+ do_facet $facet $LCTL --device %${!svc} notransno
+ do_facet $facet $LCTL mark "$facet REPLAY BARRIER on ${!svc}"
+ $LCTL mark "local REPLAY BARRIER on ${!svc}"
}
mds_evict_client() {
UUID=`cat /proc/fs/lustre/mdc/*_MNT_*/uuid`
- do_facet mds "echo $UUID > /proc/fs/lustre/mds/mds_svc/evict_client"
+ do_facet mds "echo $UUID > /proc/fs/lustre/mds/${mds_svc}/evict_client"
}
fail() {
- local facet=$1
- facet_failover $facet
+ facet_failover $*
df $MOUNT || error "post-failover df: $?"
}
fail_abort() {
local facet=$1
- stop $facet --force --failover --nomod
+ stop $facet
change_active $facet
- start $facet
- do_facet $facet lctl --device %${facet}_svc abort_recovery
+ local svc=${facet}_svc
+ local dev=${facet}_dev
+ local opt=${facet}_opt
+ start $facet ${!dev} ${!opt}
+ do_facet $facet lctl --device %${!svc} abort_recovery
df $MOUNT || echo "first df failed: $?"
sleep 1
df $MOUNT || error "post-failover df: $?"
}
do_lmc() {
- $LMC -m ${XMLCONFIG} $@
+ echo There is no lmc. This is mountconf, baby.
+ exit 1
}
h2gm () {
do_node $HOST $@
}
-add_facet() {
+add() {
local facet=$1
shift
- echo "add facet $facet: `facet_host $facet`"
- do_lmc --add node --node ${facet}_facet $@ --timeout $TIMEOUT \
- --lustre_upcall $UPCALL --ptldebug $PTLDEBUG --subsystem $SUBSYSTEM
- do_lmc --add net --node ${facet}_facet --nid `facet_nid $facet` \
- --nettype lnet $PORT_OPT
-}
-
-add_mds() {
- local MOUNT_OPTS
- local facet=$1
- shift
- rm -f ${facet}active
- add_facet $facet
- [ "x$MDSOPT" != "x" ] && MOUNT_OPTS="--mountfsoptions $MDSOPT"
- do_lmc --add mds --node ${facet}_facet --mds ${facet}_svc \
- --fstype $FSTYPE $* $MOUNT_OPTS
-}
-
-add_mdsfailover() {
- local MOUNT_OPTS
- local facet=$1
- shift
- add_facet ${facet}failover --lustre_upcall $UPCALL
- [ "x$MDSOPT" != "x" ] && MOUNT_OPTS="--mountfsoptions $MDSOPT"
- do_lmc --add mds --node ${facet}failover_facet --mds ${facet}_svc \
- --fstype $FSTYPE $* $MOUNT_OPTS
-}
-
-add_ost() {
- facet=$1
- shift
+ # failsafe
+ stop ${facet} -f
rm -f ${facet}active
- add_facet $facet
- do_lmc --add ost --node ${facet}_facet --ost ${facet}_svc \
- --fstype $FSTYPE $* $OSTOPT
-}
-
-add_ostfailover() {
- facet=$1
- shift
- add_facet ${facet}failover
- do_lmc --add ost --failover --node ${facet}failover_facet \
- --ost ${facet}_svc --fstype $FSTYPE $* $OSTOPT
-}
-
-add_lov() {
- lov=$1
- mds_facet=$2
- shift; shift
- do_lmc --add lov --mds ${mds_facet}_svc --lov $lov $* $LOVOPT
-}
-
-add_client() {
- local MOUNT_OPTS
- local facet=$1
- mds=$2
- shift; shift
- [ "x$CLIENTOPT" != "x" ] && MOUNT_OPTS="--clientoptions $CLIENTOPT"
- add_facet $facet --lustre_upcall $UPCALL
- do_lmc --add mtpt --node ${facet}_facet --mds ${mds}_svc $* $MOUNT_OPTS
+ $MKFS $*
}
log() {
echo "$*"
+ lsmod | grep lnet > /dev/null || modprobe lnet
$LCTL mark "$*" 2> /dev/null || true
}
lload
wirecheck
lfs
+mkfs.lustre
+mkfs_lustre
+mount.lustre
+mount_lustre
+tunefs.lustre
+tunefs_lustre
+llog_reader
llmount
l_getgroups
-mount.lustre
wiretest
llog_reader
.*.cmd
LIBPTLCTL := $(top_builddir)/lnet/utils/libptlctl.a
-sbin_scripts = lconf lmc llanalyze llstat.pl llobdstat.pl lactive \
- load_ldap.sh lrun
+sbin_scripts = llanalyze llstat.pl llobdstat.pl lactive lrun
bin_scripts = lfind lstripe
if UTILS
-rootsbin_SCRIPTS = mount.lustre
-sbin_PROGRAMS = lctl obdio obdbarrier lload wirecheck wiretest llmount \
- l_getgroups
+# mount only finds helpers in /sbin
+rootsbin_PROGRAMS = mount.lustre
+sbin_PROGRAMS = lctl obdio obdbarrier lload wirecheck wiretest \
+ mount_lustre mkfs_lustre mkfs.lustre \
+ tunefs_lustre tunefs.lustre l_getgroups
bin_PROGRAMS = lfs llog_reader
-lib_LIBRARIES = liblustreapi.a
+lib_LIBRARIES = liblustreapi.a
sbin_SCRIPTS = $(sbin_scripts)
bin_SCRIPTS = $(bin_scripts)
endif # UTILS
+lctl_SOURCES = parser.c obd.c lustre_cfg.c lctl.c parser.h obdctl.h platform.h
lctl_LDADD := $(LIBREADLINE) $(LIBPTLCTL)
lctl_DEPENDENCIES := $(LIBPTLCTL)
+lfs_SOURCES = lfs.c parser.c obd.c
lfs_LDADD := $(LIBREADLINE) liblustreapi.a $(LIBPTLCTL)
lfs_DEPENDENCIES := $(LIBPTLCTL) liblustreapi.a
+lload_SOURCES = lload.c
lload_LDADD := $(LIBREADLINE) $(LIBPTLCTL)
lload_DEPENDENCIES := $(LIBPTLCTL)
wirecheck_SOURCES = wirecheck.c
wirecheck_CPPFLAGS = -DCC="\"$(CC)\""
+
wiretest_SOURCES = wiretest.c
-lctl_SOURCES = parser.c obd.c lustre_cfg.c lctl.c parser.h obdctl.h platform.h
-lload_SOURCES = lload.c
obdio_SOURCES = obdio.c obdiolib.c obdiolib.h
obdbarrier_SOURCES = obdbarrier.c obdiolib.c obdiolib.h
-lfs_SOURCES = lfs.c parser.c obd.c
-llog_reader_LDADD := $(LIBREADLINE) $(LIBPTLCTL)
-llog_reader_DEPENDENCIES := $(LIBPTLCTL)
llog_reader_SOURCES = llog_reader.c
+llog_reader_LDADD := $(LIBPTLCTL)
+llog_reader_DEPENDENCIES := $(LIBPTLCTL)
+
+mount_lustre_SOURCES = mount_lustre.c
+mount_lustre_LDADD := $(LIBPTLCTL)
+mount_lustre_DEPENDENCIES := $(LIBPTLCTL)
-llmount_SOURCES = llmount.c
-llmount_CFLAGS = $(LLMOUNT_GM_CFLAGS)
-llmount_LDADD = $(LIBREADLINE) $(LIBPTLCTL) $(LLMOUNT_GM_LDADD)
-llmount_DEPENDENCIES := $(LIBPTLCTL)
+mkfs_lustre_SOURCES = mkfs_lustre.c
+mkfs_lustre_CPPFLAGS = -UTUNEFS $(AM_CPPFLAGS)
+mkfs_lustre_LDADD := $(LIBPTLCTL)
+mkfs_lustre_DEPENDENCIES := $(LIBPTLCTL)
+
+tunefs_lustre_SOURCES = $(mkfs_lustre_SOURCES)
+tunefs_lustre_CPPFLAGS = -DTUNEFS $(AM_CPPFLAGS)
+tunefs_lustre_LDADD := $(mkfs_lustre_LDADD)
+tunefs_lustre_DEPENDENCIES := $(mkfs_lustre_DEPENDENCIES)
EXTRA_DIST = $(bin_scripts) $(sbin_scripts)
cp wirehdr.c wiretest.c
./wirecheck >> wiretest.c
-mount.lustre$(EXEEXT): llmount
+# Apparently I can't use .'s in automake names
+mount.lustre$(EXEEXT): mount_lustre
+ cp $< $@
+
+mkfs.lustre$(EXEEXT): mkfs_lustre
+ cp $< $@
+
+tunefs.lustre$(EXEEXT): tunefs_lustre
cp $< $@
--- /dev/null
+# combo mdt/mgs
+uml1,options lnet networks=tcp,/r/tmp/mdt,mdt|mgs,,,,--device-size=10240
+# ost0
+uml1,options lnet networks=tcp,/r/tmp/ost0,ost,,"uml1@tcp0",,--device-size=10240
+
--- /dev/null
+#!/bin/bash
+#
+# cluster_config.sh - configure multiple lustre servers from a csv file
+#
+# This script is used to parse each line of a spreadsheet (csv file) and
+# execute remote pdsh commands to format (mkfs.lustre) every Lustre target
+# that will be part of the Lustre cluster.
+#
+# In addition, it can also verify the network connectivity and hostnames in
+# the cluster and produce High-Availability software configurations for
+# Heartbeat or CluManager
+#
+################################################################################
+
+# Usage
+usage() {
+ cat >&2 <<EOF
+
+Usage: `basename $0` [-t HAtype] [-n] [-f] [-h] [-v] <csv file>
+
+ -t HAtype produce High-Availability software configurations
+
+ The argument following -t is used to indicate the High-
+ Availability software type. The HA software types which
+ are currently supported are: hbv1 (Heartbeat v1), hbv2
+ (Heartbeat v2) and clumanager (CluManager).
+ -n don't verify network connectivity and hostnames in the
+ cluster
+ -f force-format the Lustre targets using --reformat option
+ -h help and examples
+ -v verbose mode
+ csv file a spreadsheet that contains configuration parameters
+ (separated by commas) for each target in a Lustre cl-
+ uster
+EOF
+ exit 1
+}
+
+# Samples
+sample() {
+ cat >&2 <<EOF
+
+Each line in the csv file represents one Lustre target.
+The format is:
+hostname,module_opts,device name,device type,fsname,mgs nids,index,
+format options,mkfs options,mount options,failover nids,heartbeat channels,
+service address,heartbeat options
+
+Items left blank will be set to defaults.
+
+Sample 1 for csv file (Simple one without HA software configuration options):
+-------------------------------------------------------------------------------
+# combo mdt/mgs
+lustre-mgs,options lnet networks=tcp,/r/tmp/mgs,mdt|mgs,,,,--device-size=10240
+
+# ost0
+lustre-ost,options lnet networks=tcp,/r/tmp/ost0,ost,,lustre-mgs@tcp0,,
+--device-size=10240
+-------------------------------------------------------------------------------
+
+Sample 2 for csv file (Complex one without HA software configuration options):
+-------------------------------------------------------------------------------
+# mgs
+lustre-mgs1,options lnet 'networks="tcp,elan"',/r/tmp/mgs,mgs,,,,
+--device-size=10240,-J size=4,,"lustre-mgs2,2@elan"
+
+# mdt
+lustre-mdt1,options lnet 'networks="tcp,elan"',/r/tmp/mdt,mdt,,
+"lustre-mgs1,1@elan:lustre-mgs2,2@elan",,--device-size=10240,
+-J size=4,,lustre-mdt2
+
+# ost
+lustre-ost1,options lnet 'networks="tcp,elan"',/r/tmp/ost,ost,,
+"lustre-mgs1,1@elan:lustre-mgs2,2@elan",,--device-size=10240,
+-J size=4,"extents,mballoc",lustre-ost2
+-------------------------------------------------------------------------------
+
+Sample 3 for csv file (with Heartbeat version 1 configuration options):
+-------------------------------------------------------------------------------
+# mgs
+lustre-mgs1,options lnet networks=tcp,/r/tmp/mgs,mgs,,,,--device-size=10240,,,
+lustre-mgs2,serial /dev/ttyS0:bcast eth1,192.168.1.170,auto_failback off:
+ping 192.168.1.169:respawn hacluster /usr/lib/heartbeat/ipfail
+
+# mdt
+lustre-mdt1,options lnet networks=tcp,/r/tmp/mdt,mdt,,"lustre-mgs1:lustre-mgs2",
+,--device-size=10240,,,lustre-mdt2,bcast eth1,192.168.1.173,auto_failback off
+
+# ost
+lustre-ost1,options lnet networks=tcp,/r/tmp/ost,ost,,"lustre-mgs1:lustre-mgs2",
+,--device-size=10240,,,lustre-ost2,bcast eth1,192.168.1.171,auto_failback on
+-------------------------------------------------------------------------------
+
+Sample 4 for csv file (with Heartbeat version 2 configuration options):
+-------------------------------------------------------------------------------
+# combo mdt/mgs
+lustre-mgs1,options lnet networks=tcp,/r/tmp/mgs,mgs|mdt,,,,--device-size=10240,
+,,"lustre-mgs2:lustre-mgs3",bcast eth1,192.168.1.170,auto_failback off
+
+# ost
+lustre-ost1,options lnet networks=tcp,/r/tmp/ost,ost,,"lustre-mgs1:lustre-mgs2:
+lustre-mgs3",,--device-size=10240,,,lustre-ost2,bcast eth1,192.168.1.171,
+auto_failback on:crm yes
+-------------------------------------------------------------------------------
+
+Sample 5 for csv file (with Red Hat's Cluster Manager configuration options):
+-------------------------------------------------------------------------------
+# mgs
+lustre-mgs1,options lnet networks=tcp,/r/tmp/mgs,mgs,,,,--device-size=10240,,,
+lustre-mgs2,broadcast,192.168.1.170,--clumembd--interval=1000000 --tko_count=20
+
+# mdt
+lustre-mdt1,options lnet networks=tcp,/r/tmp/mdt,mdt,,"lustre-mgs1:lustre-mgs2",
+,--device-size=10240,,,lustre-mdt2,multicast225.0.0.12,192.168.1.173,
+
+# ost
+lustre-ost1,options lnet networks=tcp,/r/tmp/ost,ost,,"lustre-mgs1:lustre-mgs2",
+,--device-size=10240,,,lustre-ost2,,192.168.1.171:192.168.1.172,
+-------------------------------------------------------------------------------
+
+EOF
+ exit 1
+}
+
+# Global variables
+PDSH=${PDSH:-"pdsh -R ssh"}
+export PDSH
+
+CMD_PATH=${CMD_PATH:-"/sbin/"}
+
+# Some scripts to be called
+SCRIPTS_PATH=${CLUSTER_SCRIPTS_PATH:-"./"}
+MODULE_CONFIG=${SCRIPTS_PATH}$"module_config.sh"
+VERIFY_CLUSTER_NET=${SCRIPTS_PATH}$"verify_cluster_net.sh"
+GEN_HB_CONFIG=${SCRIPTS_PATH}$"gen_hb_config.sh"
+GEN_CLUMGR_CONFIG=${SCRIPTS_PATH}$"gen_clumanager_config.sh"
+
+HATYPE_HBV1=$"hbv1" # Heartbeat version 1
+HATYPE_HBV2=$"hbv2" # Heartbeat version 2
+HATYPE_CLUMGR=$"clumanager" # Cluster Manager
+
+HB_TMP_DIR=$"/tmp/heartbeat/" # Temporary directory
+CLUMGR_TMP_DIR=$"/tmp/clumanager/"
+TMP_DIRS=$"${HB_TMP_DIR} ${CLUMGR_TMP_DIR}"
+
+declare -a CONFIG_ITEM # fields in each line of the csv file
+declare -a NODE_NAMES # node names in the failover group
+
+# Get and check the positional parameters
+while getopts "t:nfhv" OPTION; do
+ case $OPTION in
+ t)
+ HATYPE_OPT=$OPTARG
+ if [ "${HATYPE_OPT}" != "${HATYPE_HBV1}" ] \
+ && [ "${HATYPE_OPT}" != "${HATYPE_HBV2}" ] \
+ && [ "${HATYPE_OPT}" != "${HATYPE_CLUMGR}" ]; then
+ echo >&2 $"`basename $0`: Invalid HA software type" \
+ "- ${HATYPE_OPT}!"
+ usage
+ fi
+ ;;
+ n)
+ VERIFY_CONNECT=$"no"
+ ;;
+ f)
+ REFORMAT_OPTION=$"--reformat "
+ ;;
+ h)
+ sample
+ ;;
+ v)
+ VERBOSE_OPT=$" -v"
+ ;;
+ ?)
+ usage
+ esac
+done
+
+# Toss out the parameters we've already processed
+shift `expr $OPTIND - 1`
+
+# Here we expect the csv file
+if [ $# -eq 0 ]; then
+ echo >&2 $"`basename $0`: Missing csv file!"
+ usage
+fi
+
+# Output verbose informations
+verbose_output() {
+ if [ -n "${VERBOSE_OPT}" ]; then
+ echo "`basename $0`: $*"
+ fi
+ return 0
+}
+
+# Check the csv file
+check_file() {
+ # Check argument
+ if [ $# -eq 0 ]; then
+ echo >&2 $"`basename $0`: check_file() error: Lack argument"\
+ "for function check_file()!"
+ return 1
+ fi
+
+ CSV_FILE=$1
+ if [ ! -s ${CSV_FILE} ]; then
+ echo >&2 $"`basename $0`: check_file() error: ${CSV_FILE}"\
+ "does not exist or is empty!"
+ return 1
+ fi
+
+ return 0
+}
+
+# Parse a line in the csv file
+parse_line() {
+ # Check argument
+ if [ $# -eq 0 ]; then
+ echo >&2 $"`basename $0`: parse_line() error: Lack argument"\
+ "for function parse_line()!"
+ return 1
+ fi
+
+ declare -i i=0
+ declare -i length=0
+ declare -i idx=0
+ declare -i s_quote_flag=0
+ declare -i d_quote_flag=0
+ local TMP_LETTER LINE
+
+ LINE=$*
+
+ # Initialize the CONFIG_ITEM array
+ for ((i = 0; i < ${#CONFIG_ITEM[@]}; i++)); do
+ CONFIG_ITEM[i]=$""
+ done
+
+ # Get the length of the line
+ length=${#LINE}
+
+ i=0
+ while [ ${idx} -lt ${length} ]; do
+ # Get a letter from the line
+ TMP_LETTER=${LINE:${idx}:1}
+
+ case "${TMP_LETTER}" in
+ ",")
+ if [ ${s_quote_flag} -eq 1 ] || [ ${d_quote_flag} -eq 1 ]; then
+ CONFIG_ITEM[i]=${CONFIG_ITEM[i]}${TMP_LETTER}
+ else
+ i=$i+1
+ fi
+ idx=${idx}+1
+ continue
+ ;;
+ "'")
+ if [ ${s_quote_flag} -eq 0 ]; then
+ s_quote_flag=1
+ else
+ s_quote_flag=0
+ fi
+ ;;
+ "\"")
+ if [ ${d_quote_flag} -eq 0 ]; then
+ d_quote_flag=1
+ else
+ d_quote_flag=0
+ fi
+
+ if [ ${i} -eq 1 ]; then
+ CONFIG_ITEM[i]=${CONFIG_ITEM[i]}$"\\"${TMP_LETTER}
+ idx=${idx}+1
+ continue
+ fi
+ ;;
+ "\r")
+ idx=${idx}+1
+ continue
+ ;;
+ *)
+ ;;
+ esac
+ CONFIG_ITEM[i]=${CONFIG_ITEM[i]}${TMP_LETTER}
+ idx=${idx}+1
+ done
+ return 0
+}
+
+# Check the elements required for OSTs, MDTs and MGS
+#
+# When formatting an OST, the following elements: hostname, module_opts,
+# device name, device type and mgs nids, cannot have null value.
+#
+# When formatting an MDT or MGS, the following elements: hostname,
+# module_opts, device name and device type, cannot have null value.
+check_element() {
+ # Check hostname, module_opts, device name and device type
+ if [ -z "${HOST_NAME}" ]||[ -z "${MODULE_OPTS}" ]||[ -z "${DEVICE_NAME}" ]\
+ ||[ -z "${DEVICE_TYPE}" ]; then
+ echo >&2 $"`basename $0`: check_element() error: Some required"\
+ "element has null value! Check hostname, module_opts,"\
+ "device name and device type!"
+ return 1
+ fi
+
+ # Check mgs nids
+ if [ "${DEVICE_TYPE}" = "ost" ]&&[ -z "${MGS_NIDS}" ]; then
+ echo >&2 $"`basename $0`: check_element() error: OST's mgs nids"\
+ "element has null value!"
+ return 1
+ fi
+
+ return 0
+}
+
+# Check the elements required for HA configuration
+check_ha_element() {
+ if [ -z "${HATYPE_OPT}" ]; then
+ return 0
+ fi
+
+ # Check service IP element
+ if [ -z "${SRV_IPADDRS}" ]; then
+ echo >&2 $"`basename $0`: check_ha_element() error: Service IP"\
+ "element has null value!"
+ return 1
+ fi
+
+ # Check heartbeat channel element
+ if [ "${HATYPE_OPT}" != "${HATYPE_CLUMGR}" -a -z "${HB_CHANNELS}" ]
+ then
+ echo >&2 $"`basename $0`: check_ha_element() error: Heartbeat"\
+ "channel element has null value!"
+ return 1
+ fi
+
+ return 0
+}
+
+# Check the number of MGS.
+# There should be no more than one MGS specified in the entire csv file.
+check_mgs() {
+ # Check the number of explicit MGS
+ if [ "${DEVICE_TYPE#*mgs*}" != "${DEVICE_TYPE}" ]; then
+ if [ "${EXP_MGS}" = "${HOST_NAME}" ]; then
+ echo >&2 $"`basename $0`: check_mgs() error: More than"\
+ "one explicit MGS in the csv file!"
+ return 1
+ fi
+
+ if [ -z "${EXP_MGS}" ]; then
+ EXP_MGS=${HOST_NAME}
+ fi
+
+ if [ "${EXP_MGS}" != "${HOST_NAME}" ]; then
+ if [ "${FAILOVERS#*$EXP_MGS*}" = "${FAILOVERS}" ]; then
+ echo >&2 $"`basename $0`: check_mgs() error:"\
+ "More than one explicit MGS in the"\
+ "csv file!"
+ else
+ echo >&2 $"`basename $0`: check_mgs() error:"\
+ "There should not be two entries for"\
+ "a server and its failover partner"\
+ "in the csv file!"
+ fi
+ return 1
+ fi
+ fi
+
+ # Check the number of implicit MGS
+ if [ "${DEVICE_TYPE}" = "mdt" ]&&[ -z "${MGS_NIDS}" ]; then
+ if [ "${IMP_MGS}" = "${HOST_NAME}" ]; then
+ echo >&2 $"`basename $0`: check_mgs() error: More than"\
+ "one implicit MGS in the csv file!"
+ return 1
+ fi
+
+ if [ -z "${IMP_MGS}" ]; then
+ IMP_MGS=${HOST_NAME}
+ fi
+
+ if [ "${IMP_MGS}" != "${HOST_NAME}" ]; then
+ if [ "${FAILOVERS#*$IMP_MGS*}" = "${FAILOVERS}" ]; then
+ echo >&2 $"`basename $0`: check_mgs() error:"\
+ "More than one implicit MGS in the"\
+ "csv file!"
+ else
+ echo >&2 $"`basename $0`: check_mgs() error:"\
+ "There should not be two entries for"\
+ "a server and its failover partner"\
+ "in the csv file!"
+ fi
+ return 1
+ fi
+ fi
+
+ if [ -n "${EXP_MGS}" -a -n "${IMP_MGS}" ]; then
+ echo >&2 $"`basename $0`: check_mgs() error: More than one"\
+ "MGS in the csv file!"
+ return 1
+ fi
+
+ return 0
+}
+
+# Construct the command line of mkfs.lustre
+construct_mkfs_cmdline() {
+ MKFS_CMD=${CMD_PATH}$"mkfs.lustre "
+ MKFS_CMD=${MKFS_CMD}${REFORMAT_OPTION}
+
+ case "${DEVICE_TYPE}" in
+ "ost")
+ MKFS_CMD=${MKFS_CMD}$"--ost "
+ ;;
+ "mdt")
+ MKFS_CMD=${MKFS_CMD}$"--mdt "
+ ;;
+ "mgs")
+ MKFS_CMD=${MKFS_CMD}$"--mgs "
+ ;;
+ "mdt|mgs")
+ MKFS_CMD=${MKFS_CMD}$"--mdt --mgs "
+ ;;
+ "mgs|mdt")
+ MKFS_CMD=${MKFS_CMD}$"--mdt --mgs "
+ ;;
+ *)
+ echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\
+ "Invalid device type - \"${DEVICE_TYPE}\""
+ return 1
+ ;;
+ esac
+
+ if [ -n "${FS_NAME}" ]; then
+ MKFS_CMD=${MKFS_CMD}$"--fsname="${FS_NAME}$" "
+ fi
+
+ if [ -n "${MGS_NIDS}" ]; then
+ MGS_NIDS=`echo "${MGS_NIDS}" | sed 's/^"//' | sed 's/"$//'`
+ MKFS_CMD=${MKFS_CMD}$"--mgsnode="${MGS_NIDS}$" "
+ fi
+
+ if [ -n "${INDEX}" ]; then
+ MKFS_CMD=${MKFS_CMD}$"--index="${INDEX}$" "
+ fi
+
+ if [ -n "${FORMAT_OPTIONS}" ]; then
+ FORMAT_OPTIONS=`echo "${FORMAT_OPTIONS}" | sed 's/^"//' | sed 's/"$//'`
+ MKFS_CMD=${MKFS_CMD}${FORMAT_OPTIONS}$" "
+ fi
+
+ if [ -n "${MKFS_OPTIONS}" ]; then
+ MKFS_OPTIONS=`echo "${MKFS_OPTIONS}" | sed 's/^"//' | sed 's/"$//'`
+ MKFS_CMD=${MKFS_CMD}$"--mkfsoptions="$"\""${MKFS_OPTIONS}$"\""$" "
+ fi
+
+ if [ -n "${MOUNT_OPTIONS}" ]; then
+ MOUNT_OPTIONS=`echo "${MOUNT_OPTIONS}" | sed 's/^"//' | sed 's/"$//'`
+ MKFS_CMD=${MKFS_CMD}$"--mountfsoptions="$"\""${MOUNT_OPTIONS}$"\""$" "
+ fi
+
+ if [ -n "${FAILOVERS}" ]; then
+ FAILOVERS=`echo "${FAILOVERS}" | sed 's/^"//' | sed 's/"$//'`
+ MKFS_CMD=${MKFS_CMD}$"--failnode="${FAILOVERS}$" "
+ fi
+
+ MKFS_CMD=${MKFS_CMD}${DEVICE_NAME}
+ return 0
+}
+
+# Get all the node names in this failover group
+get_nodenames() {
+ declare -i idx
+ local failover_nids failover_nid first_nid
+
+ NODE_NAMES[0]=${HOST_NAME}
+
+ failover_nids=`echo ${FAILOVERS}|awk '{split($FAILOVERS, a, ":")}\
+ END {for (i in a) print a[i]}'`
+
+ # XXX: Suppose the first nid of one failover node contains the node name
+ idx=1
+ for failover_nid in ${failover_nids}
+ do
+ first_nid=`echo ${failover_nid} | awk -F, '{print $1}'`
+ NODE_NAMES[idx]=${first_nid%@*}
+ idx=$idx+1
+ done
+
+ return 0
+}
+
+# Produce HA software's configuration files
+gen_ha_config() {
+ local cmd_line
+ declare -i idx
+
+ if [ -z "${HATYPE_OPT}" ]; then
+ return 0
+ fi
+
+ # Prepare parameters
+ # Hostnames option
+ HOSTNAME_OPT=${HOST_NAME}
+
+ if ! get_nodenames; then
+ return 1
+ fi
+
+ for ((idx = 1; idx < ${#NODE_NAMES[@]}; idx++)); do
+ HOSTNAME_OPT=${HOSTNAME_OPT}$":"${NODE_NAMES[idx]}
+ done
+
+ # Target device option
+ TARGET_TYPE=${DEVICE_TYPE}
+ if [ "${TARGET_TYPE}" = "mdt|mgs" -o "${TARGET_TYPE}" = "mgs|mdt" ]
+ then
+ TARGET_TYPE=$"mgs_mdt"
+ fi
+ TARGET_OPT=${DEVICE_NAME}:${TARGET_TYPE}
+
+ # Service IP address option
+ SRVADDR_OPT=${SRV_IPADDRS}
+
+ # Heartbeat channels option
+ HBCHANNEL_OPT=$"\""${HB_CHANNELS}$"\""
+
+ # Heartbeat options option
+ HBOPT_OPT=$"\""${HB_OPTIONS}$"\""
+
+ # Construct the generation script command line
+ case "${HATYPE_OPT}" in
+ "${HATYPE_HBV1}"|"${HATYPE_HBV2}") # Heartbeat
+ cmd_line=${GEN_HB_CONFIG}$" -r ${HATYPE_OPT} -n ${HOSTNAME_OPT}"
+ cmd_line=${cmd_line}$" -d ${TARGET_OPT} -c ${HBCHANNEL_OPT}"
+ cmd_line=${cmd_line}$" -s ${SRVADDR_OPT}"${VERBOSE_OPT}
+
+ if [ -n "${HB_OPTIONS}" ]; then
+ cmd_line=${cmd_line}$" -o ${HBOPT_OPT}"
+ fi
+ ;;
+ "${HATYPE_CLUMGR}") # CluManager
+ cmd_line=${GEN_CLUMGR_CONFIG}$" -n ${HOSTNAME_OPT}"
+ cmd_line=${cmd_line}$" -d ${TARGET_OPT} -s ${SRVADDR_OPT}"
+ cmd_line=${cmd_line}${VERBOSE_OPT}
+
+ if [ -n "${HBCHANNEL_OPT}" ]; then
+ cmd_line=${cmd_line}$" -c ${HBCHANNEL_OPT}"
+ fi
+
+ if [ -n "${HB_OPTIONS}" ]; then
+ cmd_line=${cmd_line}$" -o ${HBOPT_OPT}"
+ fi
+ ;;
+ esac
+
+ # Execute script to generate HA software's configuration files
+ verbose_output "${cmd_line}"
+ eval $(echo "${cmd_line}")
+ if [ $? -ne 0 ]; then
+ return 1
+ fi
+
+ return 0
+}
+
+# Execute pdsh commands to add lnet options lines to remote nodes'
+# modprobe.conf/modules.conf and format(mkfs.lustre) Lustre targets
+#
+# If -t option exists, then also to produce the HA software's
+# configuration files
+mass_config() {
+ # Check argument
+ if [ $# -eq 0 ]; then
+ echo >&2 $"`basename $0`: mass_config() error: Lack argument"\
+ "for function mass_config()!"
+ return 1
+ fi
+
+ CSV_FILE=$1
+ local LINE COMMAND
+ declare -a PDSH_PID
+ declare -a PDSH_CMD
+ declare -i line_num=1
+ declare -i pid_num=0
+
+ while read -r LINE; do
+ # Get rid of the empty line
+ if [ -z "`echo ${LINE} | awk '/[[:alnum:]]/{print $0}'`" ]; then
+ line_num=${line_num}+1
+ continue
+ fi
+
+ # Get rid of the comment line
+ if [ -z "`echo \"${LINE}\" | egrep -v \"([[:space:]]|^)#\"`" ]; then
+ line_num=${line_num}+1
+ continue
+ fi
+
+ # Parse the config line into CONFIG_ITEM
+ if ! parse_line $LINE; then
+ return 1
+ fi
+
+ HOST_NAME=${CONFIG_ITEM[0]}
+ MODULE_OPTS=${CONFIG_ITEM[1]}
+ DEVICE_NAME=${CONFIG_ITEM[2]}
+ DEVICE_TYPE=${CONFIG_ITEM[3]}
+ FS_NAME=${CONFIG_ITEM[4]}
+ MGS_NIDS=${CONFIG_ITEM[5]}
+ INDEX=${CONFIG_ITEM[6]}
+ FORMAT_OPTIONS=${CONFIG_ITEM[7]}
+ MKFS_OPTIONS=${CONFIG_ITEM[8]}
+ MOUNT_OPTIONS=${CONFIG_ITEM[9]}
+ FAILOVERS=${CONFIG_ITEM[10]}
+
+ HB_CHANNELS=${CONFIG_ITEM[11]}
+ SRV_IPADDRS=${CONFIG_ITEM[12]}
+ HB_OPTIONS=${CONFIG_ITEM[13]}
+
+ # Check some required elements for formatting target
+ if ! check_element; then
+ echo >&2 $"`basename $0`: check_element() error:"\
+ "Occurred on line ${line_num} in ${CSV_FILE}"
+ return 1
+ fi
+
+ # Check the number of MGS
+ if ! check_mgs; then
+ echo >&2 $"`basename $0`: check_mgs() error:"\
+ "Occurred on line ${line_num} in ${CSV_FILE}"
+ return 1
+ fi
+
+ # Construct the command line of mkfs.lustre
+ if ! construct_mkfs_cmdline; then
+ echo >&2 $"`basename $0`: construct_mkfs_cmdline() error:"\
+ "Occurred on line ${line_num} in ${CSV_FILE}"
+ return 1
+ fi
+
+ # Produce HA software's configuration files
+ if ! gen_ha_config; then
+ return 1
+ fi
+
+ # Execute pdsh command to add lnet options lines to
+ # modprobe.conf/modules.conf
+ COMMAND=$"echo \"${MODULE_OPTS}\"|${MODULE_CONFIG}"
+ verbose_output "Adding module options to ${HOST_NAME}"
+ verbose_output ${COMMAND}
+ ${PDSH} -w ${HOST_NAME} ${COMMAND} >&2 &
+ PDSH_PID[${pid_num}]=$!
+ PDSH_CMD[${pid_num}]="${PDSH} -w ${HOST_NAME} ${COMMAND}"
+ pid_num=${pid_num}+1
+
+ # Execute pdsh command to format Lustre target
+ verbose_output "Formatting Lustre target on ${HOST_NAME}..."
+ verbose_output "Format command line is: ${MKFS_CMD}"
+ ${PDSH} -w ${HOST_NAME} ${MKFS_CMD} >&2 &
+ PDSH_PID[${pid_num}]=$!
+ PDSH_CMD[${pid_num}]="${PDSH} -w ${HOST_NAME} ${MKFS_CMD}"
+ pid_num=${pid_num}+1
+
+ line_num=${line_num}+1
+ done < ${CSV_FILE}
+
+ # Wait for the exit status of the background pdsh command
+ verbose_output "Waiting for the return of the pdsh command..."
+ for ((pid_num = 0; pid_num < ${#PDSH_PID[@]}; pid_num++)); do
+ wait ${PDSH_PID[${pid_num}]}
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: mass_config() error:"\
+ "Fail to execute \"${PDSH_CMD[${pid_num}]}\"!"
+ fi
+ done
+
+ rm -rf ${TMP_DIRS}
+ return 0
+}
+
+# Main flow
+# Check the csv file
+if ! check_file $1; then
+ exit 1
+fi
+
+if [ "${VERIFY_CONNECT}" != "no" ]; then
+# Check the network connectivity and hostnames
+ verbose_output "Checking the network connectivity and hostnames..."
+ if ! ${VERIFY_CLUSTER_NET} ${VERBOSE_OPT} ${CSV_FILE}; then
+ exit 1
+ fi
+ verbose_output "Check the network connectivity and hostnames OK!"
+fi
+
+# Configure the Lustre cluster
+verbose_output "******** Lustre cluster configuration START ********"
+if ! mass_config ${CSV_FILE}; then
+ rm -rf ${TMP_DIRS}
+ exit 1
+fi
+verbose_output "******** Lustre cluster configuration END **********"
+
+exit 0
--- /dev/null
+#!/bin/bash
+#
+# gen_clumanager_config.sh - script for generating the Red Hat's Cluster Manager
+# HA software's configuration files
+#
+################################################################################
+
+# Usage
+usage() {
+ cat >&2 <<EOF
+
+Usage: `basename $0` <-n hostnames> <-d target device> <-s service addresses>
+ [-c heartbeat channels] [-o heartbeat options] [-v]
+
+ -n hostnames the nodenames of the primary node and its fail-
+ overs
+ Multiple nodenames are separated by colon (:)
+ delimeter. The first one is the nodename of the
+ primary node, the others are failover nodenames.
+ -d target device the target device name and type
+ The name and type are separated by colon (:)
+ delimeter. The type values are: mgs, mdt, ost or
+ mgs_mdt.
+ -s service addresses the IP addresses to failover
+ Multiple addresses are separated by colon (:)
+ delimeter.
+ -c heartbeat channels the methods to send/rcv heartbeats on
+ The default method is multicast, and multicast_
+ ipaddress is "225.0.0.11".
+ -o heartbeat options a "catchall" for other heartbeat configuration
+ options
+ -v verbose mode
+
+EOF
+ exit 1
+}
+
+# Global variables
+SCRIPTS_PATH=${CLUSTER_SCRIPTS_PATH:-"./"}
+SCRIPT_VERIFY_SRVIP=${SCRIPTS_PATH}$"verify_serviceIP.sh"
+
+LUSTRE_SRV_SCRIPT=$"/etc/rc.d/init.d/lustre" # service script for lustre
+
+TMP_DIR=$"/tmp/clumanager/" # temporary directory
+CLUMGR_DIR=$"/etc/" # CluManager configuration directory
+
+CONFIG_CMD=$"redhat-config-cluster-cmd"
+
+declare -a NODE_NAMES # node names in the failover group
+declare -a SRV_IPADDRS # service IP addresses
+
+# Get and check the positional parameters
+while getopts "n:d:s:c:o:v" OPTION; do
+ case $OPTION in
+ n)
+ HOSTNAME_OPT=$OPTARG
+ HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'`
+ if [ ${HOSTNAME_NUM} -lt 2 ]; then
+ echo >&2 $"`basename $0`: Lack failover nodenames!"
+ usage
+ fi
+ ;;
+ d)
+ DEVICE_OPT=$OPTARG
+ TARGET_DEV=`echo ${DEVICE_OPT} | awk -F":" '{print $1}'`
+ TARGET_TYPE=`echo ${DEVICE_OPT} | awk -F":" '{print $2}'`
+ if [ -z "${TARGET_TYPE}" ]; then
+ echo >&2 $"`basename $0`: Lack target device type!"
+ usage
+ fi
+ if [ "${TARGET_TYPE}" != "mgs" ]&&[ "${TARGET_TYPE}" != "mdt" ]\
+ &&[ "${TARGET_TYPE}" != "ost" ]&&[ "${TARGET_TYPE}" != "mgs_mdt" ]
+ then
+ echo >&2 $"`basename $0`: Invalid target device type" \
+ "- ${TARGET_TYPE}!"
+ usage
+ fi
+ ;;
+ s)
+ SRVADDR_OPT=$OPTARG
+ ;;
+ c)
+ HBCHANNEL_OPT=$OPTARG
+ HBCHANNEL_OPT=`echo "${HBCHANNEL_OPT}" | sed 's/^"//' \
+ | sed 's/"$//'`
+ if [ -n "${HBCHANNEL_OPT}" ] \
+ && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*broadcast*}" ] \
+ && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*multicast*}" ]; then
+ echo >&2 $"`basename $0`: Invalid Heartbeat channel" \
+ "- ${HBCHANNEL_OPT}!"
+ usage
+ fi
+ ;;
+ o)
+ HBOPT_OPT=$OPTARG
+ HBOPT_OPT=`echo "${HBOPT_OPT}" | sed 's/^"//' | sed 's/"$//'`
+ ;;
+ v)
+ VERBOSE_OPT=$"yes"
+ ;;
+ ?)
+ usage
+ esac
+done
+
+# Check the required parameters
+if [ -z "${HOSTNAME_OPT}" ]; then
+ echo >&2 $"`basename $0`: Lack -n option!"
+ usage
+fi
+
+if [ -z "${DEVICE_OPT}" ]; then
+ echo >&2 $"`basename $0`: Lack -d option!"
+ usage
+fi
+
+if [ -z "${SRVADDR_OPT}" ]; then
+ echo >&2 $"`basename $0`: Lack -s option!"
+ usage
+fi
+
+# Output verbose informations
+verbose_output() {
+ if [ "${VERBOSE_OPT}" = "yes" ]; then
+ echo "`basename $0`: $*"
+ fi
+ return 0
+}
+
+# get_nodenames
+#
+# Get all the node names in this failover group
+get_nodenames() {
+ PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'`
+
+ declare -i idx
+ local nodename_str nodename
+
+ nodename_str=`echo ${HOSTNAME_OPT}|awk '{split($HOSTNAME_OPT, a, ":")}\
+ END {for (i in a) print a[i]}'`
+ idx=0
+ for nodename in ${nodename_str}
+ do
+ NODE_NAMES[idx]=${nodename}
+ idx=$idx+1
+ done
+
+ return 0
+}
+
+# get_check_srvIPaddrs
+#
+# Get and check all the service IP addresses in this failover group
+get_check_srvIPaddrs() {
+ declare -i idx
+ declare -i i
+ local srvIPaddr_str srvIPaddr
+
+ srvIPaddr_str=`echo ${SRVADDR_OPT}|awk '{split($SRVADDR_OPT, a, ":")}\
+ END {for (i in a) print a[i]}'`
+ idx=0
+ for srvIPaddr in ${srvIPaddr_str}
+ do
+ SRV_IPADDRS[idx]=${srvIPaddr}
+ idx=$idx+1
+ done
+
+ for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do
+ for ((i = 0; i < ${#NODE_NAMES[@]}; i++)); do
+ # Check service IP address
+ verbose_output "Verifying service IP ${SRV_IPADDRS[idx]} and" \
+ "real IP of host ${NODE_NAMES[i]} are in the" \
+ "same subnet..."
+ if ! ${SCRIPT_VERIFY_SRVIP} ${SRV_IPADDRS[idx]} ${NODE_NAMES[i]}
+ then
+ return 1
+ fi
+ verbose_output "OK"
+ done
+ done
+
+ return 0
+}
+
+# stop_clumanager
+#
+# Run pdsh command to stop each node's clumanager service
+stop_clumanager() {
+ declare -i idx
+ local nodename_str=${PRIM_NODENAME}
+
+ for ((idx = 1; idx < ${#NODE_NAMES[@]}; idx++)); do
+ nodename_str=${nodename_str}$","${NODE_NAMES[idx]}
+ done
+
+ ${PDSH} -w ${nodename_str} /sbin/service clumanager stop
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: stop_clumanager() error:"\
+ "Fail to execute pdsh command!"
+ return 1
+ fi
+
+ return 0
+}
+
+# check_retval retval
+#
+# Check the return value of redhat-config-cluster-cmd
+check_retval() {
+ if [ $1 -ne 0 ]; then
+ echo >&2 "`basename $0`: Fail to run ${CONFIG_CMD}!"
+ return 1
+ fi
+
+ return 0
+}
+
+# gen_cluster_xml
+#
+# Run redhat-config-cluster-cmd to create the cluster.xml file
+gen_cluster_xml() {
+ declare -i idx
+ local mcast_IPaddr
+ local hbopt_str hbopt
+
+ # Run redhat-config-cluster-cmd to generate cluster.xml
+ # Add clumembd tag
+ if [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*broadcast*}" ]; then
+ ${CONFIG_CMD} --clumembd --broadcast=yes
+ if ! check_retval $?; then
+ return 1
+ fi
+ elif [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*multicast*}" ]; then
+ mcast_IPaddr=`echo ${HBCHANNEL_OPT} | awk '{print $2}'`
+ if [ -n "${mcast_IPaddr}" ]; then
+ ${CONFIG_CMD} --clumembd --multicast=yes\
+ --multicast_ipaddress=${mcast_IPaddr}
+ if ! check_retval $?; then
+ return 1
+ fi
+ fi
+ fi
+
+ # Add cluster tag
+ ${CONFIG_CMD} --cluster --name='${TARGET_TYPE} failover group'
+ if ! check_retval $?; then
+ return 1
+ fi
+
+ # Add member tag
+ for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+ ${CONFIG_CMD} --add_member --name=${NODE_NAMES[idx]}
+ if ! check_retval $?; then
+ return 1
+ fi
+ done
+
+ # Add failoverdomain tag
+ ${CONFIG_CMD} --add_failoverdomain --name=${TARGET_TYPE}-domain
+ if ! check_retval $?; then
+ return 1
+ fi
+
+ for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+ ${CONFIG_CMD} --failoverdomain=${TARGET_TYPE}-domain\
+ --add_failoverdomainnode --name=${NODE_NAMES[idx]}
+ if ! check_retval $?; then
+ return 1
+ fi
+ done
+
+ # Add service tag
+ ${CONFIG_CMD} --add_service --name=${TARGET_TYPE}-service
+ if ! check_retval $?; then
+ return 1
+ fi
+
+ ${CONFIG_CMD} --service=${TARGET_TYPE}-service \
+ --userscript=${LUSTRE_SRV_SCRIPT}
+ if ! check_retval $?; then
+ return 1
+ fi
+
+ ${CONFIG_CMD} --service=${TARGET_TYPE}-service \
+ --failoverdomain=${TARGET_TYPE}-domain
+ if ! check_retval $?; then
+ return 1
+ fi
+
+ for ((idx = 0; idx < ${#SRV_IPADDRS[@]}; idx++)); do
+ ${CONFIG_CMD} --service=mgs-service \
+ --add_service_ipaddress --ipaddress=${SRV_IPADDRS[idx]}
+ if ! check_retval $?; then
+ return 1
+ fi
+ done
+
+ # Add other tags
+ if [ -n "${HBOPT_OPT}"]; then
+ hbopt_str=`echo ${HBOPT_OPT}|awk '{split($HBOPT_OPT, a, ":")}\
+ END {for (i in a) print a[i]}'`
+ idx=0
+ for hbopt in ${hbopt_str}
+ do
+ ${CONFIG_CMD} ${hbopt}
+ if ! check_retval $?; then
+ return 1
+ fi
+ idx=$idx+1
+ done
+ fi
+
+ return 0
+}
+
+# create_config
+#
+# Create the cluster.xml file and scp it to the each node's /etc/
+create_config() {
+ CONFIG_PRIMNODE=${TMP_DIR}$"cluster.xml."${PRIM_NODENAME}
+ declare -i idx
+
+ if [ -e ${CONFIG_PRIMNODE} ]; then
+ verbose_output "${CONFIG_PRIMNODE} already exists."
+ return 0
+ fi
+
+ # Run redhat-config-cluster-cmd to generate cluster.xml
+ verbose_output "Creating cluster.xml file for" \
+ "${PRIM_NODENAME} failover group hosts..."
+ if ! gen_cluster_xml; then
+ return 1
+ fi
+ verbose_output "OK"
+
+ /bin/cp -f ${CLUMGR_DIR}cluster.xml ${CONFIG_PRIMNODE}
+
+ # scp the cluster.xml file to all the nodes
+ verbose_output "Remote copying cluster.xml file to" \
+ "${PRIM_NODENAME} failover group hosts..."
+ for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+ touch ${TMP_DIR}$"cluster.xml."${NODE_NAMES[idx]}
+ scp ${CONFIG_PRIMNODE} ${NODE_NAMES[idx]}:${CLUMGR_DIR}cluster.xml
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: Fail to scp cluster.xml file"\
+ "to node ${NODE_NAMES[idx]}!"
+ return 1
+ fi
+ done
+ verbose_output "OK"
+
+ return 0
+}
+
+# Main flow
+# Get all the node names
+if ! get_nodenames; then
+ exit 1
+fi
+
+# Get and check all the service IP addresses
+if ! get_check_srvIPaddrs; then
+ exit 1
+fi
+
+# Stop clumanager services
+verbose_output "Stopping clumanager service in the ${PRIM_NODENAME}"\
+ "failover group hosts..."
+if ! stop_clumanager; then
+ exit 1
+fi
+verbose_output "OK"
+
+# Generate configuration files
+if ! create_config; then
+ exit 1
+fi
+
+exit 0
--- /dev/null
+#!/bin/bash
+#
+# gen_hb_config.sh - script for generating the Heartbeat HA software's
+# configuration files
+#
+###############################################################################
+
+# Usage
+usage() {
+ cat >&2 <<EOF
+
+Usage: `basename $0` <-r HBver> <-n hostnames> <-d target device>
+ <-c heartbeat channels> <-s service address>
+ [-o heartbeat options] [-v]
+
+ -r HBver the version of Heartbeat software
+ The Heartbeat software versions which are curr-
+ ently supported are: hbv1 (Heartbeat version 1)
+ and hbv2 (Heartbeat version 2).
+ -n hostnames the nodenames of the primary node and its fail-
+ overs
+ Multiple nodenames are separated by colon (:)
+ delimeter. The first one is the nodename of the
+ primary node, the others are failover nodenames.
+ -d target device the target device name and type
+ The name and type are separated by colon (:)
+ delimeter. The type values are: mgs, mdt, ost or
+ mgs_mdt.
+ -c heartbeat channels the methods and devices to send/rcv heartbeats on
+ -s service address the IP address to failover
+ -o heartbeat options a "catchall" for other heartbeat configuration
+ options
+ -v verbose mode
+
+EOF
+ exit 1
+}
+
+# Global variables
+SCRIPTS_PATH=${CLUSTER_SCRIPTS_PATH:-"./"}
+SCRIPT_VERIFY_SRVIP=${SCRIPTS_PATH}$"verify_serviceIP.sh"
+
+LUSTRE_SRV_SCRIPT=$"lustre" # service script for lustre
+MON_SRV_SCRIPT=$"mon" # service script for mon
+LUSTRE_MON_SCRIPT=$"simple.health_check.monitor"
+LUSTRE_ALERT_SCRIPT=$"fail_lustre.alert"
+CIB_GEN_SCRIPT=$"/usr/lib/heartbeat/cts/haresources2cib.py"
+
+TMP_DIR=$"/tmp/heartbeat/" # temporary directory
+HACF_TEMP=${TMP_DIR}$"ha.cf.temp"
+AUTHKEYS_TEMP=${TMP_DIR}$"authkeys.temp"
+MONCF_TEMP=${TMP_DIR}$"mon.cf.temp"
+
+HA_DIR=$"/etc/ha.d/" # Heartbeat configuration directory
+MON_DIR=$"/etc/mon/" # mon configuration directory
+CIB_DIR=$"/var/lib/heartbeat/crm/" # cib.xml directory
+
+HBVER_HBV1=$"hbv1" # Heartbeat version 1
+HBVER_HBV2=$"hbv2" # Heartbeat version 2
+
+declare -a NODE_NAMES # node names in the failover group
+
+# Get and check the positional parameters
+while getopts "r:n:d:c:s:o:v" OPTION; do
+ case $OPTION in
+ r)
+ HBVER_OPT=$OPTARG
+ if [ "${HBVER_OPT}" != "${HBVER_HBV1}" ] \
+ && [ "${HBVER_OPT}" != "${HBVER_HBV2}" ]; then
+ echo >&2 $"`basename $0`: Invalid Heartbeat software" \
+ "version - ${HBVER_OPT}!"
+ usage
+ fi
+ ;;
+ n)
+ HOSTNAME_OPT=$OPTARG
+ HOSTNAME_NUM=`echo ${HOSTNAME_OPT} | awk -F":" '{print NF}'`
+ if [ ${HOSTNAME_NUM} -lt 2 ]; then
+ echo >&2 $"`basename $0`: Lack failover nodenames!"
+ usage
+ fi
+ ;;
+ d)
+ DEVICE_OPT=$OPTARG
+ TARGET_DEV=`echo ${DEVICE_OPT} | awk -F":" '{print $1}'`
+ TARGET_TYPE=`echo ${DEVICE_OPT} | awk -F":" '{print $2}'`
+ if [ -z "${TARGET_TYPE}" ]; then
+ echo >&2 $"`basename $0`: Lack target device type!"
+ usage
+ fi
+ if [ "${TARGET_TYPE}" != "mgs" ]&&[ "${TARGET_TYPE}" != "mdt" ]\
+ &&[ "${TARGET_TYPE}" != "ost" ]&&[ "${TARGET_TYPE}" != "mgs_mdt" ]
+ then
+ echo >&2 $"`basename $0`: Invalid target device type" \
+ "- ${TARGET_TYPE}!"
+ usage
+ fi
+ ;;
+ c)
+ HBCHANNEL_OPT=$OPTARG
+ HBCHANNEL_OPT=`echo "${HBCHANNEL_OPT}" | sed 's/^"//' \
+ | sed 's/"$//'`
+ if [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*serial*}" ] \
+ && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*bcast*}" ] \
+ && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*ucast*}" ] \
+ && [ "${HBCHANNEL_OPT}" = "${HBCHANNEL_OPT#*mcast*}" ]; then
+ echo >&2 $"`basename $0`: Invalid Heartbeat channel" \
+ "- ${HBCHANNEL_OPT}!"
+ usage
+ fi
+ ;;
+ s)
+ SRVADDR_OPT=$OPTARG
+ ;;
+ o)
+ HBOPT_OPT=$OPTARG
+ HBOPT_OPT=`echo "${HBOPT_OPT}" | sed 's/^"//' | sed 's/"$//'`
+ ;;
+ v)
+ VERBOSE_OPT=$"yes"
+ ;;
+ ?)
+ usage
+ esac
+done
+
+# Check the required parameters
+if [ -z "${HBVER_OPT}" ]; then
+ echo >&2 $"`basename $0`: Lack -r option!"
+ usage
+fi
+
+if [ -z "${HOSTNAME_OPT}" ]; then
+ echo >&2 $"`basename $0`: Lack -n option!"
+ usage
+fi
+
+if [ -z "${DEVICE_OPT}" ]; then
+ echo >&2 $"`basename $0`: Lack -d option!"
+ usage
+fi
+
+if [ -z "${HBCHANNEL_OPT}" ]; then
+ echo >&2 $"`basename $0`: Lack -c option!"
+ usage
+fi
+
+if [ -z "${SRVADDR_OPT}" ]; then
+ echo >&2 $"`basename $0`: Lack -s option!"
+ usage
+fi
+
+if [ "${HBVER_OPT}" = "${HBVER_HBV1}" -a ${HOSTNAME_NUM} -gt 2 ]; then
+ echo >&2 $"`basename $0`: Heartbeat version 1 can only support 2 nodes!"
+ usage
+fi
+
+# Output verbose informations
+verbose_output() {
+ if [ "${VERBOSE_OPT}" = "yes" ]; then
+ echo "`basename $0`: $*"
+ fi
+ return 0
+}
+
+# get_nodenames
+#
+# Get all the node names in this failover group
+get_nodenames() {
+ PRIM_NODENAME=`echo ${HOSTNAME_OPT} | awk -F":" '{print $1}'`
+
+ declare -i idx
+ local nodename_str nodename
+
+ nodename_str=`echo ${HOSTNAME_OPT}|awk '{split($HOSTNAME_OPT, a, ":")}\
+ END {for (i in a) print a[i]}'`
+ idx=0
+ for nodename in ${nodename_str}
+ do
+ NODE_NAMES[idx]=${nodename}
+ idx=$idx+1
+ done
+
+ return 0
+}
+
+# check_srvIPaddr
+#
+# Check service IP address in this failover group
+check_srvIPaddr() {
+ declare -i idx
+
+ for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+ # Check service IP address
+ verbose_output "Verifying service IP ${SRVADDR_OPT} and" \
+ "real IP of host ${NODE_NAMES[idx]} are in the" \
+ "same subnet..."
+ if ! ${SCRIPT_VERIFY_SRVIP} ${SRVADDR_OPT} ${NODE_NAMES[idx]}
+ then
+ return 1
+ fi
+ verbose_output "OK"
+ done
+
+ return 0
+}
+
+# stop_heartbeat
+#
+# Run pdsh command to stop each node's heartbeat service
+stop_heartbeat() {
+ declare -i idx
+ local nodename_str=${PRIM_NODENAME}
+
+ for ((idx = 1; idx < ${#NODE_NAMES[@]}; idx++)); do
+ nodename_str=${nodename_str}$","${NODE_NAMES[idx]}
+ done
+
+ ${PDSH} -w ${nodename_str} /sbin/service heartbeat stop
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: stop_heartbeat() error:"\
+ "Fail to execute pdsh command!"
+ return 1
+ fi
+
+ return 0
+}
+
+# create_template
+#
+# Create the templates for ha.cf, authkeys and mon.cf files
+create_template() {
+ /bin/mkdir -p ${TMP_DIR}
+
+ # Create the template for ha.cf
+ if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
+ cat >${HACF_TEMP} <<EOF
+debugfile /var/log/ha-debug
+logfile /var/log/ha-log
+logfacility local0
+keepalive 2
+deadtime 15
+warntime 10
+initdead 120
+
+EOF
+ elif [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
+ cat >${HACF_TEMP} <<EOF
+logfacility daemon
+use_logd yes
+keepalive 2
+deadtime 15
+warntime 10
+initdead 120
+
+EOF
+ fi
+
+ # Create the template for authkeys
+ if [ ! -s ${AUTHKEYS_TEMP} ]; then
+ cat >${AUTHKEYS_TEMP} <<EOF
+auth 1
+1 sha1 HelloLustre!
+EOF
+ fi
+
+ # Create the template for mon.cf
+ if [ ! -s ${MONCF_TEMP} ]; then
+ cat >${MONCF_TEMP} <<EOF
+cfbasedir = /etc/mon
+alertdir = /usr/lib/mon/alert.d
+mondir = /usr/lib/mon/mon.d
+statedir = /usr/lib/mon/state.d
+logdir = /usr/lib/mon/log.d
+dtlogfile = /usr/lib/mon/log.d/downtime.log
+maxprocs = 20
+histlength = 100
+randstart = 60s
+
+authtype = getpwnam
+
+EOF
+ fi
+
+ return 0
+}
+
+# gen_udpport
+#
+# Generate the UDP port number for Heartbeat bcast/ucast communication
+# The default value for udpport option in ha.cf is 694. If there are multiple
+# bcast failover groups on the same subnet, this value should be different for
+# each of the failover groups.
+gen_udpport() {
+ local port_file
+ declare -i default_port=694
+ declare -i dynamic_port=49152
+ declare -i port=0
+ declare -i tmp_port
+ declare -i idx
+
+ UDPPORT_PRIMNODE=${TMP_DIR}$"udpport."${PRIM_NODENAME}
+
+ if [ -s ${UDPPORT_PRIMNODE} ]; then
+ cat ${UDPPORT_PRIMNODE}
+ return 0
+ fi
+
+ # Get the current maximum UDP port number in the cluster
+ for port_file in `ls ${TMP_DIR}udpport.*`
+ do
+ if [ $? -ne 0 ]; then
+ break
+ fi
+ tmp_port=$(cat ${port_file})
+ if [ $? -ne 0 ]; then
+ break
+ fi
+
+ if [ ${tmp_port} -gt ${port} ]; then
+ port=${tmp_port}
+ fi
+ done
+
+ # Generate and check a new UDP port number
+ if [ ${port} -eq 0 ]; then
+ port=${default_port}
+ elif [ ${port} -eq ${default_port} ]; then
+ port=${dynamic_port}
+ else
+ port=${port}+1
+ if [ ${port} -gt 65535 ]; then
+ echo >&2 $"`basename $0`: Invalid UDP port" \
+ "- ${port}!"
+ return 1
+ fi
+ fi
+
+ # Add the UDP port number into each failover node's udpport file
+ for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+ UDPPORT_NODE=${TMP_DIR}$"udpport."${NODE_NAMES[idx]}
+ echo ${port} > ${UDPPORT_NODE}
+ done
+
+ echo ${port}
+ return 0
+}
+
+# create_hacf
+#
+# Create the ha.cf file and scp it to each node's /etc/ha.d/
+create_hacf() {
+ HACF_PRIMNODE=${TMP_DIR}$"ha.cf."${PRIM_NODENAME}
+
+ declare -i idx
+
+ if [ -s ${HACF_PRIMNODE} ]; then
+ # The ha.cf file for the primary node has already existed.
+ verbose_output "${HACF_PRIMNODE} already exists."
+ return 0
+ fi
+
+ /bin/cp -f ${HACF_TEMP} ${HACF_PRIMNODE}
+
+ if [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*bcast*}" ] \
+ || [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*ucast*}" ]; then
+ UDPPORT_OPT=$(gen_udpport)
+ if [ $? -ne 0 ]; then
+ return 1
+ fi
+ echo "udpport ${UDPPORT_OPT}" >> ${HACF_PRIMNODE}
+ fi
+
+ if [ "${HBCHANNEL_OPT}" != "${HBCHANNEL_OPT#*serial*}" ]; then
+ echo "baud 19200" >> ${HACF_PRIMNODE}
+ fi
+
+ echo ${HBCHANNEL_OPT} | awk '{split($HBCHANNEL_OPT, a, ":")} \
+ END {for (i in a) print a[i]}' >> ${HACF_PRIMNODE}
+
+ for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+ echo "node ${NODE_NAMES[idx]}" >> ${HACF_PRIMNODE}
+ done
+
+ echo ${HBOPT_OPT} | awk '{split($HBOPT_OPT, a, ":")} \
+ END {for (i in a) print a[i]}' >> ${HACF_PRIMNODE}
+
+ # scp ha.cf file to all the nodes
+ for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+ touch ${TMP_DIR}$"ha.cf."${NODE_NAMES[idx]}
+ scp ${HACF_PRIMNODE} ${NODE_NAMES[idx]}:${HA_DIR}ha.cf
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: Fail to scp ha.cf file"\
+ "to node ${NODE_NAMES[idx]}!"
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+# create_haresources
+#
+# Create the haresources file and scp it to the each node's /etc/ha.d/
+create_haresources() {
+ HARES_PRIMNODE=${TMP_DIR}$"haresources."${PRIM_NODENAME}
+ declare -i idx
+
+ if [ -s ${HARES_PRIMNODE} ]; then
+ # The haresources file for the primary node has already existed
+ verbose_output "${HARES_PRIMNODE} already exists."
+ return 0
+ fi
+
+ # Add the resource group line into the haresources file
+ echo "${PRIM_NODENAME} ${SRVADDR_OPT} "\
+ "${LUSTRE_SRV_SCRIPT}::${TARGET_TYPE}::${TARGET_DEV} "\
+ "${MON_SRV_SCRIPT}" > ${HARES_PRIMNODE}
+
+ # Generate the cib.xml file
+ if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
+ CIB_PRIMNODE=${TMP_DIR}$"cib.xml."${PRIM_NODENAME}
+ python ${CIB_GEN_SCRIPT} ${HARES_PRIMNODE} > ${CIB_PRIMNODE}
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: Fail to generate cib.xml file"\
+ "for node ${PRIM_NODENAME}!"
+ return 1
+ fi
+ fi
+
+ # scp the haresources file or cib.xml file
+ for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+ touch ${TMP_DIR}$"haresources."${NODE_NAMES[idx]}
+ if [ "${HBVER_OPT}" = "${HBVER_HBV2}" ]; then
+ scp ${CIB_PRIMNODE} ${NODE_NAMES[idx]}:${CIB_DIR}cib.xml
+ else
+ scp ${HARES_PRIMNODE} ${NODE_NAMES[idx]}:${HA_DIR}haresources
+ fi
+
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: Fail to scp haresources file"\
+ "to node ${NODE_NAMES[idx]}!"
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+# create_authkeys
+#
+# Create the authkeys file and scp it to the each node's /etc/ha.d/
+create_authkeys() {
+ AUTHKEYS_PRIMNODE=${TMP_DIR}$"authkeys."${PRIM_NODENAME}
+ declare -i idx
+
+ if [ -e ${AUTHKEYS_PRIMNODE} ]; then
+ verbose_output "${AUTHKEYS_PRIMNODE} already exists."
+ return 0
+ fi
+
+ # scp the authkeys file to all the nodes
+ for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+ touch ${TMP_DIR}$"authkeys."${NODE_NAMES[idx]}
+ scp ${AUTHKEYS_TEMP} ${NODE_NAMES[idx]}:${HA_DIR}authkeys
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: Fail to scp authkeys file"\
+ "to node ${NODE_NAMES[idx]}!"
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+# create_moncf
+#
+# Create the mon.cf file and scp it to the each node's /etc/mon/
+create_moncf() {
+ MONCF_PRIMNODE=${TMP_DIR}$"mon.cf."${PRIM_NODENAME}
+ declare -i idx
+ local hostgroup_str=$"hostgroup ${TARGET_TYPE}-group"
+
+ if [ -e ${MONCF_PRIMNODE} ]; then
+ verbose_output "${MONCF_PRIMNODE} already exists."
+ return 0
+ fi
+
+ /bin/cp -f ${MONCF_TEMP} ${MONCF_PRIMNODE}
+
+ for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+ hostgroup_str=${hostgroup_str}$" "${NODE_NAMES[idx]}
+ done
+
+ echo ${hostgroup_str} >> ${MONCF_PRIMNODE}
+
+ cat >>${MONCF_PRIMNODE} <<EOF
+
+watch ${TARGET_TYPE}-group
+ service ${LUSTRE_SRV_SCRIPT}
+ description Lustre health check
+ interval 1m
+ monitor ${LUSTRE_MON_SCRIPT} -o ${TARGET_TYPE}
+ period wd {Sat-Sun}
+ alert ${LUSTRE_ALERT_SCRIPT}
+
+EOF
+ # scp the mon.cf file to all the nodes
+ for ((idx = 0; idx < ${#NODE_NAMES[@]}; idx++)); do
+ touch ${TMP_DIR}$"mon.cf."${NODE_NAMES[idx]}
+ scp ${MONCF_PRIMNODE} ${NODE_NAMES[idx]}:${MON_DIR}mon.cf
+ if [ $? -ne 0 ]; then
+ echo >&2 "`basename $0`: Fail to scp mon.cf file"\
+ "to node ${NODE_NAMES[idx]}!"
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+# generate_config
+#
+# Generate the configuration files for Heartbeat and scp them to all the nodes
+generate_config() {
+ if ! create_template; then
+ return 1
+ fi
+
+ verbose_output "Creating and remote copying ha.cf file to"\
+ "${PRIM_NODENAME} failover group hosts..."
+ if ! create_hacf; then
+ return 1
+ fi
+ verbose_output "OK"
+
+ if [ "${HBVER_OPT}" = "${HBVER_HBV1}" ]; then
+ verbose_output "Creating and remote copying haresources file"\
+ "to ${PRIM_NODENAME} failover group hosts..."
+ else
+ verbose_output "Creating and remote copying cib.xml file"\
+ "to ${PRIM_NODENAME} failover group hosts..."
+ fi
+
+ if ! create_haresources; then
+ return 1
+ fi
+ verbose_output "OK"
+
+ verbose_output "Creating and remote copying authkeys file to" \
+ "${PRIM_NODENAME} failover group hosts..."
+ if ! create_authkeys; then
+ return 1
+ fi
+ verbose_output "OK"
+
+ verbose_output "Creating and remote copying mon.cf file to" \
+ "${PRIM_NODENAME} failover group hosts..."
+ if ! create_moncf; then
+ return 1
+ fi
+ verbose_output "OK"
+
+ return 0
+}
+
+# Main flow
+# Get all the node names
+if ! get_nodenames; then
+ exit 1
+fi
+
+# Check service IP address
+if ! check_srvIPaddr; then
+ exit 1
+fi
+
+# Stop heartbeat services
+verbose_output "Stopping heartbeat service in the ${PRIM_NODENAME}"\
+ "failover group hosts..."
+if ! stop_heartbeat; then
+ exit 1
+fi
+verbose_output "OK"
+
+# Generate configuration files
+if ! generate_config; then
+ exit 1
+fi
+
+exit 0
--- /dev/null
+#!/bin/bash
+#
+# module_config.sh - add lustre options lines into modprobe.conf or
+# modules.conf
+#
+#################################################################################
+
+# Check the kernel version
+KERNEL_VERSION=`uname -r`
+KERNEL_VERSION=${KERNEL_VERSION:0:3}
+
+if [ "${KERNEL_VERSION}" = "2.4" ]; then
+ MODULE_CONF=/etc/modules.conf
+else
+ MODULE_CONF=/etc/modprobe.conf
+fi
+
+read -r NETWORKS
+MODLINES_FILE=/tmp/modlines$$.txt
+START_MARKER=$"# start lustre config"
+END_MARKER=$"# end lustre config"
+
+# Generate a temp file contains lnet options lines
+generate_lnet_lines() {
+ local LNET_LINE TMP_LINE
+
+ TMP_LINE="${NETWORKS}"
+
+ echo ${START_MARKER} > ${MODLINES_FILE}
+ while true; do
+ LNET_LINE=${TMP_LINE%%\\n*}
+ echo ${LNET_LINE} >> ${MODLINES_FILE}
+
+ TMP_LINE=${TMP_LINE#*\\n}
+
+ if [ "${TMP_LINE}" == "${LNET_LINE}" ]; then
+ break
+ fi
+ done
+ echo ${END_MARKER} >> ${MODLINES_FILE}
+
+ #echo "--------------${MODLINES_FILE}--------------"
+ #cat ${MODLINES_FILE}
+ #echo -e "------------------------------------------\n"
+
+ return 0
+}
+
+if ! generate_lnet_lines; then
+ exit 1
+fi
+
+# Add lnet options lines to the module configuration file
+if [ -e ${MODULE_CONF} ]; then
+ # Delete the old options
+ sed -i "/${START_MARKER}/,/${END_MARKER}/d" ${MODULE_CONF}
+fi
+
+cat ${MODLINES_FILE} >> ${MODULE_CONF}
+rm -f ${MODLINES_FILE}
+exit 0
--- /dev/null
+#!/bin/bash
+#
+# verify_cluster_net.sh - script for Lustre cluster network verification
+#
+###############################################################################
+
+# Usage
+usage() {
+ cat >&2 <<EOF
+
+Usage: `basename $0` [-v] <csv file>
+
+ -v verbose mode
+ csv file a spreadsheet that contains configuration parameters
+ (separated by commas) for each target in a Lustre cl-
+ uster, the first field of each line is the host name
+ of the cluster node
+
+EOF
+ exit 1
+}
+
+# Get and check the positional parameters
+while getopts "v" OPTION; do
+ case $OPTION in
+ v)
+ VERBOSE_OPT=$"yes"
+ ;;
+ ?)
+ usage
+ esac
+done
+
+# Toss out the parameters we've already processed
+shift `expr $OPTIND - 1`
+
+# Here we expect the csv file
+if [ $# -eq 0 ]; then
+ echo >&2 $"`basename $0`: Lack csv file!"
+ usage
+fi
+
+# Global variables
+CSV_FILE=$1
+declare -a HOST_NAMES
+declare -a HOST_IPADDRS
+
+# Output verbose informations
+verbose_output() {
+ if [ "${VERBOSE_OPT}" = "yes" ]; then
+ echo "`basename $0`: $*"
+ fi
+ return 0
+}
+
+# Check the csv file
+check_file() {
+ if [ ! -s ${CSV_FILE} ]; then
+ echo >&2 $"`basename $0`: check_file() error: ${CSV_FILE}" \
+ "does not exist or is empty!"
+ return 1
+ fi
+
+ return 0
+}
+
+# Get the host names from the csv file
+get_hostnames() {
+ local NAME CHECK_STR
+ declare -i i
+
+ # Initialize the HOST_NAMES array
+ for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do
+ HOST_NAMES[i]=$""
+ done
+
+ CHECK_STR=`egrep -v "([[:space:]]|^)#" ${CSV_FILE} | awk -F, \
+ '/[[:alnum:]]/{if ($1 !~/[[:alnum:]]/) print $0}'`
+ if [ -n "${CHECK_STR}" ]; then
+ echo >&2 $"`basename $0`: get_hostnames() error: Lack hostname"\
+ "field in the line - ${CHECK_STR}"
+ return 1
+ fi
+
+ i=0
+ for NAME in `egrep -v "([[:space:]]|^)#" ${CSV_FILE}\
+ | awk -F, '/[[:alnum:]]/{print $1}'`
+ do
+ HOST_NAMES[i]=${NAME}
+ i=$i+1
+ done
+
+ return 0
+}
+
+# Check whether the host name matches the name in the local /etc/hosts table
+# and whether the IP address according to the host name is correct
+local_check() {
+ # Check argument
+ if [ $# -ne 2 ]; then
+ echo >&2 $"`basename $0`: local_check() error: Lack argument" \
+ "for function local_check()!"
+ return 1
+ fi
+
+ local RET_STR REAL_NAME
+
+ # Get the IP address according to the host name from /etc/hosts table
+ # of the current host
+ HOST_IPADDRS[$2]=`egrep "[[:space:]]$1([[:space:]]|$)" /etc/hosts \
+ | awk '{print $1}'`
+ if [ -z "${HOST_IPADDRS[$2]}" ]; then
+ echo >&2 "`basename $0`: local_check() error: $1 does not" \
+ "exist in the local /etc/hosts table!"
+ return 1
+ fi
+
+ if [ ${#HOST_IPADDRS[$2]} -gt 15 ]; then
+ echo >&2 "`basename $0`: local_check() error: More than one" \
+ "IP address line according to $1 in the local" \
+ "/etc/hosts table!"
+ return 1
+ fi
+
+ # Execute pdsh command to get the real host name
+ RET_STR=`${PDSH} -w ${HOST_IPADDRS[$2]} hostname 2>&1`
+ if [ $? -ne 0 ] || [ "${RET_STR}" != "${RET_STR#*connect:*}" ]; then
+ echo >&2 "`basename $0`: local_check() error: pdsh error:" \
+ "${RET_STR}"
+ return 1
+ fi
+
+ if [ -z "${RET_STR}" ]; then
+ echo >&2 "`basename $0`: local_check() error: pdsh error:" \
+ "No results from pdsh! Check the network connectivity"\
+ "between the local host and ${HOST_IPADDRS[$2]}" \
+ "or check the two hosts' rcmd module!"
+ return 1
+ fi
+
+ REAL_NAME=`echo ${RET_STR} | awk '{print $2}'`
+ if [ "$1" != "${REAL_NAME}" ]; then
+ echo >&2 "`basename $0`: local_check() error: The real hostname"\
+ "according to ${HOST_IPADDRS[$2]} is ${REAL_NAME}," \
+ "not $1! Check the local /etc/hosts table!"
+ return 1
+ fi
+
+ return 0
+}
+
+# Check whether the correct host name and IP address pair matches
+# the one in the remote /etc/hosts tables
+remote_check() {
+ # Check argument
+ if [ $# -ne 2 ]; then
+ echo >&2 $"`basename $0`: remote_check() error: Lack argument"\
+ "for function remote_check()!"
+ return 1
+ fi
+
+ declare -i i
+ local RET_STR COMMAND IP_ADDR
+
+ COMMAND=$"egrep \"[[:space:]]$1([[:space:]]|$)\" /etc/hosts"
+
+ # Execute pdsh command to check remote /etc/hosts tables
+ for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do
+ RET_STR=`${PDSH} -w ${HOST_NAMES[i]} ${COMMAND} 2>&1`
+ if [ $? -ne 0 ] || [ "${RET_STR}" != "${RET_STR#*connect:*}" ]
+ then
+ echo >&2 "`basename $0`: remote_check() error:" \
+ "pdsh error: ${RET_STR}"
+ return 1
+ fi
+
+ IP_ADDR=`echo ${RET_STR} | awk '{print $2}'`
+ if [ -z "${IP_ADDR}" ]; then
+ echo >&2 "`basename $0`: remote_check() error:" \
+ "$1 does not exist in the ${HOST_NAMES[i]}'s"\
+ "/etc/hosts table!"
+ return 1
+ fi
+
+ if [ "${IP_ADDR}" != "${HOST_IPADDRS[$2]}" ]; then
+ echo >&2 "`basename $0`: remote_check() error:" \
+ "IP address ${IP_ADDR} of $1 in the" \
+ "${HOST_NAMES[i]}'s /etc/hosts is incorrect!"
+ return 1
+ fi
+
+ done
+
+ return 0
+}
+
+# Verify forward and reverse network connectivity of the Lustre cluster
+network_check () {
+ # Check argument
+ if [ $# -eq 0 ]; then
+ echo >&2 $"`basename $0`: network_check() error: Lack argument" \
+ "for function network_check()!"
+ return 1
+ fi
+
+ declare -i i
+ local RET_STR COMMAND REAL_NAME
+
+ # Execute pdsh command to check network connectivity
+ for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do
+ COMMAND=$"${PDSH} -w ${HOST_NAMES[i]} hostname"
+ RET_STR=`${PDSH} -w $1 ${COMMAND} 2>&1`
+ if [ $? -ne 0 ] || [ "${RET_STR}" != "${RET_STR#*connect:*}" ]
+ then
+ echo >&2 "`basename $0`: network_check() error:" \
+ "pdsh error: ${RET_STR}"
+ return 1
+ fi
+
+ if [ -z "${RET_STR}" ]; then
+ echo >&2 "`basename $0`: network_check() error:" \
+ "pdsh error: Nothing get from pdsh! Check" \
+ "the network connectivity between $1 and" \
+ "${HOST_NAMES[i]} or the two hosts' rcmd module!"
+ return 1
+ fi
+
+ REAL_NAME=`echo ${RET_STR} | awk '{print $3}'`
+ if [ "${HOST_NAMES[i]}" != "${REAL_NAME}" ]; then
+ echo >&2 "`basename $0`: network_check() error:" \
+ "${RET_STR}"
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+# Verify forward and reverse network connectivity of the Lustre cluster,
+# and that hostnames match the names in the /etc/hosts tables.
+network_verify() {
+ declare -i i
+
+ # Initialize the HOST_IPADDRS array
+ for ((i = 0; i < ${#HOST_IPADDRS[@]}; i++)); do
+ HOST_IPADDRS[i]=$""
+ done
+
+ # Get all the host names from the csv file
+ if ! get_hostnames; then
+ return 1
+ fi
+
+ # Check whether all the host names match the names in
+ # all the /etc/hosts tables of the Lustre cluster
+ for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do
+ verbose_output "Verifying IP address of host" \
+ "${HOST_NAMES[i]} in the local /etc/hosts..."
+ if ! local_check ${HOST_NAMES[i]} $i; then
+ return 1
+ fi
+ verbose_output "OK"
+ done
+
+ for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do
+ verbose_output "Verifying IP address of host" \
+ "${HOST_NAMES[i]} in the remote /etc/hosts..."
+ if ! remote_check ${HOST_NAMES[i]} $i; then
+ return 1
+ fi
+ verbose_output "OK"
+ done
+
+ # Verify network connectivity of the Lustre cluster
+ for ((i = 0; i < ${#HOST_NAMES[@]}; i++)); do
+ verbose_output "Verifying network connectivity of host" \
+ "${HOST_NAMES[i]} to other hosts..."
+ if ! network_check ${HOST_NAMES[i]}; then
+ return 1
+ fi
+ verbose_output "OK"
+ done
+
+ return 0
+}
+
+# Main flow
+if ! check_file; then
+ exit 1
+fi
+
+if ! network_verify; then
+ exit 1
+fi
+
+exit 0
--- /dev/null
+#!/bin/bash
+#
+# verify_serviceIP.sh - script for verifying the service IP and the real
+# interface IP in a remote host are in the same subnet
+#
+###############################################################################
+
+# Usage
+usage() {
+ cat >&2 <<EOF
+
+Usage: `basename $0` <service IPaddr> <hostname>
+
+ service IPaddr the IP address to failover
+ hostname the hostname of the remote node
+
+EOF
+ exit 1
+}
+
+# Check arguments
+if [ $# -lt 2 ]; then
+ usage
+fi
+
+#
+# inSameIPsubnet serviceIPaddr interfaceIPaddr mask
+#
+# Given two IP addresses and a subnet mask determine if these IP
+# addresses are in the same subnet. If they are, return 0, else return 1.
+#
+inSameIPsubnet() {
+ declare -i n
+ declare -ia mask
+ declare -ia ip1 ip2 # IP addresses given
+ declare -i quad1 quad2 # calculated quad words
+
+ #
+ # Remove '.' characters from dotted decimal notation and save
+ # in arrays. i.e.
+ #
+ # 192.168.1.163 -> array[0] = 192
+ # array[1] = 168
+ # array[2] = 1
+ # array[3] = 163
+ #
+ let n=0
+ for quad in $(echo $1 | awk -F. '{print $1 " " $2 " " $3 " " $4}')
+ do
+ ip1[n]=$quad
+ let n=n+1
+ done
+
+ let n=0
+ for quad in $(echo $2 | awk -F. '{print $1 " " $2 " " $3 " " $4}')
+ do
+ ip2[n]=$quad
+ let n=n+1
+ done
+
+ let n=0
+ for quad in $(echo $3 | awk -F. '{print $1 " " $2 " " $3 " " $4}')
+ do
+ mask[n]=$quad
+ let n=n+1
+ done
+
+ #
+ # For each quad word, logically AND the IP address with the subnet
+ # mask to get the network/subnet quad word. If the resulting
+ # quad words for both IP addresses are the same they are in the
+ # same IP subnet.
+ #
+ for n in 0 1 2 3
+ do
+ let $((quad1=${ip1[n]} & ${mask[n]}))
+ let $((quad2=${ip2[n]} & ${mask[n]}))
+
+ if [ $quad1 != $quad2 ]; then
+ echo >&2 $"`basename $0`: Service IP address $1 and"\
+ "real interface IP address $2 are in"\
+ "different subnets!"
+ return 1 # in different subnets
+ fi
+ done
+
+ return 0 # in the same subnet, all quad words matched
+}
+
+#
+# findInterface IPaddr hostname
+#
+# Given a target IP address and a hostname, find the interface in which
+# this address is configured. If found return 0, if not return 1. The
+# interface name is returned to stdout.
+#
+findInterface() {
+ declare host
+ declare line
+ declare intf
+ declare addr
+ declare state
+
+ declare target=$1
+ declare hostname=$2
+
+ {
+ while read host intf line
+ do
+ while read host line
+ do
+ if [ "$line" = "" ]; then # go to next interface
+ continue 2
+ fi
+
+ set - $line
+ addr=
+ while [ $# -gt 0 ]; do
+ case $1 in
+ addr:*)
+ addr=${1##addr:}
+ if [ -n "$addr" -a "$addr" = "$target" ]
+ then
+ echo $intf
+ return 0
+ fi
+ ;;
+ esac
+ shift
+ done
+ done
+ done
+ } < <(${PDSH} -w $hostname /sbin/ifconfig)
+
+ echo >&2 "`basename $0`: Cannot find the interface in which" \
+ "$target is configured in the host $hostname!"
+ return 1
+}
+
+#
+# findNetmask interface hostname
+#
+# Given an interface find the netmask addresses associated with it.
+# Return 0 when found, else return 1. The netmask is returned to stdout.
+#
+findNetmask() {
+ declare line
+ declare addr
+ declare target=$1
+ declare hostname=$2
+
+ while read line
+ do
+ set - $line
+
+ while [ $# -gt 0 ]; do
+ case $1 in
+ Mask:*)
+ echo ${1##*:} # return netmask addr
+ return 0
+ ;;
+ esac
+ shift
+ done
+ done < <(${PDSH} -w $hostname /sbin/ifconfig $target)
+
+ echo >&2 "`basename $0`: Cannot find the netmask associated with" \
+ "the interface $target in the host $hostname!"
+ return 1
+}
+
+#
+# check_srvIPaddr serviceIPaddr hostname
+#
+# Given a service IP address and hostname, check whether the service IP address
+# and the real interface IP address of hostname are in the same subnet.
+# If they are, return 0, else return 1.
+#
+check_srvIPaddr() {
+ declare real_IPaddr
+ declare real_intf
+ declare netmask
+ declare srv_IPaddr=$1
+ declare hostname=$2
+
+ # Get the IP address from /etc/hosts table according to the hostname
+ real_IPaddr=`egrep "[[:space:]]$hostname([[:space:]]|$)" /etc/hosts \
+ | awk '{print $1}'`
+ if [ -z "$real_IPaddr" ]; then
+ echo >&2 "`basename $0`: $hostname does not exist in" \
+ "the local /etc/hosts table!"
+ return 1
+ fi
+
+ if [ ${#real_IPaddr} -gt 15 ]; then
+ echo >&2 "`basename $0`: More than one IP address line" \
+ "according to $hostname in the local /etc/hosts table!"
+ return 1
+ fi
+
+ # Get the interface in which the real IP address is configured
+ real_intf=$(findInterface $real_IPaddr $hostname)
+ if [ $? -ne 0 ]; then
+ return 1
+ fi
+ real_intf=${real_intf%%:*}
+
+ # Get the netmask address associated with the real interface
+ netmask=$(findNetmask $real_intf $hostname)
+ if [ $? -ne 0 ]; then
+ return 1
+ fi
+
+ # Determine if the service IP address and the real IP address
+ # are in the same subnet
+ inSameIPsubnet $srv_IPaddr $real_IPaddr $netmask
+ if [ $? -ne 0 ]; then
+ return 1
+ fi
+
+ return 0
+}
+
+# Check service IP address
+if ! check_srvIPaddr $1 $2; then
+ exit 1
+fi
+exit 0
+++ /dev/null
-#!/bin/sh
-
-echo primary `date` >> /tmp/halog
-
-
+++ /dev/null
-#!/bin/bash
-set -vx
-date
-echo "ha assist checking for problems"
-sleep 3
-if [ ! -e /tmp/halog ]; then
- echo "no problems, exiting"
- exit
-fi
-
-echo "removing /tmp/halog"
-rm /tmp/halog
-
-echo secondary start `date`
-echo "- please supply a new mds"
-
-# invoke ldap client here
-
-
-/usr/src/portals/linux/utils/ptlctl <<EOF3
-setup tcp
-close_uuid mds
-del_uuid mds
-connect dev5 988
-add_uuid mds
-quit
-EOF3
-
-echo "connected to new MDS!"
-
-/usr/src/obd/utils/obdctl <<EOF2
-name2dev RPCDEV
-newconn
-quit
-EOF2
log(self.module_name, "create mtime LOGS cmdfile failed: ", self.name)
else:
mtimecmdfile = string.split(mktemp[0])[0]
- #mtimecmdfile="/tmp/lustre-cmd.XXXXXXXX"
fd = os.open(mtimecmdfile, os.O_RDWR | os.O_CREAT)
os.write(fd, "\n\n\n\n\n%s\n\n" %mtime)
os.close(fd)
extra_error = ""
panic("Error creating " + target_symlink + ": " + e[1] + extra_error)
-# Check mtime of LLOG
+# Check mtime of config logs
def doCheckMtime(lustreDB, hosts):
for h in hosts:
node_db = lustreDB.lookup_name(h, 'node')
"setup for elan/myrinet networks.\n"
"usage: mynid [<nid>]"},
{"add_uuid", jt_lcfg_add_uuid, 0, "associate a UUID with a nid\n"
- "usage: add_uuid <uuid> <nid> <net_type>"},
+ "usage: add_uuid <uuid> <nid>"},
{"close_uuid", jt_obd_close_uuid, 0, "disconnect a UUID\n"
"usage: close_uuid <uuid> <net_type>"},
{"del_uuid", jt_lcfg_del_uuid, 0, "delete a UUID association\n"
/* Device configuration commands */
{"==== device config =====", jt_noop, 0, "device config"},
{"attach", jt_lcfg_attach, 0,
- "set the type of the current device (with <name> and <uuid>)\n"
- "usage: attach type [name [uuid]]"},
+ "set the type, name, and uuid of the current device\n"
+ "usage: attach type name uuid"},
{"setup", jt_lcfg_setup, 0,
"type specific device configuration information\n"
"usage: setup <args...>"},
"usage: dump_log config-uuid-name"},
{"clear_log", jt_cfg_clear_log, 0, "delete current config log of recorded commands\n"
"usage: clear_log config-name"},
+ {"conf_param", jt_lcfg_mgsparam, 0, "set a permanent config param\n"
+ "usage: conf_param <keyword=val> ...\n"},
+
/* Device operations */
{"=== device operations ==", jt_noop, 0, "device operations"},
{"del_mount_option", jt_lcfg_del_mount_option, 0,
"usage: del_mount_option profile\n"},
{"set_timeout", jt_lcfg_set_timeout, 0,
- "usage: set_timeout <secs>\n"},
+ "usage: conf_param obd_timeout=<secs>\n"},
{"set_lustre_upcall", jt_lcfg_set_lustre_upcall, 0,
"usage: set_lustre_upcall </full/path/to/upcall> \n"},
{"add_conn ", jt_lcfg_add_conn, 0,
"usage: add_conn <conn_uuid> [priority]\n"},
{"del_conn ", jt_lcfg_del_conn, 0,
"usage: del_conn <conn_uuid> \n"},
-
+ {"local_param", jt_lcfg_param, 0, "set a temporary, local param\n"
+ "usage: local_param <keyword=val> ...\n"},
+
/* Llog operations */
{"llog_catlist", jt_llog_catlist, 0,
"list all catalog logs on current device.\n"
} else {
mnt = getmntent(fp);
while (feof(fp) == 0 && ferror(fp) ==0) {
- if (llapi_is_lustre_mnttype(mnt->mnt_type)) {
+ if (llapi_is_lustre_mnttype(mnt)) {
rc = llapi_find(mnt->mnt_dir, obduuid, 0, 0, 0);
if (rc)
fprintf(stderr,
len = 0;
mnt = getmntent(fp);
while (feof(fp) == 0 && ferror(fp) == 0) {
- if (llapi_is_lustre_mnttype(mnt->mnt_type)) {
+ if (llapi_is_lustre_mnttype(mnt)) {
len = strlen(mnt->mnt_dir);
if (len > out_len &&
!strncmp(rpath, mnt->mnt_dir, len)) {
} else {
mnt = getmntent(fp);
while (feof(fp) == 0 && ferror(fp) == 0) {
- if (llapi_is_lustre_mnttype(mnt->mnt_type)) {
+ if (llapi_is_lustre_mnttype(mnt)) {
rc = mntdf(mnt->mnt_dir, ishow, cooked);
if (rc)
break;
} else {
mnt = getmntent(fp);
while (feof(fp) == 0 && ferror(fp) ==0) {
- if (llapi_is_lustre_mnttype(mnt->mnt_type))
+ if (llapi_is_lustre_mnttype(mnt))
break;
mnt = getmntent(fp);
}
} else {
mnt = getmntent(fp);
while (feof(fp) == 0 && ferror(fp) == 0) {
- if (llapi_is_lustre_mnttype(mnt->mnt_type))
+ if (llapi_is_lustre_mnttype(mnt))
break;
mnt = getmntent(fp);
}
return rc;
}
-int llapi_is_lustre_mnttype(char *type)
+/* Is this a lustre client fs? */
+int llapi_is_lustre_mnttype(struct mntent *mnt)
{
- return (strcmp(type,"lustre") == 0 || strcmp(type,"lustre_lite") == 0);
+ char *type = mnt->mnt_type;
+ return ((strcmp(type, "lustre") == 0 || strcmp(type,"lustre_lite") == 0)
+ && (strstr(mnt->mnt_fsname, ":/") != NULL));
}
int llapi_quotacheck(char *mnt, int check_type)
return;
}
+
static void print_setup_cfg(struct lustre_cfg *lcfg)
{
struct lov_desc *desc;
return;
}
-void print_lustre_cfg(struct lustre_cfg *lcfg)
+void print_lustre_cfg(struct lustre_cfg *lcfg, int *skip)
{
enum lcfg_command_type cmd = le32_to_cpu(lcfg->lcfg_command);
+ if (*skip > 0)
+ printf("SKIP ");
+
switch(cmd){
case(LCFG_ATTACH):{
printf("attach ");
break;
}
case(LCFG_MARKER):{
- printf("marker ");
- print_1_cfg(lcfg);
+ struct cfg_marker *marker = lustre_cfg_buf(lcfg, 1);
+
+ if (marker->cm_flags & CM_SKIP) {
+ if (marker->cm_flags & CM_START)
+ (*skip)++;
+ if (marker->cm_flags & CM_END)
+ (*skip)--;
+ }
+ printf("marker %d (flags=%#x) %.16s '%s' %s:%s", marker->cm_step,
+ marker->cm_flags, marker->cm_svname,
+ marker->cm_comment, ctime(&marker->cm_createtime),
+ marker->cm_canceltime ?
+ ctime(&marker->cm_canceltime) : "");
break;
}
default:
void print_records(struct llog_rec_hdr** recs,int rec_number)
{
__u32 lopt;
- int i;
+ int i, skip = 0;
- for(i=0;i<rec_number;i++){
+ for(i = 0; i < rec_number; i++){
printf("#%.2d ", le32_to_cpu(recs[i]->lrh_index));
printf("L ");
lcfg = (struct lustre_cfg *)
((char*)(recs[i]) + sizeof(struct llog_rec_hdr));
- print_lustre_cfg(lcfg);
+ print_lustre_cfg(lcfg, &skip);
}
if (lopt == PTL_CFG_REC){
+++ /dev/null
-#!/bin/bash
-#
-# Load a lustre config xml into an openldap database.
-# See https://projects.clusterfs.com/lustre/LustreLDAP
-# for more details.
-#
-# Usage: load_ldap.sh <xml_file>
-set -e
-
-LDAP_BASE=${LDAP_BASE:-fs=lustre}
-LDAP_ROOTDN=${LDAP_ROOTDN:-cn=Manager,fs=lustre}
-LDAP_PW=${LDAP_PW:-secret}
-LDAP_AUTH="-x -D $LDAP_ROOTDN -w $LDAP_PW"
-LUSTRE=${LUSTRE:-`dirname $0`/..}
-
-if [ -f $LUSTRE/autoMakefile.am ]; then
- CONFDIR=$LUSTRE/conf
-else
- CONFDIR=/usr/lib/lustre
-fi
-
-TOP=$CONFDIR/top.ldif
-XSL=$CONFDIR/lustre2ldif.xsl
-
-[ ! -z $LDAPURL ] && LDAP_AUTH="$LDAP_AUTH -H $LDAPURL"
-
-XML=${XML:-$1}
-
-if [ -z "$XML" ] || [ ! -r $XML ]; then
- echo "usage: $0 xmlfile"
- exit 1
-fi
-
-NAME=`basename $XML .xml`
-LDIF=/tmp/$NAME.ldif
-
-# add the top level record, if needed
-ldapsearch $LDAP_AUTH -b $LDAP_BASE > /dev/null 2>&1 ||
- ldapadd $LDAP_AUTH -f $TOP
-
-# If this config already exists, then delete it
-ldapsearch $LDAP_AUTH -b config=$NAME,$LDAP_BASE > /dev/null 2>&1 &&
- ldapdelete $LDAP_AUTH -r config=$NAME,$LDAP_BASE
-
-4xslt -D config=$NAME $XML $XSL > $LDIF
-
-echo "Loading config to 'config=$NAME,$LDAP_BASE' ..."
-ldapadd $LDAP_AUTH -f $LDIF
-
-rm -f $LDIF
struct lustre_cfg *lcfg;
int rc;
- if (argc != 2 && argc != 3 && argc != 4)
+ if (argc != 4)
return CMD_HELP;
lustre_cfg_bufs_reset(&bufs, NULL);
lustre_cfg_bufs_set_string(&bufs, 1, argv[1]);
- if (argc >= 3) {
- lustre_cfg_bufs_set_string(&bufs, 0, argv[2]);
- } else {
- fprintf(stderr, "error: %s: LCFG_ATTACH requires a name\n",
- jt_cmdname(argv[0]));
- return -EINVAL;
- }
-
- if (argc == 4) {
- lustre_cfg_bufs_set_string(&bufs, 2, argv[3]);
- }
+ lustre_cfg_bufs_set_string(&bufs, 0, argv[2]);
+ lustre_cfg_bufs_set_string(&bufs, 2, argv[3]);
lcfg = lustre_cfg_new(LCFG_ATTACH, &bufs);
rc = lcfg_ioctl(argv[0], OBD_DEV_ID, lcfg);
struct lustre_cfg_bufs bufs;
struct lustre_cfg *lcfg;
+ fprintf(stderr, "%s has been deprecated. Use conf_param instead.\n"
+ "e.g. conf_param lustre-MDT0000 obd_timeout=50\n",
+ jt_cmdname(argv[0]));
+ return CMD_HELP;
+
+
if (argc != 2)
return CMD_HELP;
lcfg->lcfg_num = atoi(argv[1]);
rc = lcfg_ioctl(argv[0], OBD_DEV_ID, lcfg);
+ //rc = lcfg_mgs_ioctl(argv[0], OBD_DEV_ID, lcfg);
+
lustre_cfg_free(lcfg);
if (rc < 0) {
fprintf(stderr, "error: %s: %s\n", jt_cmdname(argv[0]),
return rc;
}
+
+/* Param set locally, directly on target */
+int jt_lcfg_param(int argc, char **argv)
+{
+ int i, rc;
+ struct lustre_cfg_bufs bufs;
+ struct lustre_cfg *lcfg;
+
+ if (argc >= LUSTRE_CFG_MAX_BUFCOUNT)
+ return CMD_HELP;
+
+ lustre_cfg_bufs_reset(&bufs, lcfg_devname);
+
+ for (i = 1; i < argc; i++) {
+ lustre_cfg_bufs_set_string(&bufs, i, argv[i]);
+ }
+
+ lcfg = lustre_cfg_new(LCFG_PARAM, &bufs);
+
+ rc = lcfg_ioctl(argv[0], OBD_DEV_ID, lcfg);
+ lustre_cfg_free(lcfg);
+ if (rc < 0) {
+ fprintf(stderr, "error: %s: %s\n", jt_cmdname(argv[0]),
+ strerror(rc = errno));
+ }
+ return rc;
+}
+
+/* Param set in config log on MGS */
+/* conf_param <cfg_device> key1=value1 [key2=value2...] */
+int jt_lcfg_mgsparam(int argc, char **argv)
+{
+ int i, rc, index_offset = 0;
+ struct lustre_cfg_bufs bufs;
+ struct lustre_cfg *lcfg;
+
+ if ((argc >= LUSTRE_CFG_MAX_BUFCOUNT) || (argc <= 1))
+ return CMD_HELP;
+
+ if (!strchr(argv[1], '=')) {
+ /* Not key=val, assume <cfg_device> */
+ rc = jt_lcfg_device(2, argv);
+ if (rc)
+ return rc;
+ index_offset = 1;
+ }
+
+ if (lcfg_devname == NULL) {
+ fprintf(stderr, "%s: please use 'cfg_device name' to set the "
+ "device name for config commands.\n",
+ jt_cmdname(argv[0]));
+ return -EINVAL;
+ }
+
+ lustre_cfg_bufs_reset(&bufs, lcfg_devname);
+
+ for (i = 1; i < (argc - index_offset); i++) {
+ lustre_cfg_bufs_set_string(&bufs, i, argv[i + index_offset]);
+ }
+
+ /* We could put other opcodes here. */
+ lcfg = lustre_cfg_new(LCFG_PARAM, &bufs);
+
+ rc = lcfg_mgs_ioctl(argv[0], OBD_DEV_ID, lcfg);
+ lustre_cfg_free(lcfg);
+ if (rc < 0) {
+ fprintf(stderr, "error: %s: %s\n", jt_cmdname(argv[0]),
+ strerror(rc = errno));
+ }
+
+ return rc;
+}
+
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Lin Song Tao <lincent@clusterfs.com>
+ * Author: Nathan Rutman <nathan@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <mntent.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+
+#include <string.h>
+#include <getopt.h>
+
+#include <linux/types.h>
+//#define HAVE_SYS_VFS_H 1
+#include <linux/fs.h> // for BLKGETSIZE64
+#include <lustre_disk.h>
+#include <lustre_param.h>
+#include <lnet/lnetctl.h>
+#include <lustre_ver.h>
+
+
+#define MAX_LOOP_DEVICES 16
+#define L_BLOCK_SIZE 4096
+#define INDEX_UNASSIGNED 0xFFFF
+
+static char *progname;
+static int verbose = 1;
+static int print_only = 0;
+
+
+void usage(FILE *out)
+{
+ fprintf(out, "%s v"LUSTRE_VERSION_STRING"\n", progname);
+ fprintf(out, "usage: %s <target types> [options] <device>\n", progname);
+ fprintf(out,
+ "\t<device>:block device or file (e.g /dev/sda or /tmp/ost1)\n"
+ "\ttarget types:\n"
+ "\t\t--ost: object storage, mutually exclusive with mdt\n"
+ "\t\t--mdt: metadata storage, mutually exclusive with ost\n"
+ "\t\t--mgs: configuration management service - one per site\n"
+ "\toptions (in order of popularity):\n"
+ "\t\t--mgsnode=<nid>[,<...>] : NID(s) of a remote mgs node\n"
+ "\t\t\trequired for all targets other than the mgs node\n"
+ "\t\t--fsname=<filesystem_name> : default is 'lustre'\n"
+ "\t\t--failnode=<nid>[,<...>] : NID(s) of a failover partner\n"
+ "\t\t--param <key>=<value> : set a permanent parameter\n"
+ "\t\t--index=#N : target index\n"
+ /* FIXME implement 1.6.x
+ "\t\t--configdev=<altdevice|file>: store configuration info\n"
+ "\t\t\tfor this device on an alternate device\n"
+ */
+ "\t\t--mountfsoptions=<opts> : permanent mount options\n"
+ "\t\t--backfstype=<fstype> : backing fs type (ext3, ldiskfs)\n"
+ "\t\t--device-size=#N(KB) : device size for loop devices\n"
+#ifndef TUNEFS
+ "\t\t--mkfsoptions=<opts> : format options\n"
+ "\t\t--reformat: overwrite an existing disk\n"
+ "\t\t--stripe-count-hint=#N : used for optimizing MDT inode size\n"
+#else
+ "\t\t--erase-params : erase all old parameter settings\n"
+ "\t\t--nomgs: turn off MGS service on this MDT\n"
+ "\t\t--writeconf: erase all config logs for this fs.\n"
+#endif
+ "\t\t--print: just report what we would do; don't write to "
+ "disk\n"
+ "\t\t--verbose\n"
+ "\t\t--quiet\n");
+ return;
+}
+
+#define vprint if (verbose > 0) printf
+
+static void fatal(void)
+{
+ verbose = 0;
+ fprintf(stderr, "\n%s FATAL: ", progname);
+}
+
+/*================ utility functions =====================*/
+
+inline unsigned int
+dev_major (unsigned long long int __dev)
+{
+ return ((__dev >> 8) & 0xfff) | ((unsigned int) (__dev >> 32) & ~0xfff);
+}
+
+inline unsigned int
+dev_minor (unsigned long long int __dev)
+{
+ return (__dev & 0xff) | ((unsigned int) (__dev >> 12) & ~0xff);
+}
+
+int get_os_version()
+{
+ static int version = 0;
+
+ if (!version) {
+ int fd;
+ char release[4] = "";
+
+ fd = open("/proc/sys/kernel/osrelease", O_RDONLY);
+ if (fd < 0)
+ fprintf(stderr, "%s: Warning: Can't resolve kernel "
+ "version, assuming 2.6\n", progname);
+ else {
+ read(fd, release, 4);
+ close(fd);
+ }
+ if (strncmp(release, "2.4.", 4) == 0)
+ version = 24;
+ else
+ version = 26;
+ }
+ return version;
+}
+
+int run_command(char *cmd)
+{
+ char log[] = "/tmp/mkfs_logXXXXXX";
+ int fd, rc;
+
+ if (verbose > 1)
+ printf("cmd: %s\n", cmd);
+
+ if ((fd = mkstemp(log)) >= 0) {
+ close(fd);
+ strcat(cmd, " >");
+ strcat(cmd, log);
+ }
+ strcat(cmd, " 2>&1");
+
+ /* Can't use popen because we need the rv of the command */
+ rc = system(cmd);
+ if (rc && fd >= 0) {
+ char buf[128];
+ FILE *fp;
+ fp = fopen(log, "r");
+ if (fp) {
+ while (fgets(buf, sizeof(buf), fp) != NULL) {
+ if (rc || verbose > 2)
+ printf(" %s", buf);
+ }
+ fclose(fp);
+ }
+ }
+ if (fd >= 0)
+ remove(log);
+ return rc;
+}
+
+static int check_mtab_entry(char *spec, char *type)
+{
+ FILE *fp;
+ struct mntent *mnt;
+
+ fp = setmntent(MOUNTED, "r");
+ if (fp == NULL)
+ return(0);
+
+ while ((mnt = getmntent(fp)) != NULL) {
+ if (strcmp(mnt->mnt_fsname, spec) == 0 &&
+ strcmp(mnt->mnt_type, type) == 0) {
+ endmntent(fp);
+ fprintf(stderr, "%s: according to %s %s is "
+ "already mounted on %s\n",
+ progname, MOUNTED, spec, mnt->mnt_dir);
+ return(EEXIST);
+ }
+ }
+ endmntent(fp);
+
+ return(0);
+}
+
+/*============ disk dev functions ===================*/
+
+/* Setup a file in the first unused loop_device */
+int loop_setup(struct mkfs_opts *mop)
+{
+ char loop_base[20];
+ char l_device[64];
+ int i,ret = 0;
+
+ /* Figure out the loop device names */
+ if (!access("/dev/loop0", F_OK | R_OK))
+ strcpy(loop_base, "/dev/loop\0");
+ else if (!access("/dev/loop/0", F_OK | R_OK))
+ strcpy(loop_base, "/dev/loop/\0");
+ else {
+ fprintf(stderr, "%s: can't access loop devices\n", progname);
+ return 1;
+ }
+
+ /* Find unused loop device */
+ for (i = 0; i < MAX_LOOP_DEVICES; i++) {
+ char cmd[128];
+ sprintf(l_device, "%s%d", loop_base, i);
+ if (access(l_device, F_OK | R_OK))
+ break;
+ sprintf(cmd, "losetup %s > /dev/null 2>&1", l_device);
+ ret = system(cmd);
+ /* losetup gets 1 (ret=256) for non-set-up device */
+ if (ret) {
+ /* Set up a loopback device to our file */
+ sprintf(cmd, "losetup %s %s", l_device, mop->mo_device);
+ ret = run_command(cmd);
+ if (ret) {
+ fprintf(stderr, "%s: error %d on losetup: %s\n",
+ progname, ret, strerror(ret));
+ return ret;
+ }
+ strcpy(mop->mo_loopdev, l_device);
+ return ret;
+ }
+ }
+
+ fprintf(stderr, "%s: out of loop devices!\n", progname);
+ return EMFILE;
+}
+
+int loop_cleanup(struct mkfs_opts *mop)
+{
+ char cmd[128];
+ int ret = 1;
+ if ((mop->mo_flags & MO_IS_LOOP) && *mop->mo_loopdev) {
+ sprintf(cmd, "losetup -d %s", mop->mo_loopdev);
+ ret = run_command(cmd);
+ }
+ return ret;
+}
+
+/* Determine if a device is a block device (as opposed to a file) */
+int is_block(char* devname)
+{
+ struct stat st;
+ int ret = 0;
+
+ ret = access(devname, F_OK);
+ if (ret != 0)
+ return 0;
+ ret = stat(devname, &st);
+ if (ret != 0) {
+ fprintf(stderr, "%s: cannot stat %s\n", progname, devname);
+ return -1;
+ }
+ return S_ISBLK(st.st_mode);
+}
+
+__u64 get_device_size(char* device)
+{
+ int ret, fd;
+ __u64 size = 0;
+
+ fd = open(device, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "%s: cannot open %s: %s\n",
+ progname, device, strerror(errno));
+ return 0;
+ }
+
+ /* size in bytes. bz5831 */
+ ret = ioctl(fd, BLKGETSIZE64, (void*)&size);
+ close(fd);
+ if (ret < 0) {
+ fprintf(stderr, "%s: size ioctl failed: %s\n",
+ progname, strerror(errno));
+ return 0;
+ }
+
+ vprint("device size = "LPU64"MB\n", size >> 20);
+ /* return value in KB */
+ return size >> 10;
+}
+
+int loop_format(struct mkfs_opts *mop)
+{
+ int ret = 0;
+
+ if (mop->mo_device_sz == 0) {
+ fatal();
+ fprintf(stderr, "loop device requires a --device-size= "
+ "param\n");
+ return EINVAL;
+ }
+
+ ret = creat(mop->mo_device, S_IRUSR|S_IWUSR);
+ ret = truncate(mop->mo_device, mop->mo_device_sz * 1024);
+ if (ret != 0) {
+ ret = errno;
+ fprintf(stderr, "%s: Unable to create backing store: %d\n",
+ progname, ret);
+ }
+
+ return ret;
+}
+
+/* Check whether the file exists in the device */
+static int file_in_dev(char *file_name, char *dev_name)
+{
+ FILE *fp;
+ char debugfs_cmd[256];
+ unsigned int inode_num;
+ int i;
+
+ /* Construct debugfs command line. */
+ memset(debugfs_cmd, 0, sizeof(debugfs_cmd));
+ sprintf(debugfs_cmd,
+ "debugfs -c -R 'stat %s' %s 2>&1 | egrep '(Inode|unsupported)'",
+ file_name, dev_name);
+
+ fp = popen(debugfs_cmd, "r");
+ if (!fp) {
+ fprintf(stderr, "%s: %s\n", progname, strerror(errno));
+ return 0;
+ }
+
+ if (fscanf(fp, "Inode: %u", &inode_num) == 1) { /* exist */
+ pclose(fp);
+ return 1;
+ }
+ i = fread(debugfs_cmd, 1, sizeof(debugfs_cmd), fp);
+ if (i) {
+ /* Filesystem has unsupported feature */
+ vprint("%.*s", i, debugfs_cmd);
+ /* in all likelihood, the "unsupported feature" is
+ 'extents', which older debugfs does not understand.
+ Use e2fsprogs-1.38-cfs1 or later, available from
+ ftp://ftp.lustre.org/pub/lustre/other/e2fsprogs/ */
+ return -1;
+ }
+ pclose(fp);
+ return 0;
+}
+
+/* Check whether the device has already been used with lustre */
+static int is_lustre_target(struct mkfs_opts *mop)
+{
+ int rc;
+ vprint("checking for existing Lustre data\n");
+
+ if ((rc = file_in_dev(MOUNT_DATA_FILE, mop->mo_device))
+ || (rc = file_in_dev(LAST_RCVD, mop->mo_device))) {
+ vprint("found Lustre data\n");
+ /* in the -1 case, 'extents' means this really IS a lustre
+ target */
+ return rc;
+ }
+
+ return 0; /* The device is not a lustre target. */
+}
+
+/* Build fs according to type */
+int make_lustre_backfs(struct mkfs_opts *mop)
+{
+ char mkfs_cmd[512];
+ char buf[40];
+ char *dev;
+ int ret = 0;
+ int block_count = 0;
+
+ if (mop->mo_device_sz != 0) {
+ if (mop->mo_device_sz < 8096){
+ fprintf(stderr, "%s: size of filesystem must be larger "
+ "than 8MB, but is set to %lldKB\n",
+ progname, mop->mo_device_sz);
+ return EINVAL;
+ }
+ block_count = mop->mo_device_sz / (L_BLOCK_SIZE >> 10);
+ }
+
+ if ((mop->mo_ldd.ldd_mount_type == LDD_MT_EXT3) ||
+ (mop->mo_ldd.ldd_mount_type == LDD_MT_LDISKFS)) {
+ __u64 device_sz = mop->mo_device_sz;
+
+ /* we really need the size */
+ if (device_sz == 0) {
+ device_sz = get_device_size(mop->mo_device);
+ if (device_sz == 0)
+ return ENODEV;
+ }
+
+ /* Journal size in MB */
+ if (strstr(mop->mo_mkfsopts, "-J") == NULL) {
+ /* Choose our own default journal size */
+ long journal_sz = 0, max_sz;
+ if (device_sz > 1024 * 1024) /* 1GB */
+ journal_sz = (device_sz / 102400) * 4;
+ /* man mkfs.ext3 */
+ max_sz = (102400 * L_BLOCK_SIZE) >> 20; /* 400MB */
+ if (journal_sz > max_sz)
+ journal_sz = max_sz;
+ if (journal_sz) {
+ sprintf(buf, " -J size=%ld", journal_sz);
+ strcat(mop->mo_mkfsopts, buf);
+ }
+ }
+
+ /* Default bytes_per_inode is block size */
+ if (strstr(mop->mo_mkfsopts, "-i") == NULL) {
+ long bytes_per_inode = 0;
+
+ if (IS_MDT(&mop->mo_ldd))
+ bytes_per_inode = 4096;
+
+ /* Allocate fewer inodes on large OST devices. Most
+ filesystems can be much more aggressive than even
+ this. */
+ if ((IS_OST(&mop->mo_ldd) && (device_sz > 1000000)))
+ bytes_per_inode = 16384;
+
+ if (bytes_per_inode > 0) {
+ sprintf(buf, " -i %ld", bytes_per_inode);
+ strcat(mop->mo_mkfsopts, buf);
+ }
+ }
+
+ /* This is an undocumented mke2fs option. Default is 128. */
+ if (strstr(mop->mo_mkfsopts, "-I") == NULL) {
+ long inode_size = 0;
+ if (IS_MDT(&mop->mo_ldd)) {
+ if (mop->mo_stripe_count > 77)
+ inode_size = 512; /* bz 7241 */
+ /* cray stripes across all osts (>60) */
+ else if (mop->mo_stripe_count > 34)
+ inode_size = 2048;
+ else if (mop->mo_stripe_count > 13)
+ inode_size = 1024;
+ else
+ inode_size = 512;
+ } else if (IS_OST(&mop->mo_ldd)) {
+ /* now as we store fids in EA on OST we need
+ to make inode bigger */
+ inode_size = 256;
+ }
+
+ if (inode_size > 0) {
+ sprintf(buf, " -I %ld", inode_size);
+ strcat(mop->mo_mkfsopts, buf);
+ }
+
+ }
+
+ if (verbose < 2) {
+ strcat(mop->mo_mkfsopts, " -q");
+ }
+
+ /* Enable hashed b-tree directory lookup in large dirs bz6224 */
+ if (strstr(mop->mo_mkfsopts, "-O") == NULL) {
+ strcat(mop->mo_mkfsopts, " -O dir_index");
+ }
+
+ /* Allow reformat of full devices (as opposed to
+ partitions.) We already checked for mounted dev. */
+ strcat(mop->mo_mkfsopts, " -F");
+
+ sprintf(mkfs_cmd, "mkfs.ext2 -j -b %d -L %s ", L_BLOCK_SIZE,
+ mop->mo_ldd.ldd_svname);
+
+ } else if (mop->mo_ldd.ldd_mount_type == LDD_MT_REISERFS) {
+ long journal_sz = 0; /* FIXME default journal size */
+ if (journal_sz > 0) {
+ sprintf(buf, " --journal_size %ld", journal_sz);
+ strcat(mop->mo_mkfsopts, buf);
+ }
+ sprintf(mkfs_cmd, "mkreiserfs -ff ");
+
+ } else {
+ fprintf(stderr,"%s: unsupported fs type: %d (%s)\n",
+ progname, mop->mo_ldd.ldd_mount_type,
+ MT_STR(&mop->mo_ldd));
+ return EINVAL;
+ }
+
+ /* For loop device format the dev, not the filename */
+ dev = mop->mo_device;
+ if (mop->mo_flags & MO_IS_LOOP)
+ dev = mop->mo_loopdev;
+
+ vprint("formatting backing filesystem %s on %s\n",
+ MT_STR(&mop->mo_ldd), dev);
+ vprint("\ttarget name %s\n", mop->mo_ldd.ldd_svname);
+ vprint("\t4k blocks %d\n", block_count);
+ vprint("\toptions %s\n", mop->mo_mkfsopts);
+
+ /* mkfs_cmd's trailing space is important! */
+ strcat(mkfs_cmd, mop->mo_mkfsopts);
+ strcat(mkfs_cmd, " ");
+ strcat(mkfs_cmd, dev);
+ if (block_count != 0) {
+ sprintf(buf, " %d", block_count);
+ strcat(mkfs_cmd, buf);
+ }
+
+ vprint("mkfs_cmd = %s\n", mkfs_cmd);
+ ret = run_command(mkfs_cmd);
+ if (ret) {
+ fatal();
+ fprintf(stderr, "Unable to build fs %s (%d)\n", dev, ret);
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+/* ==================== Lustre config functions =============*/
+
+void print_ldd(char *str, struct lustre_disk_data *ldd)
+{
+ printf("\n %s:\n", str);
+ printf("Target: %s\n", ldd->ldd_svname);
+ if (ldd->ldd_svindex == INDEX_UNASSIGNED)
+ printf("Index: unassigned\n");
+ else
+ printf("Index: %d\n", ldd->ldd_svindex);
+ printf("UUID: %s\n", (char *)ldd->ldd_uuid);
+ printf("Lustre FS: %s\n", ldd->ldd_fsname);
+ printf("Mount type: %s\n", MT_STR(ldd));
+ printf("Flags: %#x\n", ldd->ldd_flags);
+ printf(" (%s%s%s%s%s%s%s%s)\n",
+ IS_MDT(ldd) ? "MDT ":"",
+ IS_OST(ldd) ? "OST ":"",
+ IS_MGS(ldd) ? "MGS ":"",
+ ldd->ldd_flags & LDD_F_NEED_INDEX ? "needs_index ":"",
+ ldd->ldd_flags & LDD_F_VIRGIN ? "first_time ":"",
+ ldd->ldd_flags & LDD_F_UPDATE ? "update ":"",
+ ldd->ldd_flags & LDD_F_WRITECONF ? "writeconf ":"",
+ ldd->ldd_flags & LDD_F_UPGRADE14 ? "upgrade1.4 ":"");
+ printf("Persistent mount opts: %s\n", ldd->ldd_mount_opts);
+ printf("Parameters:%s\n", ldd->ldd_params);
+ printf("\n");
+}
+
+/* Write the server config files */
+int write_local_files(struct mkfs_opts *mop)
+{
+ char mntpt[] = "/tmp/mntXXXXXX";
+ char filepnm[128];
+ char *dev;
+ FILE *filep;
+ int ret = 0;
+
+ /* Mount this device temporarily in order to write these files */
+ if (!mkdtemp(mntpt)) {
+ fprintf(stderr, "%s: Can't create temp mount point %s: %s\n",
+ progname, mntpt, strerror(errno));
+ return errno;
+ }
+
+ dev = mop->mo_device;
+ if (mop->mo_flags & MO_IS_LOOP)
+ dev = mop->mo_loopdev;
+
+ ret = mount(dev, mntpt, MT_STR(&mop->mo_ldd), 0, NULL);
+ if (ret) {
+ fprintf(stderr, "%s: Unable to mount %s: %s\n",
+ progname, dev, strerror(errno));
+ if (errno == ENODEV) {
+ fprintf(stderr, "Is the %s module available?\n",
+ MT_STR(&mop->mo_ldd));
+ }
+ goto out_rmdir;
+ }
+
+ /* Set up initial directories */
+ sprintf(filepnm, "%s/%s", mntpt, MOUNT_CONFIGS_DIR);
+ ret = mkdir(filepnm, 0777);
+ if ((ret != 0) && (errno != EEXIST)) {
+ fprintf(stderr, "%s: Can't make configs dir %s (%d)\n",
+ progname, filepnm, ret);
+ goto out_umnt;
+ } else if (errno == EEXIST) {
+ ret = 0;
+ }
+
+ /* Save the persistent mount data into a file. Lustre must pre-read
+ this file to get the real mount options. */
+ vprint("Writing %s\n", MOUNT_DATA_FILE);
+ sprintf(filepnm, "%s/%s", mntpt, MOUNT_DATA_FILE);
+ filep = fopen(filepnm, "w");
+ if (!filep) {
+ fprintf(stderr, "%s: Unable to create %s file\n",
+ progname, filepnm);
+ goto out_umnt;
+ }
+ fwrite(&mop->mo_ldd, sizeof(mop->mo_ldd), 1, filep);
+ fclose(filep);
+
+ /* COMPAT_146 */
+#ifdef TUNEFS
+ /* Check for upgrade */
+ if ((mop->mo_ldd.ldd_flags & (LDD_F_UPGRADE14 | LDD_F_SV_TYPE_MGS))
+ == (LDD_F_UPGRADE14 | LDD_F_SV_TYPE_MGS)) {
+ char cmd[128];
+ char *term;
+ vprint("Copying old logs\n");
+#if 0
+ /* Generate new client log as servers upgrade. Starting a new client
+ may end up with short lov's, so will be degraded until all servers
+ upgrade */
+ /* Copy the old client log to fsname-client */
+ sprintf(filepnm, "%s/%s/%s-client",
+ mntpt, MOUNT_CONFIGS_DIR, mop->mo_ldd.ldd_fsname);
+ sprintf(cmd, "cp %s/%s/client %s", mntpt, MDT_LOGS_DIR,
+ filepnm);
+ if (verbose > 1)
+ printf("cmd: %s\n", cmd);
+ ret = run_command(cmd);
+ if (ret) {
+ fprintf(stderr, "%s: Can't copy 1.4 config %s/client "
+ "(%d)\n", progname, MDT_LOGS_DIR, ret);
+ fprintf(stderr, "mount -t ext3 %s somewhere, "
+ "find the client log for fs %s and "
+ "copy it manually into %s/%s-client, "
+ "then umount.\n",
+ mop->mo_device,
+ mop->mo_ldd.ldd_fsname, MOUNT_CONFIGS_DIR,
+ mop->mo_ldd.ldd_fsname);
+ goto out_umnt;
+ }
+ #endif
+ /* We need to use the old mdt log because otherwise mdt won't
+ have complete lov if old clients connect before all
+ servers upgrade. */
+ /* Copy the old mdt log to fsname-MDT0000 (get old
+ name from mdt_UUID) */
+ ret = 1;
+ strcpy(filepnm, mop->mo_ldd.ldd_uuid);
+ term = strstr(filepnm, "_UUID");
+ if (term) {
+ *term = '\0';
+ sprintf(cmd, "cp %s/%s/%s %s/%s/%s",
+ mntpt, MDT_LOGS_DIR, filepnm,
+ mntpt, MOUNT_CONFIGS_DIR,
+ mop->mo_ldd.ldd_svname);
+ if (verbose > 1)
+ printf("cmd: %s\n", cmd);
+ ret = run_command(cmd);
+ }
+ if (ret) {
+ fprintf(stderr, "%s: Can't copy 1.4 config %s/%s "
+ "(%d)\n", progname, MDT_LOGS_DIR, filepnm, ret);
+ fprintf(stderr, "mount -t ext3 %s somewhere, "
+ "find the MDT log for fs %s and "
+ "copy it manually into %s/%s, "
+ "then umount.\n",
+ mop->mo_device,
+ mop->mo_ldd.ldd_fsname, MOUNT_CONFIGS_DIR,
+ mop->mo_ldd.ldd_svname);
+ goto out_umnt;
+ }
+ }
+#endif
+ /* end COMPAT_146 */
+
+
+out_umnt:
+ umount(mntpt);
+out_rmdir:
+ rmdir(mntpt);
+ return ret;
+}
+
+int read_local_files(struct mkfs_opts *mop)
+{
+ char mntpt[] = "/tmp/mntXXXXXX";
+ char filepnm[128];
+ char *dev;
+ FILE *filep;
+ int ret = 0;
+
+ /* Mount this device temporarily in order to read these files */
+ if (!mkdtemp(mntpt)) {
+ fprintf(stderr, "%s: Can't create temp mount point %s: %s\n",
+ progname, mntpt, strerror(errno));
+ return errno;
+ }
+
+ dev = mop->mo_device;
+ if (mop->mo_flags & MO_IS_LOOP)
+ dev = mop->mo_loopdev;
+
+ ret = mount(dev, mntpt, MT_STR(&mop->mo_ldd), 0, NULL);
+ if (ret) {
+ fprintf(stderr, "%s: Unable to mount %s: %s\n",
+ progname, dev, strerror(errno));
+ goto out_rmdir;
+ }
+
+ sprintf(filepnm, "%s/%s", mntpt, MOUNT_DATA_FILE);
+ filep = fopen(filepnm, "r");
+ if (filep) {
+ vprint("Reading %s\n", MOUNT_DATA_FILE);
+ fread(&mop->mo_ldd, sizeof(mop->mo_ldd), 1, filep);
+ } else {
+ /* COMPAT_146 */
+ /* Try to read pre-1.6 config from last_rcvd */
+ struct lr_server_data lsd;
+ vprint("%s: Unable to read %s, trying last_rcvd\n",
+ progname, MOUNT_DATA_FILE);
+ sprintf(filepnm, "%s/%s", mntpt, LAST_RCVD);
+ filep = fopen(filepnm, "r");
+ if (!filep) {
+ fprintf(stderr, "%s: Unable to read old data\n",
+ progname);
+ ret = -errno;
+ goto out_umnt;
+ }
+ vprint("Reading %s\n", LAST_RCVD);
+ ret = fread(&lsd, 1, sizeof(lsd), filep);
+ if (ret < sizeof(lsd)) {
+ fprintf(stderr, "%s: Short read (%d of %d)\n",
+ progname, ret, sizeof(lsd));
+ ret = -ferror(filep);
+ if (ret)
+ goto out_close;
+ }
+ ret = 0;
+ if (lsd.lsd_feature_compat & OBD_COMPAT_OST) {
+ mop->mo_ldd.ldd_flags = LDD_F_SV_TYPE_OST;
+ mop->mo_ldd.ldd_svindex = lsd.lsd_ost_index;
+ } else if (lsd.lsd_feature_compat & OBD_COMPAT_MDT) {
+ /* We must co-locate so mgs can see old logs.
+ If user doesn't want this, they can copy the old
+ logs manually and re-tunefs. */
+ mop->mo_ldd.ldd_flags =
+ LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_MGS;
+ mop->mo_ldd.ldd_svindex = lsd.lsd_mdt_index;
+ } else {
+ /* If neither is set, we're pre-1.4.6, make a guess. */
+ sprintf(filepnm, "%s/%s", mntpt, MDT_LOGS_DIR);
+ if (lsd.lsd_ost_index > 0) {
+ mop->mo_ldd.ldd_flags = LDD_F_SV_TYPE_OST;
+ mop->mo_ldd.ldd_svindex = lsd.lsd_ost_index;
+ } else {
+ /* If there's a LOGS dir, it's an MDT */
+ if ((ret = access(filepnm, F_OK)) == 0) {
+ mop->mo_ldd.ldd_flags =
+ LDD_F_SV_TYPE_MDT |
+ LDD_F_SV_TYPE_MGS;
+ /* Old MDT's are always index 0
+ (pre CMD) */
+ mop->mo_ldd.ldd_svindex = 0;
+ } else {
+ /* The index won't be correct */
+ mop->mo_ldd.ldd_flags =
+ LDD_F_SV_TYPE_OST | LDD_F_NEED_INDEX;
+ }
+ }
+ }
+
+ memcpy(mop->mo_ldd.ldd_uuid, lsd.lsd_uuid,
+ sizeof(mop->mo_ldd.ldd_uuid));
+ mop->mo_ldd.ldd_flags |= LDD_F_UPGRADE14;
+ }
+ /* end COMPAT_146 */
+out_close:
+ fclose(filep);
+
+out_umnt:
+ umount(mntpt);
+out_rmdir:
+ rmdir(mntpt);
+ return ret;
+}
+
+
+void set_defaults(struct mkfs_opts *mop)
+{
+ mop->mo_ldd.ldd_magic = LDD_MAGIC;
+ mop->mo_ldd.ldd_config_ver = 1;
+ mop->mo_ldd.ldd_flags = LDD_F_NEED_INDEX | LDD_F_UPDATE | LDD_F_VIRGIN;
+ mop->mo_mgs_failnodes = 0;
+ strcpy(mop->mo_ldd.ldd_fsname, "lustre");
+ if (get_os_version() == 24)
+ mop->mo_ldd.ldd_mount_type = LDD_MT_EXT3;
+ else
+ mop->mo_ldd.ldd_mount_type = LDD_MT_LDISKFS;
+
+ mop->mo_ldd.ldd_svindex = INDEX_UNASSIGNED;
+ mop->mo_stripe_count = 1;
+}
+
+static inline void badopt(const char *opt, char *type)
+{
+ fprintf(stderr, "%s: '--%s' only valid for %s\n",
+ progname, opt, type);
+ usage(stderr);
+}
+
+static int add_param(char *buf, char *key, char *val)
+{
+ int end = sizeof(((struct lustre_disk_data *)0)->ldd_params);
+ int start = strlen(buf);
+ int keylen = 0;
+
+ if (key)
+ keylen = strlen(key);
+ if (start + 1 + keylen + strlen(val) >= end) {
+ fprintf(stderr, "%s: params are too long-\n%s %s%s\n",
+ progname, buf, key ? key : "", val);
+ return 1;
+ }
+
+ sprintf(buf + start, " %s%s", key ? key : "", val);
+ return 0;
+}
+
+/* from mount_lustre */
+/* Get rid of symbolic hostnames for tcp, since kernel can't do lookups */
+#define MAXNIDSTR 1024
+static char *convert_hostnames(char *s1)
+{
+ char *converted, *s2 = 0, *c;
+ int left = MAXNIDSTR;
+ lnet_nid_t nid;
+
+ converted = malloc(left);
+ c = converted;
+ while ((left > 0) && ((s2 = strsep(&s1, ",: \0")))) {
+ nid = libcfs_str2nid(s2);
+ if (nid == LNET_NID_ANY) {
+ if (*s2 == '/')
+ /* end of nids */
+ break;
+ fprintf(stderr, "%s: Can't parse NID '%s'\n",
+ progname, s2);
+ free(converted);
+ return NULL;
+ }
+ if (LNET_NETTYP(LNET_NIDNET(nid)) == SOCKLND) {
+ __u32 addr = LNET_NIDADDR(nid);
+ c += snprintf(c, left, "%u.%u.%u.%u@%s%u,",
+ (addr >> 24) & 0xff, (addr >> 16) & 0xff,
+ (addr >> 8) & 0xff, addr & 0xff,
+ libcfs_lnd2str(SOCKLND),
+ LNET_NETNUM(LNET_NIDNET(nid)));
+ } else {
+ c += snprintf(c, left, "%s,", s2);
+ }
+ left = converted + MAXNIDSTR - c;
+ }
+ *(c - 1) = '\0';
+ return converted;
+}
+
+int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop,
+ char **mountopts)
+{
+ static struct option long_opt[] = {
+ {"backfstype", 1, 0, 'b'},
+ {"stripe-count-hint", 1, 0, 'c'},
+ {"configdev", 1, 0, 'C'},
+ {"device-size", 1, 0, 'd'},
+ {"erase-params", 0, 0, 'e'},
+ {"failnode", 1, 0, 'f'},
+ {"failover", 1, 0, 'f'},
+ {"mgs", 0, 0, 'G'},
+ {"help", 0, 0, 'h'},
+ {"index", 1, 0, 'i'},
+ {"mkfsoptions", 1, 0, 'k'},
+ {"mgsnode", 1, 0, 'm'},
+ {"mgsnid", 1, 0, 'm'},
+ {"mdt", 0, 0, 'M'},
+ {"fsname",1, 0, 'n'},
+ {"nomgs", 0, 0, 'N'},
+ {"mountfsoptions", 1, 0, 'o'},
+ {"ost", 0, 0, 'O'},
+ {"param", 1, 0, 'p'},
+ {"print", 0, 0, 'P'},
+ {"quiet", 0, 0, 'q'},
+ {"reformat", 0, 0, 'r'},
+ {"verbose", 0, 0, 'v'},
+ {"writeconf", 0, 0, 'w'},
+ {0, 0, 0, 0}
+ };
+ char *optstring = "b:c:C:d:ef:Ghi:k:m:Mn:No:Op:Pqrvw";
+ char opt;
+ int rc, longidx;
+
+ while ((opt = getopt_long(argc, argv, optstring, long_opt, &longidx)) !=
+ EOF) {
+ switch (opt) {
+ case 'b': {
+ int i = 0;
+ while (i < LDD_MT_LAST) {
+ if (strcmp(optarg, mt_str(i)) == 0) {
+ mop->mo_ldd.ldd_mount_type = i;
+ break;
+ }
+ i++;
+ }
+ break;
+ }
+ case 'c':
+ if (IS_MDT(&mop->mo_ldd)) {
+ int stripe_count = atol(optarg);
+ if (stripe_count <= 0) {
+ fprintf(stderr, "%s: bad stripe count "
+ "%d\n", progname, stripe_count);
+ return 1;
+ }
+ mop->mo_stripe_count = stripe_count;
+ } else {
+ badopt(long_opt[longidx].name, "MDT");
+ return 1;
+ }
+ break;
+ case 'C': /* Configdev */
+ //FIXME
+ printf("Configdev not implemented\n");
+ return 1;
+ case 'd':
+ mop->mo_device_sz = atol(optarg);
+ break;
+ case 'e':
+ mop->mo_ldd.ldd_params[0] = '\0';
+ break;
+ case 'f': {
+ char *nids = convert_hostnames(optarg);
+ if (!nids)
+ return 1;
+ rc = add_param(mop->mo_ldd.ldd_params, PARAM_FAILNODE,
+ nids);
+ free(nids);
+ if (rc)
+ return rc;
+ break;
+ }
+ case 'G':
+ mop->mo_ldd.ldd_flags |= LDD_F_SV_TYPE_MGS;
+ break;
+ case 'h':
+ usage(stdout);
+ return 1;
+ case 'i':
+ if (IS_MDT(&mop->mo_ldd) || IS_OST(&mop->mo_ldd)) {
+ mop->mo_ldd.ldd_svindex = atol(optarg);
+ mop->mo_ldd.ldd_flags &= ~LDD_F_NEED_INDEX;
+ } else {
+ badopt(long_opt[longidx].name, "MDT,OST");
+ return 1;
+ }
+ break;
+ case 'k':
+ strncpy(mop->mo_mkfsopts, optarg,
+ sizeof(mop->mo_mkfsopts) - 1);
+ break;
+ case 'm': {
+ char *nids = convert_hostnames(optarg);
+ if (!nids)
+ return 1;
+ rc = add_param(mop->mo_ldd.ldd_params, PARAM_MGSNODE,
+ nids);
+ free(nids);
+ if (rc)
+ return rc;
+ mop->mo_mgs_failnodes++;
+ break;
+ }
+ case 'M':
+ mop->mo_ldd.ldd_flags |= LDD_F_SV_TYPE_MDT;
+ break;
+ case 'n':
+ if (!(IS_MDT(&mop->mo_ldd) || IS_OST(&mop->mo_ldd))) {
+ badopt(long_opt[longidx].name, "MDT,OST");
+ return 1;
+ }
+ if (strlen(optarg) > 8) {
+ fprintf(stderr, "%s: filesystem name must be "
+ "<= 8 chars\n", progname);
+ return 1;
+ }
+ if (optarg[0] != 0)
+ strncpy(mop->mo_ldd.ldd_fsname, optarg,
+ sizeof(mop->mo_ldd.ldd_fsname) - 1);
+ break;
+ case 'N':
+ mop->mo_ldd.ldd_flags &= ~LDD_F_SV_TYPE_MGS;
+ break;
+ case 'o':
+ *mountopts = optarg;
+ break;
+ case 'O':
+ mop->mo_ldd.ldd_flags |= LDD_F_SV_TYPE_OST;
+ break;
+ case 'p':
+ rc = add_param(mop->mo_ldd.ldd_params, NULL, optarg);
+ if (rc)
+ return rc;
+ break;
+ case 'P':
+ print_only++;
+ break;
+ case 'q':
+ verbose--;
+ break;
+ case 'r':
+ mop->mo_flags |= MO_FORCEFORMAT;
+ break;
+ case 'v':
+ verbose++;
+ break;
+ case 'w':
+ mop->mo_ldd.ldd_flags |= LDD_F_WRITECONF;
+ break;
+ default:
+ if (opt != '?') {
+ fatal();
+ fprintf(stderr, "Unknown option '%c'\n", opt);
+ }
+ usage(stderr);
+ return 1;
+ }
+ }//while
+ if (optind >= argc) {
+ fatal();
+ fprintf(stderr, "Bad arguments\n");
+ usage(stderr);
+ return 1;
+ }
+
+ return 0;
+}
+
+int main(int argc, char *const argv[])
+{
+ struct mkfs_opts mop;
+ struct lustre_disk_data *ldd;
+ char *mountopts = NULL;
+ char always_mountopts[512] = "";
+ char default_mountopts[512] = "";
+ int ret = 0;
+
+ //printf("pad %d\n", offsetof(struct lustre_disk_data, ldd_padding));
+ assert(offsetof(struct lustre_disk_data, ldd_padding) == 200);
+
+ if ((progname = strrchr(argv[0], '/')) != NULL)
+ progname++;
+ else
+ progname = argv[0];
+
+ if (argc < 2) {
+ usage(stderr);
+ ret = 1;
+ goto out;
+ }
+
+ memset(&mop, 0, sizeof(mop));
+ set_defaults(&mop);
+
+ /* device is last arg */
+ strcpy(mop.mo_device, argv[argc - 1]);
+
+ if (check_mtab_entry(mop.mo_device, "lustre"))
+ return(EEXIST);
+
+ /* Are we using a loop device? */
+ ret = is_block(mop.mo_device);
+ if (ret < 0)
+ goto out;
+ if (ret == 0)
+ mop.mo_flags |= MO_IS_LOOP;
+
+#ifdef TUNEFS
+ /* For tunefs, we must read in the old values before parsing any
+ new ones. */
+ /* Create the loopback file */
+ if (mop.mo_flags & MO_IS_LOOP) {
+ ret = access(mop.mo_device, F_OK);
+ if (ret == 0)
+ ret = loop_setup(&mop);
+ if (ret) {
+ fatal();
+ fprintf(stderr, "Loop device setup for %s failed: %s\n",
+ mop.mo_device, strerror(ret));
+ goto out;
+ }
+ }
+
+ /* Check whether the disk has already been formatted by mkfs.lustre */
+ ret = is_lustre_target(&mop);
+ if (ret == 0) {
+ fatal();
+ fprintf(stderr, "Device %s has not been formatted with "
+ "mkfs.lustre\n", mop.mo_device);
+ goto out;
+ }
+
+ ret = read_local_files(&mop);
+ if (ret) {
+ fatal();
+ fprintf(stderr, "Failed to read previous Lustre data from %s\n",
+ mop.mo_device);
+ goto out;
+ }
+
+ if (verbose > 0)
+ print_ldd("Read previous values", &(mop.mo_ldd));
+#endif
+
+ ret = parse_opts(argc, argv, &mop, &mountopts);
+ if (ret)
+ goto out;
+
+ ldd = &mop.mo_ldd;
+ if (!(IS_MDT(ldd) || IS_OST(ldd) || IS_MGS(ldd))) {
+ fatal();
+ fprintf(stderr, "must set target type :{mdt,ost,mgs}\n");
+ usage(stderr);
+ ret = 1;
+ goto out;
+ }
+
+ if (IS_MDT(ldd) && !IS_MGS(ldd) && (mop.mo_mgs_failnodes == 0)) {
+ vprint("No management node specified, adding MGS to this "
+ "MDT\n");
+ ldd->ldd_flags |= LDD_F_SV_TYPE_MGS;
+ }
+
+ if (!IS_MGS(ldd) && (mop.mo_mgs_failnodes == 0)) {
+ fatal();
+ fprintf(stderr, "Must specify either --mgs or --mgsnode\n");
+ usage(stderr);
+ goto out;
+ }
+
+ /* These are the permanent mount options (always included) */
+ switch (ldd->ldd_mount_type) {
+ case LDD_MT_EXT3:
+ case LDD_MT_LDISKFS: {
+ sprintf(always_mountopts, "errors=remount-ro");
+ if (IS_MDT(ldd) || IS_MGS(ldd))
+ strcat(always_mountopts,
+ ",iopen_nopriv,user_xattr");
+ if ((get_os_version() == 24) && IS_OST(ldd))
+ strcat(always_mountopts, ",asyncdel");
+#if 0
+ /* Files created while extents are enabled cannot be read if
+ mounted with a kernel that doesn't include the CFS patches.*/
+ if (IS_OST(ldd) &&
+ ldd->ldd_mount_type == LDD_MT_LDISKFS) {
+ strcat(default_mountopts, ",extents,mballoc");
+ }
+#endif
+ break;
+ }
+ case LDD_MT_SMFS: {
+ mop.mo_flags |= MO_IS_LOOP;
+ sprintf(always_mountopts, "type=ext3,dev=%s",
+ mop.mo_device);
+ break;
+ }
+ default: {
+ fatal();
+ fprintf(stderr, "unknown fs type %d '%s'\n",
+ ldd->ldd_mount_type,
+ MT_STR(ldd));
+ ret = EINVAL;
+ goto out;
+ }
+ }
+
+ if (mountopts) {
+ /* If user specifies mount opts, don't use defaults,
+ but always use always_mountopts */
+ sprintf(ldd->ldd_mount_opts, "%s,%s",
+ always_mountopts, mountopts);
+ } else {
+#ifdef TUNEFS
+ if (ldd->ldd_mount_opts[0] == 0)
+ /* use the defaults unless old opts exist */
+#endif
+ {
+ if (default_mountopts[0])
+ sprintf(ldd->ldd_mount_opts, "%s,%s",
+ always_mountopts, default_mountopts);
+ else
+ strcpy(ldd->ldd_mount_opts,
+ always_mountopts);
+ }
+ }
+
+ server_make_name(ldd->ldd_flags, ldd->ldd_svindex,
+ ldd->ldd_fsname, ldd->ldd_svname);
+
+ if (verbose > 0)
+ print_ldd("Permanent disk data", ldd);
+
+ if (print_only) {
+ printf("exiting before disk write.\n");
+ goto out;
+ }
+
+#ifndef TUNEFS /* mkfs.lustre */
+ /* Create the loopback file of the correct size */
+ if (mop.mo_flags & MO_IS_LOOP) {
+ ret = access(mop.mo_device, F_OK);
+ /* Don't destroy the loopback file if no FORCEFORMAT */
+ if (ret || (mop.mo_flags & MO_FORCEFORMAT))
+ ret = loop_format(&mop);
+ if (ret == 0)
+ ret = loop_setup(&mop);
+ if (ret) {
+ fatal();
+ fprintf(stderr, "Loop device setup failed: %s\n",
+ strerror(ret));
+ goto out;
+ }
+ }
+
+ /* Check whether the disk has already been formatted by mkfs.lustre */
+ if (!(mop.mo_flags & MO_FORCEFORMAT)) {
+ ret = is_lustre_target(&mop);
+ if (ret) {
+ fatal();
+ fprintf(stderr, "Device %s was previously formatted "
+ "for lustre. Use --reformat to reformat it, "
+ "or tunefs.lustre to modify.\n",
+ mop.mo_device);
+ goto out;
+ }
+ }
+
+ /* Format the backing filesystem */
+ ret = make_lustre_backfs(&mop);
+ if (ret != 0) {
+ fatal();
+ fprintf(stderr, "mkfs failed %d\n", ret);
+ goto out;
+ }
+#endif
+
+ ret = write_local_files(&mop);
+ if (ret != 0) {
+ fatal();
+ fprintf(stderr, "failed to write local files\n");
+ goto out;
+ }
+
+out:
+ loop_cleanup(&mop);
+ return ret;
+}
#!/bin/sh
MDIR=/lib/modules/`uname -r`/lustre
+mkdir -p $MDIR
KVER=24
EXT=o
echo "Copying modules from local build dir to "$MDIR
-mkdir -p $MDIR
-
-cp ../../lnet/libcfs/libcfs.$EXT $MDIR
-cp ../../lnet/lnet/lnet.$EXT $MDIR
-cp ../../lnet/klnds/socklnd/ksocklnd.$EXT $MDIR
-cp ../lvfs/lvfs.$EXT $MDIR
-cp ../obdclass/obdclass.$EXT $MDIR
-cp ../ptlrpc/ptlrpc.$EXT $MDIR
-cp ../mdc/mdc.$EXT $MDIR
-cp ../osc/osc.$EXT $MDIR
-cp ../lov/lov.$EXT $MDIR
-cp ../mds/mds.$EXT $MDIR
-cp ../lvfs/$FSFLT.$EXT $MDIR
-[ $KVER == "26" ] && cp ../ldiskfs/ldiskfs.$EXT $MDIR
-cp ../ost/ost.$EXT $MDIR
-cp ../obdfilter/obdfilter.$EXT $MDIR
-cp ../llite/llite.$EXT $MDIR
-
+cp -u ../../lnet/libcfs/libcfs.$EXT $MDIR
+cp -u ../../lnet/lnet/lnet.$EXT $MDIR
+cp -u ../../lnet/klnds/socklnd/ksocklnd.$EXT $MDIR
+cp -u ../lvfs/lvfs.$EXT $MDIR
+cp -u ../obdclass/obdclass.$EXT $MDIR
+cp -u ../ptlrpc/ptlrpc.$EXT $MDIR
+cp -u ../mdc/mdc.$EXT $MDIR
+cp -u ../osc/osc.$EXT $MDIR
+cp -u ../lov/lov.$EXT $MDIR
+cp -u ../mds/mds.$EXT $MDIR
+cp -u ../lvfs/$FSFLT.$EXT $MDIR
+[ $KVER == "26" ] && cp -u ../ldiskfs/ldiskfs.$EXT $MDIR
+cp -u ../ost/ost.$EXT $MDIR
+cp -u ../obdfilter/obdfilter.$EXT $MDIR
+cp -u ../llite/llite.$EXT $MDIR
+cp -u ../mgc/mgc.$EXT $MDIR
+cp -u ../mgs/mgs.$EXT $MDIR
+
+# prevent warnings on my uml
+rm -f /lib/modules/`uname -r`/modules.*
echo "Depmod"
depmod -a -e
echo "Copying mount from local build dir to "$MDIR
-cp ../utils/mount.lustre /sbin/.
+cp -u ../utils/mount.lustre /sbin/.
MP="/sbin/modprobe"
MPI="$MP --ignore-install"
echo "alias lustre llite" >> $MODFILE
echo "# end Lustre modules" >> $MODFILE
fi
+
+# To generate gdb debug file:
+# modprobe lustre; modprobe mds; modprobe obdfilter; modprobe mgs; modprobe mgc
+# rm -f /r/tmp/ogdb-`hostname`
+# ./lctl modules > /r/tmp/ogdb-`hostname`
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Robert Read <rread@clusterfs.com>
+ * Author: Nathan Rutman <nathan@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <mntent.h>
+#include <getopt.h>
+#include <sys/utsname.h>
+#include "obdctl.h"
+#include <lustre_ver.h>
+
+int verbose = 0;
+int nomtab = 0;
+int fake = 0;
+int force = 0;
+static char *progname = NULL;
+
+void usage(FILE *out)
+{
+ fprintf(out, "%s v"LUSTRE_VERSION_STRING"\n", progname);
+ fprintf(out, "usage: %s [-fhnv] [-o <mntopt>] <device> <mountpt>\n",
+ progname);
+ fprintf(out,
+ "\t<device>: the disk device, or for a client:\n"
+ "\t\t<mgmtnid>[:<altmgtnid>...]:/<filesystem>-client\n"
+ "\t<filesystem>: name of the Lustre filesystem (e.g. lustre1)\n"
+ "\t<mountpt>: filesystem mountpoint (e.g. /mnt/lustre)\n"
+ "\t-f|--fake: fake mount (updates /etc/mtab)\n"
+ "\t--force: force mount even if already in /etc/mtab\n"
+ "\t-h|--help: print this usage message\n"
+ "\t-n|--nomtab: do not update /etc/mtab after mount\n"
+ "\t-v|--verbose: print verbose config settings\n"
+ "\t<mntopt>: one or more comma separated of:\n"
+ "\t\t(no)flock,(no)user_xattr,(no)acl\n"
+ "\t\tnosvc: only start MGC/MGS obds\n"
+ "\t\texclude=<ostname>[:<ostname>] : colon-separated list of "
+ "inactive OSTs (e.g. lustre-OST0001)\n"
+ );
+ exit((out != stdout) ? EINVAL : 0);
+}
+
+static int check_mtab_entry(char *spec, char *mtpt, char *type)
+{
+ FILE *fp;
+ struct mntent *mnt;
+
+ if (force)
+ return (0);
+
+ fp = setmntent(MOUNTED, "r");
+ if (fp == NULL)
+ return(0);
+
+ while ((mnt = getmntent(fp)) != NULL) {
+ if (strcmp(mnt->mnt_fsname, spec) == 0 &&
+ strcmp(mnt->mnt_dir, mtpt) == 0 &&
+ strcmp(mnt->mnt_type, type) == 0) {
+ endmntent(fp);
+ fprintf(stderr, "%s: according to %s %s is "
+ "already mounted on %s\n",
+ progname, MOUNTED, spec, mtpt);
+ return(EEXIST);
+ }
+ }
+ endmntent(fp);
+
+ return(0);
+}
+
+static int
+update_mtab_entry(char *spec, char *mtpt, char *type, char *opts,
+ int flags, int freq, int pass)
+{
+ FILE *fp;
+ struct mntent mnt;
+ int rc = 0;
+
+ mnt.mnt_fsname = spec;
+ mnt.mnt_dir = mtpt;
+ mnt.mnt_type = type;
+ mnt.mnt_opts = opts ? opts : "";
+ mnt.mnt_freq = freq;
+ mnt.mnt_passno = pass;
+
+ fp = setmntent(MOUNTED, "a+");
+ if (fp == NULL) {
+ fprintf(stderr, "%s: setmntent(%s): %s:",
+ progname, MOUNTED, strerror (errno));
+ rc = 16;
+ } else {
+ if ((addmntent(fp, &mnt)) == 1) {
+ fprintf(stderr, "%s: addmntent: %s:",
+ progname, strerror (errno));
+ rc = 16;
+ }
+ endmntent(fp);
+ }
+
+ return rc;
+}
+
+/* Get rid of symbolic hostnames for tcp, since kernel can't do lookups */
+#define MAXNIDSTR 1024
+static char *convert_hostnames(char *s1)
+{
+ char *converted, *s2 = 0, *c;
+ char sep;
+ int left = MAXNIDSTR;
+ lnet_nid_t nid;
+
+ converted = malloc(left);
+ c = converted;
+ while ((left > 0) && (*s1 != '/')) {
+ s2 = strpbrk(s1, ",:");
+ if (!s2)
+ goto out_free;
+ sep = *s2;
+ *s2 = '\0';
+ nid = libcfs_str2nid(s1);
+ if (nid == LNET_NID_ANY)
+ goto out_free;
+ if (LNET_NETTYP(LNET_NIDNET(nid)) == SOCKLND) {
+ __u32 addr = LNET_NIDADDR(nid);
+ c += snprintf(c, left, "%u.%u.%u.%u@%s%u%c",
+ (addr >> 24) & 0xff, (addr >> 16) & 0xff,
+ (addr >> 8) & 0xff, addr & 0xff,
+ libcfs_lnd2str(SOCKLND),
+ LNET_NETNUM(LNET_NIDNET(nid)), sep);
+ } else {
+ c += snprintf(c, left, "%s%c", s1, sep);
+ }
+ left = converted + MAXNIDSTR - c;
+ s1 = s2 + 1;
+ }
+ snprintf(c, left, "%s", s1);
+ return converted;
+out_free:
+ fprintf(stderr, "%s: Can't parse NID '%s'\n", progname, s1);
+ free(converted);
+ return NULL;
+}
+
+/*****************************************************************************
+ *
+ * This part was cribbed from util-linux/mount/mount.c. There was no clear
+ * license information, but many other files in the package are identified as
+ * GNU GPL, so it's a pretty safe bet that was their intent.
+ *
+ ****************************************************************************/
+struct opt_map {
+ const char *opt; /* option name */
+ int skip; /* skip in mtab option string */
+ int inv; /* true if flag value should be inverted */
+ int mask; /* flag mask value */
+};
+
+static const struct opt_map opt_map[] = {
+ /* These flags are parsed by mount, not lustre */
+ { "defaults", 0, 0, 0 }, /* default options */
+ { "rw", 1, 1, MS_RDONLY }, /* read-write */
+ { "ro", 0, 0, MS_RDONLY }, /* read-only */
+ { "exec", 0, 1, MS_NOEXEC }, /* permit execution of binaries */
+ { "noexec", 0, 0, MS_NOEXEC }, /* don't execute binaries */
+ { "suid", 0, 1, MS_NOSUID }, /* honor suid executables */
+ { "nosuid", 0, 0, MS_NOSUID }, /* don't honor suid executables */
+ { "dev", 0, 1, MS_NODEV }, /* interpret device files */
+ { "nodev", 0, 0, MS_NODEV }, /* don't interpret devices */
+ { "async", 0, 1, MS_SYNCHRONOUS}, /* asynchronous I/O */
+ { "auto", 0, 0, 0 }, /* Can be mounted using -a */
+ { "noauto", 0, 0, 0 }, /* Can only be mounted explicitly */
+ { "nousers", 0, 1, 0 }, /* Forbid ordinary user to mount */
+ { "nouser", 0, 1, 0 }, /* Forbid ordinary user to mount */
+ { "noowner", 0, 1, 0 }, /* Device owner has no special privs */
+ { "_netdev", 0, 0, 0 }, /* Device accessible only via network */
+ /* These strings are passed through and parsed in lustre ll_options */
+ { "flock", 0, 0, 0 }, /* Enable flock support */
+ { "noflock", 1, 1, 0 }, /* Disable flock support */
+ { "user_xattr", 0, 0, 0 }, /* Enable get/set user xattr */
+ { "nouser_xattr", 1, 1, 0 }, /* Disable user xattr */
+ { "acl", 0, 0, 0 }, /* Enable ACL support */
+ { "noacl", 1, 1, 0 }, /* Disable ACL support */
+ { "nosvc", 0, 0, 0 }, /* Only start MGS/MGC, nothing else */
+ { "exclude", 0, 0, 0 }, /* OST exclusion list */
+ { NULL, 0, 0, 0 }
+};
+/****************************************************************************/
+
+/* 1 = found, flag set
+ 0 = found, no flag set
+ -1 = not found in above list */
+static int parse_one_option(const char *check, int *flagp)
+{
+ const struct opt_map *opt;
+
+ for (opt = &opt_map[0]; opt->opt != NULL; opt++) {
+ if (strncmp(check, opt->opt, strlen(opt->opt)) == 0) {
+ if (!opt->mask)
+ return 0;
+ if (opt->inv)
+ *flagp &= ~(opt->mask);
+ else
+ *flagp |= opt->mask;
+ return 1;
+ }
+ }
+ fprintf(stderr, "%s: ignoring unknown option '%s'\n", progname,
+ check);
+ return -1;
+}
+
+int parse_options(char *orig_options, int *flagp)
+{
+ char *options, *opt, *nextopt;
+
+ options = calloc(strlen(orig_options) + 1, 1);
+ *flagp = 0;
+ nextopt = orig_options;
+ while ((opt = strsep(&nextopt, ","))) {
+ if (!*opt)
+ /* empty option */
+ continue;
+ if (parse_one_option(opt, flagp) == 0) {
+ /* no mount flags set, so pass this on as an option */
+ if (*options)
+ strcat(options, ",");
+ strcat(options, opt);
+ }
+ }
+ /* options will always be <= orig_options */
+ strcpy(orig_options, options);
+ free(options);
+ return 0;
+}
+
+
+int main(int argc, char *const argv[])
+{
+ char default_options[] = "";
+ char *source, *target, *options = default_options, *optcopy;
+ int i, nargs = 3, opt, rc, flags, optlen;
+ static struct option long_opt[] = {
+ {"fake", 0, 0, 'f'},
+ {"force", 0, 0, 1},
+ {"help", 0, 0, 'h'},
+ {"nomtab", 0, 0, 'n'},
+ {"options", 1, 0, 'o'},
+ {"verbose", 0, 0, 'v'},
+ {0, 0, 0, 0}
+ };
+
+ progname = strrchr(argv[0], '/');
+ progname = progname ? progname + 1 : argv[0];
+
+ while ((opt = getopt_long(argc, argv, "fhno:v",
+ long_opt, NULL)) != EOF){
+ switch (opt) {
+ case 1:
+ ++force;
+ printf("force: %d\n", force);
+ nargs++;
+ break;
+ case 'f':
+ ++fake;
+ printf("fake: %d\n", fake);
+ nargs++;
+ break;
+ case 'h':
+ usage(stdout);
+ break;
+ case 'n':
+ ++nomtab;
+ printf("nomtab: %d\n", nomtab);
+ nargs++;
+ break;
+ case 'o':
+ options = optarg;
+ nargs++;
+ break;
+ case 'v':
+ ++verbose;
+ printf("verbose: %d\n", verbose);
+ nargs++;
+ break;
+ default:
+ fprintf(stderr, "%s: unknown option '%c'\n",
+ progname, opt);
+ usage(stderr);
+ break;
+ }
+ }
+
+ if (optind + 2 > argc) {
+ fprintf(stderr, "%s: too few arguments\n", progname);
+ usage(stderr);
+ }
+
+ source = convert_hostnames(argv[optind]);
+ target = argv[optind + 1];
+
+ if (!source) {
+ usage(stderr);
+ }
+
+ if (verbose > 1) {
+ for (i = 0; i < argc; i++)
+ printf("arg[%d] = %s\n", i, argv[i]);
+ printf("source = %s, target = %s\n", source, target);
+ }
+
+ if (!force && check_mtab_entry(source, target, "lustre"))
+ return(EEXIST);
+
+ rc = parse_options(options, &flags);
+ if (rc) {
+ fprintf(stderr, "%s: can't parse options: %s\n",
+ progname, options);
+ return(EINVAL);
+ }
+
+ rc = access(target, F_OK);
+ if (rc) {
+ rc = errno;
+ fprintf(stderr, "%s: %s inaccessible: %s\n", progname, target,
+ strerror(errno));
+ return rc;
+ }
+
+ /* In Linux 2.4, the target device doesn't get passed to any of our
+ functions. So we'll stick it on the end of the options. */
+ optlen = strlen(options) + strlen(",device=") + strlen(source) + 1;
+ optcopy = malloc(optlen);
+ strcpy(optcopy, options);
+ if (*optcopy)
+ strcat(optcopy, ",");
+ strcat(optcopy, "device=");
+ strcat(optcopy, source);
+
+ if (verbose)
+ printf("mounting device %s at %s, flags=%#x options=%s\n",
+ source, target, flags, optcopy);
+
+ if (!fake)
+ /* flags and target get to lustre_get_sb, but not
+ lustre_fill_super. Lustre ignores the flags, but mount
+ does not. */
+ rc = mount(source, target, "lustre", flags, (void *)optcopy);
+
+ if (rc) {
+ fprintf(stderr, "%s: mount(%s, %s) failed: %s\n", progname,
+ source, target, strerror(errno));
+ if (errno == ENODEV)
+ fprintf(stderr, "Are the lustre modules loaded?\n"
+ "Check /etc/modules.conf and /proc/filesystems\n");
+ if (errno == ENOTBLK)
+ fprintf(stderr,"Does this filesystem have any OSTs?\n");
+ if (errno == ENOENT)
+ fprintf(stderr,"Is the MGS specification correct? "
+ "(%s)\n", source);
+ if (errno == EALREADY)
+ fprintf(stderr,"The target service is already running. "
+ "(%s)\n", source);
+ if (errno == ENXIO)
+ fprintf(stderr,"The target service failed to start "
+ "(bad config log?) (%s)\n", source);
+ if (errno == EIO)
+ fprintf(stderr,"Is the MGS running? (%s)\n", source);
+ if (errno == EADDRINUSE)
+ fprintf(stderr,"The target service's index is already "
+ "in use. (%s)\n", source);
+ rc = errno;
+ } else if (!nomtab) {
+ rc = update_mtab_entry(source, target, "lustre", options,0,0,0);
+ }
+
+ free(optcopy);
+ free(source);
+ return rc;
+}
} \
} while (0)
-int obd_record(enum cfg_record_type type, int len, void *ptr)
-{
- struct obd_ioctl_data data;
-
- IOC_INIT(data);
- data.ioc_type = type;
- data.ioc_plen1 = len;
- data.ioc_pbuf1 = ptr;
- IOC_PACK("obd_record", data);
-
- return l_ioctl(OBD_DEV_ID, OBD_IOC_DORECORD, &data);
-}
-
int lcfg_ioctl(char * func, int dev_id, struct lustre_cfg *lcfg)
{
int opc;
return rc;
}
+int lcfg_mgs_ioctl(char *func, int dev_id, struct lustre_cfg *lcfg)
+{
+ struct obd_ioctl_data data;
+ static int mgs_device = -1;
+ int rc;
+
+ /* Always operates on MGS dev */
+ if (mgs_device == -1) {
+ static int do_device(char *func, char *devname);
+ do_disconnect(NULL, 1);
+ rc = do_device("mgsioc", "MGS");
+ if (rc) {
+ errno = ENODEV;
+ return -1;
+ }
+ mgs_device = cur_device;
+ }
+
+ IOC_INIT(data);
+ data.ioc_dev = mgs_device;
+ data.ioc_type = LUSTRE_CFG_TYPE;
+ data.ioc_plen1 = lustre_cfg_len(lcfg->lcfg_bufcount,
+ lcfg->lcfg_buflens);
+ data.ioc_pbuf1 = (void *)lcfg;
+ IOC_PACK(func, data);
+
+ rc = l_ioctl(dev_id, OBD_IOC_PARAM, buf);
+
+ if (rc == ENODEV)
+ fprintf(stderr, "Is the MGS running on this node?\n");
+ if (rc == ENOSYS)
+ fprintf(stderr, "Make sure cfg_device is set first.\n");
+ if (rc == EINVAL)
+ fprintf(stderr, "cfg_device should be of the form "
+ "'lustre-MDT0000'\n");
+
+ return rc;
+}
+
char *obdo_print(struct obdo *obd)
{
char buf[1024];
int jt_llog_check(int argc, char **argv);
int lcfg_ioctl(char * func, int dev_id, struct lustre_cfg *lcfg);
+int lcfg_mgs_ioctl(char *func, int dev_id, struct lustre_cfg *lcfg);
int parse_devname(char *func, char *name);
char *jt_cmdname(char *func);
int jt_lcfg_set_lustre_upcall(int argc, char **argv);
int jt_lcfg_add_conn(int argc, char **argv);
int jt_lcfg_del_conn(int argc, char **argv);
+int jt_lcfg_param(int argc, char **argv);
+int jt_lcfg_mgsparam(int argc, char **argv);
int obd_add_uuid(char *uuid, lnet_nid_t nid);
SRCDIR=`dirname $0`
PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
-lctl modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1
+lctl modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1
# do it again, in case we tried to unload ksocklnd too early
-lctl modules | awk '{ print $2 }' | xargs rmmod
+lsmod | grep lnet > /dev/null && lctl modules | awk '{ print $2 }' | xargs rmmod
CHECK_VALUE(REINT_OPEN);
CHECK_VALUE(REINT_MAX);
+ CHECK_VALUE(MGS_CONNECT);
+ CHECK_VALUE(MGS_DISCONNECT);
+ CHECK_VALUE(MGS_EXCEPTION);
+ CHECK_VALUE(MGS_TARGET_REG);
+ CHECK_VALUE(MGS_TARGET_DEL);
+
CHECK_VALUE(DISP_IT_EXECD);
CHECK_VALUE(DISP_LOOKUP_EXECD);
CHECK_VALUE(DISP_LOOKUP_NEG);
int main()
{
- lustre_assert_wire_constants();
+ lustre_assert_wire_constants();
- if (ret == 0)
- printf("wire constants OK\n");
+ if (ret == 0)
+ printf("wire constants OK\n");
- return ret;
+ return ret;
}
void lustre_assert_wire_constants(void)
(long long)MDS_STATUS_CONN);
LASSERTF(MDS_STATUS_LOV == 2, " found %lld\n",
(long long)MDS_STATUS_LOV);
+ LASSERTF(MGS_CONNECT == 250, " found %lld\n",
+ (long long)MGS_CONNECT);
+ LASSERTF(MGS_DISCONNECT == 251, " found %lld\n",
+ (long long)MGS_DISCONNECT);
+ LASSERTF(MGS_EXCEPTION == 252, " found %lld\n",
+ (long long)MGS_EXCEPTION);
+ LASSERTF(MGS_TARGET_REG == 253, " found %lld\n",
+ (long long)MGS_TARGET_REG);
+ LASSERTF(MGS_TARGET_DEL == 254, " found %lld\n",
+ (long long)MGS_TARGET_DEL);
LASSERTF(LDLM_ENQUEUE == 101, " found %lld\n",
(long long)LDLM_ENQUEUE);
LASSERTF(LDLM_CONVERT == 102, " found %lld\n",