unsigned long count, void *data);
extern int lprocfs_wr_ping(struct file *file, const char *buffer,
unsigned long count, void *data);
+extern int lprocfs_wr_import(struct file *file, const char *buffer,
+ unsigned long count, void *data);
/* Statfs helpers */
extern int lprocfs_rd_blksize(char *page, char **start, off_t off,
int lprocfs_obd_rd_hash(char *page, char **start, off_t off,
int count, int *eof, void *data);
+/* lprocfs_status.c: IR factor */
+int lprocfs_obd_rd_ir_factor(char *page, char **start, off_t off,
+ int count, int *eof, void *data);
+int lprocfs_obd_wr_ir_factor(struct file *file, const char *buffer,
+ unsigned long count, void *data);
+
extern int lprocfs_seq_release(cfs_inode_t *, struct file *);
/* You must use these macros when you want to refer to
int count, int *eof, void *data);
int lprocfs_obd_wr_max_pages_per_rpc(struct file *file, const char *buffer,
unsigned long count, void *data);
+int lprocfs_target_rd_instance(char *page, char **start, off_t off,
+ int count, int *eof, void *data);
+
/* all quota proc functions */
extern int lprocfs_quota_rd_bunit(char *page, char **start,
off_t off, int count,
static inline int lprocfs_wr_ping(struct file *file, const char *buffer,
unsigned long count, void *data)
{ return 0; }
-
+static inline int lprocfs_wr_import(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{ return 0; }
/* Statfs helpers */
static inline
#define SEQ_METADATA_PORTAL 30
#define SEQ_DATA_PORTAL 31
#define SEQ_CONTROLLER_PORTAL 32
+#define MGS_BULK_PORTAL 33
/* Portal 63 is reserved for the Cray Inc DVS - nic@cray.com, roe@cray.com, n8851@cray.com */
#define OBD_CONNECT_64BITHASH 0x4000000000ULL /* client supports 64-bits
* directory hash */
#define OBD_CONNECT_MAXBYTES 0x8000000000ULL /* max stripe size */
+#define OBD_CONNECT_IMP_RECOV 0x10000000000ULL /* imp recovery support */
+
+#define OCD_HAS_FLAG(ocd, flg) \
+ (!!((ocd)->ocd_connect_flags & OBD_CONNECT_##flg))
+
/* also update obd_connect_names[] for lprocfs_rd_connect_flags()
* and lustre/utils/wirecheck.c */
OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES)
#define ECHO_CONNECT_SUPPORTED (0)
#define MGS_CONNECT_SUPPORTED (OBD_CONNECT_VERSION | OBD_CONNECT_AT | \
- OBD_CONNECT_FULL20)
+ OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV)
/* Features required for this version of the client to work with server */
#define CLIENT_CONNECT_MDT_REQD (OBD_CONNECT_IBITS | OBD_CONNECT_FID | \
__u32 ocd_group; /* MDS group on OST */
__u32 ocd_cksum_types; /* supported checksum algorithms */
__u32 ocd_max_easize; /* How big LOV EA can be on MDS */
- __u32 padding; /* also fix lustre_swab_connect */
+ __u32 ocd_instance; /* also fix lustre_swab_connect */
__u64 ocd_maxbytes; /* Maximum stripe size in bytes */
};
__u32 ocd_group; /* MDS group on OST */
__u32 ocd_cksum_types; /* supported checksum algorithms */
__u32 ocd_max_easize; /* How big LOV EA can be on MDS */
- __u32 padding; /* also fix lustre_swab_connect */
+ __u32 ocd_instance; /* instance # of this target */
__u64 ocd_maxbytes; /* Maximum stripe size in bytes */
/* Fields after ocd_maxbytes are only accessible by the receiver
* if the corresponding flag in ocd_connect_flags is set. Accessing
MGS_TARGET_REG, /* whenever target starts up */
MGS_TARGET_DEL,
MGS_SET_INFO,
+ MGS_CONFIG_READ,
MGS_LAST_OPC
} mgs_cmd_t;
#define MGS_FIRST_OPC MGS_CONNECT
};
/* We pass this info to the MGS so it can write config logs */
-#define MTI_NAME_MAXLEN 64
+#define MTI_NAME_MAXLEN 64
#define MTI_PARAM_MAXLEN 4096
-#define MTI_NIDS_MAX 32
+#define MTI_NIDS_MAX 32
struct mgs_target_info {
__u32 mti_lustre_ver;
__u32 mti_stripe_index;
__u32 mti_config_ver;
__u32 mti_flags;
__u32 mti_nid_count;
- __u32 padding; /* 64 bit align */
+ __u32 mti_instance; /* Running instance of target */
char mti_fsname[MTI_NAME_MAXLEN];
char mti_svname[MTI_NAME_MAXLEN];
char mti_uuid[sizeof(struct obd_uuid)];
__u64 mti_nids[MTI_NIDS_MAX]; /* host nids (lnet_nid_t)*/
char mti_params[MTI_PARAM_MAXLEN];
};
-
extern void lustre_swab_mgs_target_info(struct mgs_target_info *oinfo);
+struct mgs_nidtbl_entry {
+ __u64 mne_version; /* table version of this entry */
+ __u32 mne_instance; /* target instance # */
+ __u32 mne_index; /* target index */
+ __u32 mne_length; /* length of this entry - by bytes */
+ __u8 mne_type; /* target type LDD_F_SV_TYPE_OST/MDT */
+ __u8 mne_nid_type; /* type of nid(mbz). for ipv6. */
+ __u8 mne_nid_size; /* size of each NID, by bytes */
+ __u8 mne_nid_count; /* # of NIDs in buffer */
+ union {
+ lnet_nid_t nids[0]; /* variable size buffer for NIDs. */
+ } u;
+};
+extern void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *oinfo);
+
+struct mgs_config_body {
+ char mcb_name[MTI_NAME_MAXLEN]; /* logname */
+ __u64 mcb_offset; /* next index of config log to request */
+ __u16 mcb_type; /* type of log: CONFIG_T_[CONFIG|RECOVER] */
+ __u8 mcb_reserved;
+ __u8 mcb_bits; /* bits unit size of config log */
+ __u32 mcb_units; /* # of units for bulk transfer */
+};
+extern void lustre_swab_mgs_config_body(struct mgs_config_body *body);
+
+struct mgs_config_res {
+ __u64 mcr_offset; /* index of last config log */
+ __u64 mcr_size; /* size of the log */
+};
+extern void lustre_swab_mgs_config_res(struct mgs_config_res *body);
+
/* Config marker flags (in config log) */
#define CM_START 0x01
#define CM_END 0x02
#define HEALTH_CHECK "health_check"
#define CAPA_KEYS "capa_keys"
#define CHANGELOG_USERS "changelog_users"
+#define MGS_NIDTBL_DIR "NIDTBL_VERSIONS"
/****************** persistent mount data *********************/
#define LDD_F_SV_TYPE_MDT 0x0001
#define LDD_F_SV_TYPE_OST 0x0002
#define LDD_F_SV_TYPE_MGS 0x0004
+#define LDD_F_SV_TYPE_MASK (LDD_F_SV_TYPE_MDT | \
+ LDD_F_SV_TYPE_OST | \
+ LDD_F_SV_TYPE_MGS)
#define LDD_F_SV_ALL 0x0008
/** need an index assignment */
#define LDD_F_NEED_INDEX 0x0010
#define LDD_F_IAM_DIR 0x0800
/** all nodes are specified as service nodes */
#define LDD_F_NO_PRIMNODE 0x1000
+/** IR enable flag */
+#define LDD_F_IR_CAPABLE 0x2000
+/** the MGS refused to register the target. */
+#define LDD_F_ERROR 0x4000
+
+/* opc for target register */
+#define LDD_F_OPC_REG 0x10000000
+#define LDD_F_OPC_UNREG 0x20000000
+#define LDD_F_OPC_READY 0x40000000
+#define LDD_F_OPC_MASK 0xf0000000
+
+#define LDD_F_ONDISK_MASK (LDD_F_SV_TYPE_MASK | LDD_F_IAM_DIR)
enum ldd_mount_type {
LDD_MT_EXT3 = 0,
#define LMD_FLG_NOMGS 0x0020 /* Only start target for servers, reusing
existing MGS services */
#define LMD_FLG_WRITECONF 0x0040 /* Rewrite config log */
+#define LMD_FLG_NOIR 0x0080 /* NO imperative recovery */
#define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT)
struct ll_sb_info *lsi_llsbi; /* add'l client sbi info */
struct vfsmount *lsi_srv_mnt; /* the one server mount */
cfs_atomic_t lsi_mounts; /* references to the srv_mnt */
- struct backing_dev_info lsi_bdi; /* each client mountpoint needs own backing_dev_info */
+ struct backing_dev_info lsi_bdi; /* each client mountpoint needs
+ own backing_dev_info */
};
#define LSI_SERVER 0x00000001
#define LSI_UMOUNT_FORCE 0x00000010
#define LSI_UMOUNT_FAILOVER 0x00000020
#define LSI_BDI_INITIALIZED 0x00000040
+#define LSI_IR_CAPABLE 0x00000080
#define s2lsi(sb) ((struct lustre_sb_info *)((sb)->s_fs_info))
#define s2lsi_nocast(sb) ((sb)->s_fs_info)
int server_register_target(struct super_block *sb);
struct mgs_target_info;
int server_mti_print(char *title, struct mgs_target_info *mti);
+void server_calc_timeout(struct lustre_sb_info *lsi, struct obd_device *obd);
/* mgc_request.c */
int mgc_fsname2resid(char *fsname, struct ldlm_res_id *res_id, int type);
/* MGS req_format */
extern struct req_format RQF_MGS_TARGET_REG;
extern struct req_format RQF_MGS_SET_INFO;
+extern struct req_format RQF_MGS_CONFIG_READ;
/* fid/fld req_format */
extern struct req_format RQF_SEQ_QUERY;
extern struct req_format RQF_FLD_QUERY;
extern struct req_msg_field RMF_FIEMAP_KEY;
extern struct req_msg_field RMF_FIEMAP_VAL;
+/* MGS config read message format */
+extern struct req_msg_field RMF_MGS_CONFIG_BODY;
+extern struct req_msg_field RMF_MGS_CONFIG_RES;
+
+/* generic uint32 */
+extern struct req_msg_field RMF_U32;
+
/** @} req_layout */
#endif /* _LUSTRE_REQ_LAYOUT_H__ */
/* hold common fields for "target" device */
struct obd_device_target {
__u32 obt_magic;
+ __u32 obt_instance;
struct super_block *obt_sb;
/** last_rcvd file */
struct file *obt_rcvd_filp;
cfs_list_t mgs_fs_db_list;
cfs_semaphore_t mgs_sem;
cfs_proc_dir_entry_t *mgs_proc_live;
+ cfs_time_t mgs_start_time;
};
struct mds_obd {
obd_no_conn:1, /* deny new connections */
obd_inactive:1, /* device active/inactive
* (for /proc/status only!!) */
+ obd_no_ir:1, /* no imperative recovery. */
obd_process_conf:1; /* device is processing mgs config */
/* use separate field as it is set in interrupt to don't mess with
* protection of other bits using _bh lock */
cfs_timer_t obd_recovery_timer;
time_t obd_recovery_start; /* seconds */
time_t obd_recovery_end; /* seconds, for lprocfs_status */
- time_t obd_recovery_time_hard;
+ int obd_recovery_time_hard;
int obd_recovery_timeout;
+ int obd_recovery_ir_factor;
/* new recovery stuff from CMD2 */
struct target_recovery_data obd_recovery_data;
#define KEY_CONNECT_FLAG "connect_flags"
#define KEY_SYNC_LOCK_CANCEL "sync_lock_cancel"
-
struct lu_context;
/* /!\ must be coherent with include/linux/namei.h on patched kernel */
enum {
CONFIG_T_CONFIG = 0,
CONFIG_T_SPTLRPC = 1,
- CONFIG_T_MAX = 2
+ CONFIG_T_RECOVER = 2,
+ CONFIG_T_MAX = 3
};
/* list of active configuration logs */
cfs_list_t cld_list_chain;
cfs_atomic_t cld_refcount;
struct config_llog_data *cld_sptlrpc;/* depended sptlrpc log */
+ struct config_llog_data *cld_recover; /* imperative recover log */
struct obd_export *cld_mgcexp;
cfs_mutex_t cld_lock;
int cld_type;
/* The max delay between connects is SWITCH_MAX + SWITCH_INC + INITIAL */
#define RECONNECT_DELAY_MAX (CONNECTION_SWITCH_MAX + CONNECTION_SWITCH_INC + \
INITIAL_CONNECT_TIMEOUT)
+/* The min time a target should wait for clients to reconnect in recovery */
+#define OBD_RECOVERY_TIME_MIN (2*RECONNECT_DELAY_MAX)
+#define OBD_IR_FACTOR_MIN 1
+#define OBD_IR_FACTOR_MAX 10
+#define OBD_IR_FACTOR_DEFAULT (OBD_IR_FACTOR_MAX/2)
+/* default timeout for the MGS to become IR_FULL */
+#define OBD_IR_MGS_TIMEOUT (4*obd_timeout)
#define LONG_UNLINK 300 /* Unlink should happen before now */
/**
}
if (rc)
GOTO(out, rc);
+
+ LASSERT(target->u.obt.obt_magic == OBT_MAGIC);
+ data->ocd_instance = target->u.obt.obt_instance;
+
/* Return only the parts of obd_connect_data that we understand, so the
* client knows that we don't understand the rest. */
if (data) {
{ "max_rpcs_in_flight", mdc_rd_max_rpcs_in_flight,
mdc_wr_max_rpcs_in_flight, 0 },
{ "timeouts", lprocfs_rd_timeouts, 0, 0 },
- { "import", lprocfs_rd_import, 0, 0 },
+ { "import", lprocfs_rd_import, lprocfs_wr_import, 0 },
{ "state", lprocfs_rd_state, 0, 0 },
{ "hsm_nl", 0, mdc_wr_kuc, 0, 0, 0222 },
{ 0 }
CERROR("CMD Operation not allowed in IOP mode\n");
GOTO(err_lmi, rc = -EINVAL);
}
- /* Read recovery timeouts */
- if (lsi->lsi_lmd && lsi->lsi_lmd->lmd_recovery_time_soft)
- obd->obd_recovery_timeout =
- lsi->lsi_lmd->lmd_recovery_time_soft;
-
- if (lsi->lsi_lmd && lsi->lsi_lmd->lmd_recovery_time_hard)
- obd->obd_recovery_time_hard =
- lsi->lsi_lmd->lmd_recovery_time_hard;
+
+ obd->u.obt.obt_magic = OBT_MAGIC;
}
cfs_rwlock_init(&m->mdt_sptlrpc_lock);
{ "som", lprocfs_rd_mdt_som,
lprocfs_wr_mdt_som, 0 },
{ "mdccomm", 0, lprocfs_mdt_wr_mdc, 0 },
+ { "instance", lprocfs_target_rd_instance, 0 },
+ { "ir_factor", lprocfs_obd_rd_ir_factor,
+ lprocfs_obd_wr_ir_factor, 0 },
{ 0 }
};
cfs_spin_unlock(&mdt->mdt_lut.lut_translock);
obd->u.obt.obt_mount_count = mount_count + 1;
+ obd->u.obt.obt_instance = (__u32)obd->u.obt.obt_mount_count;
lsd->lsd_mount_count = obd->u.obt.obt_mount_count;
/* save it, so mount count and last_transno is current */
return cld->cld_type == CONFIG_T_SPTLRPC;
}
+static inline int cld_is_recover(struct config_llog_data *cld)
+{
+ return cld->cld_type == CONFIG_T_RECOVER;
+}
+
#endif /* _MGC_INTERNAL_H */
case CONFIG_T_SPTLRPC:
resname = 0;
break;
+ case CONFIG_T_RECOVER:
+ resname = type;
+ break;
default:
LBUG();
}
CDEBUG(D_MGC, "dropping config log %s\n", cld->cld_logname);
+ if (cld->cld_recover)
+ config_log_put(cld->cld_recover);
if (cld->cld_sptlrpc)
config_log_put(cld->cld_sptlrpc);
if (cld_is_sptlrpc(cld))
RETURN(cld);
}
+static struct config_llog_data *config_recover_log_add(struct obd_device *obd,
+ char *fsname,
+ struct config_llog_instance *cfg,
+ struct super_block *sb)
+{
+ struct config_llog_instance lcfg = *cfg;
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct config_llog_data *cld;
+ char logname[32];
+
+ if ((lsi->lsi_flags & LSI_SERVER) && IS_OST(lsi->lsi_ldd))
+ return NULL;
+
+ /* we have to use different llog for clients and mdts for cmd
+ * where only clients are notified if one of cmd server restarts */
+ LASSERT(strlen(fsname) < sizeof(logname) / 2);
+ strcpy(logname, fsname);
+ if (lsi->lsi_flags & LSI_SERVER) { /* mdt */
+ LASSERT(lcfg.cfg_instance == NULL);
+ lcfg.cfg_instance = sb;
+ strcat(logname, "-mdtir");
+ } else {
+ LASSERT(lcfg.cfg_instance != NULL);
+ strcat(logname, "-cliir");
+ }
+
+ cld = do_config_log_add(obd, logname, CONFIG_T_RECOVER, &lcfg, sb);
+ return cld;
+}
+
+
/** Add this log to the list of active logs watched by an MGC.
* Active means we're watching for updates.
* We have one active log per "mount" - client instance or servername.
struct config_llog_instance *cfg,
struct super_block *sb)
{
+ struct lustre_sb_info *lsi = s2lsi(sb);
struct config_llog_data *cld;
struct config_llog_data *sptlrpc_cld;
char seclogname[32];
cld->cld_sptlrpc = sptlrpc_cld;
+ LASSERT(lsi->lsi_lmd);
+ if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)) {
+ struct config_llog_data *recover_cld;
+ *strrchr(seclogname, '-') = 0;
+ recover_cld = config_recover_log_add(obd, seclogname, cfg, sb);
+ if (IS_ERR(recover_cld)) {
+ config_log_put(cld);
+ RETURN(PTR_ERR(recover_cld));
+ }
+ cld->cld_recover = recover_cld;
+ }
+
RETURN(0);
}
*/
static int config_log_end(char *logname, struct config_llog_instance *cfg)
{
- struct config_llog_data *cld, *cld_sptlrpc = NULL;
+ struct config_llog_data *cld;
+ struct config_llog_data *cld_sptlrpc = NULL;
+ struct config_llog_data *cld_recover = NULL;
int rc = 0;
ENTRY;
}
cld->cld_stopping = 1;
+
+ cld_recover = cld->cld_recover;
+ cld->cld_recover = NULL;
cfs_mutex_unlock(&cld->cld_lock);
+ if (cld_recover) {
+ cfs_mutex_lock(&cld_recover->cld_lock);
+ cld_recover->cld_stopping = 1;
+ cfs_mutex_unlock(&cld_recover->cld_lock);
+ config_log_put(cld_recover);
+ }
+
cfs_spin_lock(&config_list_lock);
cld_sptlrpc = cld->cld_sptlrpc;
cld->cld_sptlrpc = NULL;
RETURN(rc);
}
+static int mgc_get_info(struct obd_export *exp, __u32 keylen, void *key,
+ __u32 *vallen, void *val, struct lov_stripe_md *unused)
+{
+ int rc = -EINVAL;
+
+ if (KEY_IS(KEY_CONN_DATA)) {
+ struct obd_import *imp = class_exp2cliimp(exp);
+ struct obd_connect_data *data = val;
+
+ if (*vallen == sizeof(*data)) {
+ *data = imp->imp_connect_data;
+ rc = 0;
+ }
+ }
+
+ return rc;
+}
+
static int mgc_import_event(struct obd_device *obd,
struct obd_import *imp,
enum obd_import_event event)
RETURN(rc);
}
+enum {
+ CONFIG_READ_NRPAGES_INIT = 1 << (20 - CFS_PAGE_SHIFT),
+ CONFIG_READ_NRPAGES = 4
+};
+
+static int mgc_apply_recover_logs(struct obd_device *obd,
+ struct config_llog_data *cld,
+ __u64 max_version,
+ void *data, int datalen)
+{
+ struct config_llog_instance *cfg = &cld->cld_cfg;
+ struct lustre_sb_info *lsi = s2lsi(cfg->cfg_sb);
+ struct mgs_nidtbl_entry *entry;
+ struct lustre_cfg *lcfg;
+ struct lustre_cfg_bufs bufs;
+ u64 prev_version = 0;
+ char *inst;
+ char *buf;
+ int bufsz = CFS_PAGE_SIZE;
+ int pos;
+ int rc = 0;
+ int off = 0;
+
+ OBD_ALLOC(buf, CFS_PAGE_SIZE);
+ if (buf == NULL)
+ return -ENOMEM;
+
+ LASSERT(cfg->cfg_instance != NULL);
+ LASSERT(cfg->cfg_sb == cfg->cfg_instance);
+ inst = buf;
+ if (!(lsi->lsi_flags & LSI_SERVER)) {
+ pos = sprintf(inst, "%p", cfg->cfg_instance);
+ } else {
+ LASSERT(IS_MDT(lsi->lsi_ldd));
+ pos = sprintf(inst, "MDT%04x", lsi->lsi_ldd->ldd_svindex);
+ }
+ buf += pos + 1;
+ bufsz -= pos + 1;
+
+ while (datalen > 0) {
+ int entry_len = sizeof(*entry);
+ int is_ost;
+ struct obd_device *obd;
+ char *obdname;
+ char *cname;
+ char *params;
+ char *uuid;
+
+ rc = -EINVAL;
+ if (datalen < sizeof(*entry))
+ break;
+
+ entry = (typeof(entry))(data + off);
+
+ /* sanity check */
+ if (entry->mne_nid_type != 0) /* only support type 0 for ipv4 */
+ break;
+ if (entry->mne_nid_count == 0) /* at least one nid entry */
+ break;
+ if (entry->mne_nid_size != sizeof(lnet_nid_t))
+ break;
+
+ entry_len += entry->mne_nid_count * entry->mne_nid_size;
+ if (datalen < entry_len) /* must have entry_len at least */
+ break;
+
+ lustre_swab_mgs_nidtbl_entry(entry);
+ LASSERT(entry->mne_length <= CFS_PAGE_SIZE);
+ if (entry->mne_length < entry_len)
+ break;
+
+ off += entry->mne_length;
+ datalen -= entry->mne_length;
+ if (datalen < 0)
+ break;
+
+ if (entry->mne_version > max_version) {
+ CERROR("entry index(%lld) is over max_index(%lld)\n",
+ entry->mne_version, max_version);
+ break;
+ }
+
+ if (prev_version >= entry->mne_version) {
+ CERROR("index unsorted, prev %lld, now %lld\n",
+ prev_version, entry->mne_version);
+ break;
+ }
+ prev_version = entry->mne_version;
+
+ /*
+ * Write a string with format "nid::instance" to
+ * lustre/<osc|mdc>/<target>-<osc|mdc>-<instance>/import.
+ */
+
+ is_ost = entry->mne_type == LDD_F_SV_TYPE_OST;
+ memset(buf, 0, bufsz);
+ obdname = buf;
+ pos = 0;
+
+ /* lustre-OST0001-osc-<instance #> */
+ strcpy(obdname, cld->cld_logname);
+ cname = strrchr(obdname, '-');
+ if (cname == NULL) {
+ CERROR("mgc: invalid logname %s\n", obdname);
+ break;
+ }
+
+ pos = cname - obdname;
+ obdname[pos] = 0;
+ pos += sprintf(obdname + pos, "-%s%04x",
+ is_ost ? "OST" : "MDT", entry->mne_index);
+
+ cname = is_ost ? "osc" : "mdc",
+ pos += sprintf(obdname + pos, "-%s-%s", cname, inst);
+ lustre_cfg_bufs_reset(&bufs, obdname);
+
+ /* find the obd by obdname */
+ obd = class_name2obd(obdname);
+ if (obd == NULL) {
+ CDEBUG(D_INFO, "mgc: cannot find obdname %s\n",
+ obdname);
+
+ /* this is a safe race, when the ost is starting up...*/
+ continue;
+ }
+
+ /* osc.import = "connection=<Conn UUID>::<target instance>" */
+ ++pos;
+ params = buf + pos;
+ pos += sprintf(params, "%s.import=%s", cname, "connection=");
+ uuid = buf + pos;
+
+ /* TODO: iterate all nids to find one */
+ /* find uuid by nid */
+ rc = client_import_find_conn(obd->u.cli.cl_import,
+ entry->u.nids[0],
+ (struct obd_uuid *)uuid);
+ if (rc < 0) {
+ CERROR("mgc: cannot find uuid by nid %s\n",
+ libcfs_nid2str(entry->u.nids[0]));
+ break;
+ }
+
+ CDEBUG(D_INFO, "Find uuid %s by nid %s\n",
+ uuid, libcfs_nid2str(entry->u.nids[0]));
+
+ pos += strlen(uuid);
+ pos += sprintf(buf + pos, "::%u", entry->mne_instance);
+ LASSERT(pos < bufsz);
+
+ lustre_cfg_bufs_set_string(&bufs, 1, params);
+
+ rc = -ENOMEM;
+ lcfg = lustre_cfg_new(LCFG_PARAM, &bufs);
+ if (lcfg == NULL) {
+ CERROR("mgc: cannot allocate memory\n");
+ break;
+ }
+
+ CDEBUG(D_INFO, "ir apply logs "LPD64"/"LPD64" for %s -> %s\n",
+ prev_version, max_version, obdname, params);
+
+ rc = class_process_config(lcfg);
+ lustre_cfg_free(lcfg);
+ if (rc)
+ CDEBUG(D_INFO, "process config for %s error %d\n",
+ obdname, rc);
+
+ /* continue, even one with error */
+ }
+
+ OBD_FREE(inst, CFS_PAGE_SIZE);
+ return rc;
+}
+
+/**
+ * This function is called if this client was notified for target restarting
+ * by the MGS. A CONFIG_READ RPC is going to send to fetch recovery logs.
+ */
+static int mgc_process_recover_log(struct obd_device *obd,
+ struct config_llog_data *cld)
+{
+ struct ptlrpc_request *req = NULL;
+ struct config_llog_instance *cfg = &cld->cld_cfg;
+ struct mgs_config_body *body;
+ struct mgs_config_res *res;
+ struct ptlrpc_bulk_desc *desc;
+ cfs_page_t **pages;
+ int nrpages;
+ bool eof = true;
+ int i;
+ int ealen;
+ int rc;
+ ENTRY;
+
+ /* allocate buffer for bulk transfer.
+ * if this is the first time for this mgs to read logs,
+ * CONFIG_READ_NRPAGES_INIT will be used since it will read all logs
+ * once; otherwise, it only reads increment of logs, this should be
+ * small and CONFIG_READ_NRPAGES will be used.
+ */
+ nrpages = CONFIG_READ_NRPAGES;
+ if (cfg->cfg_last_idx == 0) /* the first time */
+ nrpages = CONFIG_READ_NRPAGES_INIT;
+
+ OBD_ALLOC(pages, sizeof(*pages) * nrpages);
+ if (pages == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ for (i = 0; i < nrpages; i++) {
+ pages[i] = cfs_alloc_page(CFS_ALLOC_STD);
+ if (pages[i] == NULL)
+ GOTO(out, rc = -ENOMEM);
+ }
+
+again:
+ LASSERT(cld_is_recover(cld));
+ LASSERT(cfs_mutex_is_locked(&cld->cld_lock));
+ req = ptlrpc_request_alloc(class_exp2cliimp(cld->cld_mgcexp),
+ &RQF_MGS_CONFIG_READ);
+ if (req == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ rc = ptlrpc_request_pack(req, LUSTRE_MGS_VERSION, MGS_CONFIG_READ);
+ if (rc)
+ GOTO(out, rc);
+
+ /* pack request */
+ body = req_capsule_client_get(&req->rq_pill, &RMF_MGS_CONFIG_BODY);
+ LASSERT(body != NULL);
+ LASSERT(sizeof(body->mcb_name) > strlen(cld->cld_logname));
+ strncpy(body->mcb_name, cld->cld_logname, sizeof(body->mcb_name));
+ body->mcb_offset = cfg->cfg_last_idx + 1;
+ body->mcb_type = cld->cld_type;
+ body->mcb_bits = CFS_PAGE_SHIFT;
+ body->mcb_units = nrpages;
+
+ /* allocate bulk transfer descriptor */
+ desc = ptlrpc_prep_bulk_imp(req, nrpages, BULK_PUT_SINK,
+ MGS_BULK_PORTAL);
+ if (desc == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ for (i = 0; i < nrpages; i++)
+ ptlrpc_prep_bulk_page(desc, pages[i], 0, CFS_PAGE_SIZE);
+
+ ptlrpc_request_set_replen(req);
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ GOTO(out, rc);
+
+ res = req_capsule_server_get(&req->rq_pill, &RMF_MGS_CONFIG_RES);
+ if (res->mcr_size < res->mcr_offset)
+ GOTO(out, rc = -EINVAL);
+
+ /* always update the index even though it might have errors with
+ * handling the recover logs */
+ cfg->cfg_last_idx = res->mcr_offset;
+ eof = res->mcr_offset == res->mcr_size;
+
+ CDEBUG(D_INFO, "Latest version "LPD64", more %d.\n",
+ res->mcr_offset, eof == false);
+
+ ealen = sptlrpc_cli_unwrap_bulk_read(req, req->rq_bulk, 0);
+ if (ealen < 0)
+ GOTO(out, rc = ealen);
+
+ if (ealen > nrpages << CFS_PAGE_SHIFT)
+ GOTO(out, rc = -EINVAL);
+
+ if (ealen == 0) { /* no logs transferred */
+ if (!eof)
+ rc = -EINVAL;
+ GOTO(out, rc);
+ }
+
+ for (i = 0; i < nrpages && ealen > 0; i++) {
+ int rc2;
+ void *ptr;
+
+ ptr = cfs_kmap(pages[i]);
+ rc2 = mgc_apply_recover_logs(obd, cld, res->mcr_offset, ptr,
+ min_t(int, ealen, CFS_PAGE_SIZE));
+ cfs_kunmap(pages[i]);
+ if (rc2 < 0) {
+ CWARN("Process recover log %s error %d\n",
+ cld->cld_logname, rc2);
+ break;
+ }
+
+ ealen -= CFS_PAGE_SIZE;
+ }
+
+out:
+ if (req)
+ ptlrpc_req_finished(req);
+
+ if (rc == 0 && !eof)
+ goto again;
+
+ if (pages) {
+ for (i = 0; i < nrpages; i++) {
+ if (pages[i] == NULL)
+ break;
+ cfs_free_page(pages[i]);
+ }
+ OBD_FREE(pages, sizeof(*pages) * nrpages);
+ }
+ return rc;
+}
+
/* identical to mgs_log_is_empty */
static int mgc_llog_is_empty(struct obd_device *obd, struct llog_ctxt *ctxt,
char *name)
config_log_get(cld);
}
- rc = mgc_process_cfg_log(mgc, cld, rcl != 0);
+
+ if (cld_is_recover(cld)) {
+ rc = 0; /* this is not a fatal error for recover log */
+ if (rcl == 0)
+ rc = mgc_process_recover_log(mgc, cld);
+ } else {
+ rc = mgc_process_cfg_log(mgc, cld, rcl != 0);
+ }
CDEBUG(D_MGC, "%s: configuration from log '%s' %sed (%d).\n",
mgc->obd_name, cld->cld_logname, rc ? "fail" : "succeed", rc);
cld->cld_cfg.cfg_flags |= CFG_F_COMPAT146;
rc = mgc_process_log(obd, cld);
+ if (rc == 0 && cld->cld_recover) {
+ rc = mgc_process_log(obd, cld->cld_recover);
+ if (rc)
+ CERROR("Cannot process recover llog %d\n", rc);
+ }
config_log_put(cld);
break;
.o_cancel = mgc_cancel,
//.o_iocontrol = mgc_iocontrol,
.o_set_info_async = mgc_set_info_async,
+ .o_get_info = mgc_get_info,
.o_import_event = mgc_import_event,
.o_llog_init = mgc_llog_init,
.o_llog_finish = mgc_llog_finish,
MODULES := mgs
-mgs-objs := mgs_handler.o mgs_fs.o mgs_llog.o lproc_mgs.o
+mgs-objs := mgs_handler.o mgs_fs.o mgs_llog.o lproc_mgs.o mgs_nids.o
EXTRA_DIST := $(mgs-objs:%.o=%.c) mgs_internal.h
}
seq_show_srpc_rules(seq, fsdb->fsdb_name, &fsdb->fsdb_srpc_gen);
+ seq_printf(seq, "\nImperative Recovery Status:\n");
+
+ lprocfs_rd_ir_state(seq, fsdb);
+
cfs_up(&fsdb->fsdb_sem);
return 0;
}
-LPROC_SEQ_FOPS_RO(mgs_live);
+static ssize_t mgs_live_seq_write(struct file *file, const char *buf,
+ size_t len, loff_t *off)
+{
+ struct seq_file *seq = file->private_data;
+ struct fs_db *fsdb = seq->private;
+ ssize_t rc;
+
+ rc = lprocfs_wr_ir_state(file, buf, len, fsdb);
+ if (rc >= 0)
+ rc = len;
+ return rc;
+}
+LPROC_SEQ_FOPS(mgs_live);
int lproc_mgs_add_live(struct obd_device *obd, struct fs_db *fsdb)
{
}
mgs->mgs_configs_dir = dentry;
+ /* create directory to store nid table versions */
+ dentry = simple_mkdir(cfs_fs_pwd(current->fs), mnt, MGS_NIDTBL_DIR,
+ 0777, 1);
+ if (IS_ERR(dentry)) {
+ rc = PTR_ERR(dentry);
+ CERROR("cannot create %s directory: rc = %d\n",
+ MOUNT_CONFIGS_DIR, rc);
+ GOTO(err_pop, rc);
+ } else {
+ dput(dentry);
+ }
+
err_pop:
pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
return rc;
GOTO(err_ops, rc = -EROFS);
}
+ obd->u.obt.obt_magic = OBT_MAGIC;
+ obd->u.obt.obt_instance = 0;
+
/* namespace for mgs llog */
obd->obd_namespace = ldlm_namespace_new(obd ,"MGS",
LDLM_NAMESPACE_SERVER,
/* Internal mgs setup */
mgs_init_fsdb_list(obd);
cfs_sema_init(&mgs->mgs_sem, 1);
+ mgs->mgs_start_time = cfs_time_current_sec();
/* Setup proc */
lprocfs_mgs_init_vars(&lvars);
}
/* similar to filter_prepare_destroy */
-static int mgs_get_cfg_lock(struct obd_device *obd, char *fsname,
- struct lustre_handle *lockh)
+int mgs_get_lock(struct obd_device *obd, struct ldlm_res_id *res,
+ struct lustre_handle *lockh)
{
- struct ldlm_res_id res_id;
int rc, flags = 0;
ENTRY;
- rc = mgc_fsname2resid(fsname, &res_id, CONFIG_T_CONFIG);
- if (!rc)
- rc = ldlm_cli_enqueue_local(obd->obd_namespace, &res_id,
- LDLM_PLAIN, NULL, LCK_EX,
- &flags, ldlm_blocking_ast,
- ldlm_completion_ast, NULL,
- fsname, 0, NULL, lockh);
+ rc = ldlm_cli_enqueue_local(obd->obd_namespace, res,
+ LDLM_PLAIN, NULL, LCK_EX,
+ &flags, ldlm_blocking_ast,
+ ldlm_completion_ast, NULL,
+ NULL, 0, NULL, lockh);
if (rc)
- CERROR("can't take cfg lock for %s (%d)\n", fsname, rc);
+ CERROR("can't take cfg lock for "LPX64"/"LPX64"(%d)\n",
+ le64_to_cpu(res->name[0]), le64_to_cpu(res->name[1]),
+ rc);
RETURN(rc);
}
-static int mgs_put_cfg_lock(struct lustre_handle *lockh)
+int mgs_put_lock(struct lustre_handle *lockh)
{
ENTRY;
- ldlm_lock_decref(lockh, LCK_EX);
+ ldlm_lock_decref_and_cancel(lockh, LCK_EX);
RETURN(0);
}
void mgs_revoke_lock(struct obd_device *obd, struct fs_db *fsdb)
{
struct lustre_handle lockh;
+ struct ldlm_res_id res_id;
int lockrc;
+ int bit;
+ int rc;
LASSERT(fsdb->fsdb_name[0] != '\0');
+ rc = mgc_fsname2resid(fsdb->fsdb_name, &res_id, CONFIG_T_CONFIG);
+ LASSERT(rc == 0);
- if (cfs_test_and_set_bit(FSDB_REVOKING_LOCK, &fsdb->fsdb_flags) == 0) {
- lockrc = mgs_get_cfg_lock(obd, fsdb->fsdb_name, &lockh);
+ bit = FSDB_REVOKING_LOCK;
+ if (!rc && cfs_test_and_set_bit(bit, &fsdb->fsdb_flags) == 0) {
+ lockrc = mgs_get_lock(obd, &res_id, &lockh);
/* clear the bit before lock put */
- cfs_clear_bit(FSDB_REVOKING_LOCK, &fsdb->fsdb_flags);
+ cfs_clear_bit(bit, &fsdb->fsdb_flags);
if (lockrc != ELDLM_OK)
CERROR("lock error %d for fs %s\n",
lockrc, fsdb->fsdb_name);
else
- mgs_put_cfg_lock(&lockh);
+ mgs_put_lock(&lockh);
}
}
struct obd_device *obd = req->rq_export->exp_obd;
struct mgs_target_info *mti, *rep_mti;
struct fs_db *fsdb;
+ int opc;
int rc = 0;
ENTRY;
mti = req_capsule_client_get(&req->rq_pill, &RMF_MGS_TARGET_INFO);
+ opc = mti->mti_flags & LDD_F_OPC_MASK;
+ if (opc == LDD_F_OPC_READY) {
+ CDEBUG(D_MGS, "fs: %s index: %d is ready to reconnect.\n",
+ mti->mti_fsname, mti->mti_stripe_index);
+ rc = mgs_ir_update(obd, mti);
+ if (rc) {
+ LASSERT(!(mti->mti_flags & LDD_F_IR_CAPABLE));
+ CERROR("Update IR return with %d(ignore and IR "
+ "disabled)\n", rc);
+ }
+ GOTO(out_nolock, rc);
+ }
+
+ /* Do not support unregistering right now. */
+ if (opc != LDD_F_OPC_REG)
+ GOTO(out_nolock, rc = -EINVAL);
+
+ CDEBUG(D_MGS, "fs: %s index: %d is registered to MGS.\n",
+ mti->mti_fsname, mti->mti_stripe_index);
+
if (mti->mti_flags & LDD_F_NEED_INDEX)
mti->mti_flags |= LDD_F_WRITECONF;
CDEBUG(D_MGS, "replying with %s, index=%d, rc=%d\n", mti->mti_svname,
mti->mti_stripe_index, rc);
req->rq_status = rc;
+ if (rc)
+ /* we need an error flag to tell the target what's going on,
+ * instead of just doing it by error code only. */
+ mti->mti_flags |= LDD_F_ERROR;
+
rc = req_capsule_server_pack(&req->rq_pill);
if (rc)
RETURN(rc);
RETURN(rc);
}
+static int mgs_config_read(struct ptlrpc_request *req)
+{
+ struct mgs_config_body *body;
+ int rc;
+ ENTRY;
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_MGS_CONFIG_BODY);
+ if (body == NULL)
+ RETURN(-EINVAL);
+
+ switch (body->mcb_type) {
+ case CONFIG_T_RECOVER:
+ rc = mgs_get_ir_logs(req);
+ break;
+
+ case CONFIG_T_CONFIG:
+ rc = -ENOTSUPP;
+ break;
+
+ default:
+ rc = -EINVAL;
+ break;
+ }
+
+ RETURN(rc);
+}
+
/*
* similar as in ost_connect_check_sptlrpc()
*/
req_capsule_set(&req->rq_pill, &RQF_MGS_SET_INFO);
rc = mgs_set_info_rpc(req);
break;
-
+ case MGS_CONFIG_READ:
+ DEBUG_REQ(D_MGS, req, "read config");
+ req_capsule_set(&req->rq_pill, &RQF_MGS_CONFIG_READ);
+ rc = mgs_config_read(req);
+ break;
case LDLM_ENQUEUE:
DEBUG_REQ(D_MGS, req, "enqueue");
req_capsule_set(&req->rq_pill, &RQF_LDLM_ENQUEUE);
#include <lustre_log.h>
#include <lustre_export.h>
-/* mgs_llog.c */
-int class_dentry_readdir(struct obd_device *obd, struct dentry *dir,
- struct vfsmount *inmnt,
- cfs_list_t *dentry_list);
-
#define MGSSELF_NAME "_mgs"
+/* -- imperative recovery control data structures -- */
+/**
+ * restarting targets.
+ */
+struct mgs_nidtbl;
+struct mgs_nidtbl_target {
+ cfs_list_t mnt_list;
+ struct mgs_nidtbl *mnt_fs;
+ u64 mnt_version;
+ int mnt_type; /* OST or MDT */
+ cfs_time_t mnt_last_active;
+ struct mgs_target_info mnt_mti;
+};
+
+enum {
+ IR_FULL = 0,
+ IR_STARTUP,
+ IR_DISABLED,
+ IR_PARTIAL
+};
+
+#define IR_STRINGS { "full", "startup", "disabled", "partial" }
+
+/**
+ */
+struct fs_db;
+
+struct mgs_nidtbl {
+ struct fs_db *mn_fsdb;
+ struct file *mn_version_file;
+ cfs_mutex_t mn_lock;
+ u64 mn_version;
+ int mn_nr_targets;
+ cfs_list_t mn_targets;
+};
+
struct mgs_tgt_srpc_conf {
struct mgs_tgt_srpc_conf *mtsc_next;
char *mtsc_tgt;
#define FSDB_OSCNAME18 (4) /* old 1.8 style OSC naming */
#define FSDB_UDESC (5) /* sptlrpc user desc, will be obsolete */
-
struct fs_db {
char fsdb_name[9];
cfs_list_t fsdb_list; /* list of databases */
unsigned long fsdb_flags;
__u32 fsdb_gen;
- /* in-memory copy of the srpc rules, guarded by fsdb_sem */
+ /* in-memory copy of the srpc rules, guarded by fsdb_lock */
struct sptlrpc_rule_set fsdb_srpc_gen;
struct mgs_tgt_srpc_conf *fsdb_srpc_tgt;
+
+ int fsdb_ir_state;
+
+ /* Target NIDs Table */
+ struct mgs_nidtbl fsdb_nidtbl;
+
+ /* async thread to notify clients */
+ struct obd_device *fsdb_obd;
+ cfs_waitq_t fsdb_notify_waitq;
+ cfs_completion_t fsdb_notify_comp;
+ cfs_atomic_t fsdb_notify_phase;
+ volatile int fsdb_notify_async:1,
+ fsdb_notify_stop:1;
+ /* statistic data */
+ unsigned int fsdb_notify_total;
+ unsigned int fsdb_notify_max;
+ unsigned int fsdb_notify_count;
};
+/* mgs_llog.c */
+int class_dentry_readdir(struct obd_device *obd, struct dentry *dir,
+ struct vfsmount *inmnt,
+ cfs_list_t *dentry_list);
+
int mgs_init_fsdb_list(struct obd_device *obd);
int mgs_cleanup_fsdb_list(struct obd_device *obd);
-int mgs_find_or_make_fsdb(struct obd_device *obd, char *name,
+int mgs_find_or_make_fsdb(struct obd_device *obd, char *name,
struct fs_db **dbh);
+struct fs_db *mgs_find_fsdb(struct obd_device *obd, char *fsname);
int mgs_get_fsdb_srpc_from_llog(struct obd_device *obd, struct fs_db *fsdb);
int mgs_check_index(struct obd_device *obd, struct mgs_target_info *mti);
int mgs_check_failnid(struct obd_device *obd, struct mgs_target_info *mti);
/* mgs_handler.c */
void mgs_revoke_lock(struct obd_device *obd, struct fs_db *fsdb);
+int mgs_get_lock(struct obd_device *obd, struct ldlm_res_id *res,
+ struct lustre_handle *lockh);
+int mgs_put_lock(struct lustre_handle *lockh);
+
+/* mgs_nids.c */
+int mgs_ir_update(struct obd_device *obd, struct mgs_target_info *mti);
+int mgs_ir_init_fs(struct obd_device *obd, struct fs_db *fsdb);
+void mgs_ir_fini_fs(struct obd_device *obd, struct fs_db *fsdb);
+int mgs_get_ir_logs(struct ptlrpc_request *req);
+int lprocfs_wr_ir_state(struct file *file, const char *buffer,
+ unsigned long count, void *data);
+int lprocfs_rd_ir_state(struct seq_file *seq, void *data);
+int lprocfs_wr_ir_timeout(struct file *file, const char *buffer,
+ unsigned long count, void *data);
+int lprocfs_rd_ir_timeout(char *page, char **start, off_t off, int count,
+ int *eof, void *data);
/* mgs_fs.c */
int mgs_export_stats_init(struct obd_device *obd, struct obd_export *exp,
sptlrpc_rule_set_free(&fsdb->fsdb_srpc_gen);
}
-static struct fs_db *mgs_find_fsdb(struct obd_device *obd, char *fsname)
+struct fs_db *mgs_find_fsdb(struct obd_device *obd, char *fsname)
{
struct mgs_obd *mgs = &obd->u.mgs;
struct fs_db *fsdb;
if (rc)
GOTO(err, rc);
+ /* initialise data for NID table */
+ mgs_ir_init_fs(obd, fsdb);
+
lproc_mgs_add_live(obd, fsdb);
}
cfs_down(&fsdb->fsdb_sem);
lproc_mgs_del_live(obd, fsdb);
cfs_list_del(&fsdb->fsdb_list);
+
+ /* deinitialize fsr */
+ mgs_ir_fini_fs(obd, fsdb);
+
if (fsdb->fsdb_ost_index_map)
OBD_FREE(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE);
if (fsdb->fsdb_mdt_index_map)
int mgs_erase_logs(struct obd_device *obd, char *fsname)
{
struct mgs_obd *mgs = &obd->u.mgs;
- static struct fs_db *fsdb;
+ struct fs_db *fsdb;
cfs_list_t dentry_list;
struct l_linux_dirent *dirent, *n;
int rc, len = strlen(fsname);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/mgs/mgs_nids.c
+ *
+ * NID table management for lustre.
+ *
+ * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
+ */
+
+#ifndef EXPORT_SYMTAB
+#define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_MGS
+#define D_MGS D_CONFIG
+
+#ifdef __KERNEL__
+#include <linux/module.h>
+#include <linux/pagemap.h>
+#include <linux/fs.h>
+#endif
+
+#include <obd.h>
+#include <obd_lov.h>
+#include <obd_class.h>
+#include <lustre_log.h>
+#include <obd_ost.h>
+#include <libcfs/list.h>
+#include <linux/lvfs.h>
+#include <lustre_fsfilt.h>
+#include <lustre_disk.h>
+#include <lustre_param.h>
+#include "mgs_internal.h"
+
+static unsigned int ir_timeout;
+
+static int nidtbl_is_sane(struct mgs_nidtbl *tbl)
+{
+ struct mgs_nidtbl_target *tgt;
+ int version = 0;
+
+ LASSERT(cfs_mutex_is_locked(&tbl->mn_lock));
+ cfs_list_for_each_entry(tgt, &tbl->mn_targets, mnt_list) {
+ if (!tgt->mnt_version)
+ continue;
+
+ if (version >= tgt->mnt_version)
+ return 0;
+
+ version = tgt->mnt_version;
+ }
+ return 1;
+}
+
+/**
+ * Fetch nidtbl entries whose version are not less than @version
+ * nidtbl entries will be packed in @pages by @unit_size units - entries
+ * shouldn't cross unit boundaries.
+ */
+static int mgs_nidtbl_read(struct obd_device *unused, struct mgs_nidtbl *tbl,
+ struct mgs_config_res *res, cfs_page_t **pages,
+ int nrpages, int units_total, int unit_size)
+{
+ struct mgs_nidtbl_target *tgt;
+ struct mgs_nidtbl_entry *entry;
+ struct mgs_nidtbl_entry *last_in_unit = NULL;
+ struct mgs_target_info *mti;
+ __u64 version = res->mcr_offset;
+ bool nobuf = false;
+ void *buf = NULL;
+ int bytes_in_unit = 0;
+ int units_in_page = 0;
+ int index = 0;
+ int rc = 0;
+ ENTRY;
+
+ /* make sure unit_size is power 2 */
+ LASSERT((unit_size & (unit_size - 1)) == 0);
+ LASSERT(nrpages << CFS_PAGE_SHIFT >= units_total * unit_size);
+
+ cfs_mutex_lock(&tbl->mn_lock);
+ LASSERT(nidtbl_is_sane(tbl));
+
+ /* no more entries ? */
+ if (version > tbl->mn_version) {
+ version = tbl->mn_version;
+ goto out;
+ }
+
+ /* iterate over all targets to compose a bitmap by the type of llog.
+ * If the llog is for MDTs, llog entries for OSTs will be returned;
+ * otherwise, it's for clients, then llog entries for both OSTs and
+ * MDTs will be returned.
+ */
+ cfs_list_for_each_entry(tgt, &tbl->mn_targets, mnt_list) {
+ int entry_len = sizeof(*entry);
+
+ if (tgt->mnt_version < version)
+ continue;
+
+ /* write target recover information */
+ mti = &tgt->mnt_mti;
+ LASSERT(mti->mti_nid_count < MTI_NIDS_MAX);
+ entry_len += mti->mti_nid_count * sizeof(lnet_nid_t);
+
+ if (entry_len > unit_size) {
+ CWARN("nidtbl: too large entry: entry length %d,"
+ "unit size: %d\n", entry_len, unit_size);
+ GOTO(out, rc = -EOVERFLOW);
+ }
+
+ if (bytes_in_unit < entry_len) {
+ if (units_total == 0) {
+ nobuf = true;
+ break;
+ }
+
+ /* check if we need to consume remaining bytes. */
+ if (last_in_unit != NULL && bytes_in_unit) {
+ /* entry has been swapped. */
+ __swab32s(&last_in_unit->mne_length);
+ last_in_unit->mne_length += bytes_in_unit;
+ __swab32s(&last_in_unit->mne_length);
+ rc += bytes_in_unit;
+ buf += bytes_in_unit;
+ last_in_unit = NULL;
+ }
+ LASSERT((rc & (unit_size - 1)) == 0);
+
+ if (units_in_page == 0) {
+ /* allocate a new page */
+ pages[index] = cfs_alloc_page(CFS_ALLOC_STD);
+ if (pages[index] == NULL) {
+ rc = -ENOMEM;
+ break;
+ }
+
+ /* destroy previous map */
+ if (index > 0)
+ cfs_kunmap(pages[index - 1]);
+
+ /* reassign buffer */
+ buf = cfs_kmap(pages[index]);
+ ++index;
+
+ units_in_page = CFS_PAGE_SIZE / unit_size;
+ LASSERT(units_in_page > 0);
+ }
+
+ /* allocate an unit */
+ LASSERT(((long)buf & (unit_size - 1)) == 0);
+ bytes_in_unit = unit_size;
+ --units_in_page;
+ --units_total;
+ }
+
+ /* fill in entry. */
+ entry = (struct mgs_nidtbl_entry *)buf;
+ entry->mne_version = tgt->mnt_version;
+ entry->mne_instance = mti->mti_instance;
+ entry->mne_index = mti->mti_stripe_index;
+ entry->mne_length = entry_len;
+ entry->mne_type = tgt->mnt_type;
+ entry->mne_nid_type = 0;
+ entry->mne_nid_size = sizeof(lnet_nid_t);
+ entry->mne_nid_count = mti->mti_nid_count;
+ memcpy(entry->u.nids, mti->mti_nids,
+ mti->mti_nid_count * sizeof(lnet_nid_t));
+ lustre_swab_mgs_nidtbl_entry(entry);
+
+ version = tgt->mnt_version;
+ rc += entry_len;
+ buf += entry_len;
+
+ bytes_in_unit -= entry_len;
+ last_in_unit = entry;
+
+ CDEBUG(D_MGS, "fsname %s, entry size %d, pages %d/%d/%d/%d.\n",
+ tbl->mn_fsdb->fsdb_name, entry_len,
+ bytes_in_unit, index, nrpages, units_total);
+ }
+ if (index > 0)
+ cfs_kunmap(pages[index - 1]);
+out:
+ LASSERT(version <= tbl->mn_version);
+ res->mcr_size = tbl->mn_version;
+ res->mcr_offset = nobuf ? version : tbl->mn_version;
+ cfs_mutex_unlock(&tbl->mn_lock);
+ LASSERT(ergo(version == 1, rc == 0)); /* get the log first time */
+
+ CDEBUG(D_MGS, "Read IR logs %s return with %d, version %llu\n",
+ tbl->mn_fsdb->fsdb_name, rc, version);
+ RETURN(rc);
+}
+
+static int nidtbl_update_version(struct obd_device *obd, struct mgs_nidtbl *tbl)
+{
+ struct lvfs_run_ctxt saved;
+ struct file *file = NULL;
+ char filename[sizeof(MGS_NIDTBL_DIR) + 9];
+ u64 version;
+ loff_t off = 0;
+ int rc;
+ ENTRY;
+
+ LASSERT(cfs_mutex_is_locked(&tbl->mn_lock));
+ LASSERT(sizeof(filename) < 32);
+
+ sprintf(filename, "%s/%s",
+ MGS_NIDTBL_DIR, tbl->mn_fsdb->fsdb_name);
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+ file = l_filp_open(filename, O_RDWR|O_CREAT, 0660);
+ if (!IS_ERR(file)) {
+ version = cpu_to_le64(tbl->mn_version);
+ rc = lustre_fwrite(file, &version, sizeof(version), &off);
+ if (rc == sizeof(version))
+ rc = 0;
+ filp_close(file, 0);
+ fsfilt_sync(obd, obd->u.mgs.mgs_sb);
+ } else {
+ rc = PTR_ERR(file);
+ }
+
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ RETURN(rc);
+}
+
+#define MGS_NIDTBL_VERSION_INIT 2
+
+static int nidtbl_read_version(struct obd_device *obd, struct mgs_nidtbl *tbl)
+{
+ struct lvfs_run_ctxt saved;
+ struct file *file = NULL;
+ char filename[sizeof(MGS_NIDTBL_DIR) + 9];
+ u64 version;
+ loff_t off = 0;
+ int rc;
+ ENTRY;
+
+ LASSERT(cfs_mutex_is_locked(&tbl->mn_lock));
+ LASSERT(sizeof(filename) < 32);
+
+ sprintf(filename, "%s/%s",
+ MGS_NIDTBL_DIR, tbl->mn_fsdb->fsdb_name);
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+ file = l_filp_open(filename, O_RDONLY, 0);
+ if (!IS_ERR(file)) {
+ rc = lustre_fread(file, &version, sizeof(version), &off);
+ if (rc == sizeof(version))
+ rc = cpu_to_le64(version);
+ else if (rc == 0)
+ rc = MGS_NIDTBL_VERSION_INIT;
+ else
+ CERROR("read version file %s error %d\n", filename, rc);
+ filp_close(file, 0);
+ } else {
+ rc = PTR_ERR(file);
+ if (rc == -ENOENT)
+ rc = MGS_NIDTBL_VERSION_INIT;
+ }
+
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ RETURN(rc);
+}
+
+static int mgs_nidtbl_write(struct fs_db *fsdb, struct mgs_target_info *mti)
+{
+ struct mgs_nidtbl *tbl;
+ struct mgs_nidtbl_target *tgt;
+ bool found = false;
+ int type = mti->mti_flags & LDD_F_SV_TYPE_MASK;
+ int rc = 0;
+ ENTRY;
+
+ type &= ~LDD_F_SV_TYPE_MGS;
+ LASSERT(type != 0);
+
+ tbl = &fsdb->fsdb_nidtbl;
+ cfs_mutex_lock(&tbl->mn_lock);
+ cfs_list_for_each_entry(tgt, &tbl->mn_targets, mnt_list) {
+ struct mgs_target_info *info = &tgt->mnt_mti;
+ if (type == tgt->mnt_type &&
+ mti->mti_stripe_index == info->mti_stripe_index) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ OBD_ALLOC_PTR(tgt);
+ if (tgt == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ CFS_INIT_LIST_HEAD(&tgt->mnt_list);
+ tgt->mnt_fs = tbl;
+ tgt->mnt_version = 0; /* 0 means invalid */
+ tgt->mnt_type = type;
+
+ ++tbl->mn_nr_targets;
+ }
+
+ tgt->mnt_version = ++tbl->mn_version;
+ tgt->mnt_mti = *mti;
+
+ cfs_list_move_tail(&tgt->mnt_list, &tbl->mn_targets);
+
+ rc = nidtbl_update_version(fsdb->fsdb_obd, tbl);
+ EXIT;
+
+out:
+ cfs_mutex_unlock(&tbl->mn_lock);
+ if (rc)
+ CERROR("Write NID table version for file system %s error %d\n",
+ fsdb->fsdb_name, rc);
+ return rc;
+}
+
+static void mgs_nidtbl_fini_fs(struct fs_db *fsdb)
+{
+ struct mgs_nidtbl *tbl = &fsdb->fsdb_nidtbl;
+ CFS_LIST_HEAD(head);
+
+ cfs_mutex_lock(&tbl->mn_lock);
+ tbl->mn_nr_targets = 0;
+ cfs_list_splice_init(&tbl->mn_targets, &head);
+ cfs_mutex_unlock(&tbl->mn_lock);
+
+ while (!cfs_list_empty(&head)) {
+ struct mgs_nidtbl_target *tgt;
+ tgt = list_entry(head.next, struct mgs_nidtbl_target, mnt_list);
+ cfs_list_del(&tgt->mnt_list);
+ OBD_FREE_PTR(tgt);
+ }
+}
+
+static int mgs_nidtbl_init_fs(struct fs_db *fsdb)
+{
+ struct mgs_nidtbl *tbl = &fsdb->fsdb_nidtbl;
+
+ CFS_INIT_LIST_HEAD(&tbl->mn_targets);
+ cfs_mutex_init(&tbl->mn_lock);
+ tbl->mn_nr_targets = 0;
+ tbl->mn_fsdb = fsdb;
+ cfs_mutex_lock(&tbl->mn_lock);
+ tbl->mn_version = nidtbl_read_version(fsdb->fsdb_obd, tbl);
+ cfs_mutex_unlock(&tbl->mn_lock);
+ CDEBUG(D_MGS, "IR: current version is %llu\n", tbl->mn_version);
+
+ return 0;
+}
+
+/* --------- Imperative Recovery relies on nidtbl stuff ------- */
+static int mgs_ir_notify(void *arg)
+{
+ struct fs_db *fsdb = arg;
+ struct ldlm_res_id resid;
+
+ char name[sizeof(fsdb->fsdb_name) + 20];
+
+ LASSERTF(sizeof(name) < 32, "name is too large to be in stack.\n");
+ sprintf(name, "mgs_%s_notify", fsdb->fsdb_name);
+ cfs_daemonize(name);
+
+ cfs_complete(&fsdb->fsdb_notify_comp);
+
+ mgc_fsname2resid(fsdb->fsdb_name, &resid, CONFIG_T_RECOVER);
+ while (1) {
+ struct l_wait_info lwi = { 0 };
+ struct lustre_handle lockh;
+ cfs_time_t curtime;
+ int lockrc;
+ int delta;
+
+ l_wait_event(fsdb->fsdb_notify_waitq,
+ fsdb->fsdb_notify_stop ||
+ cfs_atomic_read(&fsdb->fsdb_notify_phase),
+ &lwi);
+ if (fsdb->fsdb_notify_stop)
+ break;
+
+ CDEBUG(D_MGS, "%s woken up, phase is %d\n",
+ name, cfs_atomic_read(&fsdb->fsdb_notify_phase));
+
+ curtime = cfs_time_current();
+ lockrc = mgs_get_lock(fsdb->fsdb_obd, &resid, &lockh);
+ if (lockrc == ELDLM_OK) {
+ cfs_atomic_set(&fsdb->fsdb_notify_phase, 0);
+ mgs_put_lock(&lockh);
+
+ /* do statistic */
+ fsdb->fsdb_notify_count++;
+ delta = (cfs_time_current() - curtime) / NSEC_PER_USEC;
+ fsdb->fsdb_notify_total += delta;
+ if (delta > fsdb->fsdb_notify_max)
+ fsdb->fsdb_notify_max = delta;
+ CDEBUG(D_MGS, "Revoke recover lock of %s spent %dus\n",
+ fsdb->fsdb_name, delta);
+ } else {
+ CERROR("Fatal error %d for fs %s\n",
+ lockrc, fsdb->fsdb_name);
+ }
+ }
+
+ cfs_complete(&fsdb->fsdb_notify_comp);
+ return 0;
+}
+
+int mgs_ir_init_fs(struct obd_device *obd, struct fs_db *fsdb)
+{
+ struct mgs_obd *mgs = &obd->u.mgs;
+ int rc;
+
+ if (!ir_timeout)
+ ir_timeout = OBD_IR_MGS_TIMEOUT;
+
+ fsdb->fsdb_ir_state = IR_FULL;
+ if (cfs_time_before(cfs_time_current_sec(),
+ mgs->mgs_start_time + ir_timeout))
+ fsdb->fsdb_ir_state = IR_STARTUP;
+
+ /* start notify thread */
+ fsdb->fsdb_obd = obd;
+ cfs_atomic_set(&fsdb->fsdb_notify_phase, 0);
+ cfs_waitq_init(&fsdb->fsdb_notify_waitq);
+ cfs_init_completion(&fsdb->fsdb_notify_comp);
+ rc = cfs_create_thread(mgs_ir_notify, fsdb, CFS_DAEMON_FLAGS);
+ if (rc > 0)
+ cfs_wait_for_completion(&fsdb->fsdb_notify_comp);
+ else
+ CERROR("Start notify thread error %d\n", rc);
+
+ mgs_nidtbl_init_fs(fsdb);
+ return 0;
+}
+
+void mgs_ir_fini_fs(struct obd_device *obd, struct fs_db *fsdb)
+{
+ if (cfs_test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags))
+ return;
+
+ mgs_nidtbl_fini_fs(fsdb);
+
+ fsdb->fsdb_notify_stop = 1;
+ cfs_waitq_signal(&fsdb->fsdb_notify_waitq);
+ cfs_wait_for_completion(&fsdb->fsdb_notify_comp);
+}
+
+/* caller must have held fsdb_sem */
+static inline void ir_state_graduate(struct fs_db *fsdb)
+{
+ struct mgs_obd *mgs = &fsdb->fsdb_obd->u.mgs;
+
+ if (fsdb->fsdb_ir_state == IR_STARTUP) {
+ if (cfs_time_before(mgs->mgs_start_time + ir_timeout,
+ cfs_time_current_sec())) {
+ fsdb->fsdb_ir_state = IR_FULL;
+ }
+ }
+}
+
+int mgs_ir_update(struct obd_device *obd, struct mgs_target_info *mti)
+{
+ struct fs_db *fsdb;
+ bool notify = true;
+ int rc;
+
+ if (mti->mti_instance == 0)
+ return -EINVAL;
+
+ rc = mgs_find_or_make_fsdb(obd, mti->mti_fsname, &fsdb);
+ if (rc)
+ return rc;
+
+ rc = mgs_nidtbl_write(fsdb, mti);
+ if (rc)
+ return rc;
+
+ /* check ir state */
+ cfs_down(&fsdb->fsdb_sem);
+ ir_state_graduate(fsdb);
+ switch (fsdb->fsdb_ir_state) {
+ case IR_FULL:
+ mti->mti_flags |= LDD_F_IR_CAPABLE;
+ break;
+ case IR_DISABLED:
+ notify = false;
+ case IR_STARTUP:
+ case IR_PARTIAL:
+ break;
+ default:
+ LBUG();
+ }
+ cfs_up(&fsdb->fsdb_sem);
+
+ LASSERT(ergo(mti->mti_flags & LDD_F_IR_CAPABLE, notify));
+ if (notify) {
+ CDEBUG(D_MGS, "Try to revoke recover lock of %s\n",
+ fsdb->fsdb_name);
+ cfs_atomic_inc(&fsdb->fsdb_notify_phase);
+ cfs_waitq_signal(&fsdb->fsdb_notify_waitq);
+ }
+ return 0;
+}
+
+/* NID table can be cached by two entities: Clients and MDTs */
+enum {
+ IR_CLIENT = 1,
+ IR_MDT = 2
+};
+
+static int delogname(char *logname, char *fsname, int *typ)
+{
+ char *ptr;
+ int type;
+ int len;
+
+ ptr = strrchr(logname, '-');
+ if (ptr == NULL)
+ return -EINVAL;
+
+ /* decouple file system name. The llog name may be:
+ * - "prefix-fsname", prefix is "cliir" or "mdtir"
+ */
+ if (strncmp(ptr, "-mdtir", 6) == 0)
+ type = IR_MDT;
+ else if (strncmp(ptr, "-cliir", 6) == 0)
+ type = IR_CLIENT;
+ else
+ return -EINVAL;
+
+ len = ptr - logname;
+ if (len == 0)
+ return -EINVAL;
+
+ memcpy(fsname, logname, len);
+ fsname[len] = 0;
+ if (typ)
+ *typ = type;
+ return 0;
+}
+
+int mgs_get_ir_logs(struct ptlrpc_request *req)
+{
+ struct obd_device *obd = req->rq_export->exp_obd;
+ struct fs_db *fsdb;
+ struct mgs_config_body *body;
+ struct mgs_config_res *res;
+ struct ptlrpc_bulk_desc *desc;
+ struct l_wait_info lwi;
+ char fsname[16];
+ long bufsize;
+ int unit_size;
+
+ int type;
+ int rc = 0;
+ int i;
+ int bytes;
+ int page_count;
+ int nrpages;
+ cfs_page_t **pages = NULL;
+ ENTRY;
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_MGS_CONFIG_BODY);
+ if (body == NULL)
+ RETURN(-EINVAL);
+
+ if (body->mcb_type != CONFIG_T_RECOVER)
+ RETURN(-EINVAL);
+
+ rc = delogname(body->mcb_name, fsname, &type);
+ if (rc)
+ RETURN(rc);
+
+ rc = mgs_find_or_make_fsdb(obd, fsname, &fsdb);
+ if (rc)
+ GOTO(out, rc);
+
+ bufsize = body->mcb_units << body->mcb_bits;
+ nrpages = (bufsize + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
+ if (nrpages > PTLRPC_MAX_BRW_PAGES)
+ RETURN(-EINVAL);
+
+ CDEBUG(D_MGS, "Reading IR log %s bufsize %ld.\n",
+ body->mcb_name, bufsize);
+
+ OBD_ALLOC(pages, sizeof(*pages) * nrpages);
+ if (pages == NULL)
+ RETURN(-ENOMEM);
+
+ rc = req_capsule_server_pack(&req->rq_pill);
+ if (rc)
+ GOTO(out, rc);
+
+ res = req_capsule_server_get(&req->rq_pill, &RMF_MGS_CONFIG_RES);
+ if (res == NULL)
+ GOTO(out, rc = -EINVAL);
+
+ res->mcr_offset = body->mcb_offset;
+ unit_size = min_t(int, 1 << body->mcb_bits, CFS_PAGE_SIZE);
+ bytes = mgs_nidtbl_read(obd, &fsdb->fsdb_nidtbl, res, pages, nrpages,
+ bufsize / unit_size, unit_size);
+ if (bytes < 0)
+ GOTO(out, rc = bytes);
+
+ /* start bulk transfer */
+ page_count = (bytes + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
+ LASSERT(page_count <= nrpages);
+ desc = ptlrpc_prep_bulk_exp(req, page_count,
+ BULK_PUT_SOURCE, MGS_BULK_PORTAL);
+ if (desc == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ for (i = 0; i < page_count && bytes > 0; i++) {
+ ptlrpc_prep_bulk_page(desc, pages[i], 0,
+ min_t(int, bytes, CFS_PAGE_SIZE));
+ bytes -= CFS_PAGE_SIZE;
+ }
+
+ rc = target_bulk_io(req->rq_export, desc, &lwi);
+ ptlrpc_free_bulk(desc);
+
+out:
+ if (pages) {
+ for (i = 0; i < nrpages; i++) {
+ if (pages[i] == NULL)
+ break;
+ cfs_free_page(pages[i]);
+ }
+ OBD_FREE(pages, sizeof(*pages) * nrpages);
+ }
+ return rc;
+}
+
+static int lprocfs_ir_set_state(struct fs_db *fsdb, const char *buf)
+{
+ const char *strings[] = IR_STRINGS;
+ int state = -1;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(strings); i++) {
+ if (strcmp(strings[i], buf) == 0) {
+ state = i;
+ break;
+ }
+ }
+ if (state < 0)
+ return -EINVAL;
+
+ CDEBUG(D_MGS, "change fsr state of %s from %s to %s\n",
+ fsdb->fsdb_name, strings[fsdb->fsdb_ir_state], strings[state]);
+ cfs_down(&fsdb->fsdb_sem);
+ fsdb->fsdb_ir_state = state;
+ cfs_up(&fsdb->fsdb_sem);
+
+ return 0;
+}
+
+static int lprocfs_ir_set_timeout(struct fs_db *fsdb, const char *buf)
+{
+ return -EINVAL;
+}
+
+static int lprocfs_ir_clear_stats(struct fs_db *fsdb, const char *buf)
+{
+ if (*buf)
+ return -EINVAL;
+
+ fsdb->fsdb_notify_total = 0;
+ fsdb->fsdb_notify_max = 0;
+ fsdb->fsdb_notify_count = 0;
+ return 0;
+}
+
+static struct lproc_ir_cmd {
+ char *name;
+ int namelen;
+ int (*handler)(struct fs_db *, const char *);
+} ir_cmds[] = {
+ { "state=", 6, lprocfs_ir_set_state },
+ { "timeout=", 8, lprocfs_ir_set_timeout },
+ { "0", 1, lprocfs_ir_clear_stats }
+};
+
+int lprocfs_wr_ir_state(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ struct fs_db *fsdb = data;
+ char *kbuf;
+ char *ptr;
+ int rc = 0;
+
+ if (count > CFS_PAGE_SIZE)
+ return -EINVAL;
+
+ OBD_ALLOC(kbuf, count + 1);
+ if (kbuf == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(kbuf, buffer, count)) {
+ OBD_FREE(kbuf, count);
+ return -EFAULT;
+ }
+
+ kbuf[count] = 0; /* buffer is supposed to end with 0 */
+ if (kbuf[count - 1] == '\n')
+ kbuf[count - 1] = 0;
+ ptr = kbuf;
+
+ /* fsname=<file system name> must be the 1st entry */
+ while (ptr != NULL) {
+ char *tmpptr;
+ int i;
+
+ tmpptr = strchr(ptr, ';');
+ if (tmpptr)
+ *tmpptr++ = 0;
+
+ rc = -EINVAL;
+ for (i = 0; i < ARRAY_SIZE(ir_cmds); i++) {
+ struct lproc_ir_cmd *cmd;
+ int cmdlen;
+
+ cmd = &ir_cmds[i];
+ cmdlen = cmd->namelen;
+ if (strncmp(cmd->name, ptr, cmdlen) == 0) {
+ ptr += cmdlen;
+ rc = cmd->handler(fsdb, ptr);
+ break;
+ }
+ }
+ if (rc)
+ break;
+
+ ptr = tmpptr;
+ }
+ if (rc)
+ CERROR("Unable to process command: %s(%d)\n", ptr, rc);
+ OBD_FREE(kbuf, count + 1);
+ return rc ?: count;
+}
+
+int lprocfs_rd_ir_state(struct seq_file *seq, void *data)
+{
+ struct fs_db *fsdb = data;
+ struct mgs_nidtbl *tbl = &fsdb->fsdb_nidtbl;
+ const char *ir_strings[] = IR_STRINGS;
+
+ /* mgs_live_seq_show() already holds fsdb_sem. */
+ ir_state_graduate(fsdb);
+
+ seq_printf(seq,
+ "\tstate: %s, nidtbl version: %lld\n",
+ ir_strings[fsdb->fsdb_ir_state], tbl->mn_version);
+ seq_printf(seq, "\tnotify total/max/count: %u/%u/%u\n",
+ fsdb->fsdb_notify_total, fsdb->fsdb_notify_max,
+ fsdb->fsdb_notify_count);
+ return 0;
+}
+
+int lprocfs_rd_ir_timeout(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ *eof = 1;
+ return snprintf(page, count, "%d\n", ir_timeout);
+}
+
+int lprocfs_wr_ir_timeout(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ return lprocfs_wr_uint(file, buffer, count, &ir_timeout);
+}
+
"layout_lock",
"64bithash",
"object_max_bytes",
+ "imp_recov",
NULL
};
" name: %s\n"
" target: %s\n"
" state: %s\n"
+ " instance: %u\n"
" connect_flags: [",
obd->obd_name,
obd2cli_tgt(obd),
- ptlrpc_import_state_name(imp->imp_state));
+ ptlrpc_import_state_name(imp->imp_state),
+ imp->imp_connect_data.ocd_instance);
i += obd_connect_flags2str(page + i, count - i,
imp->imp_connect_data.ocd_connect_flags,
", ");
if (lprocfs_obd_snprintf(&page, size, &len, "VBR: %s\n",
obd->obd_version_recov ? "ON" : "OFF")<=0)
goto out;
+ if (lprocfs_obd_snprintf(&page, size, &len, "IR: %s\n",
+ obd->obd_no_ir ? "OFF" : "ON") <= 0)
+ goto out;
goto fclose;
}
}
EXPORT_SYMBOL(lprocfs_obd_rd_recovery_status);
+int lprocfs_obd_rd_ir_factor(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ struct obd_device *obd = (struct obd_device *)data;
+ LASSERT(obd != NULL);
+
+ return snprintf(page, count, "%d\n",
+ obd->obd_recovery_ir_factor);
+}
+EXPORT_SYMBOL(lprocfs_obd_rd_ir_factor);
+
+int lprocfs_obd_wr_ir_factor(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ struct obd_device *obd = (struct obd_device *)data;
+ int val, rc;
+ LASSERT(obd != NULL);
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ if (val < OBD_IR_FACTOR_MIN || val > OBD_IR_FACTOR_MAX)
+ return -EINVAL;
+
+ obd->obd_recovery_ir_factor = val;
+ return count;
+}
+EXPORT_SYMBOL(lprocfs_obd_wr_ir_factor);
+
int lprocfs_obd_rd_recovery_time_soft(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
struct obd_device *obd = data;
LASSERT(obd != NULL);
- return snprintf(page, count, "%lu\n", obd->obd_recovery_time_hard);
+ return snprintf(page, count, "%u\n", obd->obd_recovery_time_hard);
}
EXPORT_SYMBOL(lprocfs_obd_rd_recovery_time_hard);
}
EXPORT_SYMBOL(lprocfs_obd_wr_max_pages_per_rpc);
+int lprocfs_target_rd_instance(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ struct obd_device *obd = (struct obd_device *)data;
+ struct obd_device_target *target = &obd->u.obt;
+
+ LASSERT(obd != NULL);
+ LASSERT(target->obt_magic == OBT_MAGIC);
+ *eof = 1;
+ return snprintf(page, count, "%u\n", obd->u.obt.obt_instance);
+}
+EXPORT_SYMBOL(lprocfs_target_rd_instance);
+
EXPORT_SYMBOL(lprocfs_register);
EXPORT_SYMBOL(lprocfs_srch);
EXPORT_SYMBOL(lprocfs_remove);
struct obd_uuid *uuid;
class_uuid_t uuidc;
lnet_nid_t nid;
- char *mgcname, *niduuid, *mgssec;
+ char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL;
char *ptr;
int recov_bk;
int rc = 0, i = 0, j, len;
RETURN(-EINVAL);
}
+ cfs_mutex_down(&mgc_start_lock);
+
len = strlen(LUSTRE_MGC_OBDNAME) + strlen(libcfs_nid2str(nid)) + 1;
OBD_ALLOC(mgcname, len);
OBD_ALLOC(niduuid, len + 2);
mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";
- cfs_mutex_down(&mgc_start_lock);
+ OBD_ALLOC_PTR(data);
+ if (data == NULL)
+ GOTO(out_free, rc = -ENOMEM);
obd = class_name2obd(mgcname);
if (obd && !obd->obd_stopping) {
/* Re-using an existing MGC */
cfs_atomic_inc(&obd->u.cli.cl_mgc_refcount);
+ /* IR compatibility check, only for clients */
+ if (lmd_is_client(lsi->lsi_lmd)) {
+ int has_ir;
+ int vallen = sizeof(*data);
+ __u32 *flags = &lsi->lsi_lmd->lmd_flags;
+
+ rc = obd_get_info(obd->obd_self_export,
+ strlen(KEY_CONN_DATA), KEY_CONN_DATA,
+ &vallen, data, NULL);
+ LASSERT(rc == 0);
+ has_ir = OCD_HAS_FLAG(data, IMP_RECOV);
+ if (has_ir ^ !(*flags & LMD_FLG_NOIR)) {
+ /* LMD_FLG_NOIR is for test purpose only */
+ LCONSOLE_WARN(
+ "Trying to mount a client with IR setting "
+ "not compatible with current mgc. "
+ "Force to use current mgc setting that is "
+ "IR %s.\n",
+ has_ir ? "enabled" : "disabled");
+ if (has_ir)
+ *flags &= ~LMD_FLG_NOIR;
+ else
+ *flags |= LMD_FLG_NOIR;
+ }
+ }
+
recov_bk = 0;
/* If we are restarting the MGS, don't try to keep the MGC's
old connection, or registration will fail. */
/* nonfatal */
CWARN("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc);
/* We connect to the MGS at setup, and don't disconnect until cleanup */
- OBD_ALLOC_PTR(data);
- if (data == NULL)
- GOTO(out, rc = -ENOMEM);
data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_FID |
- OBD_CONNECT_AT | OBD_CONNECT_FULL20;
+ OBD_CONNECT_AT | OBD_CONNECT_FULL20 |
+ OBD_CONNECT_IMP_RECOV;
+ if (lmd_is_client(lsi->lsi_lmd) &&
+ lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
+ data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
data->ocd_version = LUSTRE_VERSION_CODE;
rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
- OBD_FREE_PTR(data);
if (rc) {
CERROR("connect failed %d\n", rc);
GOTO(out, rc);
out_free:
cfs_mutex_up(&mgc_start_lock);
+ if (data)
+ OBD_FREE_PTR(data);
if (mgcname)
OBD_FREE(mgcname, len);
if (niduuid)
struct obd_device *mgc = lsi->lsi_mgc;
struct lustre_disk_data *ldd = lsi->lsi_ldd;
struct mgs_target_info *mti = NULL;
+ bool writeconf;
int rc;
ENTRY;
libcfs_nid2str(mti->mti_nids[0]), mti->mti_stripe_index,
mti->mti_flags);
+ /* if write_conf is true, the registration must succeed */
+ writeconf = !!(ldd->ldd_flags & (LDD_F_NEED_INDEX | LDD_F_UPDATE));
+ mti->mti_flags |= LDD_F_OPC_REG;
+
/* Register the target */
/* FIXME use mgc_process_config instead */
rc = obd_set_info_async(mgc->u.cli.cl_mgc_mgsexp,
sizeof(KEY_REGISTER_TARGET), KEY_REGISTER_TARGET,
sizeof(*mti), mti, NULL);
- if (rc)
+ if (rc) {
+ if (mti->mti_flags & LDD_F_ERROR) {
+ LCONSOLE_ERROR_MSG(0x160,
+ "The MGS is refusing to allow this "
+ "server (%s) to start. Please see messages"
+ " on the MGS node.\n", ldd->ldd_svname);
+ } else if (writeconf) {
+ LCONSOLE_ERROR_MSG(0x15f,
+ "Communication to the MGS return error %d. "
+ "Is the MGS running?\n", rc);
+ } else {
+ CERROR("Cannot talk to the MGS: %d, not fatal\n", rc);
+ /* reset the error code for non-fatal error. */
+ rc = 0;
+ }
GOTO(out, rc);
+ }
/* Always update our flags */
- ldd->ldd_flags = mti->mti_flags & ~LDD_F_REWRITE_LDD;
+ ldd->ldd_flags = mti->mti_flags & LDD_F_ONDISK_MASK;
/* If this flag is set, it means the MGS wants us to change our
on-disk data. (So far this means just the index.) */
RETURN(rc);
}
+/**
+ * Notify the MGS that this target is ready.
+ * Used by IR - if the MGS receives this message, it will notify clients.
+ */
+static int server_notify_target(struct super_block *sb, struct obd_device *obd)
+{
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct obd_device *mgc = lsi->lsi_mgc;
+ struct mgs_target_info *mti = NULL;
+ int rc;
+ ENTRY;
+
+ LASSERT(mgc);
+
+ if (!(lsi->lsi_flags & LSI_SERVER))
+ RETURN(-EINVAL);
+
+ OBD_ALLOC_PTR(mti);
+ if (!mti)
+ RETURN(-ENOMEM);
+ rc = server_sb2mti(sb, mti);
+ if (rc)
+ GOTO(out, rc);
+
+ mti->mti_instance = obd->u.obt.obt_instance;
+ mti->mti_flags |= LDD_F_OPC_READY;
+
+ /* FIXME use mgc_process_config instead */
+ rc = obd_set_info_async(mgc->u.cli.cl_mgc_mgsexp,
+ sizeof(KEY_REGISTER_TARGET),
+ KEY_REGISTER_TARGET,
+ sizeof(*mti), mti, NULL);
+
+ /* Imperative recovery: if the mgs informs us to use IR? */
+ if (!rc && !(mti->mti_flags & LDD_F_ERROR) &&
+ (mti->mti_flags & LDD_F_IR_CAPABLE))
+ lsi->lsi_flags |= LSI_IR_CAPABLE;
+
+out:
+ if (mti)
+ OBD_FREE_PTR(mti);
+ RETURN(rc);
+
+}
+
/** Start server targets: MDTs and OSTs
*/
static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
#endif
/* If we're an OST, make sure the global OSS is running */
- if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_OST) {
+ if (IS_OST(lsi->lsi_ldd)) {
/* make sure OSS is started */
cfs_mutex_down(&server_start_lock);
obd = class_name2obd(LUSTRE_OSS_OBDNAME);
/* Register with MGS */
rc = server_register_target(sb);
- if (rc && (lsi->lsi_ldd->ldd_flags &
- (LDD_F_NEED_INDEX | LDD_F_UPDATE | LDD_F_UPGRADE14))){
- CERROR("Required registration failed for %s: %d\n",
- lsi->lsi_ldd->ldd_svname, rc);
- if (rc == -EIO) {
- LCONSOLE_ERROR_MSG(0x15f, "Communication error with "
- "the MGS. Is the MGS running?\n");
- }
- GOTO(out_mgc, rc);
- }
- if (rc == -EINVAL) {
- LCONSOLE_ERROR_MSG(0x160, "The MGS is refusing to allow this "
- "server (%s) to start. Please see messages"
- " on the MGS node.\n",
- lsi->lsi_ldd->ldd_svname);
- GOTO(out_mgc, rc);
- }
- /* non-fatal error of registeration with MGS */
if (rc)
- CDEBUG(D_MOUNT, "Cannot register with MGS: %d\n", rc);
+ GOTO(out_mgc, rc);
/* Let the target look up the mount using the target's name
(we can't pass the sb or mnt through class_process_config.) */
obd->obd_self_export, 0, NULL, NULL);
}
+ server_notify_target(sb, obd);
+
/* log has been fully processed */
obd_notify(obd, NULL, OBD_NOTIFY_CONFIG, (void *)CONFIG_LOG);
+
+ /* calculate recovery timeout, do it after lustre_process_log */
+ server_calc_timeout(lsi, obd);
}
RETURN(rc);
return rc;
}
+/*
+ * Calculate timeout value for a target.
+ */
+void server_calc_timeout(struct lustre_sb_info *lsi, struct obd_device *obd)
+{
+ struct lustre_mount_data *lmd;
+ int soft = 0;
+ int hard = 0;
+ int factor = 0;
+ bool has_ir = !!(lsi->lsi_flags & LSI_IR_CAPABLE);
+ int min = OBD_RECOVERY_TIME_MIN;
+
+ LASSERT(lsi->lsi_flags & LSI_SERVER);
+
+ lmd = lsi->lsi_lmd;
+ if (lmd) {
+ soft = lmd->lmd_recovery_time_soft;
+ hard = lmd->lmd_recovery_time_hard;
+ has_ir = has_ir && !(lmd->lmd_flags & LMD_FLG_NOIR);
+ obd->obd_no_ir = !has_ir;
+ }
+
+ if (soft == 0)
+ soft = OBD_RECOVERY_TIME_SOFT;
+ if (hard == 0)
+ hard = OBD_RECOVERY_TIME_HARD;
+
+ /* target may have ir_factor configured. */
+ factor = OBD_IR_FACTOR_DEFAULT;
+ if (obd->obd_recovery_ir_factor)
+ factor = obd->obd_recovery_ir_factor;
+
+ if (has_ir) {
+ int new_soft = soft;
+ int new_hard = hard;
+
+ /* adjust timeout value by imperative recovery */
+
+ new_soft = (soft * factor) / OBD_IR_FACTOR_MAX;
+ new_hard = (hard * factor) / OBD_IR_FACTOR_MAX;
+
+ /* make sure the timeout is not too short */
+ new_soft = max(min, new_soft);
+ new_hard = max(new_soft, new_hard);
+
+ LCONSOLE_INFO("%s: Imperative Recovery enabled, recovery "
+ "window shrunk from %d-%d down to %d-%d\n",
+ obd->obd_name, soft, hard, new_soft, new_hard);
+
+ soft = new_soft;
+ hard = new_hard;
+ }
+
+ /* we're done */
+ obd->obd_recovery_timeout = soft;
+ obd->obd_recovery_time_hard = hard;
+ obd->obd_recovery_ir_factor = factor;
+}
+EXPORT_SYMBOL(server_calc_timeout);
+
/*************** mount common betweeen server and client ***************/
/* Common umount */
s1 = options;
while (*s1) {
int clear = 0;
- int time_min = 2 * (CONNECTION_SWITCH_MAX +
- 2 * INITIAL_CONNECT_TIMEOUT);
+ int time_min = OBD_RECOVERY_TIME_MIN;
/* Skip whitespace and extra commas */
while (*s1 == ' ' || *s1 == ',')
lmd->lmd_recovery_time_hard = max_t(int,
simple_strtoul(s1 + 19, NULL, 10), time_min);
clear++;
+ } else if (strncmp(s1, "noir", 4) == 0) {
+ lmd->lmd_flags |= LMD_FLG_NOIR; /* test purpose only. */
+ clear++;
} else if (strncmp(s1, "nosvc", 5) == 0) {
lmd->lmd_flags |= LMD_FLG_NOSVC;
clear++;
s1 = strstr(devname, ":/");
if (s1) {
++s1;
- lmd->lmd_flags = LMD_FLG_CLIENT;
+ lmd->lmd_flags |= LMD_FLG_CLIENT;
/* Remove leading /s from fsname */
while (*++s1 == '/') ;
/* Freed in lustre_free_lsi */
obd->obd_last_committed = le64_to_cpu(lsd->lsd_last_transno);
out:
obd->u.obt.obt_mount_count = mount_count + 1;
+ obd->u.obt.obt_instance = (__u32)obd->u.obt.obt_mount_count;
lsd->lsd_mount_count = cpu_to_le64(obd->u.obt.obt_mount_count);
/* save it, so mount count and last_transno is current */
struct lustre_sb_info *lsi = s2lsi(lmi->lmi_sb);
mnt = lmi->lmi_mnt;
obd->obd_fsops = fsfilt_get_ops(MT_STR(lsi->lsi_ldd));
-
- /* gets recovery timeouts from mount data */
- if (lsi->lsi_lmd && lsi->lsi_lmd->lmd_recovery_time_soft)
- obd->obd_recovery_timeout =
- lsi->lsi_lmd->lmd_recovery_time_soft;
- if (lsi->lsi_lmd && lsi->lsi_lmd->lmd_recovery_time_hard)
- obd->obd_recovery_time_hard =
- lsi->lsi_lmd->lmd_recovery_time_hard;
} else {
/* old path - used by lctl */
CERROR("Using old MDS mount method\n");
}
}
+ obd->u.obt.obt_magic = OBT_MAGIC;
obd->u.obt.obt_vfsmnt = mnt;
obd->u.obt.obt_sb = mnt->mnt_sb;
- obd->u.obt.obt_magic = OBT_MAGIC;
filter->fo_fstype = mnt->mnt_sb->s_type->name;
CDEBUG(D_SUPER, "%s: mnt = %p\n", filter->fo_fstype, mnt);
lprocfs_filter_wr_syncjournal, 0 },
{ "sync_on_lock_cancel", lprocfs_filter_rd_sync_lock_cancel,
lprocfs_filter_wr_sync_lock_cancel, 0 },
+ { "instance", lprocfs_target_rd_instance, 0 },
+ { "ir_factor", lprocfs_obd_rd_ir_factor,
+ lprocfs_obd_wr_ir_factor, 0},
{ 0 }
};
osc_wr_contention_seconds, 0 },
{ "lockless_truncate", osc_rd_lockless_truncate,
osc_wr_lockless_truncate, 0 },
- { "import", lprocfs_rd_import, 0, 0 },
+ { "import", lprocfs_rd_import, lprocfs_wr_import, 0 },
{ "state", lprocfs_rd_state, 0, 0 },
{ 0 }
};
}
imp->imp_connect_data = *ocd;
+ CDEBUG(D_HA, "obd %s to target with inst %u\n",
+ imp->imp_obd->obd_name, ocd->ocd_instance);
exp = class_conn2export(&imp->imp_dlm_handle);
cfs_spin_unlock(&imp->imp_lock);
&RMF_MGS_SEND_PARAM
};
+static const struct req_msg_field *mgs_config_read_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MGS_CONFIG_BODY
+};
+
+static const struct req_msg_field *mgs_config_read_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MGS_CONFIG_RES
+};
+
static const struct req_msg_field *log_cancel_client[] = {
&RMF_PTLRPC_BODY,
&RMF_LOGCOOKIES
&RQF_SEC_CTX,
&RQF_MGS_TARGET_REG,
&RQF_MGS_SET_INFO,
+ &RQF_MGS_CONFIG_READ,
&RQF_SEQ_QUERY,
&RQF_FLD_QUERY,
&RQF_MDS_CONNECT,
NULL, NULL);
EXPORT_SYMBOL(RMF_MGS_SEND_PARAM);
+struct req_msg_field RMF_MGS_CONFIG_BODY =
+ DEFINE_MSGF("mgs_config_read request", 0,
+ sizeof(struct mgs_config_body),
+ lustre_swab_mgs_config_body, NULL);
+EXPORT_SYMBOL(RMF_MGS_CONFIG_BODY);
+
+struct req_msg_field RMF_MGS_CONFIG_RES =
+ DEFINE_MSGF("mgs_config_read reply ", 0,
+ sizeof(struct mgs_config_res),
+ lustre_swab_mgs_config_res, NULL);
+EXPORT_SYMBOL(RMF_MGS_CONFIG_RES);
+
+struct req_msg_field RMF_U32 =
+ DEFINE_MSGF("generic u32", 0,
+ sizeof(__u32), lustre_swab_generic_32s, NULL);
+EXPORT_SYMBOL(RMF_U32);
+
struct req_msg_field RMF_SETINFO_VAL =
DEFINE_MSGF("setinfo_val", 0, -1, NULL, NULL);
EXPORT_SYMBOL(RMF_SETINFO_VAL);
mgs_set_info);
EXPORT_SYMBOL(RQF_MGS_SET_INFO);
+struct req_format RQF_MGS_CONFIG_READ =
+ DEFINE_REQ_FMT0("MGS_CONFIG_READ", mgs_config_read_client,
+ mgs_config_read_server);
+EXPORT_SYMBOL(RQF_MGS_CONFIG_READ);
+
struct req_format RQF_SEQ_QUERY =
DEFINE_REQ_FMT0("SEQ_QUERY", seq_query_client, seq_query_server);
EXPORT_SYMBOL(RQF_SEQ_QUERY);
{ MGS_TARGET_REG, "mgs_target_reg" },
{ MGS_TARGET_DEL, "mgs_target_del" },
{ MGS_SET_INFO, "mgs_set_info" },
+ { MGS_CONFIG_READ, "mgs_config_read" },
{ OBD_PING, "obd_ping" },
{ OBD_LOG_CANCEL, "llog_origin_handle_cancel" },
{ OBD_QC_CALLBACK, "obd_quota_callback" },
}
EXPORT_SYMBOL(lprocfs_wr_ping);
+/* Write the connection UUID to this file to attempt to connect to that node.
+ * The connection UUID is a node's primary NID. For example,
+ * "echo connection=192.168.0.1@tcp0::instance > .../import".
+ */
+int lprocfs_wr_import(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ struct obd_device *obd = data;
+ struct obd_import *imp = obd->u.cli.cl_import;
+ char *kbuf = NULL;
+ char *uuid;
+ char *ptr;
+ int do_reconn = 1;
+ const char prefix[] = "connection=";
+ const int prefix_len = sizeof(prefix) - 1;
+
+ if (count > CFS_PAGE_SIZE - 1 || count <= prefix_len)
+ return -EINVAL;
+
+ OBD_ALLOC(kbuf, count + 1);
+ if (kbuf == NULL)
+ return -ENOMEM;
+
+ if (cfs_copy_from_user(kbuf, buffer, count))
+ GOTO(out, count = -EFAULT);
+
+ kbuf[count] = 0;
+
+ /* only support connection=uuid::instance now */
+ if (strncmp(prefix, kbuf, prefix_len) != 0)
+ GOTO(out, count = -EINVAL);
+
+ uuid = kbuf + prefix_len;
+ ptr = strstr(uuid, "::");
+ if (ptr) {
+ __u32 inst;
+ char *endptr;
+
+ *ptr = 0;
+ do_reconn = 0;
+ ptr += strlen("::");
+ inst = simple_strtol(ptr, &endptr, 10);
+ if (*endptr) {
+ CERROR("config: wrong instance # %s\n", ptr);
+ } else if (inst != imp->imp_connect_data.ocd_instance) {
+ CDEBUG(D_INFO, "IR: %s is connecting to an obsoleted "
+ "target(%u/%u), reconnecting...\n",
+ imp->imp_obd->obd_name,
+ imp->imp_connect_data.ocd_instance, inst);
+ do_reconn = 1;
+ } else {
+ CDEBUG(D_INFO, "IR: %s has already been connecting to "
+ "new target(%u)\n",
+ imp->imp_obd->obd_name, inst);
+ }
+ }
+
+ if (do_reconn)
+ ptlrpc_recover_import(imp, uuid, 1);
+
+out:
+ OBD_FREE(kbuf, count + 1);
+ return count;
+}
+EXPORT_SYMBOL(lprocfs_wr_import);
+
#endif /* LPROCFS */
__swab64s(&ocd->ocd_transno);
__swab32s(&ocd->ocd_group);
__swab32s(&ocd->ocd_cksum_types);
+ __swab32s(&ocd->ocd_instance);
/* Fields after ocd_cksum_types are only accessible by the receiver
* if the corresponding flag in ocd_connect_flags is set. Accessing
* any field after ocd_maxbytes on the receiver without a valid flag
* may result in out-of-bound memory access and kernel oops. */
if (ocd->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)
__swab32s(&ocd->ocd_max_easize);
- CLASSERT(offsetof(typeof(*ocd), padding) != 0);
if (ocd->ocd_connect_flags & OBD_CONNECT_MAXBYTES)
__swab64s(&ocd->ocd_maxbytes);
CLASSERT(offsetof(typeof(*ocd), padding1) != 0);
__swab32s(&mti->mti_stripe_index);
__swab32s(&mti->mti_config_ver);
__swab32s(&mti->mti_flags);
+ __swab32s(&mti->mti_instance);
__swab32s(&mti->mti_nid_count);
CLASSERT(sizeof(lnet_nid_t) == sizeof(__u64));
for (i = 0; i < MTI_NIDS_MAX; i++)
__swab64s(&mti->mti_nids[i]);
}
+void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *entry)
+{
+ int i;
+
+ __swab64s(&entry->mne_version);
+ __swab32s(&entry->mne_instance);
+ __swab32s(&entry->mne_index);
+ __swab32s(&entry->mne_length);
+
+ /* mne_nid_(count|type) must be one byte size because we're gonna
+ * access it w/o swapping. */
+ CLASSERT(sizeof(entry->mne_nid_count) == sizeof(__u8));
+ CLASSERT(sizeof(entry->mne_nid_type) == sizeof(__u8));
+
+ /* remove this assertion if ipv6 is supported. */
+ LASSERT(entry->mne_nid_type == 0);
+ for (i = 0; i < entry->mne_nid_count; i++) {
+ CLASSERT(sizeof(lnet_nid_t) == sizeof(__u64));
+ __swab64s(&entry->u.nids[i]);
+ }
+}
+EXPORT_SYMBOL(lustre_swab_mgs_nidtbl_entry);
+
+void lustre_swab_mgs_config_body(struct mgs_config_body *body)
+{
+ __swab64s(&body->mcb_offset);
+ __swab32s(&body->mcb_units);
+ __swab16s(&body->mcb_type);
+}
+EXPORT_SYMBOL(lustre_swab_mgs_config_body);
+
+void lustre_swab_mgs_config_res(struct mgs_config_res *body)
+{
+ __swab64s(&body->mcr_offset);
+ __swab64s(&body->mcr_size);
+}
+EXPORT_SYMBOL(lustre_swab_mgs_config_res);
+
static void lustre_swab_obd_dqinfo (struct obd_dqinfo *i)
{
__swab64s (&i->dqi_bgrace);
switch (opcode) {
case OST_READ:
case MDS_READPAGE:
+ case MGS_CONFIG_READ:
req->rq_bulk_read = 1;
break;
case OST_WRITE:
break;
case MDS_READPAGE:
case OST_READ:
+ case MGS_CONFIG_READ:
req->rq_bulk_read = 1;
break;
}
{
/* Wire protocol assertions generated by 'wirecheck'
* (make -C lustre/utils newwiretest)
- * running on Linux centos5.localhost 2.6.18-prep #3 SMP Mon Mar 22 08:28:01 EDT 2010 x86_64
- * with gcc version 4.1.2 20071124 (Red Hat 4.1.2-42) */
+ * running on Linux venus 2.6.32-131.6.1.el6_lustre.gad4c1d5.x86_64 #1 SMP Thu Jul 28 23:13:5
+ * with gcc version 4.4.4 20100726 (Red Hat 4.4.4-13) (GCC) */
/* Constants... */
(long long)MGS_TARGET_DEL);
LASSERTF(MGS_SET_INFO == 255, " found %lld\n",
(long long)MGS_SET_INFO);
+ LASSERTF(LDF_EMPTY == 1, " found %lld\n",
+ (long long)LDF_EMPTY);
+ LASSERTF(LDF_COLLIDE == 2, " found %lld\n",
+ (long long)LDF_COLLIDE);
+ LASSERTF(LU_PAGE_SIZE == 4096, " found %lld\n",
+ (long long)LU_PAGE_SIZE);
/* Sizes and Offsets */
/* Checks for struct obd_uuid */
(long long)(int)offsetof(struct obd_connect_data, ocd_max_easize));
LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_max_easize) == 4, " found %lld\n",
(long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_max_easize));
- LASSERTF((int)offsetof(struct obd_connect_data, padding) == 60, " found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, padding));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding) == 4, " found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->padding));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_instance) == 60, " found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_instance));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_instance) == 4, " found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_instance));
LASSERTF((int)offsetof(struct obd_connect_data, ocd_maxbytes) == 64, " found %lld\n",
(long long)(int)offsetof(struct obd_connect_data, ocd_maxbytes));
LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_maxbytes) == 8, " found %lld\n",
(long long)(int)offsetof(struct obd_connect_data, padding2));
LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding2) == 8, " found %lld\n",
(long long)(int)sizeof(((struct obd_connect_data *)0)->padding2));
- CLASSERT(OBD_CONNECT_RDONLY == 0x1ULL);
- CLASSERT(OBD_CONNECT_INDEX == 0x2ULL);
- CLASSERT(OBD_CONNECT_MDS == 0x4ULL);
- CLASSERT(OBD_CONNECT_GRANT == 0x8ULL);
- CLASSERT(OBD_CONNECT_SRVLOCK == 0x10ULL);
- CLASSERT(OBD_CONNECT_VERSION == 0x20ULL);
- CLASSERT(OBD_CONNECT_REQPORTAL == 0x40ULL);
- CLASSERT(OBD_CONNECT_ACL == 0x80ULL);
- CLASSERT(OBD_CONNECT_XATTR == 0x100ULL);
- CLASSERT(OBD_CONNECT_CROW == 0x200ULL);
- CLASSERT(OBD_CONNECT_TRUNCLOCK == 0x400ULL);
- CLASSERT(OBD_CONNECT_TRANSNO == 0x800ULL);
- CLASSERT(OBD_CONNECT_IBITS == 0x1000ULL);
- CLASSERT(OBD_CONNECT_JOIN == 0x2000ULL);
- CLASSERT(OBD_CONNECT_ATTRFID == 0x4000ULL);
- CLASSERT(OBD_CONNECT_NODEVOH == 0x8000ULL);
- CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x10000ULL);
- CLASSERT(OBD_CONNECT_RMT_CLIENT_FORCE == 0x20000ULL);
- CLASSERT(OBD_CONNECT_BRW_SIZE == 0x40000ULL);
- CLASSERT(OBD_CONNECT_QUOTA64 == 0x80000ULL);
- CLASSERT(OBD_CONNECT_MDS_CAPA == 0x100000ULL);
- CLASSERT(OBD_CONNECT_OSS_CAPA == 0x200000ULL);
- CLASSERT(OBD_CONNECT_CANCELSET == 0x400000ULL);
- CLASSERT(OBD_CONNECT_SOM == 0x800000ULL);
- CLASSERT(OBD_CONNECT_AT == 0x1000000ULL);
- CLASSERT(OBD_CONNECT_LRU_RESIZE == 0x2000000ULL);
- CLASSERT(OBD_CONNECT_MDS_MDS == 0x4000000ULL);
- CLASSERT(OBD_CONNECT_REAL == 0x8000000ULL);
- CLASSERT(OBD_CONNECT_CHANGE_QS == 0x10000000ULL);
- CLASSERT(OBD_CONNECT_CKSUM == 0x20000000ULL);
- CLASSERT(OBD_CONNECT_FID == 0x40000000ULL);
- CLASSERT(OBD_CONNECT_VBR == 0x80000000ULL);
- CLASSERT(OBD_CONNECT_LOV_V3 == 0x100000000ULL);
- CLASSERT(OBD_CONNECT_GRANT_SHRINK == 0x200000000ULL);
- CLASSERT(OBD_CONNECT_SKIP_ORPHAN == 0x400000000ULL);
- CLASSERT(OBD_CONNECT_MAX_EASIZE == 0x800000000ULL);
- CLASSERT(OBD_CONNECT_FULL20 == 0x1000000000ULL);
- CLASSERT(OBD_CONNECT_LAYOUTLOCK == 0x2000000000ULL);
- CLASSERT(OBD_CONNECT_64BITHASH == 0x4000000000ULL);
- CLASSERT(OBD_CONNECT_MAXBYTES == 0x8000000000ULL);
+ CLASSERT(OBD_CONNECT_RDONLY == 0x1ULL);
+ CLASSERT(OBD_CONNECT_INDEX == 0x2ULL);
+ CLASSERT(OBD_CONNECT_MDS == 0x4ULL);
+ CLASSERT(OBD_CONNECT_GRANT == 0x8ULL);
+ CLASSERT(OBD_CONNECT_SRVLOCK == 0x10ULL);
+ CLASSERT(OBD_CONNECT_VERSION == 0x20ULL);
+ CLASSERT(OBD_CONNECT_REQPORTAL == 0x40ULL);
+ CLASSERT(OBD_CONNECT_ACL == 0x80ULL);
+ CLASSERT(OBD_CONNECT_XATTR == 0x100ULL);
+ CLASSERT(OBD_CONNECT_CROW == 0x200ULL);
+ CLASSERT(OBD_CONNECT_TRUNCLOCK == 0x400ULL);
+ CLASSERT(OBD_CONNECT_TRANSNO == 0x800ULL);
+ CLASSERT(OBD_CONNECT_IBITS == 0x1000ULL);
+ CLASSERT(OBD_CONNECT_JOIN == 0x2000ULL);
+ CLASSERT(OBD_CONNECT_ATTRFID == 0x4000ULL);
+ CLASSERT(OBD_CONNECT_NODEVOH == 0x8000ULL);
+ CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x10000ULL);
+ CLASSERT(OBD_CONNECT_RMT_CLIENT_FORCE == 0x20000ULL);
+ CLASSERT(OBD_CONNECT_BRW_SIZE == 0x40000ULL);
+ CLASSERT(OBD_CONNECT_QUOTA64 == 0x80000ULL);
+ CLASSERT(OBD_CONNECT_MDS_CAPA == 0x100000ULL);
+ CLASSERT(OBD_CONNECT_OSS_CAPA == 0x200000ULL);
+ CLASSERT(OBD_CONNECT_CANCELSET == 0x400000ULL);
+ CLASSERT(OBD_CONNECT_SOM == 0x800000ULL);
+ CLASSERT(OBD_CONNECT_AT == 0x1000000ULL);
+ CLASSERT(OBD_CONNECT_LRU_RESIZE == 0x2000000ULL);
+ CLASSERT(OBD_CONNECT_MDS_MDS == 0x4000000ULL);
+ CLASSERT(OBD_CONNECT_REAL == 0x8000000ULL);
+ CLASSERT(OBD_CONNECT_CHANGE_QS == 0x10000000ULL);
+ CLASSERT(OBD_CONNECT_CKSUM == 0x20000000ULL);
+ CLASSERT(OBD_CONNECT_FID == 0x40000000ULL);
+ CLASSERT(OBD_CONNECT_VBR == 0x80000000ULL);
+ CLASSERT(OBD_CONNECT_LOV_V3 == 0x100000000ULL);
+ CLASSERT(OBD_CONNECT_GRANT_SHRINK == 0x200000000ULL);
+ CLASSERT(OBD_CONNECT_SKIP_ORPHAN == 0x400000000ULL);
+ CLASSERT(OBD_CONNECT_MAX_EASIZE == 0x800000000ULL);
+ CLASSERT(OBD_CONNECT_FULL20 == 0x1000000000ULL);
+ CLASSERT(OBD_CONNECT_LAYOUTLOCK == 0x2000000000ULL);
+ CLASSERT(OBD_CONNECT_64BITHASH == 0x4000000000ULL);
+ CLASSERT(OBD_CONNECT_MAXBYTES == 0x8000000000ULL);
+ CLASSERT(OBD_CONNECT_IMP_RECOV == 0x10000000000ULL);
/* Checks for struct obdo */
LASSERTF((int)sizeof(struct obdo) == 208, " found %lld\n",
CLASSERT(OBD_FL_CKSUM_ADLER == 8192);
CLASSERT(OBD_FL_CKSUM_CRC32C == 16384);
CLASSERT(OBD_FL_SHRINK_GRANT == 131072);
- CLASSERT(OBD_FL_MMAP == (0x00040000));
- CLASSERT(OBD_FL_RECOV_RESEND == (0x00080000));
+ CLASSERT(OBD_FL_MMAP == 262144);
+ CLASSERT(OBD_FL_RECOV_RESEND == 524288);
CLASSERT(OBD_CKSUM_CRC32 == 1);
CLASSERT(OBD_CKSUM_ADLER == 2);
CLASSERT(OBD_CKSUM_CRC32C == 4);
(long long)(int)offsetof(struct link_ea_entry, lee_name));
LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_name) == 0, " found %lld\n",
(long long)(int)sizeof(((struct link_ea_entry *)0)->lee_name));
+
+ /* Checks for struct hsm_user_item */
+ LASSERTF((int)sizeof(struct hsm_user_item) == 32, " found %lld\n",
+ (long long)(int)sizeof(struct hsm_user_item));
+ LASSERTF((int)offsetof(struct hsm_user_item, hui_fid) == 0, " found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_item, hui_fid));
+ LASSERTF((int)sizeof(((struct hsm_user_item *)0)->hui_fid) == 16, " found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_item *)0)->hui_fid));
+ LASSERTF((int)offsetof(struct hsm_user_item, hui_extent) == 16, " found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_item, hui_extent));
+ LASSERTF((int)sizeof(((struct hsm_user_item *)0)->hui_extent) == 16, " found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_item *)0)->hui_extent));
+
+ /* Checks for struct hsm_user_request */
+ LASSERTF((int)sizeof(struct hsm_user_request) == 16, " found %lld\n",
+ (long long)(int)sizeof(struct hsm_user_request));
+ LASSERTF((int)offsetof(struct hsm_user_request, hur_action) == 0, " found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_request, hur_action));
+ LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_action) == 4, " found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_action));
+ LASSERTF((int)offsetof(struct hsm_user_request, hur_archive_num) == 4, " found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_request, hur_archive_num));
+ LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_archive_num) == 4, " found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_archive_num));
+ LASSERTF((int)offsetof(struct hsm_user_request, hur_itemcount) == 8, " found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_request, hur_itemcount));
+ LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_itemcount) == 4, " found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_itemcount));
+ LASSERTF((int)offsetof(struct hsm_user_request, hur_data_len) == 12, " found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_request, hur_data_len));
+ LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_data_len) == 4, " found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_data_len));
+
+ /* Checks for struct hsm_user_state */
+ LASSERTF((int)sizeof(struct hsm_user_state) == 32, " found %lld\n",
+ (long long)(int)sizeof(struct hsm_user_state));
+ LASSERTF((int)offsetof(struct hsm_user_state, hus_states) == 0, " found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_state, hus_states));
+ LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_states) == 4, " found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_states));
+ LASSERTF((int)offsetof(struct hsm_user_state, hus_archive_num) == 4, " found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_state, hus_archive_num));
+ LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_archive_num) == 4, " found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_archive_num));
+ LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_state) == 8, " found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_state));
+ LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_state) == 4, " found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_state));
+ LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_action) == 12, " found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_action));
+ LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_action) == 4, " found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_action));
+ LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_location) == 16, " found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_location));
+ LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_location) == 16, " found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_location));
}
+
#define lustre_swab_fiemap NULL
#define lustre_swab_qdata NULL
#define lustre_swab_ost_lvb NULL
+#define lustre_swab_mgs_config_body NULL
+#define lustre_swab_mgs_config_res NULL
#define dump_rniobuf NULL
#define dump_ioo NULL
#define dump_obdo NULL
CHECK_MEMBER(obd_connect_data, ocd_group);
CHECK_MEMBER(obd_connect_data, ocd_cksum_types);
CHECK_MEMBER(obd_connect_data, ocd_max_easize);
- CHECK_MEMBER(obd_connect_data, padding);
+ CHECK_MEMBER(obd_connect_data, ocd_instance);
CHECK_MEMBER(obd_connect_data, ocd_maxbytes);
CHECK_MEMBER(obd_connect_data, padding1);
CHECK_MEMBER(obd_connect_data, padding2);
CHECK_CDEFINE(OBD_CONNECT_LAYOUTLOCK);
CHECK_CDEFINE(OBD_CONNECT_64BITHASH);
CHECK_CDEFINE(OBD_CONNECT_MAXBYTES);
+ CHECK_CDEFINE(OBD_CONNECT_IMP_RECOV);
}
static void
{
/* Wire protocol assertions generated by 'wirecheck'
* (make -C lustre/utils newwiretest)
- * running on Linux centos5.localhost 2.6.18-prep #3 SMP Mon Mar 22 08:28:01 EDT 2010 x86_64
- * with gcc version 4.1.2 20071124 (Red Hat 4.1.2-42) */
+ * running on Linux venus 2.6.32-131.6.1.el6_lustre.gad4c1d5.x86_64 #1 SMP Thu Jul 28 23:13:5
+ * with gcc version 4.4.4 20100726 (Red Hat 4.4.4-13) (GCC) */
/* Constants... */
(long long)MGS_TARGET_DEL);
LASSERTF(MGS_SET_INFO == 255, " found %lld\n",
(long long)MGS_SET_INFO);
+ LASSERTF(LDF_EMPTY == 1, " found %lld\n",
+ (long long)LDF_EMPTY);
+ LASSERTF(LDF_COLLIDE == 2, " found %lld\n",
+ (long long)LDF_COLLIDE);
+ LASSERTF(LU_PAGE_SIZE == 4096, " found %lld\n",
+ (long long)LU_PAGE_SIZE);
/* Sizes and Offsets */
/* Checks for struct obd_uuid */
(long long)(int)offsetof(struct obd_connect_data, ocd_max_easize));
LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_max_easize) == 4, " found %lld\n",
(long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_max_easize));
- LASSERTF((int)offsetof(struct obd_connect_data, padding) == 60, " found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, padding));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding) == 4, " found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->padding));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_instance) == 60, " found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_instance));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_instance) == 4, " found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_instance));
LASSERTF((int)offsetof(struct obd_connect_data, ocd_maxbytes) == 64, " found %lld\n",
(long long)(int)offsetof(struct obd_connect_data, ocd_maxbytes));
LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_maxbytes) == 8, " found %lld\n",
(long long)(int)offsetof(struct obd_connect_data, padding2));
LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding2) == 8, " found %lld\n",
(long long)(int)sizeof(((struct obd_connect_data *)0)->padding2));
- CLASSERT(OBD_CONNECT_RDONLY == 0x1ULL);
- CLASSERT(OBD_CONNECT_INDEX == 0x2ULL);
- CLASSERT(OBD_CONNECT_MDS == 0x4ULL);
- CLASSERT(OBD_CONNECT_GRANT == 0x8ULL);
- CLASSERT(OBD_CONNECT_SRVLOCK == 0x10ULL);
- CLASSERT(OBD_CONNECT_VERSION == 0x20ULL);
- CLASSERT(OBD_CONNECT_REQPORTAL == 0x40ULL);
- CLASSERT(OBD_CONNECT_ACL == 0x80ULL);
- CLASSERT(OBD_CONNECT_XATTR == 0x100ULL);
- CLASSERT(OBD_CONNECT_CROW == 0x200ULL);
- CLASSERT(OBD_CONNECT_TRUNCLOCK == 0x400ULL);
- CLASSERT(OBD_CONNECT_TRANSNO == 0x800ULL);
- CLASSERT(OBD_CONNECT_IBITS == 0x1000ULL);
- CLASSERT(OBD_CONNECT_JOIN == 0x2000ULL);
- CLASSERT(OBD_CONNECT_ATTRFID == 0x4000ULL);
- CLASSERT(OBD_CONNECT_NODEVOH == 0x8000ULL);
- CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x10000ULL);
- CLASSERT(OBD_CONNECT_RMT_CLIENT_FORCE == 0x20000ULL);
- CLASSERT(OBD_CONNECT_BRW_SIZE == 0x40000ULL);
- CLASSERT(OBD_CONNECT_QUOTA64 == 0x80000ULL);
- CLASSERT(OBD_CONNECT_MDS_CAPA == 0x100000ULL);
- CLASSERT(OBD_CONNECT_OSS_CAPA == 0x200000ULL);
- CLASSERT(OBD_CONNECT_CANCELSET == 0x400000ULL);
- CLASSERT(OBD_CONNECT_SOM == 0x800000ULL);
- CLASSERT(OBD_CONNECT_AT == 0x1000000ULL);
- CLASSERT(OBD_CONNECT_LRU_RESIZE == 0x2000000ULL);
- CLASSERT(OBD_CONNECT_MDS_MDS == 0x4000000ULL);
- CLASSERT(OBD_CONNECT_REAL == 0x8000000ULL);
- CLASSERT(OBD_CONNECT_CHANGE_QS == 0x10000000ULL);
- CLASSERT(OBD_CONNECT_CKSUM == 0x20000000ULL);
- CLASSERT(OBD_CONNECT_FID == 0x40000000ULL);
- CLASSERT(OBD_CONNECT_VBR == 0x80000000ULL);
- CLASSERT(OBD_CONNECT_LOV_V3 == 0x100000000ULL);
- CLASSERT(OBD_CONNECT_GRANT_SHRINK == 0x200000000ULL);
- CLASSERT(OBD_CONNECT_SKIP_ORPHAN == 0x400000000ULL);
- CLASSERT(OBD_CONNECT_MAX_EASIZE == 0x800000000ULL);
- CLASSERT(OBD_CONNECT_FULL20 == 0x1000000000ULL);
- CLASSERT(OBD_CONNECT_LAYOUTLOCK == 0x2000000000ULL);
- CLASSERT(OBD_CONNECT_64BITHASH == 0x4000000000ULL);
- CLASSERT(OBD_CONNECT_MAXBYTES == 0x8000000000ULL);
+ CLASSERT(OBD_CONNECT_RDONLY == 0x1ULL);
+ CLASSERT(OBD_CONNECT_INDEX == 0x2ULL);
+ CLASSERT(OBD_CONNECT_MDS == 0x4ULL);
+ CLASSERT(OBD_CONNECT_GRANT == 0x8ULL);
+ CLASSERT(OBD_CONNECT_SRVLOCK == 0x10ULL);
+ CLASSERT(OBD_CONNECT_VERSION == 0x20ULL);
+ CLASSERT(OBD_CONNECT_REQPORTAL == 0x40ULL);
+ CLASSERT(OBD_CONNECT_ACL == 0x80ULL);
+ CLASSERT(OBD_CONNECT_XATTR == 0x100ULL);
+ CLASSERT(OBD_CONNECT_CROW == 0x200ULL);
+ CLASSERT(OBD_CONNECT_TRUNCLOCK == 0x400ULL);
+ CLASSERT(OBD_CONNECT_TRANSNO == 0x800ULL);
+ CLASSERT(OBD_CONNECT_IBITS == 0x1000ULL);
+ CLASSERT(OBD_CONNECT_JOIN == 0x2000ULL);
+ CLASSERT(OBD_CONNECT_ATTRFID == 0x4000ULL);
+ CLASSERT(OBD_CONNECT_NODEVOH == 0x8000ULL);
+ CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x10000ULL);
+ CLASSERT(OBD_CONNECT_RMT_CLIENT_FORCE == 0x20000ULL);
+ CLASSERT(OBD_CONNECT_BRW_SIZE == 0x40000ULL);
+ CLASSERT(OBD_CONNECT_QUOTA64 == 0x80000ULL);
+ CLASSERT(OBD_CONNECT_MDS_CAPA == 0x100000ULL);
+ CLASSERT(OBD_CONNECT_OSS_CAPA == 0x200000ULL);
+ CLASSERT(OBD_CONNECT_CANCELSET == 0x400000ULL);
+ CLASSERT(OBD_CONNECT_SOM == 0x800000ULL);
+ CLASSERT(OBD_CONNECT_AT == 0x1000000ULL);
+ CLASSERT(OBD_CONNECT_LRU_RESIZE == 0x2000000ULL);
+ CLASSERT(OBD_CONNECT_MDS_MDS == 0x4000000ULL);
+ CLASSERT(OBD_CONNECT_REAL == 0x8000000ULL);
+ CLASSERT(OBD_CONNECT_CHANGE_QS == 0x10000000ULL);
+ CLASSERT(OBD_CONNECT_CKSUM == 0x20000000ULL);
+ CLASSERT(OBD_CONNECT_FID == 0x40000000ULL);
+ CLASSERT(OBD_CONNECT_VBR == 0x80000000ULL);
+ CLASSERT(OBD_CONNECT_LOV_V3 == 0x100000000ULL);
+ CLASSERT(OBD_CONNECT_GRANT_SHRINK == 0x200000000ULL);
+ CLASSERT(OBD_CONNECT_SKIP_ORPHAN == 0x400000000ULL);
+ CLASSERT(OBD_CONNECT_MAX_EASIZE == 0x800000000ULL);
+ CLASSERT(OBD_CONNECT_FULL20 == 0x1000000000ULL);
+ CLASSERT(OBD_CONNECT_LAYOUTLOCK == 0x2000000000ULL);
+ CLASSERT(OBD_CONNECT_64BITHASH == 0x4000000000ULL);
+ CLASSERT(OBD_CONNECT_MAXBYTES == 0x8000000000ULL);
+ CLASSERT(OBD_CONNECT_IMP_RECOV == 0x10000000000ULL);
/* Checks for struct obdo */
LASSERTF((int)sizeof(struct obdo) == 208, " found %lld\n",
CLASSERT(OBD_FL_CKSUM_ADLER == 8192);
CLASSERT(OBD_FL_CKSUM_CRC32C == 16384);
CLASSERT(OBD_FL_SHRINK_GRANT == 131072);
- CLASSERT(OBD_FL_MMAP == (0x00040000));
- CLASSERT(OBD_FL_RECOV_RESEND == (0x00080000));
+ CLASSERT(OBD_FL_MMAP == 262144);
+ CLASSERT(OBD_FL_RECOV_RESEND == 524288);
CLASSERT(OBD_CKSUM_CRC32 == 1);
CLASSERT(OBD_CKSUM_ADLER == 2);
CLASSERT(OBD_CKSUM_CRC32C == 4);
(long long)(int)offsetof(struct link_ea_entry, lee_name));
LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_name) == 0, " found %lld\n",
(long long)(int)sizeof(((struct link_ea_entry *)0)->lee_name));
+
+ /* Checks for struct hsm_user_item */
+ LASSERTF((int)sizeof(struct hsm_user_item) == 32, " found %lld\n",
+ (long long)(int)sizeof(struct hsm_user_item));
+ LASSERTF((int)offsetof(struct hsm_user_item, hui_fid) == 0, " found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_item, hui_fid));
+ LASSERTF((int)sizeof(((struct hsm_user_item *)0)->hui_fid) == 16, " found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_item *)0)->hui_fid));
+ LASSERTF((int)offsetof(struct hsm_user_item, hui_extent) == 16, " found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_item, hui_extent));
+ LASSERTF((int)sizeof(((struct hsm_user_item *)0)->hui_extent) == 16, " found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_item *)0)->hui_extent));
+
+ /* Checks for struct hsm_user_request */
+ LASSERTF((int)sizeof(struct hsm_user_request) == 16, " found %lld\n",
+ (long long)(int)sizeof(struct hsm_user_request));
+ LASSERTF((int)offsetof(struct hsm_user_request, hur_action) == 0, " found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_request, hur_action));
+ LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_action) == 4, " found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_action));
+ LASSERTF((int)offsetof(struct hsm_user_request, hur_archive_num) == 4, " found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_request, hur_archive_num));
+ LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_archive_num) == 4, " found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_archive_num));
+ LASSERTF((int)offsetof(struct hsm_user_request, hur_itemcount) == 8, " found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_request, hur_itemcount));
+ LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_itemcount) == 4, " found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_itemcount));
+ LASSERTF((int)offsetof(struct hsm_user_request, hur_data_len) == 12, " found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_request, hur_data_len));
+ LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_data_len) == 4, " found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_data_len));
+
+ /* Checks for struct hsm_user_state */
+ LASSERTF((int)sizeof(struct hsm_user_state) == 32, " found %lld\n",
+ (long long)(int)sizeof(struct hsm_user_state));
+ LASSERTF((int)offsetof(struct hsm_user_state, hus_states) == 0, " found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_state, hus_states));
+ LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_states) == 4, " found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_states));
+ LASSERTF((int)offsetof(struct hsm_user_state, hus_archive_num) == 4, " found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_state, hus_archive_num));
+ LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_archive_num) == 4, " found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_archive_num));
+ LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_state) == 8, " found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_state));
+ LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_state) == 4, " found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_state));
+ LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_action) == 12, " found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_action));
+ LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_action) == 4, " found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_action));
+ LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_location) == 16, " found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_location));
+ LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_location) == 16, " found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_location));
}
+