return mount_type_string[mt];
}
-#define MAX_FAILOVER_NIDS 10
+#ifndef MTI_NIDS_MAX /* match lustre_idl.h */
+#define MTI_NIDS_MAX 10
+#endif
struct lustre_disk_data {
__u32 ldd_magic;
- __u32 ldd_config_ver; /* we have integrated all llog steps
- through this llog ver. */
+ __u32 ldd_config_ver; /* not used? */
__u32 ldd_flags; /* LDD_SV_TYPE */
char ldd_fsname[64]; /* filesystem this server is part of */
- char ldd_svname[64]; /* this server's name (lustre-mdt0001) */
- __u16 ldd_svindex; /* server index (0001), must match svname */
- __u16 ldd_mgsnid_count; /* how many failover nids we have for the MGS */
- lnet_nid_t ldd_mgsnid[MAX_FAILOVER_NIDS]; /* mgmt nid list; lmd can override */
+ char ldd_svname[64]; /* this server's name (lustre-mdt0001)*/
+ __u16 ldd_svindex; /* server index (0001), must match
+ svname */
+ __u16 ldd_mgsnid_count;
+ lnet_nid_t ldd_mgsnid[MTI_NIDS_MAX]; /* mgmt nid list; lmd can
+ override */
+ __u16 ldd_failnid_count; /* server failover nid count */
+ lnet_nid_t ldd_failnid[MTI_NIDS_MAX]; /* server failover nids */
enum ldd_mount_type ldd_mount_type; /* target fs type LDD_MT_* */
char ldd_mount_opts[1024]; /* target fs mount opts */
- char ldd_pad[1024];
+
+ /* Below here is required for writing mdt, ost,or client logs,
+ and is ignored after that. */
+ int ldd_stripe_sz;
+ int ldd_stripe_count;
+ int ldd_stripe_pattern;
+ int ldd_stripe_offset;
+ int ldd_timeout; /* obd timeout */
};
#define IS_MDT(data) ((data)->ldd_flags & LDD_F_SV_TYPE_MDT)
struct lustre_mount_data {
__u32 lmd_magic;
__u32 lmd_flags; /* lustre mount flags */
- __u16 lmd_mgsnid_count; /* how many failover nids we have for the MGS */
- lnet_nid_t lmd_mgsnid[MAX_FAILOVER_NIDS]; /* who to contact at startup */
+ __u16 lmd_mgsnid_count; /* how many failover nids we have for
+ the MGS */
+ lnet_nid_t lmd_mgsnid[MTI_NIDS_MAX];/* who to contact at startup */
char *lmd_dev; /* device or file system name */
char *lmd_opts; /* lustre mount options (as opposed to
_device_ mount options) */
char mo_loopdev[128]; /* in case a loop dev is needed */
__u64 mo_device_sz;
int mo_flags;
-
- /* Below here is required for writing mdt,ost,or client logs */
- int mo_stripe_sz;
- int mo_stripe_count;
- int mo_stripe_pattern;
- int mo_timeout; /* obd timeout */
};
/****************** last_rcvd file *********************/
#define OBD_CONNECT_ACL 0x80ULL /* client using access control lists */
#define OBD_CONNECT_XATTR 0x100ULL /* client using extended attributes*/
#define OBD_CONNECT_CROW 0x200ULL /* MDS is expecting create-on-write */
+#define OBD_CONNECT_EMPTY 0x80000000ULL /* fake: these are empty connect flags*/
/*
* set by servers supporting taking extent locks during obd_punch(). Currently
* is requested by liblustre clients only. See bug 9528.
#define MTI_NAME_MAXLEN 64
#define MTI_UUID_MAXLEN MTI_NAME_MAXLEN + 5
+#define MTI_NIDS_MAX 10 /* match lustre_disk.h */
struct mgmt_target_info {
char mti_fsname[MTI_NAME_MAXLEN];
char mti_svname[MTI_NAME_MAXLEN];
- char mti_nodename[MTI_NAME_MAXLEN];
- char mti_uuid[MTI_UUID_MAXLEN];
- __u64 mti_nid; /* lnet_nid_t */ //nid list?
- __u32 mti_config_ver;
- __u32 mti_flags;
- __u32 mti_stripe_index;
- __u32 mti_stripe_pattern; /* PATTERN_RAID0, PATTERN_RAID1 */
+ __u64 mti_nids[MTI_NIDS_MAX]; /* lnet_nid_t host nids */
+ __u64 mti_failnids[MTI_NIDS_MAX]; /* partner nids */
__u64 mti_stripe_size;
__u64 mti_stripe_offset;
+ __u32 mti_stripe_count; /* how many objects are used */
+ __u32 mti_stripe_pattern; /* PATTERN_RAID0, PATTERN_RAID1*/
+ __u32 mti_stripe_index;
+ __u32 mti_nid_count;
+ __u32 mti_failnid_count;
+ __u32 mti_config_ver;
+ __u32 mti_flags;
};
-extern void lustre_swab_mgmt_target_info(struct mgmt_target_info *oinfo);
+extern void lustre_swab_mgs_target_info(struct mgmt_target_info *oinfo);
#define CM_START 0x01
#define CM_END 0x02
struct semaphore mds_lov_sem;
obd_id *mds_lov_objids;
int mds_lov_objids_size;
- __u32 mds_lov_objids_red;
+ __u32 mds_lov_objids_in_file;
+ unsigned int mds_lov_objids_dirty:1;
int mds_lov_nextid_set;
struct file *mds_lov_objid_filp;
struct file *mds_health_check_filp;
struct semaphore lov_lock;
atomic_t refcount;
struct lov_desc desc;
+ struct obd_connect_data ocd;
int bufsize;
int connects;
int death_row; /* Do we have tgts scheduled to be deleted?
int i, rc = 0;
ENTRY;
- if (count != lov->desc.ld_tgt_count )
+ if (count != lov->desc.ld_tgt_count)
CERROR("Origin connect mds cnt %d != lov cnt %d\n", count,
lov->desc.ld_tgt_count);
+ /* count must match if we're doing all */
+ LASSERT(uuid || (count == lov->desc.ld_tgt_count));
+
for (i = 0, tgt = lov->tgts; i < count; i++, tgt++) {
struct obd_device *child;
struct llog_ctxt *cctxt;
int rc, rc2, i;
ENTRY;
+ lov->ocd.ocd_connect_flags = OBD_CONNECT_EMPTY;
+ if (data)
+ lov->ocd = *data;
+
rc = class_connect(conn, obd, cluuid);
if (rc)
RETURN(rc);
{
struct lov_obd *lov = &obd->u.lov;
struct lov_tgt_desc *tgt;
+ struct obd_connect_data *ocd = NULL;
__u32 bufsize, idx;
int rc;
ENTRY;
osc_obd->obd_no_recov = 0;
}
- /* NULL may need to change when we use flags for osc's */
- rc = lov_connect_obd(obd, tgt, 1, NULL);
+ if (lov->ocd.ocd_connect_flags != OBD_CONNECT_EMPTY) {
+ /* Keep the original connect flags pristine */
+ OBD_ALLOC(ocd, sizeof(*ocd));
+ if (!ocd)
+ RETURN(-ENOMEM);
+ *ocd = lov->ocd;
+ }
+ rc = lov_connect_obd(obd, tgt, 1, ocd);
+ if (ocd)
+ OBD_FREE(ocd, sizeof(*ocd));
if (rc)
GOTO(out, rc);
if (rc) {
CERROR("add failed (%d), deleting %s\n", rc,
(char *)tgt->uuid.uuid);
- //lov_disconnect_obd(obd, tgt);
lov_del_obd(obd, &tgt->uuid, index, 0);
}
RETURN(rc);
lock_kernel();
for (i = 0; i < mds->mds_lov_desc.ld_tgt_count; i++)
- if (ids[i] > (mds->mds_lov_objids)[i])
+ if (ids[i] > (mds->mds_lov_objids)[i]) {
(mds->mds_lov_objids)[i] = ids[i];
+ mds->mds_lov_objids_dirty = 1;
+ }
unlock_kernel();
EXIT;
}
ENTRY;
LASSERT(!mds->mds_lov_objids_size);
+ LASSERT(!mds->mds_lov_objids_dirty);
/* Read everything in the file, even if our current lov desc
has fewer targets. Old targets not in the lov descriptor
rc = fsfilt_read_record(obd, mds->mds_lov_objid_filp, ids, size, &off);
if (rc < 0) {
CERROR("Error reading objids %d\n", rc);
- } else {
- mds->mds_lov_objids_red = size / sizeof(*ids);
- rc = 0;
+ RETURN(rc);
}
-
- for (i = 0; i < mds->mds_lov_objids_red; i++)
- //FIXME D_ERROR
- CDEBUG(D_INFO|D_ERROR, "read last object "LPU64" for idx %d\n",
+
+ mds->mds_lov_objids_in_file = size / sizeof(*ids);
+
+ for (i = 0; i < mds->mds_lov_objids_in_file; i++) {
+ CDEBUG(D_INFO, "read last object "LPU64" for idx %d\n",
mds->mds_lov_objids[i], i);
-
- RETURN(rc);
+ }
+ RETURN(0);
}
int mds_lov_write_objids(struct obd_device *obd)
int i, rc, tgts;
ENTRY;
- tgts = max(mds->mds_lov_desc.ld_tgt_count, mds->mds_lov_objids_red);
+ if (!mds->mds_lov_objids_dirty)
+ RETURN(0);
+
+ tgts = max(mds->mds_lov_desc.ld_tgt_count, mds->mds_lov_objids_in_file);
if (!tgts)
RETURN(0);
for (i = 0; i < tgts; i++)
- //FIXME D_ERROR
- CDEBUG(D_INFO|D_ERROR, "writing last object "LPU64" for idx %d\n",
+ CDEBUG(D_INFO, "writing last object "LPU64" for idx %d\n",
mds->mds_lov_objids[i], i);
rc = fsfilt_write_record(obd, mds->mds_lov_objid_filp,
mds->mds_lov_objids, tgts * sizeof(obd_id),
&off, 0);
+ if (rc >= 0) {
+ mds->mds_lov_objids_dirty = 0;
+ rc = 0;
+ }
+
RETURN(rc);
}
(size > mds->mds_lov_objids_size)) {
obd_id *ids;
- /* add room for a bunch at a time */
- size = (ld->ld_tgt_count + 8) * sizeof(obd_id);
+ /* add room by powers of 2 */
+ size = 1;
+ while (size < ld->ld_tgt_count)
+ size = size << 1;
+ CERROR("Next size=%d\n", size);
+ size = size * sizeof(obd_id);
OBD_ALLOC(ids, size);
if (ids == NULL)
GOTO(out, rc = -ENOMEM);
memset(ids, 0, size);
if (mds->mds_lov_objids_size) {
+ obd_id *old_ids = mds->mds_lov_objids;
memcpy(ids, mds->mds_lov_objids,
mds->mds_lov_objids_size);
- OBD_FREE(mds->mds_lov_objids, mds->mds_lov_objids_size);
+ mds->mds_lov_objids = ids;
+ OBD_FREE(old_ids, mds->mds_lov_objids_size);
}
mds->mds_lov_objids = ids;
mds->mds_lov_objids_size = size;
int rc = 0;
ENTRY;
- //FIXME remove D_ERROR
- CDEBUG(D_CONFIG|D_ERROR, "Updating mds lov for OST idx %d\n", idx);
+ //FIXME remove D_WARNING
+ CDEBUG(D_CONFIG|D_WARNING, "Updating mds lov for OST idx %d\n", idx);
old_count = mds->mds_lov_desc.ld_tgt_count;
rc = mds_lov_update_desc(obd, mds->mds_osc_exp);
RETURN(-EINVAL);
}
- if (idx >= mds->mds_lov_objids_red) {
+ if (idx >= mds->mds_lov_objids_in_file) {
/* We never read this lastid; ask the osc */
obd_id lastid;
__u32 size = sizeof(lastid);
mds->mds_lov_objids[idx] = lastid;
CWARN("got last object "LPU64" from OST %d\n",
mds->mds_lov_objids[idx], idx);
+ mds->mds_lov_objids_dirty = 1;
mds_lov_write_objids(obd);
} else {
/* We did read this lastid; tell the osc */
/* If we're mounting this code for the first time on an existing FS,
* we need to populate the objids array from the real OST values */
- if (mds->mds_lov_desc.ld_tgt_count > mds->mds_lov_objids_red) {
+ if (mds->mds_lov_desc.ld_tgt_count > mds->mds_lov_objids_in_file) {
int size = sizeof(obd_id) * mds->mds_lov_desc.ld_tgt_count;
rc = obd_get_info(mds->mds_osc_exp, strlen("last_id"),
"last_id", &size, mds->mds_lov_objids);
for (i = 0; i < mds->mds_lov_desc.ld_tgt_count; i++)
CWARN("got last object "LPU64" from OST %d\n",
mds->mds_lov_objids[i], i);
+ mds->mds_lov_objids_dirty = 1;
rc = mds_lov_write_objids(obd);
if (rc)
CERROR("got last objids from OSTs, but error "
struct mds_obd *mds;
struct obd_uuid *uuid = NULL;
__u32 idx;
- int rc = 0;
+ int rc = 0, have_sem = 0;
ENTRY;
obd = mlsi->mlsi_obd;
LASSERT(obd != NULL);
- /* Hold this throughout a synchronize, and wherever we
- reference the contents of mds_lov_desc */
- down(&mds->mds_lov_sem);
+ /* We can't change the target count in one of these sync
+ threads while another sync thread is doing the clearorphans on
+ all the targets. */
+ if (!watched || (idx != MLSI_NO_INDEX)) {
+ /* if we're syncing a particular target, or we're not
+ changing the target_count, then we don't need the sem */
+ down(&mds->mds_lov_sem);
+ have_sem++;
+ }
rc = obd_set_info(mds->mds_osc_exp, strlen(KEY_MDS_CONN),
KEY_MDS_CONN, 0, uuid);
}
out:
- up(&mds->mds_lov_sem);
+ if (have_sem)
+ up(&mds->mds_lov_sem);
class_decref(obd);
RETURN(rc);
}
rc = ptlrpc_queue_wait(req);
if (!rc) {
rep_mti = lustre_swab_repbuf(req, 0, sizeof(*rep_mti),
- lustre_swab_mgmt_target_info);
+ lustre_swab_mgs_target_info);
memcpy(mti, rep_mti, sizeof(*rep_mti));
CDEBUG(D_MGC, "target_add %s got index = %d\n",
mti->mti_svname, mti->mti_stripe_index);
if (!rc) {
int index;
rep_mti = lustre_swab_repbuf(req, 0, sizeof(*rep_mti),
- lustre_swab_mgmt_target_info);
+ lustre_swab_mgs_target_info);
index = rep_mti->mti_stripe_index;
if (index != mti->mti_stripe_index) {
CERROR ("OST DEL failed. rc=%d\n", index);
ENTRY;
mti = lustre_swab_reqbuf(req, 0, sizeof(*mti),
- lustre_swab_mgmt_target_info);
+ lustre_swab_mgs_target_info);
CDEBUG(D_MGS, "adding %s, index=%d\n", mti->mti_svname,
mti->mti_stripe_index);
/* set the new target index if needed */
- if (mti->mti_flags & LDD_F_NEED_INDEX) {
- rc = mgs_set_next_index(obd, mti);
- if (rc) {
- CERROR("Can't get index (%d)\n", rc);
- GOTO(out, rc);
- }
+ rc = mgs_set_index(obd, mti);
+ if (rc) {
+ CERROR("Can't get index (%d)\n", rc);
+ GOTO(out, rc);
}
/* revoke the config lock so everyone will update */
int mgs_init_db_list(struct obd_device *obd);
int mgs_cleanup_db_list(struct obd_device *obd);
-int mgs_set_next_index(struct obd_device *obd, struct mgmt_target_info *mti);
+int mgs_set_index(struct obd_device *obd, struct mgmt_target_info *mti);
int mgs_write_log_target(struct obd_device *obd, struct mgmt_target_info *mti);
#endif
set_bit(i, index_map);
return i;
}
- CERROR("max index exceeded.\n");
+ CERROR("max index %d exceeded.\n", i);
return -1;
}
return 0;
}
-int mgs_set_next_index(struct obd_device *obd, struct mgmt_target_info *mti)
+int mgs_set_index(struct obd_device *obd, struct mgmt_target_info *mti)
{
struct fs_db *db;
int rc = 0;
return rc;
}
- /* FIXME use mti->mti_stripe_index if given, report error if already
- in use */
+ if (!(mti->mti_flags & LDD_F_NEED_INDEX)) {
+ if (mti->mti_stripe_index >= INDEX_MAP_SIZE * 8) {
+ LCONSOLE_ERROR("Server %s requested index %d, but the"
+ "max index is %d.\n",
+ mti->mti_svname, mti->mti_stripe_index,
+ INDEX_MAP_SIZE * 8);
+ return -ERANGE;
+ }
+ if (test_bit(mti->mti_stripe_index, db->fd_index_map)) {
+ LCONSOLE_ERROR("Server %s requested index %d, but that"
+ "index is already in use.\n",
+ mti->mti_svname, mti->mti_stripe_index);
+ return -EADDRINUSE;
+ } else {
+ set_bit(mti->mti_stripe_index, db->fd_index_map);
+ return 0;
+ }
+ }
+
if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
rc = next_ost_index(db->fd_index_map, INDEX_MAP_SIZE);
if (rc == -1)
uint64_t nid, char *uuid)
{
return record_base(obd,llh,NULL,nid,LCFG_ADD_UUID,uuid,0,0,0);
+
+}
+
+static inline int record_add_conn(struct obd_device *obd,
+ struct llog_handle *llh,
+ char *uuid)
+{
+ return record_base(obd,llh,NULL,0,LCFG_ADD_CONN,uuid,0,0,0);
}
static inline int record_attach(struct obd_device *obd, struct llog_handle *llh,
/* lov is the first thing in the mdt and client logs */
static int mgs_write_log_lov(struct obd_device *obd, struct fs_db *db,
+ struct mgmt_target_info *mti,
char *logname, char *lovname)
{
struct llog_handle *llh = NULL;
/* Use defaults here, will fix them later with LCFG_PARAM */
lovdesc->ld_magic = LOV_DESC_MAGIC;
lovdesc->ld_tgt_count = 0;
- lovdesc->ld_pattern = 0;
- lovdesc->ld_default_stripe_count = 1;
- lovdesc->ld_default_stripe_size = 1024*1024;
- lovdesc->ld_default_stripe_offset = 0;
+ lovdesc->ld_default_stripe_count = mti->mti_stripe_count;
+ lovdesc->ld_pattern = mti->mti_stripe_pattern;
+ lovdesc->ld_default_stripe_size = mti->mti_stripe_size;
+ lovdesc->ld_default_stripe_offset = mti->mti_stripe_offset;
sprintf((char*)lovdesc->ld_uuid.uuid, "%s_UUID", lovname);
/* can these be the same? */
uuid = (char *)lovdesc->ld_uuid.uuid;
{
struct llog_handle *llh = NULL;
char *cliname, *mdcname, *lovname, *nodeuuid, *mdsuuid, *mdcuuid;
- int rc, first_log = 0;
+ lnet_nid_t nid;
+ int rc, i, first_log = 0;
CDEBUG(D_MGS, "writing new mdt %s\n", mti->mti_svname);
/* This is the first time for all logs for this fs,
since any ost should have already started the mdt log. */
first_log++;
- rc = mgs_write_log_lov(obd, db, mti->mti_svname,
+ rc = mgs_write_log_lov(obd, db, mti, mti->mti_svname,
lovname);
}
name_create(mti->mti_fsname, "-clilov", &lovname);
if (first_log) {
/* Start client log */
- rc = mgs_write_log_lov(obd, db, cliname, lovname);
+ rc = mgs_write_log_lov(obd, db, mti, cliname, lovname);
}
- /* Add the mdt info to the client */
- name_create(libcfs_nid2str(mti->mti_nid), "_UUID", &nodeuuid);
+ /* Add the mdt info to the client log */
+ name_create(libcfs_nid2str(mti->mti_nids[0]), /*"_UUID"*/"", &nodeuuid);
name_create(mti->mti_svname, "-mdc", &mdcname);
name_create(mdcname, "_UUID", &mdcuuid);
/*
#14 L mount_option 0: 1:client 2:lov1 3:MDC_uml1_mdsA_MNT_client
*/
rc = record_start_log(obd, &llh, cliname);
- /* FIXME can we just use the nid as the node uuid, or do we really
- need the hostname? */
rc = record_marker(obd, llh, db, CM_START, "add mdc");
- rc = record_add_uuid(obd, llh, mti->mti_nid, nodeuuid);
+ for (i = 0; i < mti->mti_nid_count; i++) {
+ CERROR("add nid %s\n", libcfs_nid2str(mti->mti_nids[i]));
+ rc = record_add_uuid(obd, llh, mti->mti_nids[i], nodeuuid);
+ }
rc = record_attach(obd, llh, mdcname, LUSTRE_MDC_NAME, mdcuuid);
rc = record_setup(obd,llh,mdcname,mdsuuid,nodeuuid,0,0);
- /* FIXME add uuid, add_conn for failover mdt's */
+ for (i = 0; i < mti->mti_failnid_count; i++) {
+ nid = mti->mti_failnids[i];
+ CERROR("add failover nid %s\n", libcfs_nid2str(nid));
+ rc = record_add_uuid(obd, llh, nid, libcfs_nid2str(nid));
+ rc = record_add_conn(obd, llh, libcfs_nid2str(nid));
+ }
rc = record_mount_opt(obd, llh, cliname, lovname, mdcname);
rc = record_marker(obd, llh, db, CM_END, "add mdc");
rc = record_end_log(obd, &llh);
struct llog_handle *llh = NULL;
char *nodeuuid, *oscname, *oscuuid, *lovuuid;
char index[5];
- int rc;
+ lnet_nid_t nid;
+ int i, rc;
if (mgs_log_is_empty(obd, logname)) {
/* The first time an osc is added, setup the lov */
- rc = mgs_write_log_lov(obd, db, logname, lovname);
+ rc = mgs_write_log_lov(obd, db, mti, logname, lovname);
}
CDEBUG(D_MGS, "adding osc for %s to log %s\n",
mti->mti_svname, logname);
- name_create(libcfs_nid2str(mti->mti_nid), "_UUID", &nodeuuid);
+ name_create(libcfs_nid2str(mti->mti_nids[0]), /*"_UUID"*/"", &nodeuuid);
name_create(mti->mti_svname, "-osc", &oscname);
name_create(oscname, "_UUID", &oscuuid);
name_create(lovname, "_UUID", &lovuuid);
*/
rc = record_start_log(obd, &llh, logname);
rc = record_marker(obd, llh, db, CM_START, "add osc");
- rc = record_add_uuid(obd, llh, mti->mti_nid, nodeuuid);
+ for (i = 0; i < mti->mti_nid_count; i++) {
+ CERROR("add nid %s\n", libcfs_nid2str(mti->mti_nids[i]));
+ rc = record_add_uuid(obd, llh, mti->mti_nids[i], nodeuuid);
+ }
rc = record_attach(obd, llh, oscname, LUSTRE_OSC_NAME, lovuuid);
rc = record_setup(obd, llh, oscname, ostuuid, nodeuuid, 0, 0);
- /* FIXME add uuid, add_conn for failover ost's */
+ for (i = 0; i < mti->mti_failnid_count; i++) {
+ nid = mti->mti_failnids[i];
+ CERROR("add failover nid %s\n", libcfs_nid2str(nid));
+ rc = record_add_uuid(obd, llh, nid, libcfs_nid2str(nid));
+ rc = record_add_conn(obd, llh, libcfs_nid2str(nid));
+ }
snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
rc = record_lov_add(obd,llh, lovname, ostuuid, index,"1"/*generation*/);
rc = record_marker(obd, llh, db, CM_END, "add osc");
Heck, what do we do about the client and mds logs? We better
abort. */
if (!mgs_log_is_empty(obd, mti->mti_svname)) {
- CERROR("The config log for %s already exists, not adding.\n",
- mti->mti_svname);
+ LCONSOLE_ERROR("The config log for %s already exists, yet the "
+ "server claims it never registered. It may have"
+ " been reformatted, or the index changed. This "
+ "must be resolved before this server can be "
+ "added.\n", mti->mti_svname);
return -EALREADY;
}
/*
ENTRY;
LASSERT(ldd->ldd_magic == LDD_MAGIC);
+
+ ldd->ldd_config_ver++;
push_ctxt(&saved, mount_ctxt, NULL);
sizeof(mti->mti_fsname));
strncpy(mti->mti_svname, ldd->ldd_svname,
sizeof(mti->mti_svname));
- // char mti_nodename[NAME_MAXLEN];
- // char mti_uuid[UUID_MAXLEN];
- /* FIXME nid 0 is lo generally, need to send all non-lo nids */
+
+ mti->mti_nid_count = 0;
while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
continue;
- /* FIXME use all non-lo nids, not just first */
- break;
+ mti->mti_nids[mti->mti_nid_count] = id.nid;
+ mti->mti_nid_count++;
+ if (mti->mti_nid_count >= MTI_NIDS_MAX) {
+ CWARN("Only using first %d nids for %s\n",
+ mti->mti_nid_count, mti->mti_svname);
+ break;
+ }
}
- mti->mti_nid = id.nid;
+
+ memcpy(mti->mti_failnids, ldd->ldd_failnid, sizeof(mti->mti_failnids));
+ mti->mti_failnid_count = ldd->ldd_failnid_count;
mti->mti_config_ver = 0;
mti->mti_flags = ldd->ldd_flags;
mti->mti_stripe_index = ldd->ldd_svindex;
- mti->mti_stripe_pattern = 0; //FIXME
- mti->mti_stripe_size = 1024*1024; //FIXME
- mti->mti_stripe_offset = 0; //FIXME
+ mti->mti_stripe_count = ldd->ldd_stripe_count;
+ mti->mti_stripe_pattern = ldd->ldd_stripe_pattern;
+ mti->mti_stripe_size = ldd->ldd_stripe_sz;
+ mti->mti_stripe_offset = ldd->ldd_stripe_offset;
CDEBUG(D_MOUNT, "Initial registration %s, fs=%s, %s, index=%04x\n",
mti->mti_svname, mti->mti_fsname,
- libcfs_nid2str(mti->mti_nid), mti->mti_stripe_index);
+ libcfs_nid2str(mti->mti_nids[0]), mti->mti_stripe_index);
/* Register the target */
/* FIXME use mdc_process_config instead */
" %s\n",
ldd->ldd_svindex, mti->mti_stripe_index,
mti->mti_svname);
- ldd->ldd_flags &= ~(LDD_F_NEED_INDEX | LDD_F_NEED_REGISTER);
- /* This server has never been started, so has no config */
- ldd->ldd_config_ver = 0;
ldd->ldd_svindex = mti->mti_stripe_index;
strncpy(ldd->ldd_svname, mti->mti_svname,
sizeof(ldd->ldd_svname));
/* or ldd_make_sv_name(ldd); */
- ldd_write(&mgc->obd_lvfs_ctxt, ldd);
/* FIXME write last_rcvd?, disk label? */
}
+ /* Always write out the new flags */
+ ldd->ldd_flags &= ~(LDD_F_NEED_INDEX | LDD_F_NEED_REGISTER);
+ ldd_write(&mgc->obd_lvfs_ctxt, ldd);
+
out:
if (mti)
OBD_FREE(mti, sizeof(*mti));
to read and write configs locally. */
server_mgc_set_fs(lsi->lsi_mgc, sb);
- /* Get a new index if needed */
+ /* Register if needed */
if (lsi->lsi_ldd->ldd_flags &
(LDD_F_NEED_INDEX | LDD_F_NEED_REGISTER)) {
CDEBUG(D_MOUNT, "Need new target index from MGS\n");
/* append ldd nids to lmd nids */
for (i = 0; (i < lsi->lsi_ldd->ldd_mgsnid_count) &&
- (lsi->lsi_lmd->lmd_mgsnid_count < MAX_FAILOVER_NIDS); i++) {
+ (lsi->lsi_lmd->lmd_mgsnid_count < MTI_NIDS_MAX); i++) {
lsi->lsi_lmd->lmd_mgsnid[lsi->lsi_lmd->lmd_mgsnid_count++] =
lsi->lsi_ldd->ldd_mgsnid[i];
}
LCONSOLE_ERROR("Can't parse NID '%s'\n", s1);
goto invalid;
}
- if (lmd->lmd_mgsnid_count >= MAX_FAILOVER_NIDS) {
+ if (lmd->lmd_mgsnid_count >= MTI_NIDS_MAX) {
LCONSOLE_ERROR("Too many NIDs: '%s'\n", s1);
goto invalid;
}
CLASSERT(offsetof(typeof(*b), padding_4) != 0);
}
-void lustre_swab_mgmt_target_info(struct mgmt_target_info *mti)
+void lustre_swab_mgs_target_info(struct mgmt_target_info *mti)
{
- __swab64s(&mti->mti_nid);
- __swab32s(&mti->mti_config_ver);
- __swab32s(&mti->mti_flags);
- __swab32s(&mti->mti_stripe_index);
- __swab32s(&mti->mti_stripe_pattern);
+ int i;
+ for (i = 0; i < MTI_NIDS_MAX; i++) {
+ __swab64s(&mti->mti_nids[i]);
+ __swab64s(&mti->mti_failnids[i]);
+ }
__swab64s(&mti->mti_stripe_size);
__swab64s(&mti->mti_stripe_offset);
+ __swab32s(&mti->mti_stripe_count);
+ __swab32s(&mti->mti_stripe_pattern);
+ __swab32s(&mti->mti_stripe_index);
+ __swab32s(&mti->mti_nid_count);
+ __swab32s(&mti->mti_failnid_count);
+ __swab32s(&mti->mti_config_ver);
+ __swab32s(&mti->mti_flags);
}
static void lustre_swab_obd_dqinfo (struct obd_dqinfo *i)
EXPORT_SYMBOL(lustre_swab_ldlm_request);
EXPORT_SYMBOL(lustre_swab_ldlm_reply);
EXPORT_SYMBOL(lustre_swab_qdata);
-EXPORT_SYMBOL(lustre_swab_mgmt_target_info);
+EXPORT_SYMBOL(lustre_swab_mgs_target_info);
/* recover.c */
EXPORT_SYMBOL(ptlrpc_run_recovery_over_upcall);
return version;
}
+/* FIXME use popen */
int run_command(char *cmd)
{
int i = 0,ret = 0;
if (strstr(mop->mo_mkfsopts, "-I") == NULL) {
long inode_size = 0;
if (IS_MDT(&mop->mo_ldd)) {
- if (mop->mo_stripe_count > 77)
+ if (mop->mo_ldd.ldd_stripe_count > 77)
inode_size = 512; /* bz 7241 */
- else if (mop->mo_stripe_count > 34)
+ else if (mop->mo_ldd.ldd_stripe_count > 34)
inode_size = 2048;
- else if (mop->mo_stripe_count > 13)
+ else if (mop->mo_ldd.ldd_stripe_count > 13)
inode_size = 1024;
else
inode_size = 512;
void set_defaults(struct mkfs_opts *mop)
{
mop->mo_ldd.ldd_magic = LDD_MAGIC;
- mop->mo_ldd.ldd_config_ver = 0;
+ mop->mo_ldd.ldd_config_ver = 1;
mop->mo_ldd.ldd_flags = LDD_F_NEED_INDEX | LDD_F_NEED_REGISTER;
mop->mo_ldd.ldd_mgsnid_count = 0;
strcpy(mop->mo_ldd.ldd_fsname, "lustre");
mop->mo_ldd.ldd_mount_type = LDD_MT_LDISKFS;
mop->mo_ldd.ldd_svindex = -1;
- mop->mo_stripe_count = 1;
+ mop->mo_ldd.ldd_stripe_count = 1;
+ mop->mo_ldd.ldd_stripe_sz = 1024 * 1024;
+ mop->mo_ldd.ldd_stripe_pattern = 0;
}
static inline void badopt(char opt, char *type)
case 'c':
if (IS_MDT(&mop.mo_ldd)) {
int stripe_count = atol(optarg);
- mop.mo_stripe_count = stripe_count;
+ if (stripe_count <= 0) {
+ fprintf(stderr, "%s: bad stripe count "
+ "%d\n", progname, stripe_count);
+ exit(1);
+ }
+ mop.mo_ldd.ldd_stripe_count = stripe_count;
} else {
badopt(opt, "MDT");
}
while ((s2 = strsep(&s1, ","))) {
mop.mo_ldd.ldd_mgsnid[i++] =
libcfs_str2nid(s2);
- if (i >= MAX_FAILOVER_NIDS) {
- fprintf(stderr, "%s: too many MGS nids, "
- "ignoring %s\n", progname, s1);
+ if (i >= MTI_NIDS_MAX) {
+ fprintf(stderr, "%s: too many MGS nids,"
+ " ignoring %s\n", progname, s1);
break;
}
}
break;
case 's':
if (IS_MDT(&mop.mo_ldd))
- mop.mo_stripe_sz = atol(optarg) * 1024;
+ mop.mo_ldd.ldd_stripe_sz = atol(optarg) * 1024;
else
badopt(opt, "MDT");
break;
case 't':
- mop.mo_timeout = atol(optarg);
+ mop.mo_ldd.ldd_timeout = atol(optarg);
break;
case 'v':
verbose++;
"(is the lnet module loaded?)\n", progname);
} else {
if (i > 0) {
- if (i > MAX_FAILOVER_NIDS)
- i = MAX_FAILOVER_NIDS;
+ if (i > MTI_NIDS_MAX)
+ i = MTI_NIDS_MAX;
vprint("Adding %d local nids for MGS\n", i);
memcpy(mop.mo_ldd.ldd_mgsnid, nids,
sizeof(mop.mo_ldd.ldd_mgsnid));
goto out;
}
- if (IS_MDT(&mop.mo_ldd) && (mop.mo_stripe_sz == 0))
- mop.mo_stripe_sz = 1024 * 1024;
-
strcpy(mop.mo_device, argv[optind]);
/* These are the permanent mount options (always included) */