__u8 lsd_uuid[40]; /* server UUID */
__u64 lsd_unused; /* was lsd_last_objid - don't use for now */
__u64 lsd_last_transno; /* last completed transaction ID */
- __u64 lsd_mount_count; /* FILTER incarnation number */
+ __u64 lsd_mount_count; /* incarnation number */
__u32 lsd_feature_compat; /* compatible feature flags */
__u32 lsd_feature_rocompat;/* read-only compatible feature flags */
__u32 lsd_feature_incompat;/* incompatible feature flags */
MGMT_LAST_OPC
} mgs_cmd_t;
-#define NAME_MAXLEN 64
-#define UUID_MAXLEN NAME_MAXLEN + 5
+#define MTI_NAME_MAXLEN 64
+#define MTI_UUID_MAXLEN MTI_NAME_MAXLEN + 5
struct mgmt_target_info {
- char mti_fsname[NAME_MAXLEN];
- char mti_svname[NAME_MAXLEN];
- char mti_nodename[NAME_MAXLEN];
- char mti_uuid[UUID_MAXLEN];
+ char mti_fsname[MTI_NAME_MAXLEN];
+ char mti_svname[MTI_NAME_MAXLEN];
+ char mti_nodename[MTI_NAME_MAXLEN];
+ char mti_uuid[MTI_UUID_MAXLEN];
__u64 mti_nid; /* lnet_nid_t */ //nid list?
__u32 mti_config_ver;
__u32 mti_flags;
extern void lustre_swab_mgmt_target_info(struct mgmt_target_info *oinfo);
+#define CM_START 0x01
+#define CM_END 0x02
+#define CM_SKIP 0x04
+
+struct cfg_marker {
+ __u32 cm_step; /* aka config version */
+ __u32 cm_flags;
+ __u32 cm_timestamp;
+ char cm_comment[40];
+};
/*
* Opcodes for multiple servers.
#define MDS_CLIENT_SLOTS 17
-
-/* Data stored per server at the head of the last_rcvd file. In le32 order.
- * Try to keep this the same as fsd_server_data so we might one day merge. */
-struct mds_server_data {
- __u8 msd_uuid[40]; /* server UUID */
- __u64 msd_last_transno; /* last completed transaction ID */
- __u64 msd_mount_count; /* MDS incarnation number */
- __u64 msd_unused;
- __u32 msd_feature_compat; /* compatible feature flags */
- __u32 msd_feature_rocompat;/* read-only compatible feature flags */
- __u32 msd_feature_incompat;/* incompatible feature flags */
- __u32 msd_server_size; /* size of server data area */
- __u32 msd_client_start; /* start of per-client data area */
- __u16 msd_client_size; /* size of per-client data area */
- __u16 msd_subdir_count; /* number of subdirectories for objects */
- __u64 msd_catalog_oid; /* recovery catalog object id */
- __u32 msd_catalog_ogen; /* recovery catalog inode generation */
- __u8 msd_peeruuid[40]; /* UUID of LOV/OSC associated with MDS */
- __u8 msd_padding[MDS_LR_SERVER_SIZE - 140];
-};
-
/* Data stored per client in the last_rcvd file. In le32 order. */
struct mds_client_data {
__u8 mcd_uuid[40]; /* client UUID */
#include <linux/lustre_log.h>
#include <linux/lustre_export.h>
-#define MGS_ROCOMPAT_SUPP 0x00000001
-#define MGS_INCOMPAT_SUPP (0)
-
-typedef enum {
- MCID = 1,
- OTID = 2,
-} llogid_t;
-
-struct mgc_op_data {
- llogid_t obj_id;
- __u64 obj_version;
-};
-
-
-struct system_db {
- char fsname[64];
- struct list_head db_list;
- void* index_map;
- struct list_head ost_infos;
- int sdb_flags;
+struct fs_db {
+ char fd_name[8];
+ struct list_head fd_list;
+ void* fd_index_map;
+ __u32 fd_flags;
+ __u32 fd_last_step;
+ //FIXME add a semaphore for locking the fs_db (and logs)
};
int mgs_fs_setup(struct obd_device *obd, struct vfsmount *mnt);
int mgs_fs_cleanup(struct obd_device *obddev);
+int mgs_iocontrol(unsigned int cmd, struct obd_export *exp,
+ int len, void *karg, void *uarg);
-extern int mgs_iocontrol(unsigned int cmd, struct obd_export *exp,
- int len, void *karg, void *uarg);
-
-extern int mgs_mds_register(struct ptlrpc_request *req);
#endif
atomic_t fo_quotachecking;
};
-struct mds_server_data;
-
#define OSC_MAX_RIF_DEFAULT 8
#define OSC_MAX_RIF_MAX 64
#define OSC_MAX_DIRTY_DEFAULT 32
struct dentry *mgs_configs_dir;
struct dentry *mgs_fid_de;
struct llog_handle *mgs_cfg_llh;
- spinlock_t mgs_system_db_lock;
- struct list_head mgs_system_db_list;
- struct lustre_handle mgs_pw_lock; /* config update lock */
+ spinlock_t mgs_fs_db_lock;
+ struct list_head mgs_fs_db_list;
};
struct mds_obd {
int rc = 0;
ENTRY;
- //FIXME remove
- LDLM_ERROR(lock, "ldlm completion ast");
-
if (flags == LDLM_FL_WAIT_NOREPROC) {
LDLM_DEBUG(lock, "client-side enqueue waiting on pending lock");
goto noreproc;
{
struct mds_obd *mds = &obd->u.mds;
struct lr_server_data *lsd = mds->mds_server_data;
+ struct lr_server_data *lsd_copy = NULL;
struct file *filp = mds->mds_rcvd_filp;
struct lvfs_run_ctxt saved;
loff_t off = 0;
int rc;
ENTRY;
- push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
- lsd->lsd_last_transno = cpu_to_le64(mds->mds_last_transno);
-
CDEBUG(D_SUPER, "MDS mount_count is "LPU64", last_transno is "LPU64"\n",
mds->mds_mount_count, mds->mds_last_transno);
+
+ lsd->lsd_last_transno = cpu_to_le64(mds->mds_last_transno);
+
+ if (!(lsd->lsd_feature_compat & cpu_to_le32(LR_COMPAT_COMMON_LR))) {
+ /* Swap to the old mds_server_data format, in case
+ someone wants to revert to a pre-1.6 lustre */
+ CDEBUG(D_INFO, "writing old last_rcvd format\n");
+ /* malloc new struct instead of swap in-place because
+ we don't have a lock on the last_trasno or mount count -
+ someone may modify it while we're here, and we don't want
+ them to inc the wrong thing. */
+ OBD_ALLOC(lsd_copy, sizeof(*lsd_copy));
+ if (!lsd_copy)
+ RETURN(-ENOMEM);
+ *lsd_copy = *lsd;
+ lsd_copy->lsd_unused = lsd->lsd_last_transno;
+ lsd_copy->lsd_last_transno = lsd->lsd_mount_count;
+ lsd = lsd_copy;
+ }
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
rc = fsfilt_write_record(obd, filp, lsd, sizeof(*lsd), &off,force_sync);
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
if (rc)
CERROR("error writing MDS server data: rc = %d\n", rc);
- pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+ if (lsd_copy)
+ OBD_FREE(lsd_copy, sizeof(*lsd_copy));
RETURN(rc);
}
lsd->lsd_client_start = cpu_to_le32(MDS_LR_CLIENT_START);
lsd->lsd_client_size = cpu_to_le16(MDS_LR_CLIENT_SIZE);
lsd->lsd_feature_rocompat = cpu_to_le32(MDS_ROCOMPAT_LOVOBJID);
+ lsd->lsd_feature_compat = cpu_to_le32(LR_COMPAT_COMMON_LR);
} else {
rc = fsfilt_read_record(obd, file, lsd, sizeof(*lsd), &off);
if (rc) {
GOTO(err_msd, rc = -EINVAL);
}
- if (lsd->lsd_feature_compat & ~cpu_to_le32(LR_COMPAT_COMMON_LR)) {
- CERROR("old last_rcvd format, updating\n");
+ if (!(lsd->lsd_feature_compat & cpu_to_le32(LR_COMPAT_COMMON_LR))) {
+ CDEBUG(D_WARNING, "old last_rcvd format\n");
lsd->lsd_mount_count = lsd->lsd_last_transno; //msd->msd_mount_count
lsd->lsd_last_transno = lsd->lsd_unused; //msd->msd_last_transno;
+ /* If we update the last_rcvd, we can never go back to
+ an old install. Leave this in the old format for now.
lsd->lsd_feature_compat |= cpu_to_le32(LR_COMPAT_COMMON_LR);
- GOTO(err_msd, rc = -EINVAL);
+ */
}
mds->mds_last_transno = le64_to_cpu(lsd->lsd_last_transno);
RETURN(rc);
}
-/* see ll_mdc_blocking_ast */
+/* based on ll_mdc_blocking_ast */
static int mgc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
void *data, int flag)
{
trigger a new enqueue for the same lock (in a separate
thread likely, which won't match the just-being-cancelled
lock due to CBPENDING flag) + config llog processing */
+ /* FIXME make sure not to re-enqueue when the mgc is stopping
+ (we get called from client_disconnect_export) */
+
+ CERROR("Lock res "LPU64"\n", lock->l_resource->lr_name.name[0]);
+ /* FIXME should pass logname,sb as part of lock->l_ast_data,
+ lustre_get_process_log that. Or based on resource.
+ Either way, must have one lock per llog. */
+ //update_llog();
+
break;
}
default:
RETURN(0);
}
-/* see ll_get_dir_page */
-static int mgc_get_cfg_lock(struct obd_export *exp, char *fsname)
+/* based on ll_get_dir_page and osc_enqueue. */
+static int mgc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm,
+ __u32 type, ldlm_policy_data_t *policy, __u32 mode,
+ int *flags, void *bl_cb, void *cp_cb, void *gl_cb,
+ void *data, __u32 lvb_len, void *lvb_swabber,
+ struct lustre_handle *lockh)
{
- struct lustre_handle lockh;
struct obd_device *obd = class_exp2obd(exp);
/* FIXME use fsname, vers and separate locks? see mgs_get_cfg_lock */
struct ldlm_res_id res_id = { .name = { 12321 } };
- int rc = 0, flags = 0;
+ int rc;
ENTRY;
+ /* We're only called from obd_mount */
+ //LASSERT(mode == LCK_CR);
+ LASSERT(type == LDLM_PLAIN);
+
+ CDEBUG(D_MGC, "Enqueue for %s\n", (char *)data);
+
/* Search for already existing locks.*/
- rc = ldlm_lock_match(obd->obd_namespace, 0, &res_id, LDLM_PLAIN,
- NULL, LCK_CR, &lockh);
+ rc = ldlm_lock_match(obd->obd_namespace, 0, &res_id, type,
+ NULL, mode, lockh);
if (rc == 1)
RETURN(ELDLM_OK);
- CDEBUG(D_MGC, "Taking a cfg reader lock\n");
- /* see filter_prepare_destroy
- rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, res_id,
- LDLM_EXTENT, &policy, LCK_PW,
- &flags, ldlm_blocking_ast, ldlm_completion_ast,
- NULL, NULL, NULL, 0, NULL, &lockh);
- */
-
rc = ldlm_cli_enqueue(exp, NULL, obd->obd_namespace, res_id,
- LDLM_PLAIN, NULL, LCK_CR, &flags,
+ type, NULL, mode, flags,
mgc_blocking_ast, ldlm_completion_ast, NULL,
- NULL/*cb_data*/, NULL, 0, NULL, &lockh);
-
- /* now drop the lock so MGS can revoke it */
- ldlm_lock_decref(&lockh, LCK_PR);
+ data, NULL, 0, NULL, lockh);
RETURN(rc);
}
+static int mgc_cancel(struct obd_export *exp, struct lov_stripe_md *md,
+ __u32 mode, struct lustre_handle *lockh)
+{
+ ENTRY;
+
+ ldlm_lock_decref(lockh, mode);
+
+ RETURN(0);
+}
+
static int mgc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
void *karg, void *uarg)
{
.o_del_conn = client_import_del_conn,
.o_connect = client_connect_import,
.o_disconnect = client_disconnect_export,
+ .o_enqueue = mgc_enqueue,
+ .o_cancel = mgc_cancel,
.o_iocontrol = mgc_iocontrol,
.o_set_info = mgc_set_info,
.o_import_event = mgc_import_event,
struct inode *inode;
struct dentry *result;
- CDEBUG(D_DENTRY|D_ERROR, "--> mgs_fid2dentry: ino/gen %lu/%u, sb %p\n",
+ CDEBUG(D_DENTRY, "--> mgs_fid2dentry: ino/gen %lu/%u, sb %p\n",
ino, generation, mgs->mgs_sb);
if (ino == 0)
static int mgs_setup(struct obd_device *obd, obd_count len, void *buf)
{
struct lprocfs_static_vars lvars;
- char *ns_name = "MGS";
struct mgs_obd *mgs = &obd->u.mgs;
struct lustre_mount_info *lmi;
struct lustre_sb_info *lsi;
GOTO(err_put, rc = PTR_ERR(obd->obd_fsops));
/* namespace for mgs llog */
- obd->obd_namespace = ldlm_namespace_new(ns_name, LDLM_NAMESPACE_SERVER);
+ obd->obd_namespace = ldlm_namespace_new("MGS", LDLM_NAMESPACE_SERVER);
if (obd->obd_namespace == NULL) {
mgs_cleanup(obd);
GOTO(err_ops, rc = -ENOMEM);
}
+ /* ldlm setup */
+ ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
+ "mgs_ldlm_client", &obd->obd_ldlm_client);
+
LASSERT(!lvfs_check_rdonly(lvfs_sbdev(mnt->mnt_sb)));
rc = mgs_fs_setup(obd, mnt);
RETURN(0);
}
+/* similar to filter_prepare_destroy */
static int mgs_get_cfg_lock(struct obd_device *obd, char *fsname,
struct lustre_handle *lockh)
{
struct ldlm_res_id res_id = {.name = {12321}};
int rc, flags = 0;
+ CERROR("mgs_lock %s\n", fsname);
+
rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, res_id,
- LDLM_PLAIN, NULL, LCK_PW, &flags,
- NULL, ldlm_completion_ast, NULL, NULL,
- NULL, 0, NULL, lockh);
+ LDLM_PLAIN, NULL, LCK_EX, &flags,
+ ldlm_blocking_ast, ldlm_completion_ast,
+ NULL, NULL, NULL, 0, NULL, lockh);
if (rc) {
CERROR("can't take cfg lock %d\n", rc);
}
+
return rc;
}
+static int mgs_put_cfg_lock(struct lustre_handle *lockh)
+{
+ CERROR("mgs_unlock\n");
+
+ ldlm_lock_decref(lockh, LCK_EX);
+ return 0;
+}
+
static int mgs_handle_target_add(struct ptlrpc_request *req)
{
struct obd_device *obd = req->rq_export->exp_obd;
/* revoke the config lock so everyone will update */
lockrc = mgs_get_cfg_lock(obd, mti->mti_fsname, &lockh);
- if (lockrc) {
+ if (lockrc != ELDLM_OK) {
LCONSOLE_ERROR("Can't signal other nodes to update their "
"configuration (%d). Updating local logs "
"anyhow; you might have to manually restart "
/* create the log for the new target
and update the client/mdt logs */
rc = mgs_write_log_target(obd, mti);
+
+ /* done with log update */
+ if (lockrc == ELDLM_OK)
+ mgs_put_cfg_lock(&lockh);
+
if (rc) {
CERROR("Failed to write %s log (%d)\n",
mti->mti_svname, rc);
GOTO(out, rc);
}
- /* done with log update */
- if (!lockrc)
- ldlm_lock_decref(&lockh, LCK_PW);
-
out:
CDEBUG(D_MGS, "replying with %s, index=%d, rc=%d\n", mti->mti_svname,
mti->mti_stripe_index, rc);
case LDLM_BL_CALLBACK:
case LDLM_CP_CALLBACK:
DEBUG_REQ(D_MGS, req, "callback");
- CERROR("callbacks should not happen on MDS\n");
+ CERROR("callbacks should not happen on MGS\n");
LBUG();
OBD_FAIL_RETURN(OBD_FAIL_LDLM_BL_CALLBACK, 0);
break;
/******************** DB functions *********************/
+/* from the (client) config log, figure out:
+ 1. which ost's are active (by index)
+ 2. what the last config step is
+*/
static int db_handler(struct llog_handle *llh, struct llog_rec_hdr *rec,
void *data)
{
- struct system_db *db = (struct system_db *)data;
+ struct fs_db *db = (struct fs_db *)data;
int cfg_len = rec->lrh_len;
char *cfg_buf = (char*) (rec + 1);
int rc = 0;
ENTRY;
- CDEBUG(D_MGS, "db_handler\n");
-
if (rec->lrh_type == OBD_CFG_REC) {
struct lustre_cfg *lcfg;
int index;
lcfg = (struct lustre_cfg *)cfg_buf;
- if (lcfg->lcfg_command == LCFG_LOV_ADD_OBD) {
+ if (lcfg->lcfg_command == LCFG_LOV_ADD_OBD ||
+ lcfg->lcfg_command == LCFG_LOV_DEL_OBD) {
index = simple_strtol(lustre_cfg_string(lcfg, 2),
NULL, 0);
- set_bit(index, db->index_map);
+ set_bit(index, db->fd_index_map);
}
- if (lcfg->lcfg_command == LCFG_LOV_DEL_OBD) {
- index = simple_strtol(lustre_cfg_string(lcfg, 2),
- NULL, 0);
- clear_bit(index, db->index_map);
+ /* Never clear_bit: once assigned, we can never reassign the
+ same index again */
+
+ if (lcfg->lcfg_command == LCFG_MARKER) {
+ struct cfg_marker *marker;
+ marker = lustre_cfg_buf(lcfg, 1);
+ db->fd_last_step =
+ max(db->fd_last_step, marker->cm_step);
+ CDEBUG(D_MGS, "marker %d %s\n", marker->cm_step,
+ marker->cm_comment);
}
+
} else {
CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
rc = -EINVAL;
}
static int get_db_from_llog(struct obd_device *obd, char *logname,
- struct system_db *db)
+ struct fs_db *db)
{
struct llog_handle *loghandle;
struct lvfs_run_ctxt saved;
{
int i;
for (i = 0; i < map_len * 8; i++)
- if (!test_bit(i, index_map))
- return i;
+ if (!test_bit(i, index_map)) {
+ set_bit(i, index_map);
+ return i;
+ }
CERROR("max index exceeded.\n");
- return -ERANGE;
+ return -1;
}
static int count_osts(void *index_map, int map_len)
return num;
}
-static struct system_db *mgs_find_db(struct obd_device *obd, char *fsname)
+static struct fs_db *mgs_find_db(struct obd_device *obd, char *fsname)
{
struct mgs_obd *mgs = &obd->u.mgs;
- struct system_db *db;
+ struct fs_db *db;
struct list_head *tmp;
- list_for_each(tmp, &mgs->mgs_system_db_list) {
- db = list_entry(tmp, struct system_db, db_list);
- if (strcmp(db->fsname, fsname) == 0)
+ list_for_each(tmp, &mgs->mgs_fs_db_list) {
+ db = list_entry(tmp, struct fs_db, fd_list);
+ if (strcmp(db->fd_name, fsname) == 0)
return db;
}
return NULL;
#define INDEX_MAP_SIZE 4096
-static struct system_db *mgs_new_db(struct obd_device *obd, char *fsname)
+static struct fs_db *mgs_new_db(struct obd_device *obd, char *fsname)
{
struct mgs_obd *mgs = &obd->u.mgs;
- struct system_db *db;
+ struct fs_db *db;
OBD_ALLOC(db, sizeof(*db));
if (!db) {
- CERROR("No memory for system_db.\n");
+ CERROR("No memory for fs_db.\n");
return NULL;
}
- OBD_ALLOC(db->index_map, INDEX_MAP_SIZE);
- if (!db->index_map) {
+ OBD_ALLOC(db->fd_index_map, INDEX_MAP_SIZE);
+ if (!db->fd_index_map) {
CERROR("No memory for index_map.\n");
OBD_FREE(db, sizeof(*db));
return NULL;
}
- strncpy(db->fsname, fsname, sizeof(db->fsname));
+ strncpy(db->fd_name, fsname, sizeof(db->fd_name));
//INIT_LIST_HEAD(&db->ost_infos);
- spin_lock(&mgs->mgs_system_db_lock);
- list_add(&db->db_list, &mgs->mgs_system_db_list);
- spin_unlock(&mgs->mgs_system_db_lock);
+ spin_lock(&mgs->mgs_fs_db_lock);
+ list_add(&db->fd_list, &mgs->mgs_fs_db_list);
+ spin_unlock(&mgs->mgs_fs_db_lock);
return db;
}
-static void mgs_free_db(struct system_db *db)
+static void mgs_free_db(struct fs_db *db)
{
- list_del(&db->db_list);
- OBD_FREE(db->index_map, INDEX_MAP_SIZE);
+ list_del(&db->fd_list);
+ OBD_FREE(db->fd_index_map, INDEX_MAP_SIZE);
OBD_FREE(db, sizeof(*db));
}
int mgs_init_db_list(struct obd_device *obd)
{
struct mgs_obd *mgs = &obd->u.mgs;
- spin_lock_init(&mgs->mgs_system_db_lock);
- INIT_LIST_HEAD(&mgs->mgs_system_db_list);
+ spin_lock_init(&mgs->mgs_fs_db_lock);
+ INIT_LIST_HEAD(&mgs->mgs_fs_db_list);
return 0;
}
int mgs_cleanup_db_list(struct obd_device *obd)
{
struct mgs_obd *mgs = &obd->u.mgs;
- struct system_db *db;
+ struct fs_db *db;
struct list_head *tmp, *tmp2;
- spin_lock(&mgs->mgs_system_db_lock);
- list_for_each_safe(tmp, tmp2, &mgs->mgs_system_db_list) {
- db = list_entry(tmp, struct system_db, db_list);
+ spin_lock(&mgs->mgs_fs_db_lock);
+ list_for_each_safe(tmp, tmp2, &mgs->mgs_fs_db_list) {
+ db = list_entry(tmp, struct fs_db, fd_list);
mgs_free_db(db);
}
- spin_unlock(&mgs->mgs_system_db_lock);
+ spin_unlock(&mgs->mgs_fs_db_lock);
return 0;
}
static int mgs_find_or_make_db(struct obd_device *obd, char *name,
- struct system_db **dbh)
+ struct fs_db **dbh)
{
- struct system_db *db;
+ struct fs_db *db;
char *cliname;
int rc = 0;
if (!db)
return -ENOMEM;
- /* extract the db from the client llog */
+ /* populate the db from the client llog */
name_create(name, "-client", &cliname);
rc = get_db_from_llog(obd, cliname, db);
name_destroy(cliname);
int mgs_set_next_index(struct obd_device *obd, struct mgmt_target_info *mti)
{
- struct system_db *db;
+ struct fs_db *db;
int rc = 0;
rc = mgs_find_or_make_db(obd, mti->mti_fsname, &db);
return rc;
}
- if (mti->mti_flags & LDD_F_SV_TYPE_OST)
- mti->mti_stripe_index =
- next_ost_index(db->index_map, INDEX_MAP_SIZE);
- else
+ /* FIXME use mti->mti_stripe_index if given, report error if already
+ in use */
+ if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
+ rc = next_ost_index(db->fd_index_map, INDEX_MAP_SIZE);
+ if (rc == -1)
+ return -ERANGE;
+ mti->mti_stripe_index = rc;
+ } else {
mti->mti_stripe_index = 1; /*FIXME*/
+ }
make_sv_name(mti->mti_flags, mti->mti_stripe_index,
mti->mti_fsname, mti->mti_svname);
CDEBUG(D_MGS, "Set new index for %s to %d\n", mti->mti_svname,
mti->mti_stripe_index);
- return rc;
+ return 0;
}
/******************** config log recording functions *********************/
struct lustre_cfg *lcfg;
int rc;
- CDEBUG(D_MGS, "lcfg %s lov_setup\n", device_name);
-
lustre_cfg_bufs_reset(&bufs, device_name);
lustre_cfg_bufs_set(&bufs, 1, desc, sizeof(*desc));
lcfg = lustre_cfg_new(LCFG_SETUP, &bufs);
{
return record_base(obd,llh,NULL,0,LCFG_MOUNTOPT,
profile,lov_name,mdc_name,0);
-}
+}
+
+static int record_marker(struct obd_device *obd, struct llog_handle *llh,
+ struct fs_db *db, __u32 flags, char *comment)
+{
+ struct cfg_marker marker;
+ struct lustre_cfg_bufs bufs;
+ struct lustre_cfg *lcfg;
+ int rc;
+
+ CDEBUG(D_MGS, "lcfg marker\n");
+
+ if (flags & CM_START)
+ db->fd_last_step++;
+ marker.cm_step = db->fd_last_step;
+ marker.cm_flags = flags;
+ strncpy(marker.cm_comment, comment, sizeof(marker.cm_comment));
+ lustre_cfg_bufs_reset(&bufs, NULL);
+ lustre_cfg_bufs_set(&bufs, 1, &marker, sizeof(marker));
+ lcfg = lustre_cfg_new(LCFG_MARKER, &bufs);
+
+ rc = mgs_do_record(obd, llh, lcfg);
+
+ lustre_cfg_free(lcfg);
+ return rc;
+}
static int record_start_log(struct obd_device *obd,
struct llog_handle **llh, char *name)
/******************** config "macros" *********************/
/* lov is the first thing in the mdt and client logs */
-static int mgs_write_log_lov(struct obd_device *obd, char *fsname,
+static int mgs_write_log_lov(struct obd_device *obd, struct fs_db *db,
char *logname, char *lovname)
{
struct llog_handle *llh = NULL;
int rc = 0;
ENTRY;
+ CDEBUG(D_MGS, "Writing log %s\n", logname);
+
/*
#01 L attach 0:lov_mdsA 1:lov 2:71ccb_lov_mdsA_19f961a9e1
#02 L lov_setup 0:lov_mdsA 1:(struct lov_desc)
/* This should always be the first entry in a log.
rc = mgs_clear_log(obd, logname); */
rc = record_start_log(obd, &llh, logname);
+ rc = record_marker(obd, llh, db, CM_START, "lov setup");
rc = record_attach(obd, llh, lovname, "lov", uuid);
rc = record_lov_setup(obd, llh, lovname, lovdesc);
+ rc = record_marker(obd, llh, db, CM_END, "lov setup");
rc = record_end_log(obd, &llh);
RETURN(rc);
}
-static int mgs_write_log_mdt(struct obd_device *obd,
+static int mgs_write_log_mdt(struct obd_device *obd, struct fs_db *db,
struct mgmt_target_info *mti)
{
struct llog_handle *llh = NULL;
/* This is the first time for all logs for this fs,
since any ost should have already started the mdt log. */
first_log++;
- rc = mgs_write_log_lov(obd, mti->mti_fsname, mti->mti_svname,
+ rc = mgs_write_log_lov(obd, db, mti->mti_svname,
lovname);
}
setup /dev/loop2 ldiskfs mdsA errors=remount-ro,user_xattr
*/
rc = record_start_log(obd, &llh, mti->mti_svname);
+ rc = record_marker(obd, llh, db, CM_START, "add mdt");
rc = record_mount_opt(obd, llh, mti->mti_svname, lovname, 0);
rc = record_attach(obd, llh, mti->mti_svname, LUSTRE_MDS_NAME, mdsuuid);
rc = record_setup(obd,llh,mti->mti_svname,
"dev"/*ignored*/,"type"/*ignored*/,
mti->mti_svname, 0/*options*/);
+ rc = record_marker(obd, llh, db, CM_END, "add mdt");
rc = record_end_log(obd, &llh);
/* Append the mdt info to the client log */
name_create(mti->mti_fsname, "-clilov", &lovname);
if (first_log) {
/* Start client log */
- rc = mgs_write_log_lov(obd, mti->mti_fsname, cliname, lovname);
+ rc = mgs_write_log_lov(obd, db, cliname, lovname);
}
/* Add the mdt info to the client */
name_create(libcfs_nid2str(mti->mti_nid), "_UUID", &nodeuuid);
name_create(mti->mti_svname, "-mdc", &mdcname);
name_create(mdcname, "_UUID", &mdcuuid);
-
/*
#09 L add_uuid nid=uml1@tcp(0x20000c0a80201) 0: 1:uml1_UUID
#10 L attach 0:MDC_uml1_mdsA_MNT_client 1:mdc 2:1d834_MNT_client_03f
rc = record_start_log(obd, &llh, cliname);
/* FIXME can we just use the nid as the node uuid, or do we really
need the hostname? */
+ rc = record_marker(obd, llh, db, CM_START, "add mdc");
rc = record_add_uuid(obd, llh, mti->mti_nid, nodeuuid);
rc = record_attach(obd, llh, mdcname, LUSTRE_MDC_NAME, mdcuuid);
rc = record_setup(obd,llh,mdcname,mdsuuid,nodeuuid,0,0);
/* FIXME add uuid, add_conn for failover mdt's */
rc = record_mount_opt(obd, llh, cliname, lovname, mdcname);
+ rc = record_marker(obd, llh, db, CM_END, "add mdc");
rc = record_end_log(obd, &llh);
name_destroy(mdcuuid);
}
/* Add the ost info to the client/mdt lov */
-static int mgs_write_log_osc(struct obd_device *obd,
+static int mgs_write_log_osc(struct obd_device *obd, struct fs_db *db,
struct mgmt_target_info *mti,
char *logname, char *lovname, char *ostuuid)
{
if (mgs_log_is_empty(obd, logname)) {
/* The first time an osc is added, setup the lov */
- CDEBUG(D_MGS, "First log, creating %s\n", logname);
- rc = mgs_write_log_lov(obd, mti->mti_fsname, logname, lovname);
+ rc = mgs_write_log_lov(obd, db, logname, lovname);
}
+
+ CDEBUG(D_MGS, "adding osc for %s to log %s\n",
+ mti->mti_svname, logname);
name_create(libcfs_nid2str(mti->mti_nid), "_UUID", &nodeuuid);
name_create(mti->mti_svname, "-osc", &oscname);
#08 L lov_modify_tgts add 0:lov1 1:ost1_UUID 2:0 3:1
*/
rc = record_start_log(obd, &llh, logname);
+ rc = record_marker(obd, llh, db, CM_START, "add osc");
rc = record_add_uuid(obd, llh, mti->mti_nid, nodeuuid);
rc = record_attach(obd, llh, oscname, LUSTRE_OSC_NAME, lovuuid);
rc = record_setup(obd, llh, oscname, ostuuid, nodeuuid, 0, 0);
/* FIXME add uuid, add_conn for failover ost's */
snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
rc = record_lov_add(obd,llh, lovname, ostuuid, index,"1"/*generation*/);
+ rc = record_marker(obd, llh, db, CM_END, "add osc");
rc = record_end_log(obd, &llh);
name_destroy(lovuuid);
return rc;
}
-static int mgs_write_log_ost(struct obd_device *obd,
+static int mgs_write_log_ost(struct obd_device *obd, struct fs_db *db,
struct mgmt_target_info *mti)
{
struct llog_handle *llh = NULL;
setup /dev/loop2 ldiskfs f|n errors=remount-ro,user_xattr
*/
rc = record_start_log(obd, &llh, mti->mti_svname);
+ rc = record_marker(obd, llh, db, CM_START, "add ost");
name_create(mti->mti_svname, "_UUID", &ostuuid);
rc = record_attach(obd, llh, mti->mti_svname,
"obdfilter"/*LUSTRE_OST_NAME*/, ostuuid);
rc = record_setup(obd,llh,mti->mti_svname,
"dev"/*ignored*/,"type"/*ignored*/,
"f", 0/*options*/);
+ rc = record_marker(obd, llh, db, CM_END, "add ost");
rc = record_end_log(obd, &llh);
/* We also have to update the other logs where this osc is part of
// FIXME need real mdt name
name_create(mti->mti_fsname, "-MDT0001", &logname);
name_create(mti->mti_fsname, "-mdtlov", &lovname);
- mgs_write_log_osc(obd, mti, logname, lovname, ostuuid);
+ mgs_write_log_osc(obd, db, mti, logname, lovname, ostuuid);
name_destroy(lovname);
name_destroy(logname);
/* Append ost info to the client log */
name_create(mti->mti_fsname, "-client", &logname);
name_create(mti->mti_fsname, "-clilov", &lovname);
- mgs_write_log_osc(obd, mti, logname, lovname, ostuuid);
+ mgs_write_log_osc(obd, db, mti, logname, lovname, ostuuid);
name_destroy(lovname);
name_destroy(logname);
int mgs_write_log_target(struct obd_device *obd,
struct mgmt_target_info *mti)
{
+ struct fs_db *db;
int rc = -EINVAL;
+
+ rc = mgs_find_or_make_db(obd, mti->mti_fsname, &db);
+ if (rc) {
+ CERROR("Can't get db for %s\n", mti->mti_fsname);
+ return rc;
+ }
+
if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
- rc = mgs_write_log_mdt(obd, mti);
+ rc = mgs_write_log_mdt(obd, db, mti);
} else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
- rc = mgs_write_log_ost(obd, mti);
+ rc = mgs_write_log_ost(obd, db, mti);
} else {
CERROR("Unknown target type %#x, can't create log for %s\n",
mti->mti_flags, mti->mti_svname);
/* first assign flags to use llog_client_ops */
llh->llh_flags = flags;
rc = llog_read_header(handle);
- CDEBUG(D_ERROR, "read header rc=%d fl=%d\n", rc, flags);
if (rc == 0) {
flags = llh->llh_flags;
if (uuid)
finish:
if (oa)
obdo_free(oa);
- // FIXME remove
- CDEBUG(D_ERROR, "opened %s fp=%p\n", name?name:"by id", handle->lgh_file);
RETURN(rc);
cleanup:
switch (cleanup_phase) {
int rc;
ENTRY;
- // FIXME remove
- CDEBUG(D_ERROR, "Closing file=%p\n", handle->lgh_file);
rc = filp_close(handle->lgh_file, 0);
if (rc)
CERROR("error closing log: rc %d\n", rc);
GOTO(out, err = 0);
}
case LCFG_MARKER: {
+ struct cfg_marker *marker;
LCONSOLE_WARN("LCFG_MARKER not yet implemented.\n");
+ marker = lustre_cfg_buf(lcfg, 1);
+ CDEBUG(D_WARNING, "%d (%x) %s\n", marker->cm_step,
+ marker->cm_flags, marker->cm_comment);
GOTO(out, err = 0);
}
}
}
#endif
+/**************** config llog ********************/
+
+/* Get the client export to the MGS */
+static struct obd_export *get_mgs_export(struct obd_device *mgc)
+{
+ struct obd_export *exp, *n;
+
+ /* FIXME is this a Bad Idea? Should I just store this export
+ somewhere in the u.cli? Slightly annoying because of layering */
+
+ /* There should be exactly 2 exports in the mgc, the mgs export and
+ the mgc self-export, in that order. So just return the list head. */
+ LASSERT(!list_empty(&mgc->obd_exports));
+ LASSERT(mgc->obd_num_exports == 2);
+ list_for_each_entry_safe(exp, n, &mgc->obd_exports, exp_obd_chain) {
+ LASSERT(exp != mgc->obd_self_export);
+ break;
+ }
+ /*FIXME there's clearly a better way, but I'm too confused to sort it
+ out now...
+ exp = &list_entry(&mgc->obd_exports->head, export_obd, exp_obd_chain);
+ */
+ return exp;
+}
+
/* Get a config log from the MGS and process it.
This func is called for both clients and servers. */
+/* FIXME maybe it makes more sense for this to be a mgc func, not
+ a mount func. We could make this mgc_process_config */
int lustre_get_process_log(struct super_block *sb, char *logname,
- struct config_llog_instance *cfg)
+ struct config_llog_instance *cfg)
{
struct lustre_sb_info *lsi = s2lsi(sb);
struct obd_device *mgc = lsi->lsi_mgc;
struct llog_ctxt *rctxt, *lctxt;
- int rc;
+ struct lustre_handle lockh;
+ int rc, rcl, flags = 0;
LASSERT(mgc);
CDEBUG(D_MOUNT, "parsing config log %s\n", logname);
return(-EINVAL);
}
+ /* Get the cfg lock */
+ rcl = obd_enqueue(get_mgs_export(mgc), NULL, LDLM_PLAIN, NULL,
+ LCK_CR, &flags, NULL, NULL, NULL,
+ logname, 0, NULL, &lockh);
+ if (rcl) {
+ CERROR("Can't get cfg lock: %d\n", rcl);
+ return (rcl);
+ }
+
//FIXME Copy the mgs remote log to the local disk
#if 0
class_config_dump_llog(rctxt, logname, cfg);
#endif
rc = class_config_parse_llog(rctxt, logname, cfg);
-
- if (rc && lmd_is_client(lsi->lsi_lmd)) {
+
+ /* Now drop the lock so MGS can revoke it */
+ rcl = obd_cancel(get_mgs_export(mgc), NULL, LCK_CR, &lockh);
+ if (rcl) {
+ CERROR("Can't drop cfg lock: %d\n", rcl);
+ }
+
+ if (rc && !lmd_is_client(lsi->lsi_lmd)) {
int rc2;
LCONSOLE_INFO("%s: The configuration '%s' could not be read "
"from the MGS (%d). Trying local log.\n",
return (rc);
}
+/**************** obd start *******************/
+
static int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
char *s1, char *s2, char *s3, char *s4)
{
return rc;
}
-static struct obd_export *get_mgs_export(struct obd_device *mgc)
-{
- struct obd_export *exp, *n;
-
- /* FIXME is this a Bad Idea? Should I just store this export
- somewhere in the u.cli? */
-
- /* There should be exactly 2 exports in the mgc, the mgs export and
- the mgc self-export, in that order. So just return the list head. */
- LASSERT(!list_empty(&mgc->obd_exports));
- LASSERT(mgc->obd_num_exports == 2);
- list_for_each_entry_safe(exp, n, &mgc->obd_exports, exp_obd_chain) {
- LASSERT(exp != mgc->obd_self_export);
- break;
- }
- /*FIXME there's clearly a better way, but I'm too confused to sort it
- out now...
- exp = &list_entry(&mgc->obd_exports->head, export_obd, exp_obd_chain);
- */
- return exp;
-}
-
/* Set up a mgcobd to process startup logs */
static int lustre_start_mgc(struct super_block *sb)
{
struct lustre_disk_data *ldd = lsi->lsi_ldd;
struct mgmt_target_info *mti = NULL;
lnet_process_id_t id;
+ int i = 0;
int rc;
LASSERT(mgc);
// char mti_nodename[NAME_MAXLEN];
// char mti_uuid[UUID_MAXLEN];
/* FIXME nid 0 is lo generally, need to send all non-lo nids */
- rc = LNetGetId(1, &id);
+ while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
+ if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
+ continue;
+ /* FIXME use all non-lo nids, not just first */
+ break;
+ }
mti->mti_nid = id.nid;
mti->mti_config_ver = 0;
mti->mti_flags = ldd->ldd_flags;
}
}
- /* Set the mgc fs to our server disk */
+ /* Set the mgc fs to our server disk. This allows the MGC
+ to read and write configs locally. */
server_mgc_set_fs(lsi->lsi_mgc, sb);
/* Get a new index if needed */
}
}
-
- /* Register the mount for the target */
+ /* Let the target look up the mount using the target's name. */
rc = server_register_mount(lsi->lsi_ldd->ldd_svname, sb, mnt);
if (rc)
goto out;
- /* The MGC starts targets using the svname llog */
+ /* The MGC starts targets using the llog named with the target name */
cfg.cfg_instance = NULL;
rc = lustre_get_process_log(sb, lsi->lsi_ldd->ldd_svname, &cfg);
if (rc) {
fsd->lsd_client_size = cpu_to_le16(FILTER_LR_CLIENT_SIZE);
fsd->lsd_subdir_count = cpu_to_le16(FILTER_SUBDIR_COUNT);
filter->fo_subdir_count = FILTER_SUBDIR_COUNT;
+ fsd->lsd_feature_compat = cpu_to_le32(LR_COMPAT_COMMON_LR);
} else {
rc = fsfilt_read_record(obd, filp, fsd, sizeof(*fsd), &off);
if (rc) {
EXIT;
}
+#if 0
+/* see ldlm_blocking_ast */
/* cut-n-paste of mds_blocking_ast() */
static int ost_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
void *data, int flag)
}
RETURN(0);
}
-
+#endif
+
static int ost_brw_lock_get(int mode, struct obd_export *exp,
struct obd_ioobj *obj, struct niobuf_remote *nb,
struct lustre_handle *lh)
/* Write the server config files */
int write_local_files(struct mkfs_opts *mop)
{
- struct lr_server_data lsd;
char mntpt[] = "/tmp/mntXXXXXX";
char filepnm[128];
char *dev;
fclose(filep);
#if 0
+ struct lr_server_data lsd;
/* servers create their own last_rcvd if nonexistent - why do it here?*/
/* Create the inital last_rcvd file */
vprint("Writing %s\n", LAST_RCVD);
fprintf(stderr, "failed to write local files\n");
goto out;
}
-
- /* We will not write startup logs here. That is the domain of the
- mgc/mgs, and should probably be done at first mount.
- mgc might have to pass info from the mount_data_file to mgs. */
-#if 0
- ret = write_llog_files(&mop);
- if (ret != 0) {
- fatal();
- fprintf(stderr, "failed to write setup logs\n");
- goto out:
- }
-#endif
-
+
out:
loop_cleanup(&mop);
lnet_stop();
strcat(optcopy, source);
if (verbose)
- printf("mounting devce %s at %s, flags=%#x options=%s\n",
+ printf("mounting device %s at %s, flags=%#x options=%s\n",
source, target, flags, optcopy);
if (!fake)