From 95315389fc75393e54b45abc9a06449f64965607 Mon Sep 17 00:00:00 2001 From: nathan Date: Thu, 15 Dec 2005 00:08:38 +0000 Subject: [PATCH] Branch b1_4_mountconf b=8007 keep mgc-mgs connection open forever for locks. --- lustre/include/linux/lustre_idl.h | 1 - lustre/include/linux/obd.h | 3 +- lustre/ldlm/ldlm_request.c | 3 + lustre/mgc/mgc_llog.c | 24 +--- lustre/mgc/mgc_lock.c | 44 -------- lustre/mgc/mgc_request.c | 229 +++++++++++++++++++++++++------------- lustre/mgs/mgs_fs.c | 68 +++++++++++ lustre/mgs/mgs_handler.c | 112 ++++++++----------- lustre/obdclass/obd_mount.c | 108 +++++++++++------- 9 files changed, 338 insertions(+), 254 deletions(-) diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h index 30e8a3c..55b0cf0 100644 --- a/lustre/include/linux/lustre_idl.h +++ b/lustre/include/linux/lustre_idl.h @@ -848,7 +848,6 @@ typedef enum { LDLM_EXTENT = 11, LDLM_FLOCK = 12, // LDLM_IBITS = 13, - LDLM_LLOG = 14, LDLM_MAX_TYPE } ldlm_type_t; diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h index d5cbdd4..0208ea9 100644 --- a/lustre/include/linux/obd.h +++ b/lustre/include/linux/obd.h @@ -323,9 +323,7 @@ struct client_obd { /* mgc datastruct */ struct semaphore cl_mgc_sem; struct vfsmount *cl_mgc_vfsmnt; - struct super_block *cl_mgc_sb; struct dentry *cl_mgc_configs_dir; - struct list_head cl_mgc_open_llogs; atomic_t cl_mgc_refcount; /* Flags section */ @@ -349,6 +347,7 @@ struct mgs_obd { struct llog_handle *mgs_cfg_llh; spinlock_t mgs_system_db_lock; struct list_head mgs_system_db_list; + struct lustre_handle mgs_pw_lock; /* config update lock */ }; struct mds_obd { diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index 90eed82..0a580a5 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -89,6 +89,9 @@ int ldlm_completion_ast(struct ldlm_lock *lock, int flags, void *data) int rc = 0; ENTRY; + //FIXME remove + LDLM_ERROR(lock, "ldlm completion ast"); + if (flags == LDLM_FL_WAIT_NOREPROC) { LDLM_DEBUG(lock, "client-side enqueue waiting on pending lock"); goto noreproc; diff --git a/lustre/mgc/mgc_llog.c b/lustre/mgc/mgc_llog.c index e8005c5..16cac74 100644 --- a/lustre/mgc/mgc_llog.c +++ b/lustre/mgc/mgc_llog.c @@ -46,26 +46,4 @@ #include "mgc_internal.h" -int mgc_get_process_llog(struct obd_device *obd, char *llog_name, - struct config_llog_instance *cfg) -{ - struct llog_ctxt *ctxt; - - ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT); - - rc = class_config_parse_llog(ctxt, llog_name, cfg); - - if (!rc) { - if (rc == -EINVAL) - LCONSOLE_ERROR("%s: The configuration '%s' could not " - "be read from the MGS. Make sure this " - "client and the MGS are running " - "compatible versions of Lustre.\n", - obd->obd_name, llog_name); - else - CERROR("class_config_parse_llog failed: rc = %d\n", rc); - } - return 0; -} - -EXPORT_SYMBOL(mgc_get_process_llog) + //REMOVE ME diff --git a/lustre/mgc/mgc_lock.c b/lustre/mgc/mgc_lock.c index 262582c..69e7ceb 100644 --- a/lustre/mgc/mgc_lock.c +++ b/lustre/mgc/mgc_lock.c @@ -46,47 +46,3 @@ #include "mgc_internal.h" -int mgc_enqueue(struct obd_export *exp, int lock_mode, - struct mgc_op_data *data, struct lustre_handle *lockh, - ldlm_completion_callback cb_completion, - ldlm_blocking_callback cb_blocking, - void *cb_data) -{ - struct ptlrpc_request *req; - struct obd_device *obddev = class_exp2obd(exp); - struct ldlm_res_id res_id = - { .name = {data->obj_id, - data->obj_version} - }; - int rc = 0, flags = 0; - struct ldlm_reply *dlm_rep; - struct ldlm_request *lockreq; - unsigned long irqflags; - int reply_buffers = 0; - ENTRY; - - /* Search for already existing locks.*/ - rc = ldlm_lock_match(obd->obd_namespace, 0, &res_id, LDLM_LLOG, - NULL, mode, lockh); - if (rc == 1) - RETURN(ELDLM_OK); - - rc = ldlm_cli_enqueue(exp, req, obd->obd_namespace, res_id, LDLM_LLOG, - NULL, mode, flags, bl_cb, cp_cb, gl_cb, data, - NULL, 0, NULL, lockh); - - if (req != NULL) { - if (rc == ELDLM_LOCK_ABORTED) { - /* swabbed by ldlm_cli_enqueue() */ - LASSERT_REPSWABBED(req, 0); - rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*rep)); - LASSERT(rep != NULL); - if (rep->lock_policy_res1) - rc = rep->lock_policy_res1; - } - ptlrpc_req_finished(req); - } - - RETURN(rc); -} -EXPORT_SYMBOL(mgc_enqueue) diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index d578331..0a1ea92 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -50,77 +50,6 @@ #include "mgc_internal.h" -/* Get index and add to config llog, depending on flags */ -int mgc_target_add(struct obd_export *exp, struct mgmt_target_info *mti) -{ - struct ptlrpc_request *req; - struct mgmt_target_info *req_mti, *rep_mti; - int size = sizeof(*req_mti); - int rep_size = sizeof(*mti); - int rc; - ENTRY; - - req = ptlrpc_prep_req(class_exp2cliimp(exp), MGMT_TARGET_ADD, - 1, &size, NULL); - if (!req) - RETURN(rc = -ENOMEM); - - req_mti = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*req_mti)); - memcpy(req_mti, mti, sizeof(*req_mti)); - - req->rq_replen = lustre_msg_size(1, &rep_size); - - CDEBUG(D_MGC, "requesting add for %s\n", mti->mti_svname); - - rc = ptlrpc_queue_wait(req); - if (!rc) { - rep_mti = lustre_swab_repbuf(req, 0, sizeof(*rep_mti), - lustre_swab_mgmt_target_info); - memcpy(mti, rep_mti, sizeof(*rep_mti)); - CDEBUG(D_MGC, "target_add %s got index = %d\n", - mti->mti_svname, mti->mti_stripe_index); - } else { - CERROR("target_add failed. rc=%d\n", rc); - } - ptlrpc_req_finished(req); - - RETURN(rc); -} - -/* Remove from config llog */ -int mgc_target_del(struct obd_export *exp, struct mgmt_target_info *mti) -{ - struct ptlrpc_request *req; - struct mgmt_target_info *req_mti, *rep_mti; - int size = sizeof(*req_mti); - int rc; - ENTRY; - - req = ptlrpc_prep_req(class_exp2cliimp(exp), MGMT_TARGET_DEL, - 1, &size, NULL); - if (!req) - RETURN(rc = -ENOMEM); - - req_mti = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*req_mti)); - memcpy(req_mti, mti, sizeof(*req_mti)); - - rc = ptlrpc_queue_wait(req); - if (!rc) { - int index; - rep_mti = lustre_swab_repbuf(req, 0, sizeof(*rep_mti), - lustre_swab_mgmt_target_info); - index = rep_mti->mti_stripe_index; - if (index != mti->mti_stripe_index) { - CERROR ("OST DEL failed. rc=%d\n", index); - GOTO (out, rc = -EINVAL); - } - CERROR("OST DEL OK.(old index = %d)\n", index); - } -out: - ptlrpc_req_finished(req); - - RETURN(rc); -} static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb, struct vfsmount *mnt) @@ -146,10 +75,9 @@ static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb, } cli->cl_mgc_vfsmnt = mnt; - cli->cl_mgc_sb = mnt->mnt_root->d_inode->i_sb; // FIXME which is the right SB? - filter_common_setup also CERROR("SB's: fill=%p mnt=%p root=%p\n", sb, mnt->mnt_sb, mnt->mnt_root->d_inode->i_sb); - fsfilt_setup(obd, cli->cl_mgc_sb); + fsfilt_setup(obd, mnt->mnt_root->d_inode->i_sb); OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt); obd->obd_lvfs_ctxt.pwdmnt = mnt; @@ -168,12 +96,14 @@ static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb, goto err_ops; } cli->cl_mgc_configs_dir = dentry; + + /* We keep the cl_mgc_sem until mgc_fs_cleanup */ return (0); err_ops: fsfilt_put_ops(obd->obd_fsops); obd->obd_fsops = NULL; - cli->cl_mgc_sb = NULL; + cli->cl_mgc_vfsmnt = NULL; up(&cli->cl_mgc_sem); return(err); } @@ -184,7 +114,6 @@ static int mgc_fs_cleanup(struct obd_device *obd) int rc = 0; LASSERT(cli->cl_mgc_vfsmnt != NULL); - LASSERT(cli->cl_mgc_sb != NULL); if (cli->cl_mgc_configs_dir != NULL) { struct lvfs_run_ctxt saved; @@ -201,7 +130,6 @@ static int mgc_fs_cleanup(struct obd_device *obd) */ cli->cl_mgc_vfsmnt = NULL; - cli->cl_mgc_sb = NULL; if (obd->obd_fsops) fsfilt_put_ops(obd->obd_fsops); @@ -219,7 +147,7 @@ static int mgc_cleanup(struct obd_device *obd) /* FIXME calls to mgc_fs_setup must take an obd ref to insure there's no fs by the time we get here. */ LASSERT(cli->cl_mgc_vfsmnt == NULL); - + rc = obd_llog_finish(obd, 0); if (rc != 0) CERROR("failed to cleanup llogging subsystems\n"); @@ -255,6 +183,81 @@ err_decref: RETURN(rc); } +/* see ll_mdc_blocking_ast */ +static int mgc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, + void *data, int flag) +{ + int rc; + struct lustre_handle lockh; + ENTRY; + + switch (flag) { + case LDLM_CB_BLOCKING: + /* mgs wants the lock, give it up... */ + LDLM_ERROR(lock, "MGC blocking CB"); + + ldlm_lock2handle(lock, &lockh); + rc = ldlm_cli_cancel(&lockh); + if (rc < 0) { + CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc); + RETURN(rc); + } + break; + case LDLM_CB_CANCELING: { + /* We've given up the lock, prepare ourselves to update. + FIXME */ + LDLM_ERROR(lock, "MGC cancel CB"); + + //struct inode *inode = ll_inode_from_lock(lock); + /* in the MGC case I suspect this callback will + trigger a new enqueue for the same lock (in a separate + thread likely, which won't match the just-being-cancelled + lock due to CBPENDING flag) + config llog processing */ + break; + } + default: + LBUG(); + } + + RETURN(0); +} + +/* see ll_get_dir_page */ +static int mgc_get_cfg_lock(struct obd_export *exp, char *fsname) +{ + struct lustre_handle lockh; + struct obd_device *obd = class_exp2obd(exp); + /* FIXME use fsname, vers and separate locks? see mgs_get_cfg_lock */ + struct ldlm_res_id res_id = { .name = { 12321 } }; + int rc = 0, flags = 0; + ENTRY; + + /* Search for already existing locks.*/ + rc = ldlm_lock_match(obd->obd_namespace, 0, &res_id, LDLM_PLAIN, + NULL, LCK_CR, &lockh); + if (rc == 1) + RETURN(ELDLM_OK); + + CDEBUG(D_MGC, "Taking a cfg reader lock\n"); + + /* see filter_prepare_destroy + rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, res_id, + LDLM_EXTENT, &policy, LCK_PW, + &flags, ldlm_blocking_ast, ldlm_completion_ast, + NULL, NULL, NULL, 0, NULL, &lockh); + */ + + rc = ldlm_cli_enqueue(exp, NULL, obd->obd_namespace, res_id, + LDLM_PLAIN, NULL, LCK_CR, &flags, + mgc_blocking_ast, ldlm_completion_ast, NULL, + NULL/*cb_data*/, NULL, 0, NULL, &lockh); + + /* now drop the lock so MGS can revoke it */ + ldlm_lock_decref(&lockh, LCK_PR); + + RETURN(rc); +} + static int mgc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, void *karg, void *uarg) { @@ -329,6 +332,78 @@ out: return rc; } +/* Get index and add to config llog, depending on flags */ +int mgc_target_add(struct obd_export *exp, struct mgmt_target_info *mti) +{ + struct ptlrpc_request *req; + struct mgmt_target_info *req_mti, *rep_mti; + int size = sizeof(*req_mti); + int rep_size = sizeof(*mti); + int rc; + ENTRY; + + req = ptlrpc_prep_req(class_exp2cliimp(exp), MGMT_TARGET_ADD, + 1, &size, NULL); + if (!req) + RETURN(rc = -ENOMEM); + + req_mti = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*req_mti)); + memcpy(req_mti, mti, sizeof(*req_mti)); + + req->rq_replen = lustre_msg_size(1, &rep_size); + + CDEBUG(D_MGC, "requesting add for %s\n", mti->mti_svname); + + rc = ptlrpc_queue_wait(req); + if (!rc) { + rep_mti = lustre_swab_repbuf(req, 0, sizeof(*rep_mti), + lustre_swab_mgmt_target_info); + memcpy(mti, rep_mti, sizeof(*rep_mti)); + CDEBUG(D_MGC, "target_add %s got index = %d\n", + mti->mti_svname, mti->mti_stripe_index); + } else { + CERROR("target_add failed. rc=%d\n", rc); + } + ptlrpc_req_finished(req); + + RETURN(rc); +} + +/* Remove from config llog */ +int mgc_target_del(struct obd_export *exp, struct mgmt_target_info *mti) +{ + struct ptlrpc_request *req; + struct mgmt_target_info *req_mti, *rep_mti; + int size = sizeof(*req_mti); + int rc; + ENTRY; + + req = ptlrpc_prep_req(class_exp2cliimp(exp), MGMT_TARGET_DEL, + 1, &size, NULL); + if (!req) + RETURN(rc = -ENOMEM); + + req_mti = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*req_mti)); + memcpy(req_mti, mti, sizeof(*req_mti)); + + rc = ptlrpc_queue_wait(req); + if (!rc) { + int index; + rep_mti = lustre_swab_repbuf(req, 0, sizeof(*rep_mti), + lustre_swab_mgmt_target_info); + index = rep_mti->mti_stripe_index; + if (index != mti->mti_stripe_index) { + CERROR ("OST DEL failed. rc=%d\n", index); + GOTO (out, rc = -EINVAL); + } + CERROR("OST DEL OK.(old index = %d)\n", index); + } +out: + ptlrpc_req_finished(req); + + RETURN(rc); +} + #define INIT_RECOV_BACKUP "init_recov_bk" int mgc_set_info(struct obd_export *exp, obd_count keylen, void *key, obd_count vallen, void *val) diff --git a/lustre/mgs/mgs_fs.c b/lustre/mgs/mgs_fs.c index 95bd148..e9e210f 100644 --- a/lustre/mgs/mgs_fs.c +++ b/lustre/mgs/mgs_fs.c @@ -46,6 +46,74 @@ #include #include "mgs_internal.h" +/* Same as mds_fid2dentry */ +/* Look up an entry by inode number. */ +/* this function ONLY returns valid dget'd dentries with an initialized inode + or errors */ +static struct dentry *mgs_fid2dentry(struct mgs_obd *mgs, struct ll_fid *fid) +{ + char fid_name[32]; + unsigned long ino = fid->id; + __u32 generation = fid->generation; + struct inode *inode; + struct dentry *result; + + CDEBUG(D_DENTRY|D_ERROR, "--> mgs_fid2dentry: ino/gen %lu/%u, sb %p\n", + ino, generation, mgs->mgs_sb); + + if (ino == 0) + RETURN(ERR_PTR(-ESTALE)); + + snprintf(fid_name, sizeof(fid_name), "0x%lx", ino); + + /* under ext3 this is neither supposed to return bad inodes + nor NULL inodes. */ + result = ll_lookup_one_len(fid_name, mgs->mgs_fid_de, strlen(fid_name)); + if (IS_ERR(result)) + RETURN(result); + + inode = result->d_inode; + if (!inode) + RETURN(ERR_PTR(-ENOENT)); + + if (inode->i_generation == 0 || inode->i_nlink == 0) { + LCONSOLE_WARN("Found inode with zero generation or link -- this" + " may indicate disk corruption (inode: %lu, link:" + " %lu, count: %d)\n", inode->i_ino, + (unsigned long)inode->i_nlink, + atomic_read(&inode->i_count)); + l_dput(result); + RETURN(ERR_PTR(-ENOENT)); + } + + if (generation && inode->i_generation != generation) { + /* we didn't find the right inode.. */ + CDEBUG(D_INODE, "found wrong generation: inode %lu, link: %lu, " + "count: %d, generation %u/%u\n", inode->i_ino, + (unsigned long)inode->i_nlink, + atomic_read(&inode->i_count), inode->i_generation, + generation); + l_dput(result); + RETURN(ERR_PTR(-ENOENT)); + } + + RETURN(result); +} + +static struct dentry *mgs_lvfs_fid2dentry(__u64 id, __u32 gen, __u64 gr, + void *data) +{ + struct obd_device *obd = data; + struct ll_fid fid; + fid.id = id; + fid.generation = gen; + return mgs_fid2dentry(&obd->u.mgs, &fid); +} + +struct lvfs_callback_ops mgs_lvfs_ops = { + l_fid2dentry: mgs_lvfs_fid2dentry, +}; + int mgs_fs_setup(struct obd_device *obd, struct vfsmount *mnt) { struct mgs_obd *mgs = &obd->u.mgs; diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index 99f29a0..83f8e5a 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -28,7 +28,7 @@ # define EXPORT_SYMTAB #endif #define DEBUG_SUBSYSTEM S_MGS -#define D_MGS D_INFO|D_ERROR +#define D_MGS D_CONFIG|D_ERROR #ifdef __KERNEL__ # include @@ -112,7 +112,7 @@ static int mgs_disconnect(struct obd_export *exp) RETURN(rc); } -int mgs_handle(struct ptlrpc_request *req); +static int mgs_handle(struct ptlrpc_request *req); /* Start the MGS obd */ static int mgs_setup(struct obd_device *obd, obd_count len, void *buf) @@ -260,12 +260,31 @@ static int mgs_cleanup(struct obd_device *obd) RETURN(0); } +static int mgs_get_cfg_lock(struct obd_device *obd, char *fsname, + struct lustre_handle *lockh) +{ + /* FIXME resource should be based on fsname, + one lock per fs. One lock per config log? */ + struct ldlm_res_id res_id = {.name = {12321}}; + int rc, flags = 0; + + rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, res_id, + LDLM_PLAIN, NULL, LCK_PW, &flags, + NULL, ldlm_completion_ast, NULL, NULL, + NULL, 0, NULL, lockh); + if (rc) { + CERROR("can't take cfg lock %d\n", rc); + } + return rc; +} + static int mgs_handle_target_add(struct ptlrpc_request *req) { struct obd_device *obd = req->rq_export->exp_obd; + struct lustre_handle lockh; struct mgmt_target_info *mti, *rep_mti; int rep_size = sizeof(*mti); - int rc; + int rc, lockrc; ENTRY; mti = lustre_swab_reqbuf(req, 0, sizeof(*mti), @@ -283,6 +302,16 @@ static int mgs_handle_target_add(struct ptlrpc_request *req) } } + /* revoke the config lock so everyone will update */ + lockrc = mgs_get_cfg_lock(obd, mti->mti_fsname, &lockh); + if (lockrc) { + LCONSOLE_ERROR("Can't signal other nodes to update their " + "configuration (%d). Updating local logs " + "anyhow; you might have to manually restart " + "other servers to get the latest configuration." + "\n", lockrc); + } + /* create the log for the new target and update the client/mdt logs */ rc = mgs_write_log_target(obd, mti); @@ -292,6 +321,10 @@ static int mgs_handle_target_add(struct ptlrpc_request *req) GOTO(out, rc); } + /* done with log update */ + if (!lockrc) + ldlm_lock_decref(&lockh, LCK_PW); + out: CDEBUG(D_MGS, "replying with %s, index=%d, rc=%d\n", mti->mti_svname, mti->mti_stripe_index, rc); @@ -358,7 +391,7 @@ int mgs_handle(struct ptlrpc_request *req) break; case OBD_PING: - DEBUG_REQ(D_MGS, req, "ping"); + DEBUG_REQ(D_INFO, req, "ping"); rc = target_handle_ping(req); break; @@ -401,80 +434,23 @@ int mgs_handle(struct ptlrpc_request *req) LASSERT(current->journal_info == NULL); - CDEBUG(D_MGS, "MGS handle cmd=%d rc=%d\n", req->rq_reqmsg->opc, rc); + CDEBUG(D_CONFIG | (rc?D_ERROR:0), "MGS handle cmd=%d rc=%d\n", + req->rq_reqmsg->opc, rc); out: target_send_reply(req, rc, fail); RETURN(0); } -/* Same as mds_fid2dentry */ -/* Look up an entry by inode number. */ -/* this function ONLY returns valid dget'd dentries with an initialized inode - or errors */ -struct dentry *mgs_fid2dentry(struct mgs_obd *mgs, struct ll_fid *fid) +static inline int mgs_destroy_export(struct obd_export *exp) { - char fid_name[32]; - unsigned long ino = fid->id; - __u32 generation = fid->generation; - struct inode *inode; - struct dentry *result; - - CDEBUG(D_DENTRY|D_ERROR, "--> mgs_fid2dentry: ino/gen %lu/%u, sb %p\n", - ino, generation, mgs->mgs_sb); - - if (ino == 0) - RETURN(ERR_PTR(-ESTALE)); - - snprintf(fid_name, sizeof(fid_name), "0x%lx", ino); - - /* under ext3 this is neither supposed to return bad inodes - nor NULL inodes. */ - result = ll_lookup_one_len(fid_name, mgs->mgs_fid_de, strlen(fid_name)); - if (IS_ERR(result)) - RETURN(result); - - inode = result->d_inode; - if (!inode) - RETURN(ERR_PTR(-ENOENT)); - - if (inode->i_generation == 0 || inode->i_nlink == 0) { - LCONSOLE_WARN("Found inode with zero generation or link -- this" - " may indicate disk corruption (inode: %lu, link:" - " %lu, count: %d)\n", inode->i_ino, - (unsigned long)inode->i_nlink, - atomic_read(&inode->i_count)); - l_dput(result); - RETURN(ERR_PTR(-ENOENT)); - } - - if (generation && inode->i_generation != generation) { - /* we didn't find the right inode.. */ - CDEBUG(D_INODE, "found wrong generation: inode %lu, link: %lu, " - "count: %d, generation %u/%u\n", inode->i_ino, - (unsigned long)inode->i_nlink, - atomic_read(&inode->i_count), inode->i_generation, - generation); - l_dput(result); - RETURN(ERR_PTR(-ENOENT)); - } + ENTRY; - RETURN(result); -} + target_destroy_export(exp); -static struct dentry *mgs_lvfs_fid2dentry(__u64 id, __u32 gen, __u64 gr, - void *data) -{ - struct obd_device *obd = data; - struct ll_fid fid; - fid.id = id; - fid.generation = gen; - return mgs_fid2dentry(&obd->u.mgs, &fid); + RETURN(0); } -struct lvfs_callback_ops mgs_lvfs_ops = { - l_fid2dentry: mgs_lvfs_fid2dentry, -}; /* use obd ops to offer management infrastructure */ static struct obd_ops mgs_obd_ops = { @@ -484,7 +460,7 @@ static struct obd_ops mgs_obd_ops = { .o_setup = mgs_setup, .o_precleanup = mgs_precleanup, .o_cleanup = mgs_cleanup, - .o_destroy_export = target_destroy_export, + .o_destroy_export = mgs_destroy_export, .o_iocontrol = mgs_iocontrol, }; diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index cc74b67..145dc49 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -381,8 +381,6 @@ int lustre_get_process_log(struct super_block *sb, char *logname, { struct lustre_sb_info *lsi = s2lsi(sb); struct obd_device *mgc = lsi->lsi_mgc; - struct lustre_handle mgc_conn = {0, }; - struct obd_export *exp = NULL; struct llog_ctxt *rctxt, *lctxt; int rc; LASSERT(mgc); @@ -396,14 +394,6 @@ int lustre_get_process_log(struct super_block *sb, char *logname, return(-EINVAL); } - rc = obd_connect(&mgc_conn, mgc, &(mgc->obd_uuid), NULL); - if (rc) { - CERROR("connect failed %d\n", rc); - goto out; - } - exp = class_conn2export(&mgc_conn); - LASSERT(exp->exp_obd == mgc); - //FIXME Copy the mgs remote log to the local disk #if 0 @@ -411,7 +401,6 @@ int lustre_get_process_log(struct super_block *sb, char *logname, class_config_dump_llog(rctxt, logname, cfg); #endif rc = class_config_parse_llog(rctxt, logname, cfg); - obd_disconnect(exp); if (rc && lmd_is_client(lsi->lsi_lmd)) { int rc2; @@ -437,7 +426,6 @@ int lustre_get_process_log(struct super_block *sb, char *logname, CDEBUG(D_MOUNT, "after lustre_get_process_log %s\n", logname); class_obd_list(); -out: return (rc); } @@ -468,12 +456,13 @@ static int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd, return(rc); } -static int lustre_start_simple(char *obdname, char *type, char *s1, char *s2) +static int lustre_start_simple(char *obdname, char *type, char *uuid, + char *s1, char *s2) { int rc; CDEBUG(D_MOUNT, "Starting obd %s\n", obdname); - rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, obdname/*uuid*/, 0, 0); + rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, 0, 0); if (rc) { CERROR("%s attach error %d\n", obdname, rc); return(rc); @@ -511,7 +500,7 @@ static int server_start_mgs(struct super_block *sb) rc = server_register_mount(mgsname, sb, mnt); if (!rc && - ((rc = lustre_start_simple(mgsname, LUSTRE_MGS_NAME, 0, 0)))) + ((rc = lustre_start_simple(mgsname, LUSTRE_MGS_NAME, mgsname, 0, 0)))) server_deregister_mount(mgsname); if (rc) @@ -542,13 +531,39 @@ static int server_stop_mgs(struct super_block *sb) return rc; } +static struct obd_export *get_mgs_export(struct obd_device *mgc) +{ + struct obd_export *exp, *n; + + /* FIXME is this a Bad Idea? Should I just store this export + somewhere in the u.cli? */ + + /* There should be exactly 2 exports in the mgc, the mgs export and + the mgc self-export, in that order. So just return the list head. */ + LASSERT(!list_empty(&mgc->obd_exports)); + LASSERT(mgc->obd_num_exports == 2); + list_for_each_entry_safe(exp, n, &mgc->obd_exports, exp_obd_chain) { + LASSERT(exp != mgc->obd_self_export); + break; + } + /*FIXME there's clearly a better way, but I'm too confused to sort it + out now... + exp = &list_entry(&mgc->obd_exports->head, export_obd, exp_obd_chain); + */ + return exp; +} + /* Set up a mgcobd to process startup logs */ static int lustre_start_mgc(struct super_block *sb) { + struct lustre_handle mgc_conn = {0, }; struct lustre_sb_info *lsi = s2lsi(sb); struct obd_device *obd; + struct obd_export *exp; + char *uuid; char mgcname[] = "MGC"; lnet_nid_t nid; + lnet_process_id_t id; int recov_bk; int rc = 0, i; @@ -578,9 +593,16 @@ static int lustre_start_mgc(struct super_block *sb) if (rc < 0) return rc; + /* Generate a unique uuid for each MGC - use the 1st non-loopback nid */ + /* FIXME if no loopback? Use lustre_generate_random_uuid? */ + rc = LNetGetId(1, &id); + OBD_ALLOC(uuid, sizeof(struct obd_uuid)); + sprintf(uuid, "mgc_"LPX64, id.nid); /* Start the MGC */ - if ((rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME, "MGS", - libcfs_nid2str(nid)))) + rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME, uuid, "MGS", + libcfs_nid2str(nid)); + OBD_FREE(uuid, sizeof(struct obd_uuid)); + if (rc) return rc; /* Add the redundant MGS nids */ @@ -600,7 +622,6 @@ static int lustre_start_mgc(struct super_block *sb) libcfs_nid2str(nid), rc); } - /* Keep the mgc info in the sb */ obd = class_name2obd(mgcname); if (!obd) { CERROR("Can't find mgcobd %s\n", mgcname); @@ -616,10 +637,23 @@ static int lustre_start_mgc(struct super_block *sb) CERROR("can't set init_recov_bk %d\n", rc); goto out; } - + + /* We connect to the MGS at setup, and don't disconnect until cleanup */ + rc = obd_connect(&mgc_conn, obd, &(obd->obd_uuid), NULL); + if (rc) { + CERROR("connect failed %d\n", rc); + goto out; + } + exp = class_conn2export(&mgc_conn); + LASSERT(exp == get_mgs_export(obd)); + + /* And keep a refcount of servers/clients who started with "mount", + so we know when we can get rid of the mgc. */ atomic_set(&obd->u.cli.cl_mgc_refcount, 1); + out: - /* note that many lsi's can point to the same mgc.*/ + /* Keep the mgc info in the sb. Note that many lsi's can point + to the same mgc.*/ lsi->lsi_mgc = obd; return rc; } @@ -644,6 +678,8 @@ static int lustre_stop_mgc(struct super_block *sb) return -EBUSY; } + obd_disconnect(get_mgs_export(obd)); + rc = class_manual_cleanup(obd); if (rc) return(rc); @@ -738,8 +774,6 @@ static int server_add_target(struct super_block *sb, struct vfsmount *mnt) struct lustre_sb_info *lsi = s2lsi(sb); struct obd_device *mgc = lsi->lsi_mgc; struct lustre_disk_data *ldd = lsi->lsi_ldd; - struct lustre_handle mgc_conn = {0, }; - struct obd_export *exp = NULL; struct mgmt_target_info *mti = NULL; lnet_process_id_t id; int rc; @@ -768,19 +802,10 @@ static int server_add_target(struct super_block *sb, struct vfsmount *mnt) mti->mti_stripe_size = 1024*1024; //FIXME mti->mti_stripe_offset = 0; //FIXME - CDEBUG(D_MOUNT, "Initial connect %s, fs=%s, %s, index=%04x\n", + CDEBUG(D_MOUNT, "Initial registration %s, fs=%s, %s, index=%04x\n", mti->mti_svname, mti->mti_fsname, libcfs_nid2str(mti->mti_nid), mti->mti_stripe_index); - /* Connect to the MGS */ - rc = obd_connect(&mgc_conn, mgc, &(mgc->obd_uuid), NULL); - if (rc) { - CERROR("connect failed %d\n", rc); - goto out; - } - exp = class_conn2export(&mgc_conn); - LASSERT(exp->exp_obd == mgc); - /* Register the target */ /* FIXME use ioctl instead? eg struct obd_ioctl_data ioc_data = { 0 }; @@ -790,11 +815,10 @@ static int server_add_target(struct super_block *sb, struct vfsmount *mnt) rc = obd_iocontrol(OBD_IOC_START, obd->obd_self_export, sizeof ioc_data, &ioc_data, NULL); */ - rc = obd_set_info(exp, + rc = obd_set_info(get_mgs_export(mgc), strlen("add_target"), "add_target", sizeof(*mti), mti); CDEBUG(D_MOUNT, "disconnect"); - obd_disconnect(exp); if (rc) { CERROR("add_target failed %d\n", rc); goto out; @@ -840,7 +864,8 @@ static int server_start_targets(struct super_block *sb, struct vfsmount *mnt) obd = class_name2obd("MDS"); if (!obd) { //FIXME pre-rename, should eventually be LUSTRE_MDS_NAME - rc = lustre_start_simple("MDS", LUSTRE_MDT_NAME, 0, 0); + rc = lustre_start_simple("MDS", LUSTRE_MDT_NAME, + "MDS_uuid", 0, 0); if (rc) { CERROR("failed to start MDS: %d\n", rc); goto out_servers; @@ -853,7 +878,8 @@ static int server_start_targets(struct super_block *sb, struct vfsmount *mnt) /* make sure OSS is started */ obd = class_name2obd("OSS"); if (!obd) { - rc = lustre_start_simple("OSS", LUSTRE_OSS_NAME, 0, 0); + rc = lustre_start_simple("OSS", LUSTRE_OSS_NAME, + "OSS_uuid", 0, 0); if (rc) { CERROR("failed to start OSS: %d\n", rc); goto out_servers; @@ -1082,8 +1108,12 @@ static void server_put_super(struct super_block *sb) /* If they wanted the mgs to stop separately from the mdt, they should have put it on a different device. */ - if (IS_MGMT(lsi->lsi_ldd)) + if (IS_MGMT(lsi->lsi_ldd)) { + /* stop the mgc before the mgs so the connection gets cleaned + up */ + lustre_stop_mgc(sb); server_stop_mgs(sb); + } /* clean the mgc and sb */ rc = lustre_common_put_super(sb); @@ -1241,14 +1271,14 @@ int lustre_common_put_super(struct super_block *sb) CDEBUG(D_MOUNT, "dropping sb %p\n", sb); rc = lustre_stop_mgc(sb); - if (rc) { + if (rc && (rc != -ENOENT)) { if (rc != -EBUSY) { CERROR("Can't stop MGC: %d\n", rc); return rc; } /* BUSY just means that there's some other obd that needs the mgc. Let him clean it up. */ - CDEBUG(D_MOUNT, "MGC busy, not stopping\n"); + CDEBUG(D_MOUNT, "MGC busy, will stop later\n"); } rc = lustre_free_lsi(sb); return rc; -- 1.8.3.1