Whamcloud - gitweb
Branch b1_4_mountconf
authornathan <nathan>
Thu, 15 Dec 2005 00:08:38 +0000 (00:08 +0000)
committernathan <nathan>
Thu, 15 Dec 2005 00:08:38 +0000 (00:08 +0000)
b=8007
keep mgc-mgs connection open forever for locks.

lustre/include/linux/lustre_idl.h
lustre/include/linux/obd.h
lustre/ldlm/ldlm_request.c
lustre/mgc/mgc_llog.c
lustre/mgc/mgc_lock.c
lustre/mgc/mgc_request.c
lustre/mgs/mgs_fs.c
lustre/mgs/mgs_handler.c
lustre/obdclass/obd_mount.c

index 30e8a3c..55b0cf0 100644 (file)
@@ -848,7 +848,6 @@ typedef enum {
         LDLM_EXTENT    = 11,
         LDLM_FLOCK     = 12,
 //      LDLM_IBITS     = 13,
-        LDLM_LLOG      = 14,
         LDLM_MAX_TYPE
 } ldlm_type_t;
 
index d5cbdd4..0208ea9 100644 (file)
@@ -323,9 +323,7 @@ struct client_obd {
         /* mgc datastruct */
         struct semaphore         cl_mgc_sem;
         struct vfsmount         *cl_mgc_vfsmnt;
-        struct super_block      *cl_mgc_sb;
         struct dentry           *cl_mgc_configs_dir;
-        struct list_head         cl_mgc_open_llogs;
         atomic_t                 cl_mgc_refcount;
 
         /* Flags section */
@@ -349,6 +347,7 @@ struct mgs_obd {
         struct llog_handle              *mgs_cfg_llh;
         spinlock_t                       mgs_system_db_lock;
         struct list_head                 mgs_system_db_list;
+        struct lustre_handle             mgs_pw_lock;  /* config update lock */
 };
 
 struct mds_obd {
index 90eed82..0a580a5 100644 (file)
@@ -89,6 +89,9 @@ int ldlm_completion_ast(struct ldlm_lock *lock, int flags, void *data)
         int rc = 0;
         ENTRY;
 
+        //FIXME remove
+        LDLM_ERROR(lock, "ldlm completion ast");
+
         if (flags == LDLM_FL_WAIT_NOREPROC) {
                 LDLM_DEBUG(lock, "client-side enqueue waiting on pending lock");
                 goto noreproc;
index e8005c5..16cac74 100644 (file)
 
 #include "mgc_internal.h"
 
-int mgc_get_process_llog(struct obd_device *obd, char *llog_name,
-                         struct config_llog_instance *cfg)
-{
-        struct llog_ctxt *ctxt;
-
-        ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT);
-
-        rc = class_config_parse_llog(ctxt, llog_name, cfg);
-
-        if (!rc) {
-                if (rc == -EINVAL)
-                        LCONSOLE_ERROR("%s: The configuration '%s' could not " 
-                                       "be read from the MGS.  Make sure this " 
-                                       "client and the MGS are running " 
-                                       "compatible versions of Lustre.\n",
-                                       obd->obd_name, llog_name);
-                else
-                        CERROR("class_config_parse_llog failed: rc = %d\n", rc);
-        }
-        return 0;
-}
-
-EXPORT_SYMBOL(mgc_get_process_llog)
+                                    //REMOVE ME
index 262582c..69e7ceb 100644 (file)
 
 #include "mgc_internal.h"
 
-int mgc_enqueue(struct obd_export *exp, int lock_mode, 
-                struct mgc_op_data *data, struct lustre_handle *lockh,
-                ldlm_completion_callback cb_completion,
-                ldlm_blocking_callback cb_blocking,
-                void *cb_data)
-{    
-        struct ptlrpc_request *req;
-        struct obd_device *obddev = class_exp2obd(exp);
-        struct ldlm_res_id res_id =
-                { .name = {data->obj_id, 
-                           data->obj_version} 
-                };
-        int rc = 0, flags = 0;
-        struct ldlm_reply *dlm_rep;
-        struct ldlm_request *lockreq;
-        unsigned long irqflags;
-        int   reply_buffers = 0;
-        ENTRY;
-
-        /* Search for already existing locks.*/
-        rc = ldlm_lock_match(obd->obd_namespace, 0, &res_id, LDLM_LLOG, 
-                             NULL, mode, lockh);
-        if (rc == 1) 
-                RETURN(ELDLM_OK);
-
-        rc = ldlm_cli_enqueue(exp, req, obd->obd_namespace, res_id, LDLM_LLOG,
-                              NULL, mode, flags, bl_cb, cp_cb, gl_cb, data,
-                              NULL, 0, NULL, lockh);
-
-        if (req != NULL) {
-                if (rc == ELDLM_LOCK_ABORTED) {
-                        /* swabbed by ldlm_cli_enqueue() */
-                        LASSERT_REPSWABBED(req, 0);
-                        rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*rep));
-                        LASSERT(rep != NULL);
-                        if (rep->lock_policy_res1)
-                                rc = rep->lock_policy_res1;
-                }
-                ptlrpc_req_finished(req);
-        }
-
-        RETURN(rc);
-}
-EXPORT_SYMBOL(mgc_enqueue)
index d578331..0a1ea92 100644 (file)
 #include "mgc_internal.h"
 
 
-/* Get index and add to config llog, depending on flags */
-int mgc_target_add(struct obd_export *exp, struct mgmt_target_info *mti)
-{
-        struct ptlrpc_request *req;
-        struct mgmt_target_info *req_mti, *rep_mti;
-        int size = sizeof(*req_mti);
-        int rep_size = sizeof(*mti);
-        int rc;
-        ENTRY;
-
-        req = ptlrpc_prep_req(class_exp2cliimp(exp), MGMT_TARGET_ADD, 
-                              1, &size, NULL);
-        if (!req)
-                RETURN(rc = -ENOMEM);
-
-        req_mti = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*req_mti));
-        memcpy(req_mti, mti, sizeof(*req_mti));
-
-        req->rq_replen = lustre_msg_size(1, &rep_size);
-
-        CDEBUG(D_MGC, "requesting add for %s\n", mti->mti_svname);
-        
-        rc = ptlrpc_queue_wait(req);
-        if (!rc) {
-                rep_mti = lustre_swab_repbuf(req, 0, sizeof(*rep_mti),
-                                             lustre_swab_mgmt_target_info);
-                memcpy(mti, rep_mti, sizeof(*rep_mti));
-                CDEBUG(D_MGC, "target_add %s got index = %d\n",
-                       mti->mti_svname, mti->mti_stripe_index);
-        } else {
-                CERROR("target_add failed. rc=%d\n", rc);
-        }
-        ptlrpc_req_finished(req);
-
-        RETURN(rc);
-}
-
-/* Remove from config llog */
-int mgc_target_del(struct obd_export *exp, struct mgmt_target_info *mti)
-{
-        struct ptlrpc_request *req;
-        struct mgmt_target_info *req_mti, *rep_mti;
-        int size = sizeof(*req_mti);
-        int rc;
-        ENTRY;
-
-        req = ptlrpc_prep_req(class_exp2cliimp(exp), MGMT_TARGET_DEL,
-                              1, &size, NULL);
-        if (!req)
-                RETURN(rc = -ENOMEM);
-
-        req_mti = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*req_mti));
-        memcpy(req_mti, mti, sizeof(*req_mti));
-
-        rc = ptlrpc_queue_wait(req);
-        if (!rc) {
-                int index;
-                rep_mti = lustre_swab_repbuf(req, 0, sizeof(*rep_mti),
-                                             lustre_swab_mgmt_target_info);
-                index = rep_mti->mti_stripe_index;
-                if (index != mti->mti_stripe_index) {
-                        CERROR ("OST DEL failed. rc=%d\n", index);
-                        GOTO (out, rc = -EINVAL);
-                }
-                CERROR("OST DEL OK.(old index = %d)\n", index);
-        }
-out:
-        ptlrpc_req_finished(req);
-
-        RETURN(rc);
-}
 
 static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb, 
                         struct vfsmount *mnt)
@@ -146,10 +75,9 @@ static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb,
         }
 
         cli->cl_mgc_vfsmnt = mnt;
-        cli->cl_mgc_sb = mnt->mnt_root->d_inode->i_sb;
         // FIXME which is the right SB? - filter_common_setup also 
         CERROR("SB's: fill=%p mnt=%p root=%p\n", sb, mnt->mnt_sb, mnt->mnt_root->d_inode->i_sb);
-        fsfilt_setup(obd, cli->cl_mgc_sb);
+        fsfilt_setup(obd, mnt->mnt_root->d_inode->i_sb);
 
         OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt);
         obd->obd_lvfs_ctxt.pwdmnt = mnt;
@@ -168,12 +96,14 @@ static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb,
                 goto err_ops;
         }
         cli->cl_mgc_configs_dir = dentry;
+
+        /* We keep the cl_mgc_sem until mgc_fs_cleanup */
         return (0);
 
 err_ops:        
         fsfilt_put_ops(obd->obd_fsops);
         obd->obd_fsops = NULL;
-        cli->cl_mgc_sb = NULL;
+        cli->cl_mgc_vfsmnt = NULL;
         up(&cli->cl_mgc_sem);
         return(err);
 }
@@ -184,7 +114,6 @@ static int mgc_fs_cleanup(struct obd_device *obd)
         int rc = 0;
 
         LASSERT(cli->cl_mgc_vfsmnt != NULL);
-        LASSERT(cli->cl_mgc_sb != NULL);
 
         if (cli->cl_mgc_configs_dir != NULL) {
                 struct lvfs_run_ctxt saved;
@@ -201,7 +130,6 @@ static int mgc_fs_cleanup(struct obd_device *obd)
         */
 
         cli->cl_mgc_vfsmnt = NULL;
-        cli->cl_mgc_sb = NULL;
         
         if (obd->obd_fsops) 
                 fsfilt_put_ops(obd->obd_fsops);
@@ -219,7 +147,7 @@ static int mgc_cleanup(struct obd_device *obd)
         /* FIXME calls to mgc_fs_setup must take an obd ref to insure there's
            no fs by the time we get here. */
         LASSERT(cli->cl_mgc_vfsmnt == NULL);
-
+        
         rc = obd_llog_finish(obd, 0);
         if (rc != 0)
                 CERROR("failed to cleanup llogging subsystems\n");
@@ -255,6 +183,81 @@ err_decref:
         RETURN(rc);
 }
 
+/* see ll_mdc_blocking_ast */
+static int mgc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
+                            void *data, int flag)
+{
+        int rc;
+        struct lustre_handle lockh;
+        ENTRY;
+
+        switch (flag) {
+        case LDLM_CB_BLOCKING:
+                /* mgs wants the lock, give it up... */
+                LDLM_ERROR(lock, "MGC blocking CB");
+
+                ldlm_lock2handle(lock, &lockh);
+                rc = ldlm_cli_cancel(&lockh);
+                if (rc < 0) {
+                        CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
+                        RETURN(rc);
+                }
+                break;
+        case LDLM_CB_CANCELING: {
+                /* We've given up the lock, prepare ourselves to update.
+                   FIXME */
+                LDLM_ERROR(lock, "MGC cancel CB");
+                
+                //struct inode *inode = ll_inode_from_lock(lock);
+                /* <adilger> in the MGC case I suspect this callback will 
+                   trigger a new enqueue for the same lock (in a separate
+                   thread likely, which won't match the just-being-cancelled
+                   lock due to CBPENDING flag) + config llog processing */
+                break;
+        }
+        default:
+                LBUG();
+        }
+
+        RETURN(0);
+}
+
+/* see ll_get_dir_page */
+static int mgc_get_cfg_lock(struct obd_export *exp, char *fsname)
+{                       
+        struct lustre_handle lockh;
+        struct obd_device *obd = class_exp2obd(exp);
+        /* FIXME use fsname, vers and separate locks? see mgs_get_cfg_lock */
+        struct ldlm_res_id res_id = { .name = { 12321 } };
+        int rc = 0, flags = 0;
+        ENTRY;
+
+        /* Search for already existing locks.*/
+        rc = ldlm_lock_match(obd->obd_namespace, 0, &res_id, LDLM_PLAIN, 
+                             NULL, LCK_CR, &lockh);
+        if (rc == 1) 
+                RETURN(ELDLM_OK);
+
+        CDEBUG(D_MGC, "Taking a cfg reader lock\n");
+
+        /* see filter_prepare_destroy
+        rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, res_id,
+                              LDLM_EXTENT, &policy, LCK_PW,
+                              &flags, ldlm_blocking_ast, ldlm_completion_ast,
+                              NULL, NULL, NULL, 0, NULL, &lockh);
+        */
+        
+        rc = ldlm_cli_enqueue(exp, NULL, obd->obd_namespace, res_id,
+                              LDLM_PLAIN, NULL, LCK_CR, &flags, 
+                              mgc_blocking_ast, ldlm_completion_ast, NULL,
+                              NULL/*cb_data*/, NULL, 0, NULL, &lockh);
+
+        /* now drop the lock so MGS can revoke it */ 
+        ldlm_lock_decref(&lockh, LCK_PR);
+
+        RETURN(rc);
+}
+
 static int mgc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                          void *karg, void *uarg)
 {
@@ -329,6 +332,78 @@ out:
         return rc;
 }
 
+/* Get index and add to config llog, depending on flags */
+int mgc_target_add(struct obd_export *exp, struct mgmt_target_info *mti)
+{
+        struct ptlrpc_request *req;
+        struct mgmt_target_info *req_mti, *rep_mti;
+        int size = sizeof(*req_mti);
+        int rep_size = sizeof(*mti);
+        int rc;
+        ENTRY;
+
+        req = ptlrpc_prep_req(class_exp2cliimp(exp), MGMT_TARGET_ADD, 
+                              1, &size, NULL);
+        if (!req)
+                RETURN(rc = -ENOMEM);
+
+        req_mti = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*req_mti));
+        memcpy(req_mti, mti, sizeof(*req_mti));
+
+        req->rq_replen = lustre_msg_size(1, &rep_size);
+
+        CDEBUG(D_MGC, "requesting add for %s\n", mti->mti_svname);
+        
+        rc = ptlrpc_queue_wait(req);
+        if (!rc) {
+                rep_mti = lustre_swab_repbuf(req, 0, sizeof(*rep_mti),
+                                             lustre_swab_mgmt_target_info);
+                memcpy(mti, rep_mti, sizeof(*rep_mti));
+                CDEBUG(D_MGC, "target_add %s got index = %d\n",
+                       mti->mti_svname, mti->mti_stripe_index);
+        } else {
+                CERROR("target_add failed. rc=%d\n", rc);
+        }
+        ptlrpc_req_finished(req);
+
+        RETURN(rc);
+}
+
+/* Remove from config llog */
+int mgc_target_del(struct obd_export *exp, struct mgmt_target_info *mti)
+{
+        struct ptlrpc_request *req;
+        struct mgmt_target_info *req_mti, *rep_mti;
+        int size = sizeof(*req_mti);
+        int rc;
+        ENTRY;
+
+        req = ptlrpc_prep_req(class_exp2cliimp(exp), MGMT_TARGET_DEL,
+                              1, &size, NULL);
+        if (!req)
+                RETURN(rc = -ENOMEM);
+
+        req_mti = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*req_mti));
+        memcpy(req_mti, mti, sizeof(*req_mti));
+
+        rc = ptlrpc_queue_wait(req);
+        if (!rc) {
+                int index;
+                rep_mti = lustre_swab_repbuf(req, 0, sizeof(*rep_mti),
+                                             lustre_swab_mgmt_target_info);
+                index = rep_mti->mti_stripe_index;
+                if (index != mti->mti_stripe_index) {
+                        CERROR ("OST DEL failed. rc=%d\n", index);
+                        GOTO (out, rc = -EINVAL);
+                }
+                CERROR("OST DEL OK.(old index = %d)\n", index);
+        }
+out:
+        ptlrpc_req_finished(req);
+
+        RETURN(rc);
+}
+
 #define INIT_RECOV_BACKUP "init_recov_bk"
 int mgc_set_info(struct obd_export *exp, obd_count keylen,
                  void *key, obd_count vallen, void *val)
index 95bd148..e9e210f 100644 (file)
 #include <libcfs/list.h>
 #include "mgs_internal.h"
 
+/* Same as mds_fid2dentry */
+/* Look up an entry by inode number. */
+/* this function ONLY returns valid dget'd dentries with an initialized inode
+   or errors */
+static struct dentry *mgs_fid2dentry(struct mgs_obd *mgs, struct ll_fid *fid)
+{
+        char fid_name[32];
+        unsigned long ino = fid->id;
+        __u32 generation = fid->generation;
+        struct inode *inode;
+        struct dentry *result;
+
+        CDEBUG(D_DENTRY|D_ERROR, "--> mgs_fid2dentry: ino/gen %lu/%u, sb %p\n",
+               ino, generation, mgs->mgs_sb);
+
+        if (ino == 0)
+                RETURN(ERR_PTR(-ESTALE));
+        
+        snprintf(fid_name, sizeof(fid_name), "0x%lx", ino);
+        
+        /* under ext3 this is neither supposed to return bad inodes
+           nor NULL inodes. */
+        result = ll_lookup_one_len(fid_name, mgs->mgs_fid_de, strlen(fid_name));
+        if (IS_ERR(result))
+                RETURN(result);
+
+        inode = result->d_inode;
+        if (!inode)
+                RETURN(ERR_PTR(-ENOENT));
+
+        if (inode->i_generation == 0 || inode->i_nlink == 0) {
+                LCONSOLE_WARN("Found inode with zero generation or link -- this"
+                              " may indicate disk corruption (inode: %lu, link:"
+                              " %lu, count: %d)\n", inode->i_ino,
+                              (unsigned long)inode->i_nlink,
+                              atomic_read(&inode->i_count));
+                l_dput(result);
+                RETURN(ERR_PTR(-ENOENT));
+        }
+
+        if (generation && inode->i_generation != generation) {
+                /* we didn't find the right inode.. */
+                CDEBUG(D_INODE, "found wrong generation: inode %lu, link: %lu, "
+                       "count: %d, generation %u/%u\n", inode->i_ino,
+                       (unsigned long)inode->i_nlink,
+                       atomic_read(&inode->i_count), inode->i_generation,
+                       generation);
+                l_dput(result);
+                RETURN(ERR_PTR(-ENOENT));
+        }
+
+        RETURN(result);
+}
+
+static struct dentry *mgs_lvfs_fid2dentry(__u64 id, __u32 gen, __u64 gr,
+                                          void *data)
+{
+        struct obd_device *obd = data;
+        struct ll_fid fid;
+        fid.id = id;
+        fid.generation = gen;
+        return mgs_fid2dentry(&obd->u.mgs, &fid);
+}
+
+struct lvfs_callback_ops mgs_lvfs_ops = {
+        l_fid2dentry:     mgs_lvfs_fid2dentry,
+};
+
 int mgs_fs_setup(struct obd_device *obd, struct vfsmount *mnt)
 {
         struct mgs_obd *mgs = &obd->u.mgs;
index 99f29a0..83f8e5a 100644 (file)
@@ -28,7 +28,7 @@
 # define EXPORT_SYMTAB
 #endif
 #define DEBUG_SUBSYSTEM S_MGS
-#define D_MGS D_INFO|D_ERROR
+#define D_MGS D_CONFIG|D_ERROR
 
 #ifdef __KERNEL__
 # include <linux/module.h>
@@ -112,7 +112,7 @@ static int mgs_disconnect(struct obd_export *exp)
         RETURN(rc);
 }
 
-int mgs_handle(struct ptlrpc_request *req);
+static int mgs_handle(struct ptlrpc_request *req);
 
 /* Start the MGS obd */
 static int mgs_setup(struct obd_device *obd, obd_count len, void *buf)
@@ -260,12 +260,31 @@ static int mgs_cleanup(struct obd_device *obd)
         RETURN(0);
 }
 
+static int mgs_get_cfg_lock(struct obd_device *obd, char *fsname,
+                            struct lustre_handle *lockh)
+{
+        /* FIXME resource should be based on fsname, 
+           one lock per fs.  One lock per config log? */
+        struct ldlm_res_id res_id = {.name = {12321}};
+        int rc, flags = 0;
+
+        rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, res_id,
+                              LDLM_PLAIN, NULL, LCK_PW, &flags,
+                              NULL, ldlm_completion_ast, NULL, NULL,
+                              NULL, 0, NULL, lockh);
+        if (rc) {
+                CERROR("can't take cfg lock %d\n", rc);
+        }
+        return rc;
+}
+
 static int mgs_handle_target_add(struct ptlrpc_request *req)
 {    
         struct obd_device *obd = req->rq_export->exp_obd;
+        struct lustre_handle lockh;
         struct mgmt_target_info *mti, *rep_mti;
         int rep_size = sizeof(*mti);
-        int rc;
+        int rc, lockrc;
         ENTRY;
 
         mti = lustre_swab_reqbuf(req, 0, sizeof(*mti),
@@ -283,6 +302,16 @@ static int mgs_handle_target_add(struct ptlrpc_request *req)
                 }
         }
 
+        /* revoke the config lock so everyone will update */
+        lockrc = mgs_get_cfg_lock(obd, mti->mti_fsname, &lockh);
+        if (lockrc) {
+                LCONSOLE_ERROR("Can't signal other nodes to update their "
+                               "configuration (%d). Updating local logs "
+                               "anyhow; you might have to manually restart "
+                               "other servers to get the latest configuration."
+                               "\n", lockrc);
+        }
+
         /* create the log for the new target 
            and update the client/mdt logs */
         rc = mgs_write_log_target(obd, mti);
@@ -292,6 +321,10 @@ static int mgs_handle_target_add(struct ptlrpc_request *req)
                 GOTO(out, rc);
         }
 
+        /* done with log update */
+        if (!lockrc)
+                ldlm_lock_decref(&lockh, LCK_PW);
+
 out:
         CDEBUG(D_MGS, "replying with %s, index=%d, rc=%d\n", mti->mti_svname, 
                mti->mti_stripe_index, rc);
@@ -358,7 +391,7 @@ int mgs_handle(struct ptlrpc_request *req)
                 break;
 
         case OBD_PING:
-                DEBUG_REQ(D_MGS, req, "ping");
+                DEBUG_REQ(D_INFO, req, "ping");
                 rc = target_handle_ping(req);
                 break;
 
@@ -401,80 +434,23 @@ int mgs_handle(struct ptlrpc_request *req)
 
         LASSERT(current->journal_info == NULL);
         
-        CDEBUG(D_MGS, "MGS handle cmd=%d rc=%d\n", req->rq_reqmsg->opc, rc);
+        CDEBUG(D_CONFIG | (rc?D_ERROR:0), "MGS handle cmd=%d rc=%d\n",
+               req->rq_reqmsg->opc, rc);
 
  out:
         target_send_reply(req, rc, fail);
         RETURN(0);
 }
 
-/* Same as mds_fid2dentry */
-/* Look up an entry by inode number. */
-/* this function ONLY returns valid dget'd dentries with an initialized inode
-   or errors */
-struct dentry *mgs_fid2dentry(struct mgs_obd *mgs, struct ll_fid *fid)
+static inline int mgs_destroy_export(struct obd_export *exp)
 {
-        char fid_name[32];
-        unsigned long ino = fid->id;
-        __u32 generation = fid->generation;
-        struct inode *inode;
-        struct dentry *result;
-
-        CDEBUG(D_DENTRY|D_ERROR, "--> mgs_fid2dentry: ino/gen %lu/%u, sb %p\n",
-               ino, generation, mgs->mgs_sb);
-
-        if (ino == 0)
-                RETURN(ERR_PTR(-ESTALE));
-        
-        snprintf(fid_name, sizeof(fid_name), "0x%lx", ino);
-        
-        /* under ext3 this is neither supposed to return bad inodes
-           nor NULL inodes. */
-        result = ll_lookup_one_len(fid_name, mgs->mgs_fid_de, strlen(fid_name));
-        if (IS_ERR(result))
-                RETURN(result);
-
-        inode = result->d_inode;
-        if (!inode)
-                RETURN(ERR_PTR(-ENOENT));
-
-        if (inode->i_generation == 0 || inode->i_nlink == 0) {
-                LCONSOLE_WARN("Found inode with zero generation or link -- this"
-                              " may indicate disk corruption (inode: %lu, link:"
-                              " %lu, count: %d)\n", inode->i_ino,
-                              (unsigned long)inode->i_nlink,
-                              atomic_read(&inode->i_count));
-                l_dput(result);
-                RETURN(ERR_PTR(-ENOENT));
-        }
-
-        if (generation && inode->i_generation != generation) {
-                /* we didn't find the right inode.. */
-                CDEBUG(D_INODE, "found wrong generation: inode %lu, link: %lu, "
-                       "count: %d, generation %u/%u\n", inode->i_ino,
-                       (unsigned long)inode->i_nlink,
-                       atomic_read(&inode->i_count), inode->i_generation,
-                       generation);
-                l_dput(result);
-                RETURN(ERR_PTR(-ENOENT));
-        }
+        ENTRY;
 
-        RETURN(result);
-}
+        target_destroy_export(exp);
 
-static struct dentry *mgs_lvfs_fid2dentry(__u64 id, __u32 gen, __u64 gr,
-                                          void *data)
-{
-        struct obd_device *obd = data;
-        struct ll_fid fid;
-        fid.id = id;
-        fid.generation = gen;
-        return mgs_fid2dentry(&obd->u.mgs, &fid);
+        RETURN(0);
 }
 
-struct lvfs_callback_ops mgs_lvfs_ops = {
-        l_fid2dentry:     mgs_lvfs_fid2dentry,
-};
 
 /* use obd ops to offer management infrastructure */
 static struct obd_ops mgs_obd_ops = {
@@ -484,7 +460,7 @@ static struct obd_ops mgs_obd_ops = {
         .o_setup           = mgs_setup,
         .o_precleanup      = mgs_precleanup,
         .o_cleanup         = mgs_cleanup,
-        .o_destroy_export  = target_destroy_export,
+        .o_destroy_export  = mgs_destroy_export,
         .o_iocontrol       = mgs_iocontrol,
 };
 
index cc74b67..145dc49 100644 (file)
@@ -381,8 +381,6 @@ int lustre_get_process_log(struct super_block *sb, char *logname,
 {
         struct lustre_sb_info *lsi = s2lsi(sb);
         struct obd_device *mgc = lsi->lsi_mgc;
-        struct lustre_handle mgc_conn = {0, };
-        struct obd_export *exp = NULL;
         struct llog_ctxt *rctxt, *lctxt;
         int rc;
         LASSERT(mgc);
@@ -396,14 +394,6 @@ int lustre_get_process_log(struct super_block *sb, char *logname,
                 return(-EINVAL);
         }
 
-        rc = obd_connect(&mgc_conn, mgc, &(mgc->obd_uuid), NULL);
-        if (rc) {
-                CERROR("connect failed %d\n", rc);
-                goto out;
-        }
-        exp = class_conn2export(&mgc_conn);
-        LASSERT(exp->exp_obd == mgc);
-
         //FIXME Copy the mgs remote log to the local disk
 
 #if 0
@@ -411,7 +401,6 @@ int lustre_get_process_log(struct super_block *sb, char *logname,
         class_config_dump_llog(rctxt, logname, cfg);
 #endif
         rc = class_config_parse_llog(rctxt, logname, cfg);
-        obd_disconnect(exp);
 
         if (rc && lmd_is_client(lsi->lsi_lmd)) {
                 int rc2;
@@ -437,7 +426,6 @@ int lustre_get_process_log(struct super_block *sb, char *logname,
         CDEBUG(D_MOUNT, "after lustre_get_process_log %s\n", logname);
         class_obd_list();
 
-out:
         return (rc);
 }
 
@@ -468,12 +456,13 @@ static int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
         return(rc);
 }
 
-static int lustre_start_simple(char *obdname, char *type, char *s1, char *s2)
+static int lustre_start_simple(char *obdname, char *type, char *uuid, 
+                               char *s1, char *s2)
 {
         int rc;
         CDEBUG(D_MOUNT, "Starting obd %s\n", obdname);
 
-        rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, obdname/*uuid*/, 0, 0);
+        rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, 0, 0);
         if (rc) {
                 CERROR("%s attach error %d\n", obdname, rc);
                 return(rc);
@@ -511,7 +500,7 @@ static int server_start_mgs(struct super_block *sb)
         rc = server_register_mount(mgsname, sb, mnt);
 
         if (!rc &&
-            ((rc = lustre_start_simple(mgsname, LUSTRE_MGS_NAME, 0, 0)))) 
+            ((rc = lustre_start_simple(mgsname, LUSTRE_MGS_NAME, mgsname, 0, 0)))) 
                 server_deregister_mount(mgsname);
         
         if (rc)                                
@@ -542,13 +531,39 @@ static int server_stop_mgs(struct super_block *sb)
         return rc;
 }
 
+static struct obd_export *get_mgs_export(struct obd_device *mgc)
+{
+        struct obd_export *exp, *n;
+
+        /* FIXME is this a Bad Idea?  Should I just store this export 
+           somewhere in the u.cli? */
+
+        /* There should be exactly 2 exports in the mgc, the mgs export and 
+           the mgc self-export, in that order. So just return the list head. */
+        LASSERT(!list_empty(&mgc->obd_exports));
+        LASSERT(mgc->obd_num_exports == 2);
+        list_for_each_entry_safe(exp, n, &mgc->obd_exports, exp_obd_chain) {
+                LASSERT(exp != mgc->obd_self_export);
+                break;
+        }
+        /*FIXME there's clearly a better way, but I'm too confused to sort it 
+          out now...
+        exp = &list_entry(&mgc->obd_exports->head, export_obd, exp_obd_chain);
+        */
+        return exp;
+}
+
 /* Set up a mgcobd to process startup logs */
 static int lustre_start_mgc(struct super_block *sb)
 {
+        struct lustre_handle mgc_conn = {0, };
         struct lustre_sb_info *lsi = s2lsi(sb);
         struct obd_device *obd;
+        struct obd_export *exp;
+        char *uuid;
         char mgcname[] = "MGC";
         lnet_nid_t nid;
+        lnet_process_id_t id;
         int recov_bk;
         int rc = 0, i;
         
@@ -578,9 +593,16 @@ static int lustre_start_mgc(struct super_block *sb)
         if (rc < 0)
                 return rc;
 
+        /* Generate a unique uuid for each MGC - use the 1st non-loopback nid */
+        /* FIXME if no loopback? Use lustre_generate_random_uuid? */
+        rc = LNetGetId(1, &id);  
+        OBD_ALLOC(uuid, sizeof(struct obd_uuid));
+        sprintf(uuid, "mgc_"LPX64, id.nid);
         /* Start the MGC */
-        if ((rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME, "MGS", 
-                                       libcfs_nid2str(nid))))
+        rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME, uuid, "MGS", 
+                                 libcfs_nid2str(nid));
+        OBD_FREE(uuid, sizeof(struct obd_uuid));
+        if (rc) 
                 return rc;
         
         /* Add the redundant MGS nids */
@@ -600,7 +622,6 @@ static int lustre_start_mgc(struct super_block *sb)
                                libcfs_nid2str(nid), rc);
         }
         
-        /* Keep the mgc info in the sb */
         obd = class_name2obd(mgcname);
         if (!obd) {
                 CERROR("Can't find mgcobd %s\n", mgcname);
@@ -616,10 +637,23 @@ static int lustre_start_mgc(struct super_block *sb)
                 CERROR("can't set init_recov_bk %d\n", rc);
                 goto out;
         }
-        
+
+        /* We connect to the MGS at setup, and don't disconnect until cleanup */
+        rc = obd_connect(&mgc_conn, obd, &(obd->obd_uuid), NULL);
+        if (rc) {
+                CERROR("connect failed %d\n", rc);
+                goto out;
+        }
+        exp = class_conn2export(&mgc_conn);
+        LASSERT(exp == get_mgs_export(obd));
+
+        /* And keep a refcount of servers/clients who started with "mount",
+           so we know when we can get rid of the mgc. */
         atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
+
 out:
-        /* note that many lsi's can point to the same mgc.*/
+        /* Keep the mgc info in the sb. Note that many lsi's can point
+           to the same mgc.*/
         lsi->lsi_mgc = obd;
         return rc;
 }
@@ -644,6 +678,8 @@ static int lustre_stop_mgc(struct super_block *sb)
                 return -EBUSY; 
         }
 
+        obd_disconnect(get_mgs_export(obd));
+
         rc = class_manual_cleanup(obd);
         if (rc)
                 return(rc);
@@ -738,8 +774,6 @@ static int server_add_target(struct super_block *sb, struct vfsmount *mnt)
         struct lustre_sb_info *lsi = s2lsi(sb);
         struct obd_device *mgc = lsi->lsi_mgc;
         struct lustre_disk_data *ldd = lsi->lsi_ldd;
-        struct lustre_handle mgc_conn = {0, };
-        struct obd_export *exp = NULL;
         struct mgmt_target_info *mti = NULL;
         lnet_process_id_t         id;
         int rc;
@@ -768,19 +802,10 @@ static int server_add_target(struct super_block *sb, struct vfsmount *mnt)
         mti->mti_stripe_size = 1024*1024;  //FIXME    
         mti->mti_stripe_offset = 0; //FIXME    
 
-        CDEBUG(D_MOUNT, "Initial connect %s, fs=%s, %s, index=%04x\n",
+        CDEBUG(D_MOUNT, "Initial registration %s, fs=%s, %s, index=%04x\n",
                mti->mti_svname, mti->mti_fsname,
                libcfs_nid2str(mti->mti_nid), mti->mti_stripe_index);
 
-        /* Connect to the MGS */
-        rc = obd_connect(&mgc_conn, mgc, &(mgc->obd_uuid), NULL);
-        if (rc) {
-                CERROR("connect failed %d\n", rc);
-                goto out;
-        }
-        exp = class_conn2export(&mgc_conn);
-        LASSERT(exp->exp_obd == mgc);
-        
         /* Register the target */
         /* FIXME use ioctl instead? eg 
         struct obd_ioctl_data ioc_data = { 0 };
@@ -790,11 +815,10 @@ static int server_add_target(struct super_block *sb, struct vfsmount *mnt)
         rc = obd_iocontrol(OBD_IOC_START, obd->obd_self_export,
                             sizeof ioc_data, &ioc_data, NULL);
         */
-        rc = obd_set_info(exp,
+        rc = obd_set_info(get_mgs_export(mgc),
                           strlen("add_target"), "add_target",
                           sizeof(*mti), mti);
         CDEBUG(D_MOUNT, "disconnect");
-        obd_disconnect(exp);
         if (rc) {
                 CERROR("add_target failed %d\n", rc);
                 goto out;
@@ -840,7 +864,8 @@ static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
                 obd = class_name2obd("MDS");
                 if (!obd) {
                         //FIXME pre-rename, should eventually be LUSTRE_MDS_NAME
-                        rc = lustre_start_simple("MDS", LUSTRE_MDT_NAME, 0, 0);
+                        rc = lustre_start_simple("MDS", LUSTRE_MDT_NAME, 
+                                                 "MDS_uuid", 0, 0);
                         if (rc) {
                                 CERROR("failed to start MDS: %d\n", rc);
                                 goto out_servers;
@@ -853,7 +878,8 @@ static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
                 /* make sure OSS is started */
                 obd = class_name2obd("OSS");
                 if (!obd) {
-                        rc = lustre_start_simple("OSS", LUSTRE_OSS_NAME, 0, 0);
+                        rc = lustre_start_simple("OSS", LUSTRE_OSS_NAME,
+                                                 "OSS_uuid", 0, 0);
                         if (rc) {
                                 CERROR("failed to start OSS: %d\n", rc);
                                 goto out_servers;
@@ -1082,8 +1108,12 @@ static void server_put_super(struct super_block *sb)
 
         /* If they wanted the mgs to stop separately from the mdt, they
            should have put it on a different device. */ 
-        if (IS_MGMT(lsi->lsi_ldd)) 
+        if (IS_MGMT(lsi->lsi_ldd)) {
+                /* stop the mgc before the mgs so the connection gets cleaned
+                   up */
+                lustre_stop_mgc(sb);
                 server_stop_mgs(sb);
+        }
 
         /* clean the mgc and sb */
         rc = lustre_common_put_super(sb);
@@ -1241,14 +1271,14 @@ int lustre_common_put_super(struct super_block *sb)
         CDEBUG(D_MOUNT, "dropping sb %p\n", sb);
         
         rc = lustre_stop_mgc(sb);
-        if (rc) {
+        if (rc && (rc != -ENOENT)) {
                 if (rc != -EBUSY) {
                         CERROR("Can't stop MGC: %d\n", rc);
                         return rc;
                 }
                 /* BUSY just means that there's some other obd that
                    needs the mgc.  Let him clean it up. */
-                CDEBUG(D_MOUNT, "MGC busy, not stopping\n");
+                CDEBUG(D_MOUNT, "MGC busy, will stop later\n");
         }
         rc = lustre_free_lsi(sb);
         return rc;