Whamcloud - gitweb
Branch b1_4_mountconf
authornathan <nathan>
Fri, 3 Feb 2006 18:18:29 +0000 (18:18 +0000)
committernathan <nathan>
Fri, 3 Feb 2006 18:18:29 +0000 (18:18 +0000)
b=9851
copy remote MGS llogs locally onto server disks, so that servers
can start up even if the MGS is no longer present.
(Note that this leads to the possibility of servers started using
out-of-date logs, and without taking an config lock.)

lustre/include/linux/obd.h
lustre/liblustre/llite_lib.c
lustre/mdc/mdc_request.c
lustre/mds/mds_fs.c
lustre/mgc/mgc_request.c
lustre/mgs/mgs_llog.c
lustre/obdclass/llog.c
lustre/obdclass/obd_mount.c
lustre/osc/osc_request.c
lustre/ptlrpc/import.c

index cdecea4..66bf44c 100644 (file)
@@ -366,6 +366,7 @@ struct client_obd {
         struct vfsmount         *cl_mgc_vfsmnt;
         struct dentry           *cl_mgc_configs_dir;
         atomic_t                 cl_mgc_refcount;
+        struct obd_export       *cl_mgc_mgsexp;
 
         /* Flags section */
         unsigned int             cl_checksum:1; /* debug checksums */
@@ -493,7 +494,7 @@ struct niobuf_local {
 };
 
 
-/* device types */
+/* device types (not names--FIXME) */
 /* FIXME all the references to these defines need to be updated */
 #define LUSTRE_MDS_NAME "mds"
 #define LUSTRE_MDT_NAME "mdt"
@@ -512,6 +513,9 @@ struct niobuf_local {
 #define LUSTRE_MGS_NAME "mgs"
 #define LUSTRE_MGC_NAME "mgc"
 
+#define LUSTRE_MGS_OBDNAME "MGS"
+#define LUSTRE_MGC_OBDNAME "MGC"
+
 
 /* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */
 #define N_LOCAL_TEMP_PAGE 0x10000000
@@ -946,5 +950,7 @@ static inline void init_obd_quota_ops(quota_interface_t *interface,
 #define KEY_MDS_CONN "mds_conn"
 #define KEY_NEXT_ID  "next_id"
 #define KEY_LOVDESC  "lovdesc"
+#define KEY_INIT_RECOV "initial_recov"
+#define KEY_INIT_RECOV_BACKUP "init_recov_bk"
 
 #endif /* __OBD_H */
index ec62ef1..68b5992 100644 (file)
@@ -131,7 +131,7 @@ int liblustre_process_log(struct config_llog_instance *cfg,
 
         /* Disable initial recovery on this import */
         rc = obd_set_info(obd->obd_self_export,
-                          strlen("initial_recov"), "initial_recov",
+                          strlen(KEY_INIT_RECOV), KEY_INIT_RECOV,
                           sizeof(allow_recov), &allow_recov);
 
         rc = obd_connect(&mdc_conn, obd, &mdc_uuid, ocd);
index 6eec8b8..313950c 100644 (file)
@@ -886,7 +886,7 @@ int mdc_set_info(struct obd_export *exp, obd_count keylen,
         struct obd_import *imp = class_exp2cliimp(exp);
         int rc = -EINVAL;
 
-        if (KEY_IS("initial_recov")) {
+        if (KEY_IS(KEY_INIT_RECOV)) {
                 if (vallen != sizeof(int))
                         RETURN(-EINVAL);
                 imp->imp_initial_recov = *(int *)val;
@@ -895,7 +895,7 @@ int mdc_set_info(struct obd_export *exp, obd_count keylen,
                 RETURN(0);
         }
         /* Turn off initial_recov after we try all backup servers once */
-        if (KEY_IS("init_recov_bk")) {
+        if (KEY_IS(KEY_INIT_RECOV_BACKUP)) {
                 if (vallen != sizeof(int))
                         RETURN(-EINVAL);
                 imp->imp_initial_recov_bk = *(int *)val;
index 3fbf06c..7d9aa91 100644 (file)
@@ -477,6 +477,7 @@ int mds_fs_setup(struct obd_device *obd, struct vfsmount *mnt)
         }
         mds->mds_pending_dir = dentry;
 
+        /* COMPAT_146 */
         dentry = simple_mkdir(current->fs->pwd, MDT_LOGS_DIR, 0777, 1);
         if (IS_ERR(dentry)) {
                 rc = PTR_ERR(dentry);
@@ -485,6 +486,7 @@ int mds_fs_setup(struct obd_device *obd, struct vfsmount *mnt)
                 GOTO(err_pending, rc);
         }
         mds->mds_logs_dir = dentry;
+        /* end COMPAT_146 */
 
         dentry = simple_mkdir(current->fs->pwd, "OBJECTS", 0777, 1);
         if (IS_ERR(dentry)) {
index 30a2282..ccc5228 100644 (file)
@@ -228,7 +228,8 @@ static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb,
 
         cli->cl_mgc_vfsmnt = mnt;
         // FIXME which is the right SB? - filter_common_setup also 
-        CERROR("SB's: fill=%p mnt=%p root=%p\n", sb, mnt->mnt_sb, mnt->mnt_root->d_inode->i_sb);
+        CERROR("SB's: fill=%p mnt=%p root=%p\n", sb, mnt->mnt_sb,
+               mnt->mnt_root->d_inode->i_sb);
         fsfilt_setup(obd, mnt->mnt_root->d_inode->i_sb);
 
         OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt);
@@ -383,15 +384,15 @@ static int mgc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
         switch (flag) {
         case LDLM_CB_BLOCKING:
                 /* mgs wants the lock, give it up... */
-                LDLM_ERROR(lock, "MGC blocking CB");
+                LDLM_DEBUG(lock, "MGC blocking CB");
                 ldlm_lock2handle(lock, &lockh);
                 rc = ldlm_cli_cancel(&lockh);
                 break;
         case LDLM_CB_CANCELING: {
                 /* We've given up the lock, prepare ourselves to update. */
-                LDLM_ERROR(lock, "MGC cancel CB");
+                LDLM_DEBUG(lock, "MGC cancel CB");
                 
-                CERROR("Lock res "LPX64" (%.8s)\n",
+                CDEBUG(D_MGC, "Lock res "LPX64" (%.8s)\n",
                        lock->l_resource->lr_name.name[0], 
                        (char *)&lock->l_resource->lr_name.name[0]);
 
@@ -403,7 +404,7 @@ static int mgc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                         break;
                 }
                 if (lock->l_req_mode != lock->l_granted_mode) {
-                        CERROR("original grant failed, don't requeue\n");
+                        CERROR("original grant failed, won't requeue\n");
                         break;
                 }
 
@@ -412,7 +413,7 @@ static int mgc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                 rc = kernel_thread(mgc_async_requeue, data,
                                    CLONE_VM | CLONE_FS);
                 if (rc < 0) 
-                        CERROR("Cannot reenque thread: %d\n", rc);
+                        CERROR("Cannot re-enqueue thread: %d\n", rc);
                 else 
                         rc = 0;
                 break;
@@ -624,7 +625,6 @@ out:
         RETURN(rc);
 }
 
-#define INIT_RECOV_BACKUP "init_recov_bk"
 int mgc_set_info(struct obd_export *exp, obd_count keylen,
                  void *key, obd_count vallen, void *val)
 {
@@ -633,8 +633,7 @@ int mgc_set_info(struct obd_export *exp, obd_count keylen,
         ENTRY;
 
         /* Try to "recover" the initial connection; i.e. retry */
-        if (keylen == strlen("initial_recov") &&
-            memcmp(key, "initial_recov", keylen) == 0) {
+        if (KEY_IS(KEY_INIT_RECOV)) {
                 if (vallen != sizeof(int))
                         RETURN(-EINVAL);
                 imp->imp_initial_recov = *(int *)val;
@@ -643,8 +642,7 @@ int mgc_set_info(struct obd_export *exp, obd_count keylen,
                 RETURN(0);
         }
         /* Turn off initial_recov after we try all backup servers once */
-        if (keylen == strlen(INIT_RECOV_BACKUP) &&
-            memcmp(key, INIT_RECOV_BACKUP, keylen) == 0) {
+        if (KEY_IS(KEY_INIT_RECOV_BACKUP)) {
                 if (vallen != sizeof(int))
                         RETURN(-EINVAL);
                 imp->imp_initial_recov_bk = *(int *)val;
@@ -653,8 +651,7 @@ int mgc_set_info(struct obd_export *exp, obd_count keylen,
                 RETURN(0);
         }
         /* Hack alert */
-        if (keylen == strlen("add_target") &&
-            memcmp(key, "add_target", keylen) == 0) {
+        if (KEY_IS("add_target")) {
                 struct mgs_target_info *mti;
                 if (vallen != sizeof(struct mgs_target_info))
                         RETURN(-EINVAL);
@@ -664,8 +661,7 @@ int mgc_set_info(struct obd_export *exp, obd_count keylen,
                 rc =  mgc_target_add(exp, mti);
                 RETURN(rc);
         }
-        if (keylen == strlen("set_fs") &&
-            memcmp(key, "set_fs", keylen) == 0) {
+        if (KEY_IS("set_fs")) {
                 struct super_block *sb = (struct super_block *)val;
                 struct lustre_sb_info *lsi;
                 if (vallen != sizeof(struct super_block))
@@ -677,8 +673,7 @@ int mgc_set_info(struct obd_export *exp, obd_count keylen,
                 }
                 RETURN(rc);
         }
-        if (keylen == strlen("clear_fs") &&
-            memcmp(key, "clear_fs", keylen) == 0) {
+        if (KEY_IS("clear_fs")) {
                 if (vallen != 0)
                         RETURN(-EINVAL);
                 rc = mgc_fs_cleanup(exp->exp_obd);
@@ -752,27 +747,78 @@ static int mgc_llog_finish(struct obd_device *obd, int count)
         RETURN(rc);
 }
 
-/* Get the client export to the MGS */
-static struct obd_export *get_mgs_export(struct obd_device *mgc)
+static int mgc_copy_handler(struct llog_handle *llh, struct llog_rec_hdr *rec, 
+                            void *data)
 {
-        struct obd_export *exp, *n;
+        struct llog_rec_hdr local_rec = *rec;
+        struct llog_handle *local_llh = (struct llog_handle *)data;
+        char *cfg_buf = (char*) (rec + 1);
+        struct lustre_cfg *lcfg;
+        int rc = 0;
+        ENTRY;
 
-        /* FIXME is this a Bad Idea?  Should I just store this export 
-           somewhere in the u.cli? */
+        lcfg = (struct lustre_cfg *)cfg_buf;
 
-        /* There should be exactly 2 exports in the mgc, the mgs export and 
-           the mgc self-export, in that order. So just return the list head. */
-        LASSERT(!list_empty(&mgc->obd_exports));
-        LASSERT(mgc->obd_num_exports == 2);
-        list_for_each_entry_safe(exp, n, &mgc->obd_exports, exp_obd_chain) {
-                LASSERT(exp != mgc->obd_self_export);
-                break;
+        /* FIXME we should always write to an empty log, so remove this check.*/
+        /* append new records */
+        if (rec->lrh_index >= llog_get_size(local_llh)) { 
+                rc = llog_write_rec(local_llh, &local_rec, NULL, 0, 
+                                    (void *)cfg_buf, -1);
+
+                CDEBUG(D_INFO, "idx=%d, rc=%d, len=%d, cmd %x %s %s\n", 
+                       rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command, 
+                       lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
+        } else {
+                CDEBUG(D_INFO, "skip idx=%d\n",  rec->lrh_index);
         }
-        /*FIXME there's clearly a better way, but I'm too confused to sort it 
-          out now...
-        exp = &list_entry(&mgc->obd_exports->head, export_obd, exp_obd_chain);
-        */
-        return exp;
+
+        RETURN(rc);
+}
+
+static int mgc_copy_llog(struct obd_device *obd, struct llog_ctxt *rctxt,
+                         struct llog_ctxt *lctxt, char *logname)
+{
+        struct llog_handle *local_llh, *remote_llh;
+        struct obd_uuid *uuid;
+        int rc, rc2;
+        ENTRY;
+
+        CDEBUG(D_MGC, "Copy remote log %s\n", logname);
+
+        /* open local log */
+        rc = llog_create(lctxt, &local_llh, NULL, logname);
+        if (rc)
+                RETURN(rc);
+        /* set the log header uuid for fun */
+        OBD_ALLOC_PTR(uuid);
+        obd_str2uuid(uuid, logname);
+        rc = llog_init_handle(local_llh, LLOG_F_IS_PLAIN, uuid);
+        OBD_FREE_PTR(uuid);
+        if (rc)
+                GOTO(out_closel, rc);
+
+        /* FIXME write new log to a temp name, then vfs_rename over logname
+           upon successful completion. */
+
+        /* open remote log */
+        rc = llog_create(rctxt, &remote_llh, NULL, logname);
+        if (rc)
+                GOTO(out_closel, rc);
+        rc = llog_init_handle(remote_llh, LLOG_F_IS_PLAIN, NULL);
+        if (rc)
+                GOTO(out_closer, rc);
+
+        rc = llog_process(remote_llh, mgc_copy_handler,(void *)local_llh, NULL);
+
+out_closer:
+        rc2 = llog_close(remote_llh);
+        if (!rc)
+                rc = rc2;
+out_closel:
+        rc2 = llog_close(local_llh);
+        if (!rc)
+                rc = rc2;
+        RETURN(rc);
 }
 
 /* Get a config log from the MGS and process it.
@@ -780,40 +826,67 @@ static struct obd_export *get_mgs_export(struct obd_device *mgc)
 static int mgc_process_log(struct obd_device *mgc, 
                            struct config_llog_data *cld)
 {
-        struct llog_ctxt *rctxt;
+        struct llog_ctxt *ctxt, *lctxt;
         struct lustre_handle lockh;
-        int rc, rcl, flags = 0;
+        struct client_obd *cli = &mgc->u.cli;
+        struct lvfs_run_ctxt saved;
+        int rc, rcl, flags = 0, must_pop = 0;
         ENTRY;
 
         CDEBUG(D_MGC, "Process log %s:%s from %d\n", cld->cld_logname, 
                cld->cld_cfg.cfg_instance, cld->cld_cfg.cfg_last_idx + 1);
 
-        rctxt = llog_get_context(mgc, LLOG_CONFIG_REPL_CTXT);
-        if (!rctxt) {
+        ctxt = llog_get_context(mgc, LLOG_CONFIG_REPL_CTXT);
+        if (!ctxt) {
                 CERROR("missing llog context\n");
                 RETURN(-EINVAL);
         }
 
         /* Get the cfg lock on the llog */
-        rcl = mgc_enqueue(get_mgs_export(mgc), NULL, LDLM_PLAIN, NULL, 
+        rcl = mgc_enqueue(mgc->u.cli.cl_mgc_mgsexp, NULL, LDLM_PLAIN, NULL, 
                           LCK_CR, &flags, NULL, NULL, NULL, 
                           cld, 0, NULL, &lockh);
-        if (rcl) {
+        if (rcl) 
                 CERROR("Can't get cfg lock: %d\n", rcl);
-                config_log_put();
-                RETURN(rcl);
-        }
         
-        //FIXME Copy the mgs remote log to the local disk
+        /* Copy the setup log locally if we can. Don't mess around if we're 
+           running an MGS though (logs are already local). */
+        /* FIXME What if MGC has a disk set up but a client/another server gets
+           updated in the meantime?  We'll copy the other log onto the 
+           currently set-up disk. This won't hurt the current disk, but
+           the other server won't get his update written to disk. Next time it
+           starts it will update, so this isn't a huge deal... */
+        if (cli->cl_mgc_vfsmnt && 
+            ((lctxt = llog_get_context(mgc, LLOG_CONFIG_ORIG_CTXT)) != NULL) &&
+            (class_name2obd(LUSTRE_MGS_OBDNAME) == NULL)) {
+                push_ctxt(&saved, &mgc->obd_lvfs_ctxt, NULL);
+                must_pop++;
+                if (!rcl) 
+                        /* Only try to copy log if we have the lock */
+                        rc = mgc_copy_llog(mgc, ctxt, lctxt, cld->cld_logname);
+                if (rcl || rc) 
+                        LCONSOLE_WARN("Failed to get MGS log %s, using local "
+                                      "copy.\n", cld->cld_logname);
+                /* Now, whether we copied or not, start using the local llog.
+                   If we failed to copy, we'll start using whatever the old 
+                   log has. */
+                ctxt = lctxt;
+        }
 
         /* logname and instance info should be the same, so use our 
-           copy for the update */
-        rc = class_config_parse_llog(rctxt, cld->cld_logname, &cld->cld_cfg);
+           copy of the instance for the update.  The cfg_last_idx will
+           be updated here. */
+        rc = class_config_parse_llog(ctxt, cld->cld_logname, &cld->cld_cfg);
         
+        if (must_pop) 
+                pop_ctxt(&saved, &mgc->obd_lvfs_ctxt, NULL);
+
         /* Now drop the lock so MGS can revoke it */ 
-        rcl = mgc_cancel(get_mgs_export(mgc), NULL, LCK_CR, &lockh);
-        if (rcl) {
-                CERROR("Can't drop cfg lock: %d\n", rcl);
+        if (!rcl) {
+                rcl = mgc_cancel(mgc->u.cli.cl_mgc_mgsexp, NULL, 
+                                 LCK_CR, &lockh);
+                if (rcl) 
+                        CERROR("Can't drop cfg lock: %d\n", rcl);
         }
         
         if (rc) {
@@ -821,7 +894,7 @@ static int mgc_process_log(struct obd_device *mgc,
                                "(%d) from the MGS.\n",
                                mgc->obd_name, cld->cld_logname, rc);
         }
-
+        
         RETURN(rc);
 }
 
@@ -843,7 +916,7 @@ static int mgc_process_config(struct obd_device *obd, obd_count len, void *buf)
                 mti = (struct mgs_target_info *)lustre_cfg_buf(lcfg, 1);
                 CDEBUG(D_MGC, "add_target %s %#x\n",    
                        mti->mti_svname, mti->mti_flags);
-                rc = mgc_target_add(get_mgs_export(obd), mti);
+                rc = mgc_target_add(obd->u.cli.cl_mgc_mgsexp, mti);
                 break;
         }
         case LCFG_LOV_DEL_OBD: 
@@ -865,9 +938,9 @@ static int mgc_process_config(struct obd_device *obd, obd_count len, void *buf)
 
                 cld = config_log_get(logname, cfg);
                 if (IS_ERR(cld)) 
-                        GOTO(out, rc = PTR_ERR(cld));
-
-                rc = mgc_process_log(obd, cld);
+                        rc = PTR_ERR(cld);
+                else
+                        rc = mgc_process_log(obd, cld);
                 config_log_put();
                 break;       
         }
index f3bf5cc..9fcce2e 100644 (file)
@@ -138,6 +138,7 @@ static int mgs_get_db_from_llog(struct obd_device *obd, char *logname,
         struct llog_handle *loghandle;
         struct lvfs_run_ctxt saved;
         int rc, rc2;
+        ENTRY;
 
         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
 
index 9187a84..91cdfb5 100644 (file)
@@ -144,8 +144,11 @@ int llog_init_handle(struct llog_handle *handle, int flags,
         rc = llog_read_header(handle);
         if (rc == 0) {
                 flags = llh->llh_flags;
-                if (uuid)
-                        LASSERT(obd_uuid_equals(uuid, &llh->llh_tgtuuid));
+                if (uuid && !obd_uuid_equals(uuid, &llh->llh_tgtuuid)) {
+                        CERROR("uuid mismatch: %s/%s\n", (char *)uuid->uuid,
+                               (char *)llh->llh_tgtuuid.uuid);
+                        rc = -EEXIST;
+                }
                 GOTO(out, rc);
         } else if (rc != LLOG_EEMPTY || !flags) {
                 /* set a pesudo flag for initialization */
@@ -324,7 +327,7 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb,
 }
 EXPORT_SYMBOL(llog_process);
 
-int llog_get_size(struct llog_handle *loghandle)
+inline int llog_get_size(struct llog_handle *loghandle)
 {
         if (loghandle && loghandle->lgh_hdr)
                 return loghandle->lgh_hdr->llh_count;
index a88dafa..827a1cc 100644 (file)
@@ -442,35 +442,10 @@ int lustre_process_log(struct super_block *sb, char *logname,
         rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
         lustre_cfg_free(lcfg);
 
-        if (rc && (rc != -EINTR) && !lmd_is_client(lsi->lsi_lmd)) {
-                int rc2;
-                struct llog_ctxt *lctxt;
-
-                LCONSOLE_INFO("%s: The configuration '%s' could not be read "
-                               "from the MGS (%d).  Trying local log.\n",
-                               mgc->obd_name, logname, rc);
-                /* If we couldn't connect to the MGS, try reading a copy
-                   of the config log stored locally on disk */
-                lctxt = llog_get_context(mgc, LLOG_CONFIG_ORIG_CTXT);
-                if (lctxt) {
-                        rc2 = class_config_parse_llog(lctxt, logname, cfg);
-                } else {
-                        CERROR("missing llog context\n");
-                        rc2 = -EINVAL;
-                }
-
-                if (rc2) {
-                        CERROR("%s: Can't read the local config (%d)\n",
-                                       mgc->obd_name, rc2);
-                } else {
-                        rc = 0;
-                }
-        }
-        if (rc) {
+        if (rc) 
                 LCONSOLE_ERROR("%s: The configuration '%s' could not be read "
                                "(%d), mount will fail.\n",
                                mgc->obd_name, logname, rc);
-        }
 
         class_obd_list();
         RETURN(rc);
@@ -554,14 +529,13 @@ static int server_start_mgs(struct super_block *sb)
         struct lustre_sb_info    *lsi = s2lsi(sb);
         struct vfsmount          *mnt = lsi->lsi_srv_mnt;
         struct lustre_mount_info *lmi;
-        char   mgsname[] = "MGS";
         int    rc = 0;
         ENTRY;
         LASSERT(mnt);
 
         /* It is impossible to have more than 1 MGS per node, since
            MGC wouldn't know which to connect to */
-        lmi = server_find_mount(mgsname);
+        lmi = server_find_mount(LUSTRE_MGS_OBDNAME);
         if (lmi) {
                 lsi = s2lsi(lmi->lmi_sb);
                 LCONSOLE_ERROR("The MGS service was already started from "
@@ -569,17 +543,18 @@ static int server_start_mgs(struct super_block *sb)
                 RETURN(-EALREADY);
         }
 
-        CDEBUG(D_CONFIG, "Start MGS service %s\n", mgsname);
+        CDEBUG(D_CONFIG, "Start MGS service %s\n", LUSTRE_MGS_OBDNAME);
 
-        rc = server_register_mount(mgsname, sb, mnt);
+        rc = server_register_mount(LUSTRE_MGS_OBDNAME, sb, mnt);
 
         if (!rc &&
-            ((rc = lustre_start_simple(mgsname, LUSTRE_MGS_NAME, mgsname, 0, 0)))) 
-                server_deregister_mount(mgsname);
+            ((rc = lustre_start_simple(LUSTRE_MGS_OBDNAME, LUSTRE_MGS_NAME, 
+                                       LUSTRE_MGS_OBDNAME, 0, 0)))) 
+                server_deregister_mount(LUSTRE_MGS_OBDNAME);
         
         if (rc)                                
-                LCONSOLE_ERROR("Failed to start MGS %s (%d).  Is the 'mgs' "
-                               "module loaded?\n", mgsname, rc);
+                LCONSOLE_ERROR("Failed to start MGS '%s' (%d).  Is the 'mgs' "
+                               "module loaded?\n", LUSTRE_MGS_OBDNAME, rc);
 
         RETURN(rc);
 }
@@ -587,16 +562,15 @@ static int server_start_mgs(struct super_block *sb)
 static int server_stop_mgs(struct super_block *sb)
 {
         struct obd_device *obd;
-        char mgsname[] = "MGS";
         int rc;
         ENTRY;
 
-        CDEBUG(D_MOUNT, "Stop MGS service %s\n", mgsname);
+        CDEBUG(D_MOUNT, "Stop MGS service %s\n", LUSTRE_MGS_OBDNAME);
 
         /* There better be only one MGS */
-        obd = class_name2obd(mgsname);
+        obd = class_name2obd(LUSTRE_MGS_OBDNAME);
         if (!obd) {
-                CDEBUG(D_CONFIG, "mgs %s not running\n", mgsname);
+                CDEBUG(D_CONFIG, "mgs %s not running\n", LUSTRE_MGS_OBDNAME);
                 RETURN(-EALREADY);
         }
 
@@ -606,8 +580,6 @@ static int server_stop_mgs(struct super_block *sb)
         RETURN(rc);
 }
 
-static struct obd_export *mgc_mgs_export = NULL;
-
 /* Set up a mgcobd to process startup logs */
 static int lustre_start_mgc(struct super_block *sb)
 {
@@ -617,7 +589,6 @@ static int lustre_start_mgc(struct super_block *sb)
         struct obd_device *obd;
         struct obd_export *exp;
         char *uuid;
-        char mgcname[] = "MGC";
         lnet_nid_t nid;
         lnet_process_id_t id;
         int recov_bk;
@@ -626,7 +597,7 @@ static int lustre_start_mgc(struct super_block *sb)
 
         LASSERT(lsi->lsi_lmd);
         
-        obd = class_name2obd(mgcname);
+        obd = class_name2obd(LUSTRE_MGC_OBDNAME);
         if (obd) {
                 atomic_inc(&obd->u.cli.cl_mgc_refcount);
                 /* FIXME There's only one MGC, but users could give different
@@ -634,7 +605,19 @@ static int lustre_start_mgc(struct super_block *sb)
                    or not?  If there's truly one MGS per site, the MGS uuids
                    _should_ all be the same. Maybe check here?
                 */
-                GOTO(out, rc);
+                
+                /* Try all connections, but only once (again). 
+                   We don't want to block another target from starting
+                   (using its local copy of the log), but we do want to connect
+                   if at all possible. */
+                CDEBUG(D_MOUNT, "Set MGS reconnect\n");
+                recov_bk = 1;
+                rc = obd_set_info(obd->obd_self_export,
+                                  strlen(KEY_INIT_RECOV_BACKUP),
+                                  KEY_INIT_RECOV_BACKUP,
+                                  sizeof(recov_bk), &recov_bk);
+
+                GOTO(out, rc = 0);
         }
 
         if (lsi->lsi_lmd->lmd_mgsnid_count == 0) {
@@ -642,11 +625,12 @@ static int lustre_start_mgc(struct super_block *sb)
                 RETURN(-EINVAL);
         }
 
-        CDEBUG(D_MOUNT, "Start MGC %s\n", mgcname);
+        CDEBUG(D_MOUNT, "Start MGC '%s'\n", LUSTRE_MGC_OBDNAME);
 
         /* Add the first uuid for the MGS */
         nid = lsi->lsi_lmd->lmd_mgsnid[0];
-        rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID, libcfs_nid2str(nid), 0,0,0);
+        rc = do_lcfg(LUSTRE_MGC_OBDNAME, nid, LCFG_ADD_UUID, 
+                     libcfs_nid2str(nid), 0,0,0);
         if (rc < 0)
                 RETURN(rc);
 
@@ -660,8 +644,8 @@ static int lustre_start_mgc(struct super_block *sb)
         OBD_ALLOC(uuid, sizeof(struct obd_uuid));
         sprintf(uuid, "mgc_"LPX64, id.nid);
         /* Start the MGC */
-        rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME, uuid, "MGS"
-                                 libcfs_nid2str(nid));
+        rc = lustre_start_simple(LUSTRE_MGC_OBDNAME, LUSTRE_MGC_NAME
+                                 uuid, LUSTRE_MGS_OBDNAME, libcfs_nid2str(nid));
         OBD_FREE(uuid, sizeof(struct obd_uuid));
         if (rc) 
                 RETURN(rc);
@@ -669,35 +653,34 @@ static int lustre_start_mgc(struct super_block *sb)
         /* Add the redundant MGS nids */
         for (i = 1; i < lsi->lsi_lmd->lmd_mgsnid_count; i++) {
                 nid = lsi->lsi_lmd->lmd_mgsnid[i];
-                rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID, libcfs_nid2str(nid),
-                              0, 0, 0);
+                rc = do_lcfg(LUSTRE_MGC_OBDNAME, nid, LCFG_ADD_UUID, 
+                             libcfs_nid2str(nid), 0, 0, 0);
                 if (rc) {
                         CERROR("Add uuid for %s failed %d\n", 
                                libcfs_nid2str(nid), rc);
                         continue;
                 }
-                rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN, libcfs_nid2str(nid),
-                              0, 0, 0);
+                rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_ADD_CONN,
+                             libcfs_nid2str(nid), 0, 0, 0);
                 if (rc) 
                         CERROR("Add conn for %s failed %d\n", 
                                libcfs_nid2str(nid), rc);
         }
         
-        obd = class_name2obd(mgcname);
+        obd = class_name2obd(LUSTRE_MGC_OBDNAME);
         if (!obd) {
-                CERROR("Can't find mgcobd %s\n", mgcname);
+                CERROR("Can't find mgcobd %s\n", LUSTRE_MGC_OBDNAME);
                 RETURN(-ENOTCONN);
         }
 
-        /* Don't try to recover the mgs connection */
+        /* Try all connections, but only once. */
         recov_bk = 1;
         rc = obd_set_info(obd->obd_self_export,
-                          strlen("init_recov_bk"), "init_recov_bk",
+                          strlen(KEY_INIT_RECOV_BACKUP), KEY_INIT_RECOV_BACKUP,
                           sizeof(recov_bk), &recov_bk);
-        if (rc) {
-                CERROR("can't set init_recov_bk %d\n", rc);
-                GOTO(out, rc);
-        }
+        if (rc) 
+                /* nonfatal */
+                CERROR("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc);
        
         /* FIXME add ACL support? */
         //ocd.ocd_connect_flags = OBD_CONNECT_ACL;
@@ -710,8 +693,7 @@ static int lustre_start_mgc(struct super_block *sb)
         }
         
         exp = class_conn2export(&mgc_conn);
-        /* only 1 mgc, only 1 connection to the mgs */
-        mgc_mgs_export = exp;
+        obd->u.cli.cl_mgc_mgsexp = exp;
 
         /* And keep a refcount of servers/clients who started with "mount",
            so we know when we can get rid of the mgc. */
@@ -747,9 +729,8 @@ static int lustre_stop_mgc(struct super_block *sb)
                 RETURN(-EBUSY); 
         }
 
-        if (mgc_mgs_export) 
-                obd_disconnect(mgc_mgs_export);
-        mgc_mgs_export = NULL;
+        if (obd->u.cli.cl_mgc_mgsexp)
+                obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
 
         rc = class_manual_cleanup(obd);
         if (rc)
@@ -898,7 +879,7 @@ static int server_add_target(struct super_block *sb, struct vfsmount *mnt)
 
         /* Register the target */
         /* FIXME use mdc_process_config instead */
-        rc = obd_set_info(mgc_mgs_export,
+        rc = obd_set_info(mgc->u.cli.cl_mgc_mgsexp,
                           strlen("add_target"), "add_target",
                           sizeof(*mti), mti);
         CDEBUG(D_MOUNT, "disconnect");
index c0ae408..361bc77 100644 (file)
@@ -3069,7 +3069,7 @@ static int osc_set_info(struct obd_export *exp, obd_count keylen,
                 RETURN(0);
         }
 
-        if (KEY_IS("initial_recov")) {
+        if (KEY_IS(KEY_INIT_RECOV)) {
                 struct obd_import *imp = exp->exp_obd->u.cli.cl_import;
                 if (vallen != sizeof(int))
                         RETURN(-EINVAL);
index eeea579..4c1cdb6 100644 (file)
@@ -362,7 +362,7 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid)
                 /* Don't retry if connect fails */
                 rc = 0;
                 obd_set_info(obd->obd_self_export,
-                             strlen("initial_recov"), "initial_recov",
+                             strlen(KEY_INIT_RECOV), KEY_INIT_RECOV,
                              sizeof(rc), &rc);
         }