Whamcloud - gitweb
Branch b1_4_mountconf
authornathan <nathan>
Wed, 11 Jan 2006 01:32:53 +0000 (01:32 +0000)
committernathan <nathan>
Wed, 11 Jan 2006 01:32:53 +0000 (01:32 +0000)
b=9861
- add lov info to mkfs.lustre
- add failover nids to registration
b=4482
- connect flags for online ost add
- better locking in mds_lov_sync
- adilger's dirty objids patch
- various fixme's

(warning: this is completely untested code, may not work)

14 files changed:
lustre/include/linux/lustre_disk.h
lustre/include/linux/lustre_idl.h
lustre/include/linux/obd.h
lustre/lov/lov_log.c
lustre/lov/lov_obd.c
lustre/mds/mds_lov.c
lustre/mgc/mgc_request.c
lustre/mgs/mgs_handler.c
lustre/mgs/mgs_internal.h
lustre/mgs/mgs_llog.c
lustre/obdclass/obd_mount.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/ptlrpc_module.c
lustre/utils/mkfs_lustre.c

index 5e2d051..d0a6039 100644 (file)
@@ -68,21 +68,33 @@ static inline char *mt_str(enum ldd_mount_type mt)
         return mount_type_string[mt];
 }
 
-#define MAX_FAILOVER_NIDS 10
+#ifndef MTI_NIDS_MAX  /* match lustre_idl.h */
+#define MTI_NIDS_MAX 10
+#endif
 
 struct lustre_disk_data {
         __u32      ldd_magic;
-        __u32      ldd_config_ver;      /* we have integrated all llog steps
-                                           through this llog ver. */
+        __u32      ldd_config_ver;      /* not used? */
         __u32      ldd_flags;           /* LDD_SV_TYPE */
         char       ldd_fsname[64];      /* filesystem this server is part of */
-        char       ldd_svname[64];      /* this server's name (lustre-mdt0001) */
-        __u16      ldd_svindex;         /* server index (0001), must match svname */
-        __u16      ldd_mgsnid_count;    /* how many failover nids we have for the MGS */
-        lnet_nid_t ldd_mgsnid[MAX_FAILOVER_NIDS]; /* mgmt nid list; lmd can override */
+        char       ldd_svname[64];      /* this server's name (lustre-mdt0001)*/
+        __u16      ldd_svindex;         /* server index (0001), must match 
+                                           svname */
+        __u16      ldd_mgsnid_count;
+        lnet_nid_t ldd_mgsnid[MTI_NIDS_MAX]; /* mgmt nid list; lmd can 
+                                                     override */
+        __u16      ldd_failnid_count;   /* server failover nid count */
+        lnet_nid_t ldd_failnid[MTI_NIDS_MAX]; /* server failover nids */
         enum ldd_mount_type ldd_mount_type;  /* target fs type LDD_MT_* */
         char       ldd_mount_opts[1024]; /* target fs mount opts */
-        char       ldd_pad[1024];
+        
+        /* Below here is required for writing mdt, ost,or client logs,
+           and is ignored after that. */
+        int   ldd_stripe_sz;
+        int   ldd_stripe_count;
+        int   ldd_stripe_pattern;
+        int   ldd_stripe_offset;
+        int   ldd_timeout;               /* obd timeout */
 };
         
 #define IS_MDT(data)   ((data)->ldd_flags & LDD_F_SV_TYPE_MDT)
@@ -114,8 +126,9 @@ static inline void ldd_make_sv_name(struct lustre_disk_data *ldd)
 struct lustre_mount_data {
         __u32      lmd_magic;
         __u32      lmd_flags;         /* lustre mount flags */
-        __u16      lmd_mgsnid_count;  /* how many failover nids we have for the MGS */
-        lnet_nid_t lmd_mgsnid[MAX_FAILOVER_NIDS];  /* who to contact at startup */
+        __u16      lmd_mgsnid_count;  /* how many failover nids we have for 
+                                         the MGS */
+        lnet_nid_t lmd_mgsnid[MTI_NIDS_MAX];/* who to contact at startup */
         char      *lmd_dev;           /* device or file system name */
         char      *lmd_opts;          /* lustre mount options (as opposed to 
                                          _device_ mount options) */
@@ -145,12 +158,6 @@ struct mkfs_opts {
         char  mo_loopdev[128];          /* in case a loop dev is needed */
         __u64 mo_device_sz;
         int   mo_flags; 
-
-        /* Below here is required for writing mdt,ost,or client logs */
-        int   mo_stripe_sz;
-        int   mo_stripe_count;
-        int   mo_stripe_pattern;
-        int   mo_timeout;               /* obd timeout */
 };
 
 /****************** last_rcvd file *********************/
index 339e86c..488fe60 100644 (file)
@@ -212,6 +212,7 @@ static inline void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags)
 #define OBD_CONNECT_ACL         0x80ULL /* client using access control lists */
 #define OBD_CONNECT_XATTR      0x100ULL /* client using extended attributes*/
 #define OBD_CONNECT_CROW       0x200ULL /* MDS is expecting create-on-write */
+#define OBD_CONNECT_EMPTY 0x80000000ULL /* fake: these are empty connect flags*/
 /*
  * set by servers supporting taking extent locks during obd_punch(). Currently
  * is requested by liblustre clients only. See bug 9528.
@@ -939,22 +940,25 @@ typedef enum {
 
 #define MTI_NAME_MAXLEN 64
 #define MTI_UUID_MAXLEN MTI_NAME_MAXLEN + 5
+#define MTI_NIDS_MAX 10 /* match lustre_disk.h */
 
 struct mgmt_target_info {
         char             mti_fsname[MTI_NAME_MAXLEN];
         char             mti_svname[MTI_NAME_MAXLEN];
-        char             mti_nodename[MTI_NAME_MAXLEN];
-        char             mti_uuid[MTI_UUID_MAXLEN];
-        __u64            mti_nid;            /* lnet_nid_t */ //nid list?
-        __u32            mti_config_ver;
-        __u32            mti_flags;
-        __u32            mti_stripe_index;
-        __u32            mti_stripe_pattern;   /* PATTERN_RAID0, PATTERN_RAID1 */
+        __u64            mti_nids[MTI_NIDS_MAX];     /* lnet_nid_t host nids */
+        __u64            mti_failnids[MTI_NIDS_MAX]; /* partner nids */
         __u64            mti_stripe_size;      
         __u64            mti_stripe_offset;    
+        __u32            mti_stripe_count;     /* how many objects are used */
+        __u32            mti_stripe_pattern;   /* PATTERN_RAID0, PATTERN_RAID1*/
+        __u32            mti_stripe_index;
+        __u32            mti_nid_count;
+        __u32            mti_failnid_count;
+        __u32            mti_config_ver;
+        __u32            mti_flags;
 };
 
-extern void lustre_swab_mgmt_target_info(struct mgmt_target_info *oinfo);
+extern void lustre_swab_mgs_target_info(struct mgmt_target_info *oinfo);
 
 #define CM_START 0x01
 #define CM_END   0x02
index ad45929..806cb15 100644 (file)
@@ -377,7 +377,8 @@ struct mds_obd {
         struct semaphore                 mds_lov_sem;
         obd_id                          *mds_lov_objids;
         int                              mds_lov_objids_size;
-        __u32                            mds_lov_objids_red;
+        __u32                            mds_lov_objids_in_file;
+        unsigned int                     mds_lov_objids_dirty:1;
         int                              mds_lov_nextid_set;
         struct file                     *mds_lov_objid_filp;
         struct file                     *mds_health_check_filp;
@@ -425,6 +426,7 @@ struct lov_obd {
         struct semaphore lov_lock;
         atomic_t refcount;
         struct lov_desc desc;
+        struct obd_connect_data ocd;
         int bufsize;
         int connects;
         int death_row;      /* Do we have tgts scheduled to be deleted?
index 4f51a97..325d4b1 100644 (file)
@@ -101,9 +101,12 @@ static int lov_llog_origin_connect(struct llog_ctxt *ctxt, int count,
         int i, rc = 0;
         ENTRY;
 
-        if (count != lov->desc.ld_tgt_count )
+        if (count != lov->desc.ld_tgt_count)
                 CERROR("Origin connect mds cnt %d != lov cnt %d\n", count,
                        lov->desc.ld_tgt_count);
+        /* count must match if we're doing all */
+        LASSERT(uuid || (count == lov->desc.ld_tgt_count));
+
         for (i = 0, tgt = lov->tgts; i < count; i++, tgt++) {
                 struct obd_device *child;
                 struct llog_ctxt *cctxt;
index abd97db..1adcbff 100644 (file)
@@ -207,6 +207,10 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
         int rc, rc2, i;
         ENTRY;
 
+        lov->ocd.ocd_connect_flags = OBD_CONNECT_EMPTY; 
+        if (data) 
+                lov->ocd = *data;
+
         rc = class_connect(conn, obd, cluuid);
         if (rc)
                 RETURN(rc);
@@ -433,6 +437,7 @@ lov_add_obd(struct obd_device *obd, struct obd_uuid *uuidp, int index, int gen)
 {
         struct lov_obd *lov = &obd->u.lov;
         struct lov_tgt_desc *tgt;
+        struct obd_connect_data *ocd = NULL;
         __u32 bufsize, idx;
         int rc;
         ENTRY;
@@ -503,8 +508,16 @@ lov_add_obd(struct obd_device *obd, struct obd_uuid *uuidp, int index, int gen)
                         osc_obd->obd_no_recov = 0;
         }
 
-        /* NULL may need to change when we use flags for osc's */
-        rc = lov_connect_obd(obd, tgt, 1, NULL);
+        if (lov->ocd.ocd_connect_flags != OBD_CONNECT_EMPTY) { 
+                /* Keep the original connect flags pristine */
+                OBD_ALLOC(ocd, sizeof(*ocd));
+                if (!ocd) 
+                        RETURN(-ENOMEM);
+                *ocd = lov->ocd;
+        }
+        rc = lov_connect_obd(obd, tgt, 1, ocd);
+        if (ocd)
+                OBD_FREE(ocd, sizeof(*ocd));
         if (rc)
                 GOTO(out, rc);
 
@@ -516,7 +529,6 @@ lov_add_obd(struct obd_device *obd, struct obd_uuid *uuidp, int index, int gen)
         if (rc) {
                 CERROR("add failed (%d), deleting %s\n", rc, 
                        (char *)tgt->uuid.uuid);
-                //lov_disconnect_obd(obd, tgt);
                 lov_del_obd(obd, &tgt->uuid, index, 0);
         }
         RETURN(rc);
index 225b809..f7f1250 100644 (file)
@@ -49,8 +49,10 @@ void mds_lov_update_objids(struct obd_device *obd, obd_id *ids)
 
         lock_kernel();
         for (i = 0; i < mds->mds_lov_desc.ld_tgt_count; i++)
-                if (ids[i] > (mds->mds_lov_objids)[i])
+                if (ids[i] > (mds->mds_lov_objids)[i]) {
                         (mds->mds_lov_objids)[i] = ids[i];
+                        mds->mds_lov_objids_dirty = 1;
+                }
         unlock_kernel();
         EXIT;
 }
@@ -64,6 +66,7 @@ static int mds_lov_read_objids(struct obd_device *obd)
         ENTRY;
 
         LASSERT(!mds->mds_lov_objids_size);
+        LASSERT(!mds->mds_lov_objids_dirty);
 
         /* Read everything in the file, even if our current lov desc 
            has fewer targets. Old targets not in the lov descriptor 
@@ -83,17 +86,16 @@ static int mds_lov_read_objids(struct obd_device *obd)
         rc = fsfilt_read_record(obd, mds->mds_lov_objid_filp, ids, size, &off);
         if (rc < 0) {
                 CERROR("Error reading objids %d\n", rc);
-        } else {
-                mds->mds_lov_objids_red = size / sizeof(*ids); 
-                rc = 0;
+                RETURN(rc);
         }
-
-        for (i = 0; i < mds->mds_lov_objids_red; i++)
-                //FIXME D_ERROR
-                CDEBUG(D_INFO|D_ERROR, "read last object "LPU64" for idx %d\n",
+                
+        mds->mds_lov_objids_in_file = size / sizeof(*ids); 
+        
+        for (i = 0; i < mds->mds_lov_objids_in_file; i++) {
+                CDEBUG(D_INFO, "read last object "LPU64" for idx %d\n",
                        mds->mds_lov_objids[i], i);
-
-        RETURN(rc);
+        }
+        RETURN(0);
 }
 
 int mds_lov_write_objids(struct obd_device *obd)
@@ -103,19 +105,26 @@ int mds_lov_write_objids(struct obd_device *obd)
         int i, rc, tgts; 
         ENTRY;
 
-        tgts = max(mds->mds_lov_desc.ld_tgt_count, mds->mds_lov_objids_red);
+        if (!mds->mds_lov_objids_dirty)
+                RETURN(0);
+
+        tgts = max(mds->mds_lov_desc.ld_tgt_count, mds->mds_lov_objids_in_file);
 
         if (!tgts)
                 RETURN(0);
 
         for (i = 0; i < tgts; i++)
-                //FIXME D_ERROR
-                CDEBUG(D_INFO|D_ERROR, "writing last object "LPU64" for idx %d\n",
+                CDEBUG(D_INFO, "writing last object "LPU64" for idx %d\n",
                        mds->mds_lov_objids[i], i);
 
         rc = fsfilt_write_record(obd, mds->mds_lov_objid_filp,
                                  mds->mds_lov_objids, tgts * sizeof(obd_id),
                                  &off, 0);
+        if (rc >= 0) {
+                mds->mds_lov_objids_dirty = 0;
+                rc = 0;
+        }
+
         RETURN(rc);
 }
 
@@ -189,17 +198,23 @@ static int mds_lov_update_desc(struct obd_device *obd, struct obd_export *lov)
             (size > mds->mds_lov_objids_size)) {
                 obd_id *ids;
                 
-                /* add room for a bunch at a time */
-                size = (ld->ld_tgt_count + 8) * sizeof(obd_id);
+                /* add room by powers of 2 */
+                size = 1;
+                while (size < ld->ld_tgt_count) 
+                        size = size << 1;
+                CERROR("Next size=%d\n", size);
+                size = size * sizeof(obd_id);
 
                 OBD_ALLOC(ids, size);
                 if (ids == NULL)
                         GOTO(out, rc = -ENOMEM);
                 memset(ids, 0, size);
                 if (mds->mds_lov_objids_size) {
+                        obd_id *old_ids = mds->mds_lov_objids;
                         memcpy(ids, mds->mds_lov_objids, 
                                mds->mds_lov_objids_size);
-                        OBD_FREE(mds->mds_lov_objids, mds->mds_lov_objids_size);
+                        mds->mds_lov_objids = ids;
+                        OBD_FREE(old_ids, mds->mds_lov_objids_size);
                 }
                 mds->mds_lov_objids = ids;
                 mds->mds_lov_objids_size = size;
@@ -226,8 +241,8 @@ static int mds_lov_add_ost(struct obd_device *obd, struct obd_device *watched,
         int rc = 0;
         ENTRY;
 
-        //FIXME remove D_ERROR
-        CDEBUG(D_CONFIG|D_ERROR, "Updating mds lov for OST idx %d\n", idx);
+        //FIXME remove D_WARNING
+        CDEBUG(D_CONFIG|D_WARNING, "Updating mds lov for OST idx %d\n", idx);
 
         old_count = mds->mds_lov_desc.ld_tgt_count;
         rc = mds_lov_update_desc(obd, mds->mds_osc_exp);
@@ -240,7 +255,7 @@ static int mds_lov_add_ost(struct obd_device *obd, struct obd_device *watched,
                 RETURN(-EINVAL);
         }
         
-        if (idx >= mds->mds_lov_objids_red) {
+        if (idx >= mds->mds_lov_objids_in_file) {
                 /* We never read this lastid; ask the osc */
                 obd_id lastid;
                 __u32 size = sizeof(lastid);
@@ -252,6 +267,7 @@ static int mds_lov_add_ost(struct obd_device *obd, struct obd_device *watched,
                 mds->mds_lov_objids[idx] = lastid;
                 CWARN("got last object "LPU64" from OST %d\n",
                       mds->mds_lov_objids[idx], idx);
+                mds->mds_lov_objids_dirty = 1;
                 mds_lov_write_objids(obd);
         } else {
                 /* We did read this lastid; tell the osc */ 
@@ -322,7 +338,7 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name)
 
         /* If we're mounting this code for the first time on an existing FS,
          * we need to populate the objids array from the real OST values */
-        if (mds->mds_lov_desc.ld_tgt_count > mds->mds_lov_objids_red) {
+        if (mds->mds_lov_desc.ld_tgt_count > mds->mds_lov_objids_in_file) {
                 int size = sizeof(obd_id) * mds->mds_lov_desc.ld_tgt_count;
                 rc = obd_get_info(mds->mds_osc_exp, strlen("last_id"),
                                   "last_id", &size, mds->mds_lov_objids);
@@ -330,6 +346,7 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name)
                         for (i = 0; i < mds->mds_lov_desc.ld_tgt_count; i++)
                                 CWARN("got last object "LPU64" from OST %d\n",
                                       mds->mds_lov_objids[i], i);
+                        mds->mds_lov_objids_dirty = 1;
                         rc = mds_lov_write_objids(obd);
                         if (rc)
                                 CERROR("got last objids from OSTs, but error "
@@ -586,7 +603,7 @@ static int __mds_lov_synchronize(void *data)
         struct mds_obd *mds;
         struct obd_uuid *uuid = NULL;
         __u32  idx;
-        int rc = 0;
+        int rc = 0, have_sem = 0;
         ENTRY;
 
         obd = mlsi->mlsi_obd;
@@ -600,9 +617,15 @@ static int __mds_lov_synchronize(void *data)
 
         LASSERT(obd != NULL);
 
-        /* Hold this throughout a synchronize, and wherever we
-           reference the contents of mds_lov_desc */
-        down(&mds->mds_lov_sem);
+        /* We can't change the target count in one of these sync
+           threads while another sync thread is doing the clearorphans on
+           all the targets. */
+        if (!watched || (idx != MLSI_NO_INDEX)) {
+                /* if we're syncing a particular target, or we're not 
+                   changing the target_count, then we don't need the sem */
+                down(&mds->mds_lov_sem);
+                have_sem++;
+        }
 
         rc = obd_set_info(mds->mds_osc_exp, strlen(KEY_MDS_CONN),
                           KEY_MDS_CONN, 0, uuid);
@@ -643,7 +666,8 @@ static int __mds_lov_synchronize(void *data)
         }
 
 out:
-        up(&mds->mds_lov_sem);
+        if (have_sem) 
+                up(&mds->mds_lov_sem);
         class_decref(obd);
         RETURN(rc);
 }
index 914599e..96c88f1 100644 (file)
@@ -570,7 +570,7 @@ int mgc_target_add(struct obd_export *exp, struct mgmt_target_info *mti)
         rc = ptlrpc_queue_wait(req);
         if (!rc) {
                 rep_mti = lustre_swab_repbuf(req, 0, sizeof(*rep_mti),
-                                             lustre_swab_mgmt_target_info);
+                                             lustre_swab_mgs_target_info);
                 memcpy(mti, rep_mti, sizeof(*rep_mti));
                 CDEBUG(D_MGC, "target_add %s got index = %d\n",
                        mti->mti_svname, mti->mti_stripe_index);
@@ -603,7 +603,7 @@ int mgc_target_del(struct obd_export *exp, struct mgmt_target_info *mti)
         if (!rc) {
                 int index;
                 rep_mti = lustre_swab_repbuf(req, 0, sizeof(*rep_mti),
-                                             lustre_swab_mgmt_target_info);
+                                             lustre_swab_mgs_target_info);
                 index = rep_mti->mti_stripe_index;
                 if (index != mti->mti_stripe_index) {
                         CERROR ("OST DEL failed. rc=%d\n", index);
index c71287b..acd9df5 100644 (file)
@@ -304,18 +304,16 @@ static int mgs_handle_target_add(struct ptlrpc_request *req)
         ENTRY;
 
         mti = lustre_swab_reqbuf(req, 0, sizeof(*mti),
-                                 lustre_swab_mgmt_target_info);
+                                 lustre_swab_mgs_target_info);
         
         CDEBUG(D_MGS, "adding %s, index=%d\n", mti->mti_svname, 
                mti->mti_stripe_index);
 
         /* set the new target index if needed */
-        if (mti->mti_flags & LDD_F_NEED_INDEX) {
-                rc = mgs_set_next_index(obd, mti);
-                if (rc) {
-                        CERROR("Can't get index (%d)\n", rc);
-                        GOTO(out, rc);
-                }
+        rc = mgs_set_index(obd, mti);
+        if (rc) {
+                CERROR("Can't get index (%d)\n", rc);
+                GOTO(out, rc);
         }
 
         /* revoke the config lock so everyone will update */
index a142fc4..fad39a2 100644 (file)
@@ -14,7 +14,7 @@ extern struct lvfs_callback_ops mgs_lvfs_ops;
 
 int mgs_init_db_list(struct obd_device *obd);
 int mgs_cleanup_db_list(struct obd_device *obd);
-int mgs_set_next_index(struct obd_device *obd, struct mgmt_target_info *mti);
+int mgs_set_index(struct obd_device *obd, struct mgmt_target_info *mti);
 int mgs_write_log_target(struct obd_device *obd, struct mgmt_target_info *mti);
 
 #endif
index 45c69ff..91b3141 100644 (file)
@@ -135,7 +135,7 @@ static int next_ost_index(void *index_map, int map_len)
                          set_bit(i, index_map);
                          return i;
                  }
-        CERROR("max index exceeded.\n");
+        CERROR("max index %d exceeded.\n", i);
         return -1;
 }
 
@@ -269,7 +269,7 @@ static int mgs_find_or_make_db(struct obd_device *obd, char *name,
         return 0;
 }
 
-int mgs_set_next_index(struct obd_device *obd, struct mgmt_target_info *mti)
+int mgs_set_index(struct obd_device *obd, struct mgmt_target_info *mti)
 {
         struct fs_db *db;
         int rc = 0;
@@ -280,8 +280,25 @@ int mgs_set_next_index(struct obd_device *obd, struct mgmt_target_info *mti)
                 return rc;
         }
 
-        /* FIXME use mti->mti_stripe_index if given, report error if already 
-           in use */
+        if (!(mti->mti_flags & LDD_F_NEED_INDEX)) {
+                if (mti->mti_stripe_index >= INDEX_MAP_SIZE * 8) {
+                        LCONSOLE_ERROR("Server %s requested index %d, but the"
+                                       "max index is %d.\n", 
+                                       mti->mti_svname, mti->mti_stripe_index,
+                                       INDEX_MAP_SIZE * 8);
+                        return -ERANGE;
+                }
+                if (test_bit(mti->mti_stripe_index, db->fd_index_map)) {
+                        LCONSOLE_ERROR("Server %s requested index %d, but that"
+                                       "index is already in use.\n", 
+                                       mti->mti_svname, mti->mti_stripe_index);
+                        return -EADDRINUSE;
+                } else {
+                        set_bit(mti->mti_stripe_index, db->fd_index_map);
+                        return 0;
+                }
+        } 
+
         if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
                 rc = next_ost_index(db->fd_index_map, INDEX_MAP_SIZE);
                 if (rc == -1)
@@ -369,6 +386,14 @@ static inline int record_add_uuid(struct obd_device *obd,
                                   uint64_t nid, char *uuid)
 {
         return record_base(obd,llh,NULL,nid,LCFG_ADD_UUID,uuid,0,0,0);
+
+}
+
+static inline int record_add_conn(struct obd_device *obd, 
+                                  struct llog_handle *llh, 
+                                  char *uuid)
+{
+        return record_base(obd,llh,NULL,0,LCFG_ADD_CONN,uuid,0,0,0);
 }
 
 static inline int record_attach(struct obd_device *obd, struct llog_handle *llh,
@@ -591,6 +616,7 @@ static int mgs_clear_log(struct obd_device *obd, char *name)
 
 /* lov is the first thing in the mdt and client logs */
 static int mgs_write_log_lov(struct obd_device *obd, struct fs_db *db, 
+                             struct mgmt_target_info *mti,
                              char *logname, char *lovname)
 {
         struct llog_handle *llh = NULL;
@@ -614,10 +640,10 @@ static int mgs_write_log_lov(struct obd_device *obd, struct fs_db *db,
         /* Use defaults here, will fix them later with LCFG_PARAM */
         lovdesc->ld_magic = LOV_DESC_MAGIC;
         lovdesc->ld_tgt_count = 0;
-        lovdesc->ld_pattern = 0;
-        lovdesc->ld_default_stripe_count = 1;
-        lovdesc->ld_default_stripe_size = 1024*1024;
-        lovdesc->ld_default_stripe_offset = 0;
+        lovdesc->ld_default_stripe_count = mti->mti_stripe_count;
+        lovdesc->ld_pattern = mti->mti_stripe_pattern;
+        lovdesc->ld_default_stripe_size = mti->mti_stripe_size;
+        lovdesc->ld_default_stripe_offset = mti->mti_stripe_offset;
         sprintf((char*)lovdesc->ld_uuid.uuid, "%s_UUID", lovname);
         /* can these be the same? */
         uuid = (char *)lovdesc->ld_uuid.uuid;
@@ -640,7 +666,8 @@ static int mgs_write_log_mdt(struct obd_device *obd, struct fs_db *db,
 {
         struct llog_handle *llh = NULL;
         char *cliname, *mdcname, *lovname, *nodeuuid, *mdsuuid, *mdcuuid;
-        int rc, first_log = 0;
+        lnet_nid_t nid;
+        int rc, i, first_log = 0;
 
         CDEBUG(D_MGS, "writing new mdt %s\n", mti->mti_svname);
 
@@ -650,7 +677,7 @@ static int mgs_write_log_mdt(struct obd_device *obd, struct fs_db *db,
                 /* This is the first time for all logs for this fs, 
                    since any ost should have already started the mdt log. */
                 first_log++;
-                rc = mgs_write_log_lov(obd, db, mti->mti_svname,
+                rc = mgs_write_log_lov(obd, db, mti, mti->mti_svname,
                                        lovname);
         } 
 
@@ -682,11 +709,11 @@ static int mgs_write_log_mdt(struct obd_device *obd, struct fs_db *db,
         name_create(mti->mti_fsname, "-clilov", &lovname);
         if (first_log) {
                 /* Start client log */
-                rc = mgs_write_log_lov(obd, db, cliname, lovname);
+                rc = mgs_write_log_lov(obd, db, mti, cliname, lovname);
         }
 
-        /* Add the mdt info to the client */
-        name_create(libcfs_nid2str(mti->mti_nid), "_UUID", &nodeuuid);
+        /* Add the mdt info to the client log */
+        name_create(libcfs_nid2str(mti->mti_nids[0]), /*"_UUID"*/"", &nodeuuid);
         name_create(mti->mti_svname, "-mdc", &mdcname);
         name_create(mdcname, "_UUID", &mdcuuid);
         /* 
@@ -698,13 +725,19 @@ static int mgs_write_log_mdt(struct obd_device *obd, struct fs_db *db,
         #14 L mount_option 0:  1:client  2:lov1  3:MDC_uml1_mdsA_MNT_client
         */
         rc = record_start_log(obd, &llh, cliname);
-        /* FIXME can we just use the nid as the node uuid, or do we really
-           need the hostname? */
         rc = record_marker(obd, llh, db, CM_START, "add mdc"); 
-        rc = record_add_uuid(obd, llh, mti->mti_nid, nodeuuid);
+        for (i = 0; i < mti->mti_nid_count; i++) {
+                CERROR("add nid %s\n", libcfs_nid2str(mti->mti_nids[i]));
+                rc = record_add_uuid(obd, llh, mti->mti_nids[i], nodeuuid);
+        }
         rc = record_attach(obd, llh, mdcname, LUSTRE_MDC_NAME, mdcuuid);
         rc = record_setup(obd,llh,mdcname,mdsuuid,nodeuuid,0,0);
-        /* FIXME add uuid, add_conn for failover mdt's */
+        for (i = 0; i < mti->mti_failnid_count; i++) {
+                nid = mti->mti_failnids[i];
+                CERROR("add failover nid %s\n", libcfs_nid2str(nid));
+                rc = record_add_uuid(obd, llh, nid, libcfs_nid2str(nid));
+                rc = record_add_conn(obd, llh, libcfs_nid2str(nid));
+        }
         rc = record_mount_opt(obd, llh, cliname, lovname, mdcname);
         rc = record_marker(obd, llh, db, CM_END, "add mdc"); 
         rc = record_end_log(obd, &llh);
@@ -726,17 +759,18 @@ static int mgs_write_log_osc(struct obd_device *obd, struct fs_db *db,
         struct llog_handle *llh = NULL;
         char *nodeuuid, *oscname, *oscuuid, *lovuuid;
         char index[5];
-        int rc;
+        lnet_nid_t nid;
+        int i, rc;
 
         if (mgs_log_is_empty(obd, logname)) {
                 /* The first time an osc is added, setup the lov */
-                rc = mgs_write_log_lov(obd, db, logname, lovname);
+                rc = mgs_write_log_lov(obd, db, mti, logname, lovname);
         }
   
         CDEBUG(D_MGS, "adding osc for %s to log %s\n",
                mti->mti_svname, logname);
 
-        name_create(libcfs_nid2str(mti->mti_nid), "_UUID", &nodeuuid);
+        name_create(libcfs_nid2str(mti->mti_nids[0]), /*"_UUID"*/"", &nodeuuid);
         name_create(mti->mti_svname, "-osc", &oscname);
         name_create(oscname, "_UUID", &oscuuid);
         name_create(lovname, "_UUID", &lovuuid);
@@ -751,10 +785,18 @@ static int mgs_write_log_osc(struct obd_device *obd, struct fs_db *db,
         */
         rc = record_start_log(obd, &llh, logname);
         rc = record_marker(obd, llh, db, CM_START, "add osc"); 
-        rc = record_add_uuid(obd, llh, mti->mti_nid, nodeuuid);
+        for (i = 0; i < mti->mti_nid_count; i++) {
+                CERROR("add nid %s\n", libcfs_nid2str(mti->mti_nids[i]));
+                rc = record_add_uuid(obd, llh, mti->mti_nids[i], nodeuuid);
+        }
         rc = record_attach(obd, llh, oscname, LUSTRE_OSC_NAME, lovuuid);
         rc = record_setup(obd, llh, oscname, ostuuid, nodeuuid, 0, 0);
-        /* FIXME add uuid, add_conn for failover ost's */
+        for (i = 0; i < mti->mti_failnid_count; i++) {
+                nid = mti->mti_failnids[i];
+                CERROR("add failover nid %s\n", libcfs_nid2str(nid));
+                rc = record_add_uuid(obd, llh, nid, libcfs_nid2str(nid));
+                rc = record_add_conn(obd, llh, libcfs_nid2str(nid));
+        }
         snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
         rc = record_lov_add(obd,llh, lovname, ostuuid, index,"1"/*generation*/);
         rc = record_marker(obd, llh, db, CM_END, "add osc"); 
@@ -786,8 +828,11 @@ static int mgs_write_log_ost(struct obd_device *obd, struct fs_db *db,
            Heck, what do we do about the client and mds logs? We better
            abort. */
         if (!mgs_log_is_empty(obd, mti->mti_svname)) {
-                CERROR("The config log for %s already exists, not adding.\n",
-                       mti->mti_svname);
+                LCONSOLE_ERROR("The config log for %s already exists, yet the "
+                               "server claims it never registered.  It may have"
+                               " been reformatted, or the index changed. This "
+                               "must be resolved before this server can be "
+                               "added.\n", mti->mti_svname);
                 return -EALREADY;
         }
         /*
index 5ac4bc2..df5ca72 100644 (file)
@@ -319,6 +319,8 @@ static int ldd_write(struct lvfs_run_ctxt *mount_ctxt,
         ENTRY;
 
         LASSERT(ldd->ldd_magic == LDD_MAGIC);
+        
+        ldd->ldd_config_ver++;  
 
         push_ctxt(&saved, mount_ctxt, NULL);
         
@@ -842,26 +844,33 @@ static int server_add_target(struct super_block *sb, struct vfsmount *mnt)
                 sizeof(mti->mti_fsname));
         strncpy(mti->mti_svname, ldd->ldd_svname,
                 sizeof(mti->mti_svname));
-        // char             mti_nodename[NAME_MAXLEN];
-        // char             mti_uuid[UUID_MAXLEN];
-        /* FIXME nid 0 is lo generally, need to send all non-lo nids */
+        
+        mti->mti_nid_count = 0;
         while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
                 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND) 
                         continue;
-                /* FIXME use all non-lo nids, not just first */
-                break;
+                mti->mti_nids[mti->mti_nid_count] = id.nid;
+                mti->mti_nid_count++;
+                if (mti->mti_nid_count >= MTI_NIDS_MAX) {
+                        CWARN("Only using first %d nids for %s\n",
+                              mti->mti_nid_count, mti->mti_svname);
+                        break;
+                }
         }       
-        mti->mti_nid = id.nid;
+      
+        memcpy(mti->mti_failnids, ldd->ldd_failnid, sizeof(mti->mti_failnids));
+        mti->mti_failnid_count = ldd->ldd_failnid_count;
         mti->mti_config_ver = 0;
         mti->mti_flags = ldd->ldd_flags;
         mti->mti_stripe_index = ldd->ldd_svindex;
-        mti->mti_stripe_pattern = 0; //FIXME
-        mti->mti_stripe_size = 1024*1024;  //FIXME    
-        mti->mti_stripe_offset = 0; //FIXME    
+        mti->mti_stripe_count = ldd->ldd_stripe_count;
+        mti->mti_stripe_pattern = ldd->ldd_stripe_pattern;
+        mti->mti_stripe_size = ldd->ldd_stripe_sz; 
+        mti->mti_stripe_offset = ldd->ldd_stripe_offset;  
 
         CDEBUG(D_MOUNT, "Initial registration %s, fs=%s, %s, index=%04x\n",
                mti->mti_svname, mti->mti_fsname,
-               libcfs_nid2str(mti->mti_nid), mti->mti_stripe_index);
+               libcfs_nid2str(mti->mti_nids[0]), mti->mti_stripe_index);
 
         /* Register the target */
         /* FIXME use mdc_process_config instead */
@@ -881,17 +890,17 @@ static int server_add_target(struct super_block *sb, struct vfsmount *mnt)
                        " %s\n",
                        ldd->ldd_svindex, mti->mti_stripe_index, 
                        mti->mti_svname);
-                ldd->ldd_flags &= ~(LDD_F_NEED_INDEX | LDD_F_NEED_REGISTER);
-                /* This server has never been started, so has no config */
-                ldd->ldd_config_ver = 0;  
                 ldd->ldd_svindex = mti->mti_stripe_index;
                 strncpy(ldd->ldd_svname, mti->mti_svname, 
                         sizeof(ldd->ldd_svname));
                 /* or ldd_make_sv_name(ldd); */
-                ldd_write(&mgc->obd_lvfs_ctxt, ldd);
                 /* FIXME write last_rcvd?, disk label? */
         }
 
+        /* Always write out the new flags */
+        ldd->ldd_flags &= ~(LDD_F_NEED_INDEX | LDD_F_NEED_REGISTER);
+        ldd_write(&mgc->obd_lvfs_ctxt, ldd);
+
 out:
         if (mti)        
                 OBD_FREE(mti, sizeof(*mti));
@@ -942,7 +951,7 @@ static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
            to read and write configs locally. */
         server_mgc_set_fs(lsi->lsi_mgc, sb);
 
-        /* Get a new index if needed */
+        /* Register if needed */
         if (lsi->lsi_ldd->ldd_flags & 
             (LDD_F_NEED_INDEX | LDD_F_NEED_REGISTER)) {
                 CDEBUG(D_MOUNT, "Need new target index from MGS\n");
@@ -1304,7 +1313,7 @@ static int server_fill_super(struct super_block *sb)
 
         /* append ldd nids to lmd nids */
         for (i = 0; (i < lsi->lsi_ldd->ldd_mgsnid_count) && 
-              (lsi->lsi_lmd->lmd_mgsnid_count < MAX_FAILOVER_NIDS); i++) {
+              (lsi->lsi_lmd->lmd_mgsnid_count < MTI_NIDS_MAX); i++) {
                 lsi->lsi_lmd->lmd_mgsnid[lsi->lsi_lmd->lmd_mgsnid_count++] = 
                         lsi->lsi_ldd->ldd_mgsnid[i];
         }
@@ -1475,7 +1484,7 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd)
                         LCONSOLE_ERROR("Can't parse NID '%s'\n", s1);
                         goto invalid;
                 }
-                if (lmd->lmd_mgsnid_count >= MAX_FAILOVER_NIDS) {
+                if (lmd->lmd_mgsnid_count >= MTI_NIDS_MAX) {
                         LCONSOLE_ERROR("Too many NIDs: '%s'\n", s1);
                         goto invalid;
                 }
index bd2da48..679d741 100644 (file)
@@ -566,15 +566,22 @@ void lustre_swab_mds_body (struct mds_body *b)
         CLASSERT(offsetof(typeof(*b), padding_4) != 0);
 }
 
-void lustre_swab_mgmt_target_info(struct mgmt_target_info *mti)
+void lustre_swab_mgs_target_info(struct mgmt_target_info *mti)
 {
-        __swab64s(&mti->mti_nid);
-        __swab32s(&mti->mti_config_ver);
-        __swab32s(&mti->mti_flags);
-        __swab32s(&mti->mti_stripe_index);
-        __swab32s(&mti->mti_stripe_pattern);
+        int i;
+        for (i = 0; i < MTI_NIDS_MAX; i++) {
+                __swab64s(&mti->mti_nids[i]);
+                __swab64s(&mti->mti_failnids[i]);
+        }
         __swab64s(&mti->mti_stripe_size);
         __swab64s(&mti->mti_stripe_offset);
+        __swab32s(&mti->mti_stripe_count);
+        __swab32s(&mti->mti_stripe_pattern);
+        __swab32s(&mti->mti_stripe_index);
+        __swab32s(&mti->mti_nid_count);
+        __swab32s(&mti->mti_failnid_count);
+        __swab32s(&mti->mti_config_ver);
+        __swab32s(&mti->mti_flags);
 }
 
 static void lustre_swab_obd_dqinfo (struct obd_dqinfo *i)
index 83b5766..5738266 100644 (file)
@@ -183,7 +183,7 @@ EXPORT_SYMBOL(lustre_swab_ldlm_lock_desc);
 EXPORT_SYMBOL(lustre_swab_ldlm_request);
 EXPORT_SYMBOL(lustre_swab_ldlm_reply);
 EXPORT_SYMBOL(lustre_swab_qdata);
-EXPORT_SYMBOL(lustre_swab_mgmt_target_info);
+EXPORT_SYMBOL(lustre_swab_mgs_target_info);
 
 /* recover.c */
 EXPORT_SYMBOL(ptlrpc_run_recovery_over_upcall);
index d53a6a6..c9f0f53 100644 (file)
@@ -138,6 +138,7 @@ int get_os_version()
         return version;
 }
 
+/* FIXME use popen */
 int run_command(char *cmd)
 {
        int i = 0,ret = 0;
@@ -440,11 +441,11 @@ int make_lustre_backfs(struct mkfs_opts *mop)
                 if (strstr(mop->mo_mkfsopts, "-I") == NULL) {
                         long inode_size = 0;
                         if (IS_MDT(&mop->mo_ldd)) {
-                                if (mop->mo_stripe_count > 77)
+                                if (mop->mo_ldd.ldd_stripe_count > 77)
                                         inode_size = 512; /* bz 7241 */
-                                else if (mop->mo_stripe_count > 34)
+                                else if (mop->mo_ldd.ldd_stripe_count > 34)
                                         inode_size = 2048;
-                                else if (mop->mo_stripe_count > 13)
+                                else if (mop->mo_ldd.ldd_stripe_count > 13)
                                         inode_size = 1024;
                                 else 
                                         inode_size = 512;
@@ -599,7 +600,7 @@ out_rmdir:
 void set_defaults(struct mkfs_opts *mop)
 {
         mop->mo_ldd.ldd_magic = LDD_MAGIC;
-        mop->mo_ldd.ldd_config_ver = 0;
+        mop->mo_ldd.ldd_config_ver = 1;
         mop->mo_ldd.ldd_flags = LDD_F_NEED_INDEX | LDD_F_NEED_REGISTER;
         mop->mo_ldd.ldd_mgsnid_count = 0;
         strcpy(mop->mo_ldd.ldd_fsname, "lustre");
@@ -609,7 +610,9 @@ void set_defaults(struct mkfs_opts *mop)
                 mop->mo_ldd.ldd_mount_type = LDD_MT_LDISKFS;
         
         mop->mo_ldd.ldd_svindex = -1;
-        mop->mo_stripe_count = 1;
+        mop->mo_ldd.ldd_stripe_count = 1;
+        mop->mo_ldd.ldd_stripe_sz = 1024 * 1024;
+        mop->mo_ldd.ldd_stripe_pattern = 0;
 }
 
 static inline void badopt(char opt, char *type)
@@ -685,7 +688,12 @@ int main(int argc , char *const argv[])
                 case 'c':
                         if (IS_MDT(&mop.mo_ldd)) {
                                 int stripe_count = atol(optarg);
-                                mop.mo_stripe_count = stripe_count;
+                                if (stripe_count <= 0) {
+                                        fprintf(stderr, "%s: bad stripe count "
+                                                "%d\n", progname, stripe_count);
+                                        exit(1);
+                                }
+                                mop.mo_ldd.ldd_stripe_count = stripe_count;
                         } else {
                                 badopt(opt, "MDT");
                         }
@@ -724,9 +732,9 @@ int main(int argc , char *const argv[])
                         while ((s2 = strsep(&s1, ","))) {
                                 mop.mo_ldd.ldd_mgsnid[i++] =
                                         libcfs_str2nid(s2);
-                                if (i >= MAX_FAILOVER_NIDS) {
-                                        fprintf(stderr, "%s: too many MGS nids, "
-                                                "ignoring %s\n", progname, s1);
+                                if (i >= MTI_NIDS_MAX) {
+                                        fprintf(stderr, "%s: too many MGS nids,"
+                                                " ignoring %s\n", progname, s1);
                                         break;
                                 }
                         }
@@ -762,12 +770,12 @@ int main(int argc , char *const argv[])
                         break;
                 case 's':
                         if (IS_MDT(&mop.mo_ldd)) 
-                                mop.mo_stripe_sz = atol(optarg) * 1024;
+                                mop.mo_ldd.ldd_stripe_sz = atol(optarg) * 1024;
                         else 
                                 badopt(opt, "MDT");
                         break;
                 case 't':
-                        mop.mo_timeout = atol(optarg);
+                        mop.mo_ldd.ldd_timeout = atol(optarg);
                         break;
                 case 'v':
                         verbose++;
@@ -815,8 +823,8 @@ int main(int argc , char *const argv[])
                                 "(is the lnet module loaded?)\n", progname);
                 } else {
                         if (i > 0) {
-                                if (i > MAX_FAILOVER_NIDS
-                                        i = MAX_FAILOVER_NIDS;
+                                if (i > MTI_NIDS_MAX
+                                        i = MTI_NIDS_MAX;
                                 vprint("Adding %d local nids for MGS\n", i);
                                 memcpy(mop.mo_ldd.ldd_mgsnid, nids,
                                        sizeof(mop.mo_ldd.ldd_mgsnid));
@@ -833,9 +841,6 @@ int main(int argc , char *const argv[])
                 goto out;
         }
 
-        if (IS_MDT(&mop.mo_ldd) && (mop.mo_stripe_sz == 0))
-                mop.mo_stripe_sz = 1024 * 1024;
-        
         strcpy(mop.mo_device, argv[optind]);
         
         /* These are the permanent mount options (always included) */