Whamcloud - gitweb
land b1_5_qos (20051216_1716)
authorniu <niu>
Fri, 16 Dec 2005 09:48:51 +0000 (09:48 +0000)
committerniu <niu>
Fri, 16 Dec 2005 09:48:51 +0000 (09:48 +0000)
15 files changed:
lustre/include/linux/lustre_idl.h
lustre/include/linux/obd.h
lustre/include/linux/obd_lov.h
lustre/lov/lov_internal.h
lustre/lov/lov_obd.c
lustre/lov/lov_qos.c
lustre/lov/lov_request.c
lustre/lov/lproc_lov.c
lustre/ptlrpc/pack_generic.c
lustre/tests/qos.sh [new file with mode: 0644]
lustre/tests/sanity.sh
lustre/utils/lctl.c
lustre/utils/lustre_cfg.c
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index 596a682..59b3256 100644 (file)
@@ -780,10 +780,10 @@ struct lov_desc {
         __u32 ld_pattern;                  /* PATTERN_RAID0, PATTERN_RAID1 */
         __u64 ld_default_stripe_size;      /* in bytes */
         __u64 ld_default_stripe_offset;    /* in bytes */
+        __u32 ld_qos_threshold;            /* in MB */
+        __u32 ld_qos_maxage;               /* in second */
         __u32 ld_padding_1;                /* also fix lustre_swab_lov_desc */
         __u32 ld_padding_2;                /* also fix lustre_swab_lov_desc */
-        __u32 ld_padding_3;                /* also fix lustre_swab_lov_desc */
-        __u32 ld_padding_4;                /* also fix lustre_swab_lov_desc */
         struct obd_uuid ld_uuid;
 };
 
index 7daf6f5..37500d1 100644 (file)
@@ -456,6 +456,8 @@ struct lov_tgt_desc {
         __u32                    ltd_gen;
         struct obd_export       *ltd_exp;
         int                      active; /* is this target up for requests */
+        int                      index;  /* index of target array in lov_obd */
+        struct list_head         qos_bavail_list; /* link entry to lov_obd */
 };
 
 struct lov_obd {
@@ -464,6 +466,7 @@ struct lov_obd {
         int bufsize;
         int refcount;
         unsigned int lo_catalog_loaded:1;
+        struct list_head qos_bavail_list; /* tgts list, sorted by available space, protected by lov_lock */
         struct lov_tgt_desc *tgts;
 };
 
index 78ac7bc..33ebf78 100644 (file)
@@ -24,4 +24,7 @@ static inline int lov_mds_md_v1_size(int stripes)
 #define IOC_LOV_SET_OSC_ACTIVE         _IOWR('g', 50, long)
 #define IOC_LOV_MAX_NR                 50
 
+#define QOS_DEFAULT_THRESHOLD           10 /* MB */
+#define QOS_DEFAULT_MAXAGE              5  /* Seconds */ 
+
 #endif
index 7bba3a0..12014ad 100644 (file)
@@ -130,8 +130,8 @@ int lov_stripe_number(struct lov_stripe_md *lsm, obd_off lov_off);
 
 /* lov_qos.c */
 void qos_shrink_lsm(struct lov_request_set *set);
-int qos_prep_create(struct lov_obd *lov, struct lov_request_set *set,
-                    int newea);
+int qos_prep_create(struct obd_export *exp, struct lov_request_set *set);
+void qos_update(struct lov_obd *lov, int idx, struct obd_statfs *osfs);
 int qos_remedy_create(struct lov_request_set *set, struct lov_request *req);
 
 /* lov_request.c */
index c045db2..f7673cf 100644 (file)
@@ -420,9 +420,15 @@ lov_add_obd(struct obd_device *obd, struct obd_uuid *uuidp, int index, int gen)
                         RETURN(-ENOMEM);
                 }
 
-                memset(tgt, 0, bufsize);
                 if (lov->tgts) {
+                        int i;
                         memcpy(tgt, lov->tgts, lov->bufsize);
+                        LASSERT(index == lov->desc.ld_tgt_count);
+                        for (i = 0; i < index; i++) {
+                                INIT_LIST_HEAD(&tgt[i].qos_bavail_list);
+                                list_splice(&lov->tgts[i].qos_bavail_list, 
+                                            &tgt[i].qos_bavail_list);
+                        }
                         OBD_FREE(lov->tgts, lov->bufsize);
                 }
 
@@ -442,6 +448,8 @@ lov_add_obd(struct obd_device *obd, struct obd_uuid *uuidp, int index, int gen)
         tgt->uuid = *uuidp;
         /* XXX - add a sanity check on the generation number. */
         tgt->ltd_gen = gen;
+        tgt->index = index;
+        INIT_LIST_HEAD(&tgt->qos_bavail_list);
 
         old_count = lov->desc.ld_tgt_count;
         if (index >= lov->desc.ld_tgt_count)
@@ -533,7 +541,8 @@ static int lov_setup(struct obd_device *obd, obd_count len, void *buf)
         struct lustre_cfg *lcfg = buf;
         struct lov_desc *desc;
         struct lov_obd *lov = &obd->u.lov;
-        int count;
+        struct lov_tgt_desc *tgts;
+        int count, i;
         ENTRY;
 
         if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) {
@@ -598,11 +607,15 @@ static int lov_setup(struct obd_device *obd, obd_count len, void *buf)
                 CERROR("Out of memory\n");
                 RETURN(-EINVAL);
         }
-        memset(lov->tgts, 0, lov->bufsize);
+        for (i = 0, tgts = lov->tgts; i < max(count, 1); i++, tgts++) {
+                tgts->index = i;
+                INIT_LIST_HEAD(&tgts->qos_bavail_list);
+        }
 
         desc->ld_active_tgt_count = 0;
         lov->desc = *desc;
         spin_lock_init(&lov->lov_lock);
+        INIT_LIST_HEAD(&lov->qos_bavail_list);
 
         lprocfs_init_vars(lov, &lvars);
         lprocfs_obd_setup(obd, lvars.obd_vars);
@@ -760,11 +773,13 @@ static int lov_clear_orphans(struct obd_export *export, struct obdo *src_oa,
 /* the LOV expects oa->o_id to be set to the LOV object id */
 static int
 lov_create(struct obd_export *exp, struct obdo *src_oa,
-                      struct lov_stripe_md **ea, struct obd_trans_info *oti)
+           struct lov_stripe_md **ea, struct obd_trans_info *oti)
 {
         struct lov_request_set *set = NULL;
-        struct list_head *pos;
         struct lov_obd *lov;
+        struct obd_statfs osfs;
+        unsigned long maxage;
+        struct lov_request *req;
         int rc = 0;
         ENTRY;
 
@@ -785,15 +800,15 @@ lov_create(struct obd_export *exp, struct obdo *src_oa,
         lov = &exp->exp_obd->u.lov;
         if (!lov->desc.ld_active_tgt_count)
                 RETURN(-EIO);
+        
+        maxage = jiffies - lov->desc.ld_qos_maxage * HZ;
+        obd_statfs(exp->exp_obd, &osfs, maxage);                
 
         rc = lov_prep_create_set(exp, ea, src_oa, oti, &set);
         if (rc)
                 RETURN(rc);
 
-        list_for_each (pos, &set->set_list) {
-                struct lov_request *req =
-                        list_entry(pos, struct lov_request, rq_link);
-
+        list_for_each_entry(req, &set->set_list, rq_link) {
                 /* XXX: LOV STACKING: use real "obj_mdp" sub-data */
                 rc = obd_create(lov->tgts[req->rq_idx].ltd_exp,
                                 req->rq_oa, &req->rq_md, oti);
@@ -1780,6 +1795,7 @@ static int lov_statfs(struct obd_device *obd, struct obd_statfs *osfs,
                                 rc = err;
                         continue;
                 }
+                qos_update(lov, i, &lov_sfs);
 
                 if (!set) {
                         memcpy(osfs, &lov_sfs, sizeof(lov_sfs));
index 3a42f88..8ff1448 100644 (file)
@@ -28,6 +28,8 @@
 #define DEBUG_SUBSYSTEM S_LOV
 
 #ifdef __KERNEL__
+#include <linux/types.h>
+#include <linux/random.h>
 #else
 #include <liblustre.h>
 #endif
@@ -108,68 +110,343 @@ int qos_remedy_create(struct lov_request_set *set, struct lov_request *req)
 
 #define LOV_CREATE_RESEED_MULT 4
 #define LOV_CREATE_RESEED_MIN  1000
-/* FIXME use real qos data to prepare the lov create request */
-int qos_prep_create(struct lov_obd *lov, struct lov_request_set *set, int newea)
+/* alloc objects on osts with round-robin algorithm */
+static int alloc_rr(struct lov_obd *lov, int *idx_arr, int *stripe_cnt)
 {
-        static int ost_start_idx, ost_start_count;
+        static int ost_start_count, ost_start_idx;
         unsigned ost_idx, ost_count = lov->desc.ld_tgt_count;
         unsigned ost_active_count = lov->desc.ld_active_tgt_count;
-        struct lov_stripe_md *lsm = set->set_md;
-        struct obdo *src_oa = set->set_oa;
-        int i, rc = 0;
+        int i, *idx_pos = idx_arr;
         ENTRY;
-
-        LASSERT(src_oa->o_valid & OBD_MD_FLID);
-
-        lsm->lsm_object_id = src_oa->o_id;
-        if (!lsm->lsm_stripe_size)
-                lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size;
-        if (!lsm->lsm_pattern) {
-                lsm->lsm_pattern = lov->desc.ld_pattern ?
-                        lov->desc.ld_pattern : LOV_PATTERN_RAID0;
+        
+        if (--ost_start_count <= 0) {
+                ost_start_idx = ll_insecure_random_int();
+                ost_start_count = 
+                        (LOV_CREATE_RESEED_MIN / max(ost_active_count, 1U) +
+                         LOV_CREATE_RESEED_MULT) * max(ost_active_count, 1U);
+        } else if (*stripe_cnt >= lov->desc.ld_active_tgt_count) {
+                /* If we allocate from all of the stripes, make the
+                 * next file start on the next OST. */
+                ++ost_start_idx;
         }
+        ost_idx = ost_start_idx % ost_count;
 
-        if (newea || lsm->lsm_oinfo[0].loi_ost_idx >= ost_count) {
-                if (--ost_start_count <= 0) {
-                        ost_start_idx = ll_insecure_random_int();
-                        ost_start_count =
-                          (LOV_CREATE_RESEED_MIN / max(ost_active_count, 1U) +
-                           LOV_CREATE_RESEED_MULT) * max(ost_active_count, 1U);
-                } else if (lsm->lsm_stripe_count >= ost_active_count) {
-                        /* If we allocate from all of the stripes, make the
-                         * next file start on the next OST. */
-                        ++ost_start_idx;
+        for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) {
+                ++ost_start_idx;
+                
+                if (lov->tgts[ost_idx].active == 0) {
+                        CDEBUG(D_HA, "lov idx %d inactive\n", ost_idx);
+                        continue;
                 }
-                ost_idx = ost_start_idx % ost_count;
-        } else {
-                ost_idx = lsm->lsm_oinfo[0].loi_ost_idx;
+                
+                *idx_pos = ost_idx;
+                idx_pos++;
+                /* got enough ost */
+                if (idx_pos - idx_arr == *stripe_cnt)
+                        RETURN(0);
         }
+        *stripe_cnt = idx_pos - idx_arr;
+        RETURN(0);
+}
 
-        CDEBUG(D_INODE, "allocating %d subobjs for objid "LPX64" at idx %d\n",
-               lsm->lsm_stripe_count, lsm->lsm_object_id, ost_idx);
+/* alloc objects on osts with specific stripe offset */
+static int alloc_specific(struct lov_obd *lov, struct lov_stripe_md *lsm,
+                          int *idx_arr)
+{
+        unsigned ost_idx, ost_count = lov->desc.ld_tgt_count;
+        int i, *idx_pos = idx_arr;
+        ENTRY;
 
+        ost_idx = lsm->lsm_oinfo[0].loi_ost_idx;
         for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) {
-                struct lov_request *req;
-
-                ++ost_start_idx;
                 if (lov->tgts[ost_idx].active == 0) {
                         CDEBUG(D_HA, "lov idx %d inactive\n", ost_idx);
                         continue;
                 }
+                *idx_pos = ost_idx;
+                idx_pos++;
+                /* got enough ost */
+                if (idx_pos - idx_arr == lsm->lsm_stripe_count)
+                        RETURN(0);
+        }
+        /* If we were passed specific striping params, then a failure to
+         * meet those requirements is an error, since we can't reallocate
+         * that memory (it might be part of a larger array or something).
+         *
+         * We can only get here if lsm_stripe_count was originally > 1.
+         */
+        CERROR("can't lstripe objid "LPX64": have %u want %u\n",
+               lsm->lsm_object_id, idx_pos - idx_arr, lsm->lsm_stripe_count);
+        RETURN(-EFBIG);
+}
+
+/* free space OST must have to be used for object allocation. */
+#define QOS_MIN                 (lov->desc.ld_qos_threshold << 20)
+
+#define TGT_BAVAIL(tgt)         (tgt->ltd_exp->exp_obd->obd_osfs.os_bavail * \
+                                 tgt->ltd_exp->exp_obd->obd_osfs.os_bsize) 
+#define TGT_FFREE(tgt)          (tgt->ltd_exp->exp_obd->obd_osfs.os_ffree)
+
+/* alloc objects on osts with free space weighted algorithm */
+static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt)
+{
+        struct lov_obd *lov = &exp->exp_obd->u.lov;
+        unsigned ost_count = lov->desc.ld_tgt_count;
+        __u64 cur_bavail, rand, *availspace, total_bavail = 0;
+        int *indexes, nfound, good_osts, i, warn = 0, rc = 0;
+        struct lov_tgt_desc *tgt;
+        int shift, require_stripes = *stripe_cnt;
+        static time_t last_warn = 0;
+        time_t now = cfs_time_current_sec();
+        ENTRY;
+        
+        availspace = NULL;
+        indexes = NULL;
+        OBD_ALLOC(availspace, sizeof(__u64) * ost_count);
+        OBD_ALLOC(indexes, sizeof(int) * require_stripes);
+        if (!availspace || !indexes)
+                GOTO(out_free, rc = -EAGAIN);
+        
+        spin_lock(&lov->lov_lock);
+        /* if free space is below some threshold, just go
+         * to do round-robin allocation */
+        total_bavail = (exp->exp_obd->obd_osfs.os_bavail * \
+                        exp->exp_obd->obd_osfs.os_bsize);
+        if (ost_count < 2 || total_bavail <= QOS_MIN) {
+                spin_unlock(&lov->lov_lock);
+                GOTO(out_free, rc = -EAGAIN);
+        }
+
+        /* if each ost has almost same free space, go to 
+         * do rr allocation for better creation performance */
+        if (!list_empty(&lov->qos_bavail_list)) {
+                __u64 max, min, val;
+                tgt = list_entry(lov->qos_bavail_list.next, 
+                                 struct lov_tgt_desc, qos_bavail_list);
+                max = TGT_BAVAIL(tgt);
+                tgt = list_entry(lov->qos_bavail_list.prev,
+                                 struct lov_tgt_desc, qos_bavail_list);
+                min = TGT_BAVAIL(tgt);
+
+                val = (max >= min) ? (max - min) : (min - max);
+                min = (min * 13) >> 8;          /* less than 5% of gap */ 
+
+                if (val < min) {
+                        spin_unlock(&lov->lov_lock);
+                        GOTO(out_free, rc = -EAGAIN);
+                }
+        } else {
+                spin_unlock(&lov->lov_lock);
+                GOTO(out_free, rc = -EAGAIN);
+        }
+        
+        total_bavail = 0;
+        good_osts = 0;
+        /* warn zero available space/inode every 30 min */
+        if (cfs_time_sub(now, last_warn) > 60 * 30)
+                warn = 1;
+        list_for_each_entry(tgt, &lov->qos_bavail_list, qos_bavail_list) {
+                if (!tgt->active)
+                        continue;
+                if (!TGT_BAVAIL(tgt)) {
+                        if (warn) {
+                                CWARN("avail space on %s is zero\n", 
+                                      tgt->uuid.uuid);
+                                last_warn = now;
+                        }
+                        continue;
+                }
+                if (!TGT_FFREE(tgt)) {
+                        if (warn) {
+                                CWARN("free inode on %s is zero\n", 
+                                      tgt->uuid.uuid);
+                                last_warn = now;
+                        }
+                        continue;
+                }
+                if ((TGT_BAVAIL(tgt) <= QOS_MIN) && (good_osts >= *stripe_cnt))
+                        break;
+                availspace[good_osts] = TGT_BAVAIL(tgt);
+                indexes[good_osts] = tgt->index;
+                total_bavail += availspace[good_osts];
+                good_osts++;
+        }
+        
+        spin_unlock(&lov->lov_lock);
+        
+        if (!total_bavail)
+                GOTO(out_free, rc = -ENOSPC);
+       
+        /* if we don't have enough good OSTs, we reduce the stripe count. */
+        if (good_osts < *stripe_cnt)
+                *stripe_cnt = good_osts;
+
+        if (!*stripe_cnt) 
+                GOTO(out_free, rc = -EAGAIN);
+        
+        nfound = shift = 0;
+        while ((total_bavail >> shift) > 0)
+                shift++;
+        shift++;
+        /* search enough OSTs with free space weighted random allocation */
+        while (nfound < *stripe_cnt) {
+                cur_bavail = 0;
+                
+                get_random_bytes(&rand, sizeof(rand));
+                if (shift < 64)
+                        rand &= ((1 << shift) - 1);
+                while (rand > total_bavail)
+                        rand -= total_bavail;
+                
+                for (i = 0; i < good_osts; i++) {
+                        cur_bavail += availspace[i];
+                        if (cur_bavail >= rand) {
+                                total_bavail -= availspace[i];
+                                availspace[i] = 0;
+                                idx_arr[nfound] = indexes[i];
+                                nfound++;
+                                break;
+                        }
+                }
+                /* should never satisfy below condition */
+                if (cur_bavail == 0)
+                        break;
+        }
+        LASSERT(nfound == *stripe_cnt);
+        
+out_free:
+        if (availspace)
+                OBD_FREE(availspace, sizeof(__u64) * ost_count);
+        if (indexes)
+                OBD_FREE(indexes, sizeof(int) * require_stripes);
+        if (rc != -EAGAIN)
+                RETURN(rc);
+
+        rc = alloc_rr(lov, idx_arr, stripe_cnt);
+        RETURN(rc);
+}
 
+/* return new alloced stripe count in success */
+static int alloc_idx_array(struct obd_export *exp, struct lov_stripe_md *lsm, 
+                           int newea, int **idx_arr, int *arr_cnt)
+{
+        struct lov_obd *lov = &exp->exp_obd->u.lov;
+        int stripe_cnt = lsm->lsm_stripe_count;
+        int i, rc = 0;
+        int *tmp_arr = NULL;
+        ENTRY;
+
+        *arr_cnt = stripe_cnt;
+        OBD_ALLOC(tmp_arr, *arr_cnt * sizeof(int));
+        if (tmp_arr == NULL)
+                RETURN(-ENOMEM);
+        for (i = 0; i < *arr_cnt; i++)
+                tmp_arr[i] = -1;
+
+        if (newea || 
+            lsm->lsm_oinfo[0].loi_ost_idx >= lov->desc.ld_tgt_count) 
+                rc = alloc_qos(exp, tmp_arr, &stripe_cnt);
+        else
+                rc = alloc_specific(lov, lsm, tmp_arr);
+
+        if (rc)
+                GOTO(out_arr, rc);
+
+        *idx_arr = tmp_arr;
+        RETURN(stripe_cnt);
+out_arr:
+        OBD_FREE(tmp_arr, *arr_cnt * sizeof(int));
+        *arr_cnt = 0;
+        RETURN(rc);
+}
+
+static void free_idx_array(int *idx_arr, int arr_cnt)
+{
+        if (arr_cnt)
+                OBD_FREE(idx_arr, arr_cnt * sizeof(int));
+}
+
+int qos_prep_create(struct obd_export *exp, struct lov_request_set *set)
+{
+        struct lov_obd *lov = &exp->exp_obd->u.lov;
+        struct lov_stripe_md *lsm;
+        struct obdo *src_oa = set->set_oa;
+        struct obd_trans_info *oti = set->set_oti;
+        int i, stripes, rc = 0, newea = 0;
+        int *idx_arr, idx_cnt = 0;
+        ENTRY;
+
+        LASSERT(src_oa->o_valid & OBD_MD_FLID);
+        if (set->set_md == NULL) {
+                int stripe_cnt = lov_get_stripecnt(lov, 0);
+
+                /* If the MDS file was truncated up to some size, stripe over
+                 * enough OSTs to allow the file to be created at that size. */
+                if (src_oa->o_valid & OBD_MD_FLSIZE) {
+                        struct lov_tgt_desc *tgt;
+                        stripes = 1;
+                        
+                        spin_lock(&lov->lov_lock);
+                        list_for_each_entry(tgt, &lov->qos_bavail_list, 
+                                            qos_bavail_list) {
+                                if (!tgt->active)
+                                        continue;
+                                if (TGT_BAVAIL(tgt) * stripes > src_oa->o_size)
+                                        break;
+                                stripes++;
+                        }
+                        spin_unlock(&lov->lov_lock);
+
+                        if (stripes < stripe_cnt)
+                                stripes = stripe_cnt;
+                } else {
+                        stripes = stripe_cnt;
+                }
+
+                rc = lov_alloc_memmd(&set->set_md, stripes, 
+                                     lov->desc.ld_pattern ?
+                                     lov->desc.ld_pattern : LOV_PATTERN_RAID0);
+                if (rc < 0)
+                        GOTO(out_err, rc);
+                rc = 0;
+                newea = 1;
+        }
+        lsm = set->set_md;
+       
+        lsm->lsm_object_id = src_oa->o_id;
+        if (!lsm->lsm_stripe_size)
+                lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size;
+        if (!lsm->lsm_pattern) {
+                LASSERT(lov->desc.ld_pattern);
+                lsm->lsm_pattern = lov->desc.ld_pattern;
+        }
+
+        stripes = alloc_idx_array(exp, lsm, newea, &idx_arr, &idx_cnt);
+        LASSERT(stripes <= lsm->lsm_stripe_count);
+        if (stripes <= 0)
+                GOTO(out_err, rc = stripes ? stripes : -EIO);
+        
+        for (i = 0; i < stripes; i++) {
+                struct lov_request *req;
+                int ost_idx = idx_arr[i];
+                LASSERT(ost_idx >= 0);
+                
                 OBD_ALLOC(req, sizeof(*req));
                 if (req == NULL)
-                        GOTO(out, rc = -ENOMEM);
+                        GOTO(out_err, rc = -ENOMEM);
+                lov_set_add_req(req, set);
 
                 req->rq_buflen = sizeof(*req->rq_md);
                 OBD_ALLOC(req->rq_md, req->rq_buflen);
                 if (req->rq_md == NULL)
-                        GOTO(out, rc = -ENOMEM);
-
+                        GOTO(out_err, rc = -ENOMEM);
+                
                 req->rq_oa = obdo_alloc();
                 if (req->rq_oa == NULL)
-                        GOTO(out, rc = -ENOMEM);
-
+                        GOTO(out_err, rc = -ENOMEM);
+                
                 req->rq_idx = ost_idx;
                 req->rq_stripe = i;
                 /* create data objects with "parent" OA */
@@ -180,40 +457,74 @@ int qos_prep_create(struct lov_obd *lov, struct lov_request_set *set, int newea)
                  *     stripe which holds the existing file size.
                  */
                 if (src_oa->o_valid & OBD_MD_FLSIZE) {
-                        if (lov_stripe_offset(lsm, src_oa->o_size, i,
-                                              &req->rq_oa->o_size) < 0 &&
-                            req->rq_oa->o_size)
-                                req->rq_oa->o_size--;
+                        req->rq_oa->o_size = 
+                                lov_size_to_stripe(lsm, src_oa->o_size, i);
 
                         CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
                                i, req->rq_oa->o_size, src_oa->o_size);
                 }
 
-                lov_set_add_req(req, set);
-
-                /* If we have allocated enough objects, we are OK */
-                if (set->set_count == lsm->lsm_stripe_count)
-                        GOTO(out, rc = 0);
         }
+        LASSERT(set->set_count == stripes);
 
-        if (set->set_count == 0)
-                GOTO(out, rc = -EIO);
-
-        /* If we were passed specific striping params, then a failure to
-         * meet those requirements is an error, since we can't reallocate
-         * that memory (it might be part of a larger array or something).
-         *
-         * We can only get here if lsm_stripe_count was originally > 1.
-         */
-        if (!newea) {
-                CERROR("can't lstripe objid "LPX64": have %u want %u, rc %d\n",
-                       lsm->lsm_object_id, set->set_count,
-                       lsm->lsm_stripe_count, rc);
-                rc = rc ? rc : -EFBIG;
-        } else {
+        if (stripes < lsm->lsm_stripe_count)
                 qos_shrink_lsm(set);
-                rc = 0;
+
+        if (oti && (src_oa->o_valid & OBD_MD_FLCOOKIE)) {
+                oti_alloc_cookies(oti, set->set_count);
+                if (!oti->oti_logcookies)
+                        GOTO(out_err, rc = -ENOMEM);
+                set->set_cookies = oti->oti_logcookies;
         }
-out:
-        RETURN(rc);
+out_err:
+        if (newea && rc)
+                obd_free_memmd(exp, &set->set_md);
+        free_idx_array(idx_arr, idx_cnt);
+        EXIT;
+        return rc;
 }
+
+/* An caveat here is don't use list_move() on same list */
+#define list_adjust(tgt, lov, list_name, value) \
+{ \
+        struct list_head *element; \
+        struct lov_tgt_desc *tmp;  \
+        if (list_empty(&(tgt)->list_name)) \
+                list_add(&(tgt)->list_name, &(lov)->list_name); \
+        element = (tgt)->list_name.next; \
+        while((element != &(lov)->list_name) && \
+              (tmp = list_entry(element, struct lov_tgt_desc, list_name)) && \
+              (value(tgt) < value(tmp))) \
+                element = element->next; \
+        if (element != (tgt)->list_name.next) { \
+                list_del_init(&(tgt)->list_name); \
+                list_add(&(tgt)->list_name, element->prev); \
+        } \
+        element = (tgt)->list_name.prev; \
+        while ((element != &(lov)->list_name) && \
+               (tmp = list_entry(element, struct lov_tgt_desc, list_name)) && \
+               (value(tgt) > value(tmp))) \
+                element = element->prev; \
+        if (element != (tgt)->list_name.prev) { \
+                list_del_init(&(tgt)->list_name); \
+                list_add_tail(&(tgt)->list_name, element->prev); \
+        } \
+}
+
+void qos_update(struct lov_obd *lov, int idx, struct obd_statfs *osfs)
+{
+        struct lov_tgt_desc *tgt = &lov->tgts[idx];
+        __u64 bavail;
+        ENTRY;
+        
+        bavail = osfs->os_bavail * osfs->os_bsize;
+        if (!bavail) 
+                CWARN("ost %d has zero avail space!\n", idx);
+        
+        CDEBUG(D_OTHER, "QOS: bfree now "LPU64"\n", bavail);
+        
+        spin_lock(&lov->lov_lock);
+        list_adjust(tgt, lov, qos_bavail_list, TGT_BAVAIL);
+        spin_unlock(&lov->lov_lock);
+}
+
index 4547ee6..b971416 100644 (file)
@@ -597,10 +597,8 @@ int lov_fini_create_set(struct lov_request_set *set,struct lov_stripe_md **lsmp)
         LASSERT(set->set_exp);
         if (set == NULL)
                 RETURN(0);
-        if (set->set_completes) {
+        if (set->set_completes) 
                 rc = create_done(set->set_exp, set, lsmp);
-                /* FIXME update qos data here */
-        }
 
         if (atomic_dec_and_test(&set->set_refcount))
                 lov_finish_set(set);
@@ -655,9 +653,8 @@ int lov_prep_create_set(struct obd_export *exp, struct lov_stripe_md **lsmp,
                         struct obdo *src_oa, struct obd_trans_info *oti,
                         struct lov_request_set **reqset)
 {
-        struct lov_obd *lov = &exp->exp_obd->u.lov;
         struct lov_request_set *set;
-        int rc = 0, newea = 0;
+        int rc = 0;
         ENTRY;
 
         OBD_ALLOC(set, sizeof(*set));
@@ -669,53 +666,14 @@ int lov_prep_create_set(struct obd_export *exp, struct lov_stripe_md **lsmp,
         set->set_md = *lsmp;
         set->set_oa = src_oa;
         set->set_oti = oti;
-
-        if (set->set_md == NULL) {
-                int stripes, stripe_cnt;
-                stripe_cnt = lov_get_stripecnt(lov, 0);
-
-                /* If the MDS file was truncated up to some size, stripe over
-                 * enough OSTs to allow the file to be created at that size. */
-                if (src_oa->o_valid & OBD_MD_FLSIZE) {
-                        stripes=((src_oa->o_size+LUSTRE_STRIPE_MAXBYTES)>>12)-1;
-                        do_div(stripes, (__u32)(LUSTRE_STRIPE_MAXBYTES >> 12));
-
-                        if (stripes > lov->desc.ld_active_tgt_count)
-                                GOTO(out_set, rc = -EFBIG);
-                        if (stripes < stripe_cnt)
-                                stripes = stripe_cnt;
-                } else {
-                        stripes = stripe_cnt;
-                }
-
-                rc = lov_alloc_memmd(&set->set_md, stripes,
-                                     lov->desc.ld_pattern ?
-                                     lov->desc.ld_pattern : LOV_PATTERN_RAID0);
-                if (rc < 0)
-                        goto out_set;
-                newea = 1;
-        }
-
-        rc = qos_prep_create(lov, set, newea);
+        
+        rc = qos_prep_create(exp, set);
         if (rc)
-                goto out_lsm;
-
-        if (oti && (src_oa->o_valid & OBD_MD_FLCOOKIE)) {
-                oti_alloc_cookies(oti, set->set_count);
-                if (!oti->oti_logcookies)
-                        goto out_lsm;
-                set->set_cookies = oti->oti_logcookies;
-        }
-        *reqset = set;
-        RETURN(rc);
-
-out_lsm:
-        if (*lsmp == NULL)
-                obd_free_memmd(exp, &set->set_md);
-out_set:
-        lov_fini_create_set(set, lsmp);
+                lov_fini_create_set(set, lsmp);
+        else
+                *reqset = set;
         RETURN(rc);
-}
+}                                                
 
 static int common_attr_done(struct lov_request_set *set)
 {
index 12b755b..6cdc1a2 100644 (file)
@@ -118,6 +118,68 @@ static int lov_rd_desc_uuid(char *page, char **start, off_t off, int count,
         return snprintf(page, count, "%s\n", lov->desc.ld_uuid.uuid);
 }
 
+static int lov_rd_qos_threshold(char *page, char **start, off_t off, int count,
+                                int *eof, void *data)
+{
+        struct obd_device *dev = (struct obd_device*) data;
+        struct lov_obd *lov;
+
+        LASSERT(dev != NULL);
+        lov = &dev->u.lov;
+        *eof = 1;
+        return snprintf(page, count, "%u MB\n", lov->desc.ld_qos_threshold);
+}
+
+static int lov_wr_qos_threshold(struct file *file, const char *buffer,
+                                unsigned long count, void *data)
+{
+        struct obd_device *dev = (struct obd_device *)data;
+        struct lov_obd *lov;
+        int val, rc;
+        LASSERT(dev != NULL);
+
+        lov = &dev->u.lov;
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val <= 0)
+                return -EINVAL;
+        lov->desc.ld_qos_threshold = val;
+        return count;
+}
+
+static int lov_rd_qos_maxage(char *page, char **start, off_t off, int count,
+                             int *eof, void *data)
+{
+        struct obd_device *dev = (struct obd_device*) data;
+        struct lov_obd *lov;
+
+        LASSERT(dev != NULL);
+        lov = &dev->u.lov;
+        *eof = 1;
+        return snprintf(page, count, "%u Sec\n", lov->desc.ld_qos_maxage);
+}
+
+static int lov_wr_qos_maxage(struct file *file, const char *buffer,
+                             unsigned long count, void *data)
+{
+        struct obd_device *dev = (struct obd_device *)data;
+        struct lov_obd *lov;
+        int val, rc;
+        LASSERT(dev != NULL);
+
+        lov = &dev->u.lov;
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val <= 0)
+                return -EINVAL;
+        lov->desc.ld_qos_maxage = val;
+        return count;
+}
+
 static void *lov_tgt_seq_start(struct seq_file *p, loff_t *pos)
 {
         struct obd_device *dev = p->private;
@@ -188,6 +250,8 @@ struct lprocfs_vars lprocfs_obd_vars[] = {
         { "kbytesfree",   lprocfs_rd_kbytesfree,  0, 0 },
         { "kbytesavail",  lprocfs_rd_kbytesavail, 0, 0 },
         { "desc_uuid",    lov_rd_desc_uuid,       0, 0 },
+        { "qos_threshold",lov_rd_qos_threshold, lov_wr_qos_threshold, 0 },
+        { "qos_maxage",   lov_rd_qos_maxage, lov_wr_qos_maxage, 0 },
         { 0 }
 };
 
index db308d3..84f9ee8 100644 (file)
@@ -741,6 +741,8 @@ void lustre_swab_lov_desc (struct lov_desc *ld)
         __swab64s (&ld->ld_default_stripe_size);
         __swab64s (&ld->ld_default_stripe_offset);
         __swab32s (&ld->ld_pattern);
+        __swab32s (&ld->ld_qos_threshold);
+        __swab32s (&ld->ld_qos_maxage);
         /* uuid endian insensitive */
 }
 
@@ -1695,6 +1697,18 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct mds_body, aclsize));
         LASSERTF((int)sizeof(((struct mds_body *)0)->aclsize) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct mds_body *)0)->aclsize));
+        LASSERTF((int)offsetof(struct mds_body, padding_2) == 156, " found %lld\n",
+                 (long long)(int)offsetof(struct mds_body, padding_2));
+        LASSERTF((int)sizeof(((struct mds_body *)0)->padding_2) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct mds_body *)0)->padding_2));
+        LASSERTF((int)offsetof(struct mds_body, padding_3) == 160, " found %lld\n",
+                 (long long)(int)offsetof(struct mds_body, padding_3));
+        LASSERTF((int)sizeof(((struct mds_body *)0)->padding_3) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct mds_body *)0)->padding_3));
+        LASSERTF((int)offsetof(struct mds_body, padding_4) == 164, " found %lld\n",
+                 (long long)(int)offsetof(struct mds_body, padding_4));
+        LASSERTF((int)sizeof(((struct mds_body *)0)->padding_4) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct mds_body *)0)->padding_4));
         LASSERTF(FMODE_READ == 1, " found %lld\n",
                  (long long)FMODE_READ);
         LASSERTF(FMODE_WRITE == 2, " found %lld\n",
@@ -1977,6 +1991,14 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct lov_desc, ld_default_stripe_offset));
         LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset) == 8, " found %lld\n",
                  (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset));
+        LASSERTF((int)offsetof(struct lov_desc, ld_qos_threshold) == 32, " found %lld\n",
+                 (long long)(int)offsetof(struct lov_desc, ld_qos_threshold));
+        LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_qos_threshold) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct lov_desc *)0)->ld_qos_threshold));
+        LASSERTF((int)offsetof(struct lov_desc, ld_qos_maxage) == 36, " found %lld\n",
+                 (long long)(int)offsetof(struct lov_desc, ld_qos_maxage));
+        LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_qos_maxage) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct lov_desc *)0)->ld_qos_maxage));
         LASSERTF((int)offsetof(struct lov_desc, ld_uuid) == 48, " found %lld\n",
                  (long long)(int)offsetof(struct lov_desc, ld_uuid));
         LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_uuid) == 40, " found %lld\n",
diff --git a/lustre/tests/qos.sh b/lustre/tests/qos.sh
new file mode 100644 (file)
index 0000000..572bef0
--- /dev/null
@@ -0,0 +1,142 @@
+#!/bin/bash
+
+set -e
+
+export PATH=`dirname $0`/../utils:$PATH
+
+LFS=${LFS:-lfs}
+LCTL=${LCTL:-lctl}
+MOUNT=${MOUNT:-/mnt/lustre}
+MAXAGE=${MAXAGE:-1}
+
+QOSFILE=$MOUNT/qos_file
+TAB='--'
+
+echo "remove all files on $MOUNT..."
+rm -fr $MOUNT/*
+sleep 1                # to ensure we get up-to-date statfs info
+
+set_qos() {
+       for i in `ls /proc/fs/lustre/lov/*/qos_threshold`; do
+               echo $(($1/1024)) > $i 
+       done
+       for i in `ls /proc/fs/lustre/lov/*/qos_maxage`; do
+               echo $2 > $i
+       done
+}
+
+# assume all osts has same free space 
+OSTCOUNT=`cat /proc/fs/lustre/lov/*/activeobd | head -n 1`
+TOTALAVAIL=`cat /proc/fs/lustre/llite/*/kbytesavail | head -n 1`
+SINGLEAVAIL=$(($TOTALAVAIL/$OSTCOUNT))
+MINFREE=$((1024 * 4))  # 4M
+TOTALFFREE=`cat /proc/fs/lustre/llite/*/filesfree | head -n 1`
+
+if [ $SINGLEAVAIL -lt $MINFREE ]; then
+       echo "ERROR: single ost free size($SINGLEAVAIL kb) is too low!"
+       exit 1;
+fi
+if [ $OSTCOUNT -lt 3 ]; then
+       echo "WARN: ost count($OSTCOUNT) must be greater than 2!"
+       exit 0;
+fi
+
+qos_test_1() {
+       echo "[qos test 1]: creation skip almost full OST (avail space < threshold)"
+
+       # set qos_threshold as half ost size
+       THRESHOLD=$(($SINGLEAVAIL/2))
+       set_qos $THRESHOLD $MAXAGE
+
+       # set stripe number to 1
+       $LFS setstripe $QOSFILE 65536 -1 1
+       FULLOST=`$LFS find -q $QOSFILE | awk '/\s*\d*/ {print $1}'`
+       
+       # floodfill the FULLOST
+       echo "$TAB fill the OST $FULLOST to almost fullness..."
+       dd if=/dev/zero of=$QOSFILE count=$(($SINGLEAVAIL - $THRESHOLD + 1500)) bs=1k > /dev/null 2>&1 || return 1
+       echo "$TAB done"
+       
+       sleep $(($MAXAGE * 2))
+       echo "$TAB create 10 files with 1 stripe"
+       for i in `seq 10`; do
+               rm -f $MOUNT/file-$i
+               $LFS setstripe $MOUNT/file-$i 65536 -1 1
+               idx=`$LFS find -q $MOUNT/file-$i | awk '/\s*\d*/ {print $1}'`
+               if [ $idx -eq $FULLOST ]; then
+                       echo "$TAB ERROR: create object on full OST $FULLOST"
+                       return 1
+               fi
+       done
+       echo "$TAB no object created on OST $FULLOST"
+
+       # cleanup
+       for i in `seq 10`; do
+               rm -f $MOUNT/file-$i
+       done
+       rm -f $QOSFILE
+       # set threshold and maxage to normal value
+       set_qos 10240 1
+       
+       sleep 1
+       return 0
+}
+
+qos_test_2 () {
+       echo "[qos test 2]: creation balancing over all OSTs by free space"
+
+       if [ $OSTCOUNT -lt 3 ]; then
+               echo "$TAB WARN: OST count < 3, test skipped"
+               return 0
+       fi
+       
+       WADSZ=$(($SINGLEAVAIL * 3 / 4))
+       TOTALSZ=$(($WADSZ * $OSTCOUNT - 1))
+
+       # fill all OST 0 to 3/4 fulness
+       $LFS setstripe $QOSFILE 65536 0 1
+       echo "$TAB fill the OST 0 to 3/4 fulness..."
+       dd if=/dev/zero of=$QOSFILE count=$WADSZ bs=1k > /dev/null 2>&1 || return 1
+       echo "$TAB done"
+
+       # write 2 stripe files to fill up other OSTs
+       LOOPCNT=500
+       echo "$TAB create $LOOPCNT files with 2 stripe..."
+       for i in `seq $LOOPCNT`; do
+               rm -f $MOUNT/file-$i
+               $LFS setstripe $MOUNT/file-$i 65536 -1 2
+       done
+       echo "$TAB done"
+
+       # the objects created on OST 0 should be 1/4 of on other OSTs'
+       CNT0=`$LFS find -q /mnt/lustre | awk '/\s*\d*/ {print $1}'| grep -c 0`
+       CNT0=$(($CNT0 - 1))
+       echo "$TAB object created on OST 0: $CNT0"
+       
+       # the object count of other osts must be greater than 2 times 
+       CNT0=$(($CNT0 * 2))
+       for i in `seq $(($OSTCOUNT - 1))`; do
+               CNT=`$LFS find -q /mnt/lustre | awk '/\s*\d*/ {print $1}'| grep -c $i`
+               echo "$TAB object created on OST $i: $CNT"
+               if [ $CNT0 -gt $CNT ] ; then
+                       echo "$TAB ERROR: too much objects created on OST 0"
+                       return 1
+               fi
+       done
+       echo "$TAB objects created on OST 0 is about 1/4 of others'"
+       
+       # cleanup
+       for i in `seq $LOOPCNT`; do
+               rm -f $MOUNT/file-$i
+       done
+       rm -f $QOSFILE
+       return 0
+}
+       
+
+# run tests
+for j in `seq 2`; do
+       qos_test_$j
+       [ $? -ne 0 ] && exit 1 
+done
+exit 0
index 59e2a8f..4712495 100644 (file)
@@ -11,7 +11,7 @@ ONLY=${ONLY:-"$*"}
 ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"42a 42c  45   68"}
 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
 
-[ "$SLOW" = "no" ] && EXCEPT="$EXCEPT 24o 51b 51c 64b 71 101"
+[ "$SLOW" = "no" ] && EXCEPT="$EXCEPT 24o 51b 51c 64b 71 75 101"
 
 case `uname -r` in
 2.4*) FSTYPE=${FSTYPE:-ext3} ;;
@@ -2598,6 +2598,11 @@ test_74() { # bug 6149, 6184
 }
 run_test 74 "ldlm_enqueue freed-export error path (shouldn't LBUG)"
 
+test_75() {
+       sh qos.sh
+}
+run_test 75 "qos test ============================================"
+
 # on the LLNL clusters, runas will still pick up root's $TMP settings,
 # which will not be writable for the runas user, and then you get a CVS
 # error message with a corrupt path string (CVS bug) and panic.
index a60a892..9156745 100644 (file)
@@ -236,7 +236,7 @@ command_t cmdlist[] = {
          "usage: add_conn <conn_uuid> [priority]\n"},
         {"del_conn ", jt_lcfg_del_conn, 0,
          "usage: del_conn <conn_uuid> \n"},
-       
+
         /* Llog operations */ 
         {"llog_catlist", jt_llog_catlist, 0, 
          "list all catalog logs on current device.\n"
index eea50e4..819e0b0 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/lustre_idl.h>
 #include <linux/lustre_dlm.h>
 #include <linux/obd.h>          /* for struct lov_stripe_md */
+#include <linux/obd_lov.h>
 #include <linux/lustre_build_version.h>
 
 #include <unistd.h>
@@ -394,6 +395,8 @@ int jt_lcfg_lov_setup(int argc, char **argv)
                         jt_cmdname(argv[0]), argv[5]);
                 return CMD_HELP;
         }
+        desc.ld_qos_threshold = QOS_DEFAULT_THRESHOLD;
+        desc.ld_qos_maxage = QOS_DEFAULT_MAXAGE;
 
         if (argc == 7) {
                 desc.ld_tgt_count = strtoul(argv[6], &end, 0);
index 128472c..8361eae 100644 (file)
@@ -445,6 +445,8 @@ check_lov_desc(void)
         CHECK_MEMBER(lov_desc, ld_pattern);
         CHECK_MEMBER(lov_desc, ld_default_stripe_size);
         CHECK_MEMBER(lov_desc, ld_default_stripe_offset);
+        CHECK_MEMBER(lov_desc, ld_qos_threshold);
+        CHECK_MEMBER(lov_desc, ld_qos_maxage);
         CHECK_MEMBER(lov_desc, ld_uuid);
 }
 
index 7c38ad8..00f424b 100644 (file)
@@ -25,8 +25,8 @@ int main()
 void lustre_assert_wire_constants(void)
 {
         /* Wire protocol assertions generated by 'wirecheck'
-         * running on Linux mustang 2.6.12-1.1456_FC4smp #1 SMP Thu Sep 22 02:22:14 EDT 2005 i686 i68
-         * with gcc version 4.0.1 20050727 (Red Hat 4.0.1-5) */
+         * running on Linux localhost.localdomain 2.6.9-1.667 #1 Tue Nov 2 14:41:25 EST 2004 i686 i68
+         * with gcc version 3.4.3 20050227 (Red Hat 3.4.3-22.fc3) */
 
 
         /* Constants... */
@@ -816,6 +816,18 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct mds_body, aclsize));
         LASSERTF((int)sizeof(((struct mds_body *)0)->aclsize) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct mds_body *)0)->aclsize));
+        LASSERTF((int)offsetof(struct mds_body, padding_2) == 156, " found %lld\n",
+                 (long long)(int)offsetof(struct mds_body, padding_2));
+        LASSERTF((int)sizeof(((struct mds_body *)0)->padding_2) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct mds_body *)0)->padding_2));
+        LASSERTF((int)offsetof(struct mds_body, padding_3) == 160, " found %lld\n",
+                 (long long)(int)offsetof(struct mds_body, padding_3));
+        LASSERTF((int)sizeof(((struct mds_body *)0)->padding_3) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct mds_body *)0)->padding_3));
+        LASSERTF((int)offsetof(struct mds_body, padding_4) == 164, " found %lld\n",
+                 (long long)(int)offsetof(struct mds_body, padding_4));
+        LASSERTF((int)sizeof(((struct mds_body *)0)->padding_4) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct mds_body *)0)->padding_4));
         LASSERTF(FMODE_READ == 1, " found %lld\n",
                  (long long)FMODE_READ);
         LASSERTF(FMODE_WRITE == 2, " found %lld\n",
@@ -1098,6 +1110,14 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct lov_desc, ld_default_stripe_offset));
         LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset) == 8, " found %lld\n",
                  (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset));
+        LASSERTF((int)offsetof(struct lov_desc, ld_qos_threshold) == 32, " found %lld\n",
+                 (long long)(int)offsetof(struct lov_desc, ld_qos_threshold));
+        LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_qos_threshold) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct lov_desc *)0)->ld_qos_threshold));
+        LASSERTF((int)offsetof(struct lov_desc, ld_qos_maxage) == 36, " found %lld\n",
+                 (long long)(int)offsetof(struct lov_desc, ld_qos_maxage));
+        LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_qos_maxage) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct lov_desc *)0)->ld_qos_maxage));
         LASSERTF((int)offsetof(struct lov_desc, ld_uuid) == 48, " found %lld\n",
                  (long long)(int)offsetof(struct lov_desc, ld_uuid));
         LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_uuid) == 40, " found %lld\n",