Whamcloud - gitweb
LU-80 lov: large stripe count support
[fs/lustre-release.git] / lustre / lov / lov_pack.c
index 4163434..a3b94ff 100644 (file)
  * GPL HEADER END
  */
 /*
- * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  */
 /*
+ * Copyright (c) 2011 Whamcloud, Inc.
+ */
+/*
  * This file is part of Lustre, http://www.lustre.org/
  * Lustre is a trademark of Sun Microsystems, Inc.
  *
@@ -59,7 +62,7 @@ static void lov_dump_lmm_common(int level, void *lmmp)
         struct lov_mds_md *lmm = lmmp;
 
         CDEBUG(level, "objid "LPX64", magic 0x%08x, pattern %#x\n",
-               le64_to_cpu(lmm->lmm_object_id),
+               (__u64)le64_to_cpu(lmm->lmm_object_id),
                le32_to_cpu(lmm->lmm_magic),
                le32_to_cpu(lmm->lmm_pattern));
         CDEBUG(level,"stripe_size %u, stripe_count %u\n",
@@ -80,8 +83,8 @@ static void lov_dump_lmm_objects(int level, struct lov_ost_data *lod,
         for (i = 0; i < stripe_count; ++i, ++lod) {
                 CDEBUG(level, "stripe %u idx %u subobj "LPX64"/"LPX64"\n", i,
                        le32_to_cpu(lod->l_ost_idx),
-                       le64_to_cpu(lod->l_object_gr),
-                       le64_to_cpu(lod->l_object_id));
+                       (__u64)le64_to_cpu(lod->l_object_seq),
+                       (__u64)le64_to_cpu(lod->l_object_id));
         }
 }
 
@@ -145,24 +148,7 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
 
         if (lsm) {
                 lmm_magic = lsm->lsm_magic;
-
-                /* If we are just sizing the EA, limit the stripe count
-                 * to the actual number of OSTs in this filesystem. */
-                if (!lmmp) {
-                        stripe_count = lov_get_stripecnt(lov,
-                                                         lsm->lsm_stripe_count);
-                        lsm->lsm_stripe_count = stripe_count;
-                } else {
-                        stripe_count = lsm->lsm_stripe_count;
-                }
         } else {
-                /* No needs to allocated more than LOV_MAX_STRIPE_COUNT.
-                 * Anyway, this is pretty inaccurate since ld_tgt_count now
-                 * represents max index and we should rely on the actual number
-                 * of OSTs instead */
-                stripe_count = min((__u32)LOV_MAX_STRIPE_COUNT,
-                                   lov->desc.ld_tgt_count);
-
                 if (lmmp && *lmmp)
                         lmm_magic = le32_to_cpu((*lmmp)->lmm_magic);
                 else
@@ -178,6 +164,27 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
 
         }
 
+        if (lsm) {
+                /* If we are just sizing the EA, limit the stripe count
+                 * to the actual number of OSTs in this filesystem. */
+                if (!lmmp) {
+                        stripe_count = lov_get_stripecnt(lov, lmm_magic,
+                                                         lsm->lsm_stripe_count);
+                        lsm->lsm_stripe_count = stripe_count;
+                } else {
+                        stripe_count = lsm->lsm_stripe_count;
+                }
+        } else {
+                /* No need to allocate more than maximum supported stripes.
+                 * Anyway, this is pretty inaccurate since ld_tgt_count now
+                 * represents max index and we should rely on the actual number
+                 * of OSTs instead */
+                stripe_count = lov_mds_md_stripecnt(lov->lov_ocd.ocd_max_easize,
+                                                    lmm_magic);
+                if (stripe_count > lov->desc.ld_tgt_count)
+                        stripe_count = lov->desc.ld_tgt_count;
+        }
+
         /* XXX LOV STACKING call into osc for sizes */
         lmm_size = lov_mds_md_size(stripe_count, lmm_magic);
 
@@ -187,13 +194,13 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
         if (*lmmp && !lsm) {
                 stripe_count = le32_to_cpu((*lmmp)->lmm_stripe_count);
                 lmm_size = lov_mds_md_size(stripe_count, lmm_magic);
-                OBD_FREE(*lmmp, lmm_size);
+                OBD_FREE_LARGE(*lmmp, lmm_size);
                 *lmmp = NULL;
                 RETURN(0);
         }
 
         if (!*lmmp) {
-                OBD_ALLOC(*lmmp, lmm_size);
+                OBD_ALLOC_LARGE(*lmmp, lmm_size);
                 if (!*lmmp)
                         RETURN(-ENOMEM);
         }
@@ -215,7 +222,7 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
          * same first fields
          */
         lmmv1->lmm_object_id = cpu_to_le64(lsm->lsm_object_id);
-        lmmv1->lmm_object_gr = cpu_to_le64(lsm->lsm_object_gr);
+        lmmv1->lmm_object_seq = cpu_to_le64(lsm->lsm_object_seq);
         lmmv1->lmm_stripe_size = cpu_to_le32(lsm->lsm_stripe_size);
         lmmv1->lmm_stripe_count = cpu_to_le32(stripe_count);
         lmmv1->lmm_pattern = cpu_to_le32(lsm->lsm_pattern);
@@ -229,12 +236,11 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
 
         for (i = 0; i < stripe_count; i++) {
                 struct lov_oinfo *loi = lsm->lsm_oinfo[i];
-
                 /* XXX LOV STACKING call down to osc_packmd() to do packing */
                 LASSERTF(loi->loi_id, "lmm_oid "LPU64" stripe %u/%u idx %u\n",
                          lmmv1->lmm_object_id, i, stripe_count, loi->loi_ost_idx);
                 lmm_objects[i].l_object_id = cpu_to_le64(loi->loi_id);
-                lmm_objects[i].l_object_gr = cpu_to_le64(loi->loi_gr);
+                lmm_objects[i].l_object_seq = cpu_to_le64(loi->loi_seq);
                 lmm_objects[i].l_ost_gen = cpu_to_le32(loi->loi_ost_gen);
                 lmm_objects[i].l_ost_idx = cpu_to_le32(loi->loi_ost_idx);
         }
@@ -243,19 +249,26 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
 }
 
 /* Find the max stripecount we should use */
-int lov_get_stripecnt(struct lov_obd *lov, __u32 stripe_count)
+__u32 lov_get_stripecnt(struct lov_obd *lov, __u32 magic, __u32 stripe_count)
 {
+        __u32 max_stripes = LOV_MAX_STRIPE_COUNT_OLD;
+
         if (!stripe_count)
                 stripe_count = lov->desc.ld_default_stripe_count;
         if (stripe_count > lov->desc.ld_active_tgt_count)
                 stripe_count = lov->desc.ld_active_tgt_count;
         if (!stripe_count)
                 stripe_count = 1;
-        /* for now, we limit the stripe count directly, when bug 4424 is
-         * fixed this needs to be somewhat dynamic based on whether ext3
-         * can handle larger EA sizes. */
-        if (stripe_count > LOV_MAX_STRIPE_COUNT)
-                stripe_count = LOV_MAX_STRIPE_COUNT;
+
+        /* stripe count is based on whether ldiskfs can handle
+         * larger EA sizes */
+        if (lov->lov_ocd.ocd_connect_flags & OBD_CONNECT_MAX_EASIZE &&
+            lov->lov_ocd.ocd_max_easize)
+                max_stripes = lov_mds_md_stripecnt(lov->lov_ocd.ocd_max_easize,
+                                                   magic);
+
+        if (stripe_count > max_stripes)
+                stripe_count = max_stripes;
 
         return stripe_count;
 }
@@ -272,7 +285,7 @@ static int lov_verify_lmm(void *lmm, int lmm_bytes, int *stripe_count)
                 CERROR("bad disk LOV MAGIC: 0x%08X; dumping LMM (size=%d):\n",
                        le32_to_cpu(*(__u32 *)lmm), lmm_bytes);
                 sz = lmm_bytes * 2 + 1;
-                OBD_ALLOC(buffer, sz);
+                OBD_ALLOC_LARGE(buffer, sz);
                 if (buffer != NULL) {
                         int i;
 
@@ -280,7 +293,7 @@ static int lov_verify_lmm(void *lmm, int lmm_bytes, int *stripe_count)
                                 sprintf(buffer+2*i, "%.2X", ((char *)lmm)[i]);
                         buffer[sz] = '\0';
                         CERROR("%s\n", buffer);
-                        OBD_FREE(buffer, sz);
+                        OBD_FREE_LARGE(buffer, sz);
                 }
                 return -EINVAL;
         }
@@ -290,7 +303,7 @@ static int lov_verify_lmm(void *lmm, int lmm_bytes, int *stripe_count)
 }
 
 int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count,
-                      int pattern, int magic)
+                    int pattern, int magic)
 {
         int i, lsm_size;
         ENTRY;
@@ -347,8 +360,8 @@ int lov_unpackmd(struct obd_export *exp,  struct lov_stripe_md **lsmp,
                         RETURN(rc);
                 magic = le32_to_cpu(lmm->lmm_magic);
         } else {
-                stripe_count = lov_get_stripecnt(lov, 0);
                 magic = LOV_MAGIC;
+                stripe_count = lov_get_stripecnt(lov, magic, 0);
         }
 
         /* If we aren't passed an lsmp struct, we just want the size */
@@ -388,14 +401,15 @@ static int __lov_setstripe(struct obd_export *exp, int max_lmm_size,
 {
         struct obd_device *obd = class_exp2obd(exp);
         struct lov_obd *lov = &obd->u.lov;
-        struct lov_user_md_v3 lumv3;
-        struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
+        char buffer[sizeof(struct lov_user_md_v3)];
+        struct lov_user_md_v3 *lumv3 = (struct lov_user_md_v3 *)&buffer[0];
+        struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&buffer[0];
         int lmm_magic;
         int stripe_count;
         int rc;
         ENTRY;
 
-        if (cfs_copy_from_user(&lumv3, lump, sizeof(struct lov_user_md_v1)))
+        if (cfs_copy_from_user(lumv3, lump, sizeof(struct lov_user_md_v1)))
                 RETURN(-EFAULT);
 
         lmm_magic = lumv1->lmm_magic;
@@ -404,12 +418,12 @@ static int __lov_setstripe(struct obd_export *exp, int max_lmm_size,
                 lustre_swab_lov_user_md_v1(lumv1);
                 lmm_magic = LOV_USER_MAGIC_V1;
         } else if (lmm_magic == LOV_USER_MAGIC_V3) {
-                if (cfs_copy_from_user(&lumv3, lump, sizeof(lumv3)))
+                if (cfs_copy_from_user(lumv3, lump, sizeof(*lumv3)))
                         RETURN(-EFAULT);
         } else if (lmm_magic == __swab32(LOV_USER_MAGIC_V3)) {
-                if (cfs_copy_from_user(&lumv3, lump, sizeof(lumv3)))
+                if (cfs_copy_from_user(lumv3, lump, sizeof(*lumv3)))
                         RETURN(-EFAULT);
-                lustre_swab_lov_user_md_v3(&lumv3);
+                lustre_swab_lov_user_md_v3(lumv3);
                 lmm_magic = LOV_USER_MAGIC_V3;
         } else if (lmm_magic != LOV_USER_MAGIC_V1) {
                 CDEBUG(D_IOCTL,
@@ -447,7 +461,8 @@ static int __lov_setstripe(struct obd_export *exp, int max_lmm_size,
                        lumv1->lmm_stripe_offset, lov->desc.ld_tgt_count);
                 RETURN(-EINVAL);
         }
-        stripe_count = lov_get_stripecnt(lov, lumv1->lmm_stripe_count);
+        stripe_count = lov_get_stripecnt(lov, lmm_magic,
+                                         lumv1->lmm_stripe_count);
 
         if (max_lmm_size) {
                 int max_stripes = (max_lmm_size -
@@ -463,12 +478,12 @@ static int __lov_setstripe(struct obd_export *exp, int max_lmm_size,
         if (lmm_magic == LOV_USER_MAGIC_V3) {
                 struct pool_desc *pool;
 
-                pool = lov_find_pool(lov, lumv3.lmm_pool_name);
+                pool = lov_find_pool(lov, lumv3->lmm_pool_name);
                 if (pool != NULL) {
-                        if (lumv3.lmm_stripe_offset !=
-                            (typeof(lumv3.lmm_stripe_offset))(-1)) {
+                        if (lumv3->lmm_stripe_offset !=
+                            (typeof(lumv3->lmm_stripe_offset))(-1)) {
                                 rc = lov_check_index_in_pool(
-                                        lumv3.lmm_stripe_offset, pool);
+                                        lumv3->lmm_stripe_offset, pool);
                                 if (rc < 0) {
                                         lov_pool_putref(pool);
                                         RETURN(-EINVAL);
@@ -488,7 +503,7 @@ static int __lov_setstripe(struct obd_export *exp, int max_lmm_size,
                 (*lsmp)->lsm_oinfo[0]->loi_ost_idx = lumv1->lmm_stripe_offset;
                 (*lsmp)->lsm_stripe_size = lumv1->lmm_stripe_size;
                 if (lmm_magic == LOV_USER_MAGIC_V3)
-                        strncpy((*lsmp)->lsm_pool_name, lumv3.lmm_pool_name,
+                        strncpy((*lsmp)->lsm_pool_name, lumv3->lmm_pool_name,
                                 LOV_MAXPOOLNAME);
                 rc = 0;
         }
@@ -558,7 +573,7 @@ int lov_setea(struct obd_export *exp, struct lov_stripe_md **lsmp,
                 (*lsmp)->lsm_oinfo[i]->loi_ost_idx =
                         lmm_objects[i].l_ost_idx;
                 (*lsmp)->lsm_oinfo[i]->loi_id = lmm_objects[i].l_object_id;
-                (*lsmp)->lsm_oinfo[i]->loi_gr = lmm_objects[i].l_object_gr;
+                (*lsmp)->lsm_oinfo[i]->loi_seq = lmm_objects[i].l_object_seq;
         }
         RETURN(0);
 }