unsigned ddp_max_nlink;
unsigned ddp_block_shift;
mntopt_t ddp_mntopts;
+ unsigned ddp_max_ea_size;
};
/**
OBD_CONNECT_RMT_CLIENT_FORCE | OBD_CONNECT_VBR | \
OBD_CONNECT_MDS | OBD_CONNECT_SKIP_ORPHAN | \
OBD_CONNECT_GRANT_SHRINK | OBD_CONNECT_FULL20 | \
- OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES)
+ OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES | \
+ OBD_CONNECT_MAX_EASIZE)
#define ECHO_CONNECT_SUPPORTED (0)
#define MGS_CONNECT_SUPPORTED (OBD_CONNECT_VERSION | OBD_CONNECT_AT | \
OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV)
#define LOV_MIN_STRIPE_BITS 16 /* maximum PAGE_SIZE (ia64), power of 2 */
#define LOV_MIN_STRIPE_SIZE (1<<LOV_MIN_STRIPE_BITS)
-#define LOV_MAX_STRIPE_COUNT 160 /* until bug 4424 is fixed */
+#define LOV_MAX_STRIPE_COUNT_OLD 160
+/* This calculation is crafted so that input of 4096 will result in 160
+ * which in turn is equal to old maximal stripe count.
+ * XXX: In fact this is too simpified for now, what it also need is to get
+ * ea_type argument to clearly know how much space each stripe consumes.
+ *
+ * The limit of 12 pages is somewhat arbitrary, but is a reasonably large
+ * allocation that is sufficient for the current generation of systems.
+ *
+ * (max buffer size - lov+rpc header) / sizeof(struct lov_ost_data_v1) */
+#define LOV_MAX_STRIPE_COUNT 2000 /* ((12 * 4096 - 256) / 24) */
#define LOV_V1_INSANE_STRIPE_COUNT 65532 /* maximum stripe count bz13933 */
#define LOV_MAX_UUID_BUFFER_SIZE 8192
#define MDT_MAX_THREADS 512UL
#endif
#define MDS_NBUFS (64 * cfs_num_online_cpus())
-#define MDS_BUFSIZE (8 * 1024)
/**
* Assume file name length = FNAME_MAX = 256 (true for ext3).
* path name length = PATH_MAX = 4096
- * LOV MD size max = EA_MAX = 4000
+ * LOV MD size max = EA_MAX = 48000 (2000 stripes)
* symlink: FNAME_MAX + PATH_MAX <- largest
* link: FNAME_MAX + PATH_MAX (mds_rec_link < mds_rec_create)
* rename: FNAME_MAX + FNAME_MAX
* Realistic size is about 512 bytes (20 character name + 128 char symlink),
* except in the open case where there are a large number of OSTs in a LOV.
*/
-#define MDS_MAXREQSIZE (5 * 1024)
-#define MDS_MAXREPSIZE max(9 * 1024, 362 + LOV_MAX_STRIPE_COUNT * 56)
+#define MDS_MAXREPSIZE max(10 * 1024, 362 + LOV_MAX_STRIPE_COUNT * 56)
+#define MDS_MAXREQSIZE MDS_MAXREPSIZE
+
+/** MDS_BUFSIZE = max_reqsize + max sptlrpc payload size */
+#define MDS_BUFSIZE (MDS_MAXREQSIZE + 1024)
/** FLD_MAXREQSIZE == lustre_msg + __u32 padding + ptlrpc_body + opc */
#define FLD_MAXREQSIZE (160)
struct ost_server_data;
+struct osd_properties {
+ size_t osd_max_ea_size;
+};
+
#define OBT_MAGIC 0xBDDECEAE
/* hold common fields for "target" device */
struct obd_device_target {
cfs_rw_semaphore_t obt_rwsem;
struct vfsmount *obt_vfsmnt;
struct file *obt_health_check_filp;
+ struct osd_properties obt_osd_properties;
};
/* llog contexts */
return sizeof(struct lov_stripe_md) + stripes*sizeof(struct lov_oinfo*);
}
-static inline int lov_mds_md_size(int stripes, int lmm_magic)
+static inline __u32 lov_mds_md_size(int stripes, __u32 lmm_magic)
{
if (lmm_magic == LOV_MAGIC_V3)
return sizeof(struct lov_mds_md_v3) +
stripes * sizeof(struct lov_ost_data_v1);
}
+struct lov_version_size {
+ __u32 lvs_magic;
+ size_t lvs_lmm_size;
+ size_t lvs_lod_size;
+};
+
+static inline __u32 lov_mds_md_stripecnt(int ea_size, __u32 lmm_magic)
+{
+ static const struct lov_version_size lmm_ver_size[] = {
+ { .lvs_magic = LOV_MAGIC_V3,
+ .lvs_lmm_size = sizeof(struct lov_mds_md_v3),
+ .lvs_lod_size = sizeof(struct lov_ost_data_v1) },
+ { .lvs_magic = LOV_MAGIC_V1,
+ .lvs_lmm_size = sizeof(struct lov_mds_md_v1),
+ .lvs_lod_size = sizeof(struct lov_ost_data_v1)} };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(lmm_ver_size); i++) {
+ if (lmm_magic == lmm_ver_size[i].lvs_magic) {
+ if (ea_size <= lmm_ver_size[i].lvs_lmm_size)
+ return 0;
+ return (ea_size - lmm_ver_size[i].lvs_lmm_size) /
+ lmm_ver_size[i].lvs_lod_size;
+ }
+ }
+
+ /* Invalid LOV magic, so no stripes could fit */
+ return 0;
+}
+
#define IOC_LOV_TYPE 'g'
#define IOC_LOV_MIN_NR 50
#define IOC_LOV_SET_OSC_ACTIVE _IOWR('g', 50, long)
void lov_fix_desc_stripe_count(__u32 *val);
void lov_fix_desc_pattern(__u32 *val);
void lov_fix_desc_qos_maxage(__u32 *val);
-int lov_get_stripecnt(struct lov_obd *lov, __u32 stripe_count);
+__u32 lov_get_stripecnt(struct lov_obd *lov, __u32 magic, __u32 stripe_count);
int lov_connect_obd(struct obd_device *obd, __u32 index, int activate,
struct obd_connect_data *data);
int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg);
if (lsm) {
lmm_magic = lsm->lsm_magic;
-
- /* If we are just sizing the EA, limit the stripe count
- * to the actual number of OSTs in this filesystem. */
- if (!lmmp) {
- stripe_count = lov_get_stripecnt(lov,
- lsm->lsm_stripe_count);
- lsm->lsm_stripe_count = stripe_count;
- } else {
- stripe_count = lsm->lsm_stripe_count;
- }
} else {
- /* No needs to allocated more than LOV_MAX_STRIPE_COUNT.
- * Anyway, this is pretty inaccurate since ld_tgt_count now
- * represents max index and we should rely on the actual number
- * of OSTs instead */
- stripe_count = min((__u32)LOV_MAX_STRIPE_COUNT,
- lov->desc.ld_tgt_count);
-
if (lmmp && *lmmp)
lmm_magic = le32_to_cpu((*lmmp)->lmm_magic);
else
}
+ if (lsm) {
+ /* If we are just sizing the EA, limit the stripe count
+ * to the actual number of OSTs in this filesystem. */
+ if (!lmmp) {
+ stripe_count = lov_get_stripecnt(lov, lmm_magic,
+ lsm->lsm_stripe_count);
+ lsm->lsm_stripe_count = stripe_count;
+ } else {
+ stripe_count = lsm->lsm_stripe_count;
+ }
+ } else {
+ /* No need to allocate more than maximum supported stripes.
+ * Anyway, this is pretty inaccurate since ld_tgt_count now
+ * represents max index and we should rely on the actual number
+ * of OSTs instead */
+ stripe_count = lov_mds_md_stripecnt(lov->lov_ocd.ocd_max_easize,
+ lmm_magic);
+ if (stripe_count > lov->desc.ld_tgt_count)
+ stripe_count = lov->desc.ld_tgt_count;
+ }
+
/* XXX LOV STACKING call into osc for sizes */
lmm_size = lov_mds_md_size(stripe_count, lmm_magic);
}
/* Find the max stripecount we should use */
-int lov_get_stripecnt(struct lov_obd *lov, __u32 stripe_count)
+__u32 lov_get_stripecnt(struct lov_obd *lov, __u32 magic, __u32 stripe_count)
{
+ __u32 max_stripes = LOV_MAX_STRIPE_COUNT_OLD;
+
if (!stripe_count)
stripe_count = lov->desc.ld_default_stripe_count;
if (stripe_count > lov->desc.ld_active_tgt_count)
stripe_count = lov->desc.ld_active_tgt_count;
if (!stripe_count)
stripe_count = 1;
- /* for now, we limit the stripe count directly, when bug 4424 is
- * fixed this needs to be somewhat dynamic based on whether ext3
- * can handle larger EA sizes. */
- if (stripe_count > LOV_MAX_STRIPE_COUNT)
- stripe_count = LOV_MAX_STRIPE_COUNT;
+
+ /* stripe count is based on whether ldiskfs can handle
+ * larger EA sizes */
+ if (lov->lov_ocd.ocd_connect_flags & OBD_CONNECT_MAX_EASIZE &&
+ lov->lov_ocd.ocd_max_easize)
+ max_stripes = lov_mds_md_stripecnt(lov->lov_ocd.ocd_max_easize,
+ magic);
+
+ if (stripe_count > max_stripes)
+ stripe_count = max_stripes;
return stripe_count;
}
RETURN(rc);
magic = le32_to_cpu(lmm->lmm_magic);
} else {
- stripe_count = lov_get_stripecnt(lov, 0);
magic = LOV_MAGIC;
+ stripe_count = lov_get_stripecnt(lov, magic, 0);
}
/* If we aren't passed an lsmp struct, we just want the size */
lumv1->lmm_stripe_offset, lov->desc.ld_tgt_count);
RETURN(-EINVAL);
}
- stripe_count = lov_get_stripecnt(lov, lumv1->lmm_stripe_count);
+ stripe_count = lov_get_stripecnt(lov, lmm_magic,
+ lumv1->lmm_stripe_count);
if (max_lmm_size) {
int max_stripes = (max_lmm_size -
LASSERT(src_oa->o_valid & OBD_MD_FLGROUP);
if (set->set_oi->oi_md == NULL) {
- int stripes_def = lov_get_stripecnt(lov, 0);
+ __u32 stripes_def = lov_get_stripecnt(lov, LOV_MAGIC, 0);
/* If the MDS file was truncated up to some size, stripe over
* enough OSTs to allow the file to be created at that size.
if (stripes < stripes_def)
stripes = stripes_def;
} else {
- flag = LOV_USES_DEFAULT_STRIPE;
- stripes = stripes_def;
+ flag = LOV_USES_DEFAULT_STRIPE;
+ stripes = stripes_def;
}
rc = lov_alloc_memmd(&set->set_oi->oi_md, stripes,
ENTRY;
if (success) {
- __u32 expected_stripes = lov_get_stripecnt(&obd->u.lov, 0);
-
+ __u32 expected_stripes = lov_get_stripecnt(&obd->u.lov,
+ LOV_MAGIC, 0);
if (osfs->os_files != LOV_U64_MAX)
do_div(osfs->os_files, expected_stripes);
if (osfs->os_ffree != LOV_U64_MAX)
obd->obd_recovering = 1;
cfs_spin_unlock(&obd->obd_dev_lock);
obd->u.mds.mds_id = mds_id;
+ obd->u.obt.obt_osd_properties.osd_max_ea_size =
+ mdd->mdd_dt_conf.ddp_max_ea_size;
+
rc = class_setup(obd, lcfg);
if (rc)
GOTO(class_detach, rc);
obd->obd_upcall.onu_upcall = mdd_notify;
obd->obd_upcall.onu_owner = mdd;
mdd->mdd_obd_dev = obd;
+
EXIT;
class_detach:
if (rc)
struct llog_ctxt *ctxt;
int rc;
+ if (cookies < lsm->lsm_stripe_count)
+ RETURN(rc = -EFBIG);
+
/* first prepare unlink log record */
OBD_ALLOC_PTR(lur);
if (!lur)
/* workaround - New target not in objids file; increase mdsize */
/* ld_tgt_count is used as the max index everywhere, despite its name. */
if (data[off] == 0) {
+ __u32 max_easize;
__u32 stripes;
+ max_easize = mds->mds_obt.obt_osd_properties.osd_max_ea_size;
data[off] = 1;
mds->mds_lov_objid_count++;
- stripes = min_t(__u32, LOV_MAX_STRIPE_COUNT,
- mds->mds_lov_objid_count);
+ stripes = min(lov_mds_md_stripecnt(max_easize, LOV_MAGIC_V3),
+ mds->mds_lov_objid_count);
mds->mds_max_mdsize = lov_mds_md_size(stripes, LOV_MAGIC_V3);
mds->mds_max_cookiesize = stripes * sizeof(struct llog_cookie);
static int mds_lov_update_from_read(struct mds_obd *mds, obd_id *data,
__u32 count)
{
- __u32 i;
- __u32 stripes;
+ __u32 max_easize = mds->mds_obt.obt_osd_properties.osd_max_ea_size;
+ __u32 i, stripes;
for (i = 0; i < count; i++) {
if (data[i] == 0)
mds->mds_lov_objid_count++;
}
- stripes = min_t(__u32, LOV_MAX_STRIPE_COUNT,
+ stripes = min(lov_mds_md_stripecnt(max_easize, LOV_MAGIC_V3),
mds->mds_lov_objid_count);
mds->mds_max_mdsize = lov_mds_md_size(stripes, LOV_MAGIC_V3);
OBD_CONNECT_OSS_CAPA | OBD_CONNECT_FULL20 |
OBD_CONNECT_CHANGE_QS | OBD_CONNECT_AT |
OBD_CONNECT_MDS | OBD_CONNECT_SKIP_ORPHAN |
- OBD_CONNECT_SOM;
+ OBD_CONNECT_SOM | OBD_CONNECT_MAX_EASIZE;
#ifdef HAVE_LRU_RESIZE_SUPPORT
data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
#endif
data->ocd_version = LUSTRE_VERSION_CODE;
data->ocd_group = mdt_to_obd_objseq(mds->mds_id);
+ data->ocd_max_easize = mds->mds_obt.obt_osd_properties.osd_max_ea_size;
+
/* send max bytes per rpc */
data->ocd_brw_size = PTLRPC_MAX_BRW_PAGES << CFS_PAGE_SHIFT;
/* send the list of supported checksum types */
param->ddp_mntopts |= MNTOPT_USERXATTR;
if (test_opt(sb, POSIX_ACL))
param->ddp_mntopts |= MNTOPT_ACL;
+
+#if defined(LDISKFS_FEATURE_INCOMPAT_EA_INODE)
+ if (LDISKFS_HAS_INCOMPAT_FEATURE(sb, LDISKFS_FEATURE_INCOMPAT_EA_INODE))
+ param->ddp_max_ea_size = LDISKFS_XATTR_MAX_LARGE_EA_SIZE;
+ else
+#endif
+ param->ddp_max_ea_size = sb->s_blocksize;
+
}
/**