Lustre currently limits EA size to either ~1 MiB (ldiskfs)
or 32K (ZFS). VFS has its own limit, XATTR_SIZE_MAX,
which we must respect to interoperate correctly with
userspace tools like tar, getattr, and the getxattr()
syscall.
Set this as the new max EA size for both ldiskfs and ZFS.
(The current 32K on ZFS is too small for
LOV_MAX_STRIPE_COUNT [2000] files, so needs to be raised
regardless.)
In order to use this correctly, we have to use the real ea
size on the client. The previous code for maximum ea size
on the client (KEY_MAX_EASIZE, llite.max_easize) used a
calculated value based on number of targets.
With one exception, the mdc code already uses the default
ea size rather than the max. Default ea size adjusts
automatically to the largest size sent by the server.
The exception is the open code, which uses the max so it
never has to resend a layout request. This patch changes
it to use default, which means that the first time a very
widely striped file is opened, the open will be resent.
Add limit checks on client & server so the xattr size limit
is honored.
Signed-off-by: Patrick Farrell <pfarrell@whamcloud.com>
Change-Id: I4da62691f30fa276d20959810116cf558cccc515
Reviewed-on: https://review.whamcloud.com/34058
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Alexandr Boyko <c17825@cray.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Tested-by: Jenkins
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
* vmalloc(). Excessive use of vmalloc() may cause spinlock contention
* on the MDS.
*/
* vmalloc(). Excessive use of vmalloc() may cause spinlock contention
* on the MDS.
*/
-#define OBD_MAX_DEFAULT_EA_SIZE 4096
+#define OBD_MAX_DEFAULT_EA_SIZE 4096
+
+/*
+ * Lustre can handle larger xattrs internally, but we must respect the Linux
+ * VFS limitation or tools like tar cannot interact with Lustre volumes
+ * correctly.
+ */
+#define OBD_MAX_EA_SIZE XATTR_SIZE_MAX
+
enum obd_cl_sem_lock_class {
OBD_CLI_SEM_NORMAL,
enum obd_cl_sem_lock_class {
OBD_CLI_SEM_NORMAL,
+ CDEBUG(D_INFO, "max LOV ea size: %d\n", *lmmsize);
+
size = sizeof(int);
rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_EASIZE),
KEY_MAX_EASIZE, &size, lmmsize);
if (rc)
CERROR("Get max mdsize error rc %d\n", rc);
size = sizeof(int);
rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_EASIZE),
KEY_MAX_EASIZE, &size, lmmsize);
if (rc)
CERROR("Get max mdsize error rc %d\n", rc);
+ CDEBUG(D_INFO, "max LMV ea size: %d\n", *lmmsize);
+
lov_tgts_getref(obddev);
if (KEY_IS(KEY_MAX_EASIZE)) {
lov_tgts_getref(obddev);
if (KEY_IS(KEY_MAX_EASIZE)) {
- u32 max_stripe_count = min_t(u32, ld->ld_active_tgt_count,
- LOV_MAX_STRIPE_COUNT);
-
- *((u32 *)val) = lov_mds_md_size(max_stripe_count, LOV_MAGIC_V3);
+ *((u32 *)val) = exp->exp_connect_data.ocd_max_easize;
} else if (KEY_IS(KEY_DEFAULT_EASIZE)) {
u32 def_stripe_count = min_t(u32, ld->ld_default_stripe_count,
LOV_MAX_STRIPE_COUNT);
} else if (KEY_IS(KEY_DEFAULT_EASIZE)) {
u32 def_stripe_count = min_t(u32, ld->ld_default_stripe_count,
LOV_MAX_STRIPE_COUNT);
struct ldlm_intent *lit;
const void *lmm = op_data->op_data;
__u32 lmmsize = op_data->op_data_size;
struct ldlm_intent *lit;
const void *lmm = op_data->op_data;
__u32 lmmsize = op_data->op_data_size;
+ __u32 mdt_md_capsule_size;
struct list_head cancels = LIST_HEAD_INIT(cancels);
int count = 0;
enum ldlm_mode mode;
struct list_head cancels = LIST_HEAD_INIT(cancels);
int count = 0;
enum ldlm_mode mode;
+ mdt_md_capsule_size = obddev->u.cli.cl_default_mds_easize;
+
it->it_create_mode = (it->it_create_mode & ~S_IFMT) | S_IFREG;
/* XXX: openlock is not cancelled for cross-refs. */
it->it_create_mode = (it->it_create_mode & ~S_IFMT) | S_IFREG;
/* XXX: openlock is not cancelled for cross-refs. */
lmmsize);
req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
lmmsize);
req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
- obddev->u.cli.cl_max_mds_easize);
req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, acl_bufsize);
if (!(it->it_op & IT_CREAT) && it->it_op & IT_OPEN &&
req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, acl_bufsize);
if (!(it->it_op & IT_CREAT) && it->it_op & IT_OPEN &&
lustre_msg_early_size());
/* Estimate free space for DoM files in repbuf */
repsize_estimate = repsize - (req->rq_replen -
lustre_msg_early_size());
/* Estimate free space for DoM files in repbuf */
repsize_estimate = repsize - (req->rq_replen -
- obddev->u.cli.cl_max_mds_easize +
sizeof(struct lov_comp_md_v1) +
sizeof(struct lov_comp_md_entry_v1) +
lov_mds_md_size(0, LOV_MAGIC_V3));
sizeof(struct lov_comp_md_v1) +
sizeof(struct lov_comp_md_entry_v1) +
lov_mds_md_size(0, LOV_MAGIC_V3));
lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT);
lit->opc = (__u64)it->it_op;
lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT);
lit->opc = (__u64)it->it_op;
- if (obddev->u.cli.cl_default_mds_easize > 0)
- easize = obddev->u.cli.cl_default_mds_easize;
- else
- easize = obddev->u.cli.cl_max_mds_easize;
+ easize = obddev->u.cli.cl_default_mds_easize;
/* pack the intended request */
mdc_getattr_pack(req, valid, it->it_flags, op_data, easize);
/* pack the intended request */
mdc_getattr_pack(req, valid, it->it_flags, op_data, easize);
rr->rr_eadata = req_capsule_client_get(pill, &RMF_EADATA);
rr->rr_eadatalen = req_capsule_get_size(pill, &RMF_EADATA,
RCL_CLIENT);
rr->rr_eadata = req_capsule_client_get(pill, &RMF_EADATA);
rr->rr_eadatalen = req_capsule_get_size(pill, &RMF_EADATA,
RCL_CLIENT);
if (rr->rr_eadatalen > 0) {
const struct lmv_user_md *lum;
if (rr->rr_eadatalen > 0) {
const struct lmv_user_md *lum;
rr->rr_eadatalen = req_capsule_get_size(pill,
&RMF_EADATA,
RCL_CLIENT);
rr->rr_eadatalen = req_capsule_get_size(pill,
&RMF_EADATA,
RCL_CLIENT);
if (rr->rr_eadatalen > 0) {
rr->rr_eadata = req_capsule_client_get(pill,
&RMF_EADATA);
if (rr->rr_eadatalen > 0) {
rr->rr_eadata = req_capsule_client_get(pill,
&RMF_EADATA);
if (req_capsule_field_present(pill, &RMF_EADATA, RCL_CLIENT)) {
rr->rr_eadatalen = req_capsule_get_size(pill, &RMF_EADATA,
RCL_CLIENT);
if (req_capsule_field_present(pill, &RMF_EADATA, RCL_CLIENT)) {
rr->rr_eadatalen = req_capsule_get_size(pill, &RMF_EADATA,
RCL_CLIENT);
if (rr->rr_eadatalen > 0) {
rr->rr_eadata = req_capsule_client_get(pill,
&RMF_EADATA);
if (rr->rr_eadatalen > 0) {
rr->rr_eadata = req_capsule_client_get(pill,
&RMF_EADATA);
if (req_capsule_field_present(pill, &RMF_EADATA, RCL_CLIENT)) {
rr->rr_eadatalen = req_capsule_get_size(pill, &RMF_EADATA,
RCL_CLIENT);
if (req_capsule_field_present(pill, &RMF_EADATA, RCL_CLIENT)) {
rr->rr_eadatalen = req_capsule_get_size(pill, &RMF_EADATA,
RCL_CLIENT);
+
+ if (rr->rr_eadatalen > info->mti_mdt->mdt_max_ea_size)
+ RETURN(-E2BIG);
+
if (rr->rr_eadatalen > 0) {
rr->rr_eadata = req_capsule_client_get(pill,
&RMF_EADATA);
if (rr->rr_eadatalen > 0) {
rr->rr_eadata = req_capsule_client_get(pill,
&RMF_EADATA);
#endif
param->ddp_max_ea_size = sb->s_blocksize - ea_overhead;
#endif
param->ddp_max_ea_size = sb->s_blocksize - ea_overhead;
- if (param->ddp_max_ea_size > OSD_MAX_EA_SIZE)
- param->ddp_max_ea_size = OSD_MAX_EA_SIZE;
+ if (param->ddp_max_ea_size > OBD_MAX_EA_SIZE - ea_overhead)
+ param->ddp_max_ea_size = OBD_MAX_EA_SIZE - ea_overhead;
/*
* Preferred RPC size for efficient disk IO. 4MB shows good
/*
* Preferred RPC size for efficient disk IO. 4MB shows good
param->ddp_mntopts = MNTOPT_USERXATTR;
if (osd->od_posix_acl)
param->ddp_mntopts |= MNTOPT_ACL;
param->ddp_mntopts = MNTOPT_USERXATTR;
if (osd->od_posix_acl)
param->ddp_mntopts |= MNTOPT_ACL;
- param->ddp_max_ea_size = DXATTR_MAX_ENTRY_SIZE;
+ /* Previously DXATTR_MAX_ENTRY_SIZE */
+ param->ddp_max_ea_size = OBD_MAX_EA_SIZE;
/* for maxbytes, report same value as ZPL */
param->ddp_maxbytes = MAX_LFS_FILESIZE;
/* for maxbytes, report same value as ZPL */
param->ddp_maxbytes = MAX_LFS_FILESIZE;
return rc;
LASSERT(obj->oo_sa_xattr);
return rc;
LASSERT(obj->oo_sa_xattr);
- /* Limited to 32k to keep nvpair memory allocations small */
- if (buf->lb_len > DXATTR_MAX_ENTRY_SIZE) {
+ if (buf->lb_len > OBD_MAX_EA_SIZE) {
too_big = 1;
} else {
/* Prevent the DXATTR SA from consuming the entire SA
too_big = 1;
} else {
/* Prevent the DXATTR SA from consuming the entire SA
then
count=28 # hard coded of RPC protocol
elif [ $(facet_fstype $SINGLEMDS) != ldiskfs ]; then
then
count=28 # hard coded of RPC protocol
elif [ $(facet_fstype $SINGLEMDS) != ldiskfs ]; then
- count=4000 # max_num 4091 max_ea_size = 32768
+ count=4000 # max_num 4091 max_ea_size = ~65536
elif ! large_xattr_enabled; then
count=450 # max_num 497 max_ea_size = 4012
else
elif ! large_xattr_enabled; then
count=450 # max_num 497 max_ea_size = 4012
else
- count=4500 # max_num 8187 max_ea_size = 1048492
+ count=4500 # max_num 8187 max_ea_size = 65452
# not create too much (>5000) to save test time
fi
# not create too much (>5000) to save test time
fi
# Check max_easize.
local max_easize=$($LCTL get_param -n llite.*.max_easize)
# Check max_easize.
local max_easize=$($LCTL get_param -n llite.*.max_easize)
- [[ $max_easize -eq 128 ]] ||
- error "max_easize is $max_easize, should be 128 bytes"
+ if [ $MDS1_VERSION -lt $(version_code 2.12.51) ]
+ then
+ [[ $max_easize -eq 128 ]] ||
+ error "max_easize is $max_easize, should be 128 bytes"
+ else
+ # LU-11868
+ # 4012 is 4096 - ldiskfs ea overhead
+ [[ $max_easize -ge 4012 ]] ||
+ error "max_easize is $max_easize, should be at least 4012 bytes"
+
+ # 65452 is XATTR_SIZE_MAX - ldiskfs ea overhead
+ if large_xattr_enabled;
+ then
+ [[ $max_easize -ge 65452 ]] ||
+ error "max_easize is $max_easize, should be at least 65452 bytes"
+ fi
+ fi
local file=$DIR/$tfile
local value="$(generate_string $xsize)"
local xbig=trusted.big
local file=$DIR/$tfile
local value="$(generate_string $xsize)"
local xbig=trusted.big
touch $file
log "save $xbig on $file"
touch $file
log "save $xbig on $file"
- setfattr -n $xbig -v $value $file ||
- error "saving $xbig on $file failed"
+ if [ -z "$toobig" ]
+ then
+ setfattr -n $xbig -v $value $file ||
+ error "saving $xbig on $file failed"
+ else
+ setfattr -n $xbig -v $value $file &&
+ error "saving $xbig on $file succeeded"
+ return 0
+ fi
local orig=$(get_xattr_value $xbig $file)
[[ "$orig" != "$value" ]] && error "$xbig different after saving $xbig"
local orig=$(get_xattr_value $xbig $file)
[[ "$orig" != "$value" ]] && error "$xbig different after saving $xbig"
test_102ha() {
large_xattr_enabled || skip_env "ea_inode feature disabled"
test_102ha() {
large_xattr_enabled || skip_env "ea_inode feature disabled"
+ echo "setting xattr of max xattr size: $(max_xattr_size)"
grow_xattr $(max_xattr_size)
grow_xattr $(max_xattr_size)
+
+ echo "setting xattr of > max xattr size: $(max_xattr_size) + 10"
+ echo "This should fail:"
+ grow_xattr $(($(max_xattr_size) + 10)) 1
}
run_test 102ha "grow xattr from inside inode to external inode"
}
run_test 102ha "grow xattr from inside inode to external inode"
local size
if large_xattr_enabled; then
local size
if large_xattr_enabled; then
- # include/linux/limits.h: #define XATTR_SIZE_MAX 65536
- size=65536
+ size=$($LCTL get_param -n llite.*.max_easize)
else
local mds_dev=$(mdsdevname ${SINGLEMDS//mds/})
local block_size=$(get_block_size $SINGLEMDS $mds_dev)
else
local mds_dev=$(mdsdevname ${SINGLEMDS//mds/})
local block_size=$(get_block_size $SINGLEMDS $mds_dev)