From 3ec712bd183a859a7bb09280b8a5a1776ec5e2c2 Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Fri, 29 Mar 2019 15:00:15 -0400 Subject: [PATCH] LU-11868 osd: Set max ea size to XATTR_SIZE_MAX Lustre currently limits EA size to either ~1 MiB (ldiskfs) or 32K (ZFS). VFS has its own limit, XATTR_SIZE_MAX, which we must respect to interoperate correctly with userspace tools like tar, getattr, and the getxattr() syscall. Set this as the new max EA size for both ldiskfs and ZFS. (The current 32K on ZFS is too small for LOV_MAX_STRIPE_COUNT [2000] files, so needs to be raised regardless.) In order to use this correctly, we have to use the real ea size on the client. The previous code for maximum ea size on the client (KEY_MAX_EASIZE, llite.max_easize) used a calculated value based on number of targets. With one exception, the mdc code already uses the default ea size rather than the max. Default ea size adjusts automatically to the largest size sent by the server. The exception is the open code, which uses the max so it never has to resend a layout request. This patch changes it to use default, which means that the first time a very widely striped file is opened, the open will be resent. Add limit checks on client & server so the xattr size limit is honored. Signed-off-by: Patrick Farrell Change-Id: I4da62691f30fa276d20959810116cf558cccc515 Reviewed-on: https://review.whamcloud.com/34058 Reviewed-by: Andreas Dilger Reviewed-by: Alexandr Boyko Reviewed-by: James Simmons Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/obd.h | 10 +++++++++- lustre/llite/llite_lib.c | 4 ++++ lustre/lov/lov_obd.c | 5 +---- lustre/mdc/mdc_locks.c | 12 ++++++------ lustre/mdt/mdt_lib.c | 7 +++++++ lustre/osd-ldiskfs/osd_handler.c | 4 ++-- lustre/osd-zfs/osd_handler.c | 3 ++- lustre/osd-zfs/osd_xattr.c | 3 +-- lustre/tests/conf-sanity.sh | 23 +++++++++++++++++++---- lustre/tests/sanity.sh | 17 +++++++++++++++-- lustre/tests/test-framework.sh | 3 +-- 11 files changed, 67 insertions(+), 24 deletions(-) diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 242bfb8..ce41c32e 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -151,7 +151,15 @@ enum { * vmalloc(). Excessive use of vmalloc() may cause spinlock contention * on the MDS. */ -#define OBD_MAX_DEFAULT_EA_SIZE 4096 +#define OBD_MAX_DEFAULT_EA_SIZE 4096 + +/* + * Lustre can handle larger xattrs internally, but we must respect the Linux + * VFS limitation or tools like tar cannot interact with Lustre volumes + * correctly. + */ +#define OBD_MAX_EA_SIZE XATTR_SIZE_MAX + enum obd_cl_sem_lock_class { OBD_CLI_SEM_NORMAL, diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 97305de..456cb84 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -677,12 +677,16 @@ int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize) RETURN(rc); } + CDEBUG(D_INFO, "max LOV ea size: %d\n", *lmmsize); + size = sizeof(int); rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_EASIZE), KEY_MAX_EASIZE, &size, lmmsize); if (rc) CERROR("Get max mdsize error rc %d\n", rc); + CDEBUG(D_INFO, "max LMV ea size: %d\n", *lmmsize); + RETURN(rc); } diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index e151cc9..be9e51e 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -1163,10 +1163,7 @@ static int lov_get_info(const struct lu_env *env, struct obd_export *exp, lov_tgts_getref(obddev); if (KEY_IS(KEY_MAX_EASIZE)) { - u32 max_stripe_count = min_t(u32, ld->ld_active_tgt_count, - LOV_MAX_STRIPE_COUNT); - - *((u32 *)val) = lov_mds_md_size(max_stripe_count, LOV_MAGIC_V3); + *((u32 *)val) = exp->exp_connect_data.ocd_max_easize; } else if (KEY_IS(KEY_DEFAULT_EASIZE)) { u32 def_stripe_count = min_t(u32, ld->ld_default_stripe_count, LOV_MAX_STRIPE_COUNT); diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c index 13e63ba..f8dc200 100644 --- a/lustre/mdc/mdc_locks.c +++ b/lustre/mdc/mdc_locks.c @@ -256,6 +256,7 @@ mdc_intent_open_pack(struct obd_export *exp, struct lookup_intent *it, struct ldlm_intent *lit; const void *lmm = op_data->op_data; __u32 lmmsize = op_data->op_data_size; + __u32 mdt_md_capsule_size; struct list_head cancels = LIST_HEAD_INIT(cancels); int count = 0; enum ldlm_mode mode; @@ -264,6 +265,8 @@ mdc_intent_open_pack(struct obd_export *exp, struct lookup_intent *it, ENTRY; + mdt_md_capsule_size = obddev->u.cli.cl_default_mds_easize; + it->it_create_mode = (it->it_create_mode & ~S_IFMT) | S_IFREG; /* XXX: openlock is not cancelled for cross-refs. */ @@ -352,7 +355,7 @@ mdc_intent_open_pack(struct obd_export *exp, struct lookup_intent *it, lmmsize); req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, - obddev->u.cli.cl_max_mds_easize); + mdt_md_capsule_size); req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, acl_bufsize); if (!(it->it_op & IT_CREAT) && it->it_op & IT_OPEN && @@ -391,7 +394,7 @@ mdc_intent_open_pack(struct obd_export *exp, struct lookup_intent *it, lustre_msg_early_size()); /* Estimate free space for DoM files in repbuf */ repsize_estimate = repsize - (req->rq_replen - - obddev->u.cli.cl_max_mds_easize + + mdt_md_capsule_size + sizeof(struct lov_comp_md_v1) + sizeof(struct lov_comp_md_entry_v1) + lov_mds_md_size(0, LOV_MAGIC_V3)); @@ -546,10 +549,7 @@ mdc_intent_getattr_pack(struct obd_export *exp, struct lookup_intent *it, lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT); lit->opc = (__u64)it->it_op; - if (obddev->u.cli.cl_default_mds_easize > 0) - easize = obddev->u.cli.cl_default_mds_easize; - else - easize = obddev->u.cli.cl_max_mds_easize; + easize = obddev->u.cli.cl_default_mds_easize; /* pack the intended request */ mdc_getattr_pack(req, valid, it->it_flags, op_data, easize); diff --git a/lustre/mdt/mdt_lib.c b/lustre/mdt/mdt_lib.c index 428abef..4cda802 100644 --- a/lustre/mdt/mdt_lib.c +++ b/lustre/mdt/mdt_lib.c @@ -1162,6 +1162,7 @@ static int mdt_setattr_unpack(struct mdt_thread_info *info) rr->rr_eadata = req_capsule_client_get(pill, &RMF_EADATA); rr->rr_eadatalen = req_capsule_get_size(pill, &RMF_EADATA, RCL_CLIENT); + if (rr->rr_eadatalen > 0) { const struct lmv_user_md *lum; @@ -1503,6 +1504,7 @@ static int mdt_migrate_unpack(struct mdt_thread_info *info) rr->rr_eadatalen = req_capsule_get_size(pill, &RMF_EADATA, RCL_CLIENT); + if (rr->rr_eadatalen > 0) { rr->rr_eadata = req_capsule_client_get(pill, &RMF_EADATA); @@ -1591,6 +1593,7 @@ static int mdt_open_unpack(struct mdt_thread_info *info) if (req_capsule_field_present(pill, &RMF_EADATA, RCL_CLIENT)) { rr->rr_eadatalen = req_capsule_get_size(pill, &RMF_EADATA, RCL_CLIENT); + if (rr->rr_eadatalen > 0) { rr->rr_eadata = req_capsule_client_get(pill, &RMF_EADATA); @@ -1662,6 +1665,10 @@ static int mdt_setxattr_unpack(struct mdt_thread_info *info) if (req_capsule_field_present(pill, &RMF_EADATA, RCL_CLIENT)) { rr->rr_eadatalen = req_capsule_get_size(pill, &RMF_EADATA, RCL_CLIENT); + + if (rr->rr_eadatalen > info->mti_mdt->mdt_max_ea_size) + RETURN(-E2BIG); + if (rr->rr_eadatalen > 0) { rr->rr_eadata = req_capsule_client_get(pill, &RMF_EADATA); diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 41ee352..ea81256 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -2297,8 +2297,8 @@ static void osd_conf_get(const struct lu_env *env, #endif param->ddp_max_ea_size = sb->s_blocksize - ea_overhead; - if (param->ddp_max_ea_size > OSD_MAX_EA_SIZE) - param->ddp_max_ea_size = OSD_MAX_EA_SIZE; + if (param->ddp_max_ea_size > OBD_MAX_EA_SIZE - ea_overhead) + param->ddp_max_ea_size = OBD_MAX_EA_SIZE - ea_overhead; /* * Preferred RPC size for efficient disk IO. 4MB shows good diff --git a/lustre/osd-zfs/osd_handler.c b/lustre/osd-zfs/osd_handler.c index 9f23711..9a84648 100644 --- a/lustre/osd-zfs/osd_handler.c +++ b/lustre/osd-zfs/osd_handler.c @@ -608,7 +608,8 @@ static void osd_conf_get(const struct lu_env *env, param->ddp_mntopts = MNTOPT_USERXATTR; if (osd->od_posix_acl) param->ddp_mntopts |= MNTOPT_ACL; - param->ddp_max_ea_size = DXATTR_MAX_ENTRY_SIZE; + /* Previously DXATTR_MAX_ENTRY_SIZE */ + param->ddp_max_ea_size = OBD_MAX_EA_SIZE; /* for maxbytes, report same value as ZPL */ param->ddp_maxbytes = MAX_LFS_FILESIZE; diff --git a/lustre/osd-zfs/osd_xattr.c b/lustre/osd-zfs/osd_xattr.c index 67be1b4..ab4d683 100644 --- a/lustre/osd-zfs/osd_xattr.c +++ b/lustre/osd-zfs/osd_xattr.c @@ -583,8 +583,7 @@ int __osd_sa_xattr_set(const struct lu_env *env, struct osd_object *obj, return rc; LASSERT(obj->oo_sa_xattr); - /* Limited to 32k to keep nvpair memory allocations small */ - if (buf->lb_len > DXATTR_MAX_ENTRY_SIZE) { + if (buf->lb_len > OBD_MAX_EA_SIZE) { too_big = 1; } else { /* Prevent the DXATTR SA from consuming the entire SA diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 8373e9a..613c9e1 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -3494,11 +3494,11 @@ test_48() { # bz-17636 LU-7473 then count=28 # hard coded of RPC protocol elif [ $(facet_fstype $SINGLEMDS) != ldiskfs ]; then - count=4000 # max_num 4091 max_ea_size = 32768 + count=4000 # max_num 4091 max_ea_size = ~65536 elif ! large_xattr_enabled; then count=450 # max_num 497 max_ea_size = 4012 else - count=4500 # max_num 8187 max_ea_size = 1048492 + count=4500 # max_num 8187 max_ea_size = 65452 # not create too much (>5000) to save test time fi @@ -5847,8 +5847,23 @@ test_81() { # LU-4665 # Check max_easize. local max_easize=$($LCTL get_param -n llite.*.max_easize) - [[ $max_easize -eq 128 ]] || - error "max_easize is $max_easize, should be 128 bytes" + if [ $MDS1_VERSION -lt $(version_code 2.12.51) ] + then + [[ $max_easize -eq 128 ]] || + error "max_easize is $max_easize, should be 128 bytes" + else + # LU-11868 + # 4012 is 4096 - ldiskfs ea overhead + [[ $max_easize -ge 4012 ]] || + error "max_easize is $max_easize, should be at least 4012 bytes" + + # 65452 is XATTR_SIZE_MAX - ldiskfs ea overhead + if large_xattr_enabled; + then + [[ $max_easize -ge 65452 ]] || + error "max_easize is $max_easize, should be at least 65452 bytes" + fi + fi restore_ostindex } diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 0c9fc6a..3b71e70 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -8355,11 +8355,19 @@ grow_xattr() { local file=$DIR/$tfile local value="$(generate_string $xsize)" local xbig=trusted.big + local toobig=$2 touch $file log "save $xbig on $file" - setfattr -n $xbig -v $value $file || - error "saving $xbig on $file failed" + if [ -z "$toobig" ] + then + setfattr -n $xbig -v $value $file || + error "saving $xbig on $file failed" + else + setfattr -n $xbig -v $value $file && + error "saving $xbig on $file succeeded" + return 0 + fi local orig=$(get_xattr_value $xbig $file) [[ "$orig" != "$value" ]] && error "$xbig different after saving $xbig" @@ -8390,7 +8398,12 @@ run_test 102h "grow xattr from inside inode to external block" test_102ha() { large_xattr_enabled || skip_env "ea_inode feature disabled" + echo "setting xattr of max xattr size: $(max_xattr_size)" grow_xattr $(max_xattr_size) + + echo "setting xattr of > max xattr size: $(max_xattr_size) + 10" + echo "This should fail:" + grow_xattr $(($(max_xattr_size) + 10)) 1 } run_test 102ha "grow xattr from inside inode to external inode" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 158eeb1..5139539 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -8202,8 +8202,7 @@ max_xattr_size() { local size if large_xattr_enabled; then - # include/linux/limits.h: #define XATTR_SIZE_MAX 65536 - size=65536 + size=$($LCTL get_param -n llite.*.max_easize) else local mds_dev=$(mdsdevname ${SINGLEMDS//mds/}) local block_size=$(get_block_size $SINGLEMDS $mds_dev) -- 1.8.3.1