X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fmdc%2Fmdc_locks.c;h=66694f5a3ad7742db1986d11c3606af98182a333;hp=bb1d0a3ab424087c550bf699300cc6768d0e9b89;hb=2b6311e58e9cebc6076b86e209ea774c86d47c72;hpb=5185ba6381ac459ad60e11952dc55c17e3167c5e diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c index bb1d0a3..66694f5 100644 --- a/lustre/mdc/mdc_locks.c +++ b/lustre/mdc/mdc_locks.c @@ -256,6 +256,8 @@ mdc_intent_open_pack(struct obd_export *exp, struct lookup_intent *it, int count = 0; enum ldlm_mode mode; int rc; + int repsize, repsize_estimate; + ENTRY; it->it_create_mode = (it->it_create_mode & ~S_IFMT) | S_IFREG; @@ -338,8 +340,46 @@ mdc_intent_open_pack(struct obd_export *exp, struct lookup_intent *it, req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, obddev->u.cli.cl_max_mds_easize); req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, acl_bufsize); - ptlrpc_request_set_replen(req); - return req; + + /** + * Inline buffer for possible data from Data-on-MDT files. + */ + req_capsule_set_size(&req->rq_pill, &RMF_NIOBUF_INLINE, RCL_SERVER, + sizeof(struct niobuf_remote)); + ptlrpc_request_set_replen(req); + + /* Get real repbuf allocated size as rounded up power of 2 */ + repsize = size_roundup_power2(req->rq_replen + + lustre_msg_early_size()); + /* Estimate free space for DoM files in repbuf */ + repsize_estimate = repsize - (req->rq_replen - + obddev->u.cli.cl_max_mds_easize + + sizeof(struct lov_comp_md_v1) + + sizeof(struct lov_comp_md_entry_v1) + + lov_mds_md_size(0, LOV_MAGIC_V3)); + + if (repsize_estimate < obddev->u.cli.cl_dom_min_inline_repsize) { + repsize = obddev->u.cli.cl_dom_min_inline_repsize - + repsize_estimate + sizeof(struct niobuf_remote); + req_capsule_set_size(&req->rq_pill, &RMF_NIOBUF_INLINE, + RCL_SERVER, + sizeof(struct niobuf_remote) + repsize); + ptlrpc_request_set_replen(req); + CDEBUG(D_INFO, "Increase repbuf by %d bytes, total: %d\n", + repsize, req->rq_replen); + repsize = size_roundup_power2(req->rq_replen + + lustre_msg_early_size()); + } + /* The only way to report real allocated repbuf size to the server + * is the lm_repsize but it must be set prior buffer allocation itself + * due to security reasons - it is part of buffer used in signature + * calculation (see LU-11414). Therefore the saved size is predicted + * value as rq_replen rounded to the next higher power of 2. + * Such estimation is safe. Though the final allocated buffer might + * be even larger, it is not possible to know that at this point. + */ + req->rq_reqmsg->lm_repsize = repsize; + RETURN(req); } #define GA_DEFAULT_EA_NAME_LEN 20 @@ -355,6 +395,7 @@ mdc_intent_getxattr_pack(struct obd_export *exp, struct ldlm_intent *lit; int rc, count = 0; struct list_head cancels = LIST_HEAD_INIT(cancels); + u32 ea_vals_buf_size = GA_DEFAULT_EA_VAL_LEN * GA_DEFAULT_EA_NUM; ENTRY; @@ -373,19 +414,32 @@ mdc_intent_getxattr_pack(struct obd_export *exp, lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT); lit->opc = IT_GETXATTR; +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0) + /* If the supplied buffer is too small then the server will + * return -ERANGE and llite will fallback to using non cached + * xattr operations. On servers before 2.10.1 a (non-cached) + * listxattr RPC for an orphan or dead file causes an oops. So + * let's try to avoid sending too small a buffer to too old a + * server. This is effectively undoing the memory conservation + * of LU-9417 when it would be *more* likely to crash the + * server. See LU-9856. */ + if (exp->exp_connect_data.ocd_version < OBD_OCD_VERSION(2, 10, 1, 0)) + ea_vals_buf_size = max_t(u32, ea_vals_buf_size, + exp->exp_connect_data.ocd_max_easize); +#endif + /* pack the intended request */ mdc_pack_body(req, &op_data->op_fid1, op_data->op_valid, - GA_DEFAULT_EA_NAME_LEN * GA_DEFAULT_EA_NUM, - -1, 0); + ea_vals_buf_size, -1, 0); req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_SERVER, GA_DEFAULT_EA_NAME_LEN * GA_DEFAULT_EA_NUM); req_capsule_set_size(&req->rq_pill, &RMF_EAVALS, RCL_SERVER, - GA_DEFAULT_EA_VAL_LEN * GA_DEFAULT_EA_NUM); + ea_vals_buf_size); req_capsule_set_size(&req->rq_pill, &RMF_EAVALS_LENS, RCL_SERVER, - sizeof(__u32) * GA_DEFAULT_EA_NUM); + sizeof(u32) * GA_DEFAULT_EA_NUM); req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, 0); @@ -394,43 +448,6 @@ mdc_intent_getxattr_pack(struct obd_export *exp, RETURN(req); } -static struct ptlrpc_request *mdc_intent_unlink_pack(struct obd_export *exp, - struct lookup_intent *it, - struct md_op_data *op_data) -{ - struct ptlrpc_request *req; - struct obd_device *obddev = class_exp2obd(exp); - struct ldlm_intent *lit; - int rc; - ENTRY; - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), - &RQF_LDLM_INTENT_UNLINK); - if (req == NULL) - RETURN(ERR_PTR(-ENOMEM)); - - req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, - op_data->op_namelen + 1); - - rc = ldlm_prep_enqueue_req(exp, req, NULL, 0); - if (rc) { - ptlrpc_request_free(req); - RETURN(ERR_PTR(rc)); - } - - /* pack the intent */ - lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT); - lit->opc = (__u64)it->it_op; - - /* pack the intended request */ - mdc_unlink_pack(req, op_data); - - req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, - obddev->u.cli.cl_default_mds_easize); - ptlrpc_request_set_replen(req); - RETURN(req); -} - static struct ptlrpc_request * mdc_intent_getattr_pack(struct obd_export *exp, struct lookup_intent *it, struct md_op_data *op_data, __u32 acl_bufsize) @@ -602,14 +619,14 @@ static int mdc_finish_enqueue(struct obd_export *exp, * It's important that we do this first! Otherwise we might exit the * function without doing so, and try to replay a failed create * (bug 3440) */ - if (it->it_op & IT_OPEN && req->rq_replay && + if (it->it_op & IT_OPEN && req->rq_replay && (!it_disposition(it, DISP_OPEN_OPEN) || it->it_status != 0)) mdc_clear_replay_flag(req, it->it_status); - DEBUG_REQ(D_RPCTRACE, req, "op: %d disposition: %x, status: %d", + DEBUG_REQ(D_RPCTRACE, req, "op: %x disposition: %x, status: %d", it->it_op, it->it_disposition, it->it_status); - /* We know what to expect, so we do any byte flipping required here */ + /* We know what to expect, so we do any byte flipping required here */ if (it_has_reply_body(it)) { body = req_capsule_server_get(pill, &RMF_MDT_BODY); if (body == NULL) { @@ -670,6 +687,8 @@ static int mdc_finish_enqueue(struct obd_export *exp, /* maybe the lock was granted right away and layout * is packed into RMF_DLM_LVB of req */ lvb_len = req_capsule_get_size(pill, &RMF_DLM_LVB, RCL_SERVER); + CDEBUG(D_INFO, "%s: layout return lvb %d transno %lld\n", + class_exp2obd(exp)->obd_name, lvb_len, req->rq_transno); if (lvb_len > 0) { lvb_data = req_capsule_server_sized_get(pill, &RMF_DLM_LVB, lvb_len); @@ -781,19 +800,20 @@ static int mdc_enqueue_base(struct obd_export *exp, LASSERT(policy == NULL); saved_flags |= LDLM_FL_HAS_INTENT; - if (it->it_op & (IT_UNLINK | IT_GETATTR | IT_READDIR)) + if (it->it_op & (IT_GETATTR | IT_READDIR)) policy = &update_policy; else if (it->it_op & IT_LAYOUT) policy = &layout_policy; - else if (it->it_op & (IT_GETXATTR | IT_SETXATTR)) + else if (it->it_op & IT_GETXATTR) policy = &getxattr_policy; else policy = &lookup_policy; } generation = obddev->u.cli.cl_import->imp_generation; - if (!it || (it->it_op & (IT_CREAT | IT_OPEN_CREAT))) - acl_bufsize = imp->imp_connect_data.ocd_max_easize; + if (!it || (it->it_op & (IT_OPEN | IT_CREAT))) + acl_bufsize = MIN(imp->imp_connect_data.ocd_max_easize, + XATTR_SIZE_MAX); else acl_bufsize = LUSTRE_POSIX_ACL_MAX_SIZE_OLD; @@ -806,8 +826,6 @@ resend: res_id.name[3] = LDLM_FLOCK; } else if (it->it_op & IT_OPEN) { req = mdc_intent_open_pack(exp, it, op_data, acl_bufsize); - } else if (it->it_op & IT_UNLINK) { - req = mdc_intent_unlink_pack(exp, it, op_data); } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) { req = mdc_intent_getattr_pack(exp, it, op_data, acl_bufsize); } else if (it->it_op & IT_READDIR) { @@ -917,10 +935,11 @@ resend: if ((int)lockrep->lock_policy_res2 == -ERANGE && it->it_op & (IT_OPEN | IT_GETATTR | IT_LOOKUP) && - acl_bufsize != imp->imp_connect_data.ocd_max_easize) { + acl_bufsize == LUSTRE_POSIX_ACL_MAX_SIZE_OLD) { mdc_clear_replay_flag(req, -ERANGE); ptlrpc_req_finished(req); - acl_bufsize = imp->imp_connect_data.ocd_max_easize; + acl_bufsize = MIN(imp->imp_connect_data.ocd_max_easize, + XATTR_SIZE_MAX); goto resend; } @@ -1211,18 +1230,18 @@ int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data, } static int mdc_intent_getattr_async_interpret(const struct lu_env *env, - struct ptlrpc_request *req, - void *args, int rc) + struct ptlrpc_request *req, + void *args, int rc) { struct mdc_getattr_args *ga = args; - struct obd_export *exp = ga->ga_exp; - struct md_enqueue_info *minfo = ga->ga_minfo; + struct obd_export *exp = ga->ga_exp; + struct md_enqueue_info *minfo = ga->ga_minfo; struct ldlm_enqueue_info *einfo = &minfo->mi_einfo; - struct lookup_intent *it; - struct lustre_handle *lockh; - struct obd_device *obddev; - struct ldlm_reply *lockrep; - __u64 flags = LDLM_FL_HAS_INTENT; + struct lookup_intent *it; + struct lustre_handle *lockh; + struct obd_device *obddev; + struct ldlm_reply *lockrep; + __u64 flags = LDLM_FL_HAS_INTENT; ENTRY; it = &minfo->mi_it;