X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fmdc%2Fmdc_request.c;h=646db7385da17938f5d6de47d90df5dcc7b58485;hp=ad3f62baed3daf69b9f9918dd95685b36b4b4157;hb=f843facff59226d3788d855d1d6948523ab8d944;hpb=ccabce23bd9e366c345c852f565766a799f61238 diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index ad3f62ba..646db73 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -53,7 +53,6 @@ #include #include #include -#include #include #include #include @@ -192,20 +191,34 @@ static int mdc_getattr_common(struct obd_export *exp, RETURN(0); } +static void mdc_reset_acl_req(struct ptlrpc_request *req) +{ + spin_lock(&req->rq_early_free_lock); + sptlrpc_cli_free_repbuf(req); + req->rq_repbuf = NULL; + req->rq_repbuf_len = 0; + req->rq_repdata = NULL; + req->rq_reqdata_len = 0; + spin_unlock(&req->rq_early_free_lock); +} + static int mdc_getattr(struct obd_export *exp, struct md_op_data *op_data, struct ptlrpc_request **request) { - struct ptlrpc_request *req; - int rc; - ENTRY; + struct ptlrpc_request *req; + struct obd_import *imp = class_exp2cliimp(exp); + __u32 acl_bufsize = LUSTRE_POSIX_ACL_MAX_SIZE_OLD; + int rc; + ENTRY; /* Single MDS without an LMV case */ if (op_data->op_flags & MF_GET_MDT_IDX) { op_data->op_mds = 0; RETURN(0); } - *request = NULL; - req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_GETATTR); + + *request = NULL; + req = ptlrpc_request_alloc(imp, &RQF_MDS_GETATTR); if (req == NULL) RETURN(-ENOMEM); @@ -215,33 +228,42 @@ static int mdc_getattr(struct obd_export *exp, struct md_op_data *op_data, RETURN(rc); } +again: mdc_pack_body(req, &op_data->op_fid1, op_data->op_valid, op_data->op_mode, -1, 0); + req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, acl_bufsize); + req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, + op_data->op_mode); + ptlrpc_request_set_replen(req); - req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, - req->rq_import->imp_connect_data.ocd_max_easize); - req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, - op_data->op_mode); - ptlrpc_request_set_replen(req); + rc = mdc_getattr_common(exp, req); + if (rc) { + if (rc == -ERANGE) { + acl_bufsize = MIN(imp->imp_connect_data.ocd_max_easize, + XATTR_SIZE_MAX); + mdc_reset_acl_req(req); + goto again; + } - rc = mdc_getattr_common(exp, req); - if (rc) - ptlrpc_req_finished(req); - else - *request = req; - RETURN(rc); + ptlrpc_req_finished(req); + } else { + *request = req; + } + + RETURN(rc); } static int mdc_getattr_name(struct obd_export *exp, struct md_op_data *op_data, struct ptlrpc_request **request) { - struct ptlrpc_request *req; - int rc; - ENTRY; + struct ptlrpc_request *req; + struct obd_import *imp = class_exp2cliimp(exp); + __u32 acl_bufsize = LUSTRE_POSIX_ACL_MAX_SIZE_OLD; + int rc; + ENTRY; - *request = NULL; - req = ptlrpc_request_alloc(class_exp2cliimp(exp), - &RQF_MDS_GETATTR_NAME); + *request = NULL; + req = ptlrpc_request_alloc(imp, &RQF_MDS_GETATTR_NAME); if (req == NULL) RETURN(-ENOMEM); @@ -254,9 +276,6 @@ static int mdc_getattr_name(struct obd_export *exp, struct md_op_data *op_data, RETURN(rc); } - mdc_pack_body(req, &op_data->op_fid1, op_data->op_valid, - op_data->op_mode, op_data->op_suppgids[0], 0); - if (op_data->op_name) { char *name = req_capsule_client_get(&req->rq_pill, &RMF_NAME); LASSERT(strnlen(op_data->op_name, op_data->op_namelen) == @@ -264,18 +283,29 @@ static int mdc_getattr_name(struct obd_export *exp, struct md_op_data *op_data, memcpy(name, op_data->op_name, op_data->op_namelen); } - req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, - op_data->op_mode); - req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, - req->rq_import->imp_connect_data.ocd_max_easize); - ptlrpc_request_set_replen(req); +again: + mdc_pack_body(req, &op_data->op_fid1, op_data->op_valid, + op_data->op_mode, op_data->op_suppgids[0], 0); + req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, + op_data->op_mode); + req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, acl_bufsize); + ptlrpc_request_set_replen(req); - rc = mdc_getattr_common(exp, req); - if (rc) - ptlrpc_req_finished(req); - else - *request = req; - RETURN(rc); + rc = mdc_getattr_common(exp, req); + if (rc) { + if (rc == -ERANGE) { + acl_bufsize = MIN(imp->imp_connect_data.ocd_max_easize, + XATTR_SIZE_MAX); + mdc_reset_acl_req(req); + goto again; + } + + ptlrpc_req_finished(req); + } else { + *request = req; + } + + RETURN(rc); } static int mdc_xattr_common(struct obd_export *exp,const struct req_format *fmt, @@ -295,16 +325,25 @@ static int mdc_xattr_common(struct obd_export *exp,const struct req_format *fmt, if (req == NULL) RETURN(-ENOMEM); - if (xattr_name) { - xattr_namelen = strlen(xattr_name) + 1; - req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, - xattr_namelen); - } - if (input_size) { - LASSERT(input); - req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT, - input_size); - } + if (xattr_name) { + xattr_namelen = strlen(xattr_name) + 1; + req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, + xattr_namelen); + } + if (input_size) + LASSERT(input); + req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT, + input_size); + + /* get SELinux policy info if any */ + rc = sptlrpc_get_sepol(req); + if (rc < 0) { + ptlrpc_request_free(req); + RETURN(rc); + } + req_capsule_set_size(&req->rq_pill, &RMF_SELINUX_POL, RCL_CLIENT, + strlen(req->rq_sepol) ? + strlen(req->rq_sepol) + 1 : 0); /* Flush local XATTR locks to get rid of a possible cancel RPC */ if (opcode == MDS_REINT && fid_is_sane(fid) && @@ -364,6 +403,8 @@ static int mdc_xattr_common(struct obd_export *exp,const struct req_format *fmt, memcpy(tmp, input, input_size); } + mdc_file_sepol_pack(req); + if (req_capsule_has_field(&req->rq_pill, &RMF_EADATA, RCL_SERVER)) req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_SERVER, output_size); @@ -386,26 +427,77 @@ static int mdc_xattr_common(struct obd_export *exp,const struct req_format *fmt, } static int mdc_setxattr(struct obd_export *exp, const struct lu_fid *fid, - u64 valid, const char *xattr_name, - const char *input, int input_size, int output_size, - int flags, __u32 suppgid, - struct ptlrpc_request **request) + u64 obd_md_valid, const char *name, + const void *value, size_t value_size, + unsigned int xattr_flags, u32 suppgid, + struct ptlrpc_request **req) { + LASSERT(obd_md_valid == OBD_MD_FLXATTR || + obd_md_valid == OBD_MD_FLXATTRRM); + return mdc_xattr_common(exp, &RQF_MDS_REINT_SETXATTR, - fid, MDS_REINT, valid, xattr_name, - input, input_size, output_size, flags, - suppgid, request); + fid, MDS_REINT, obd_md_valid, name, + value, value_size, 0, xattr_flags, suppgid, + req); } static int mdc_getxattr(struct obd_export *exp, const struct lu_fid *fid, - u64 valid, const char *xattr_name, - const char *input, int input_size, int output_size, - int flags, struct ptlrpc_request **request) + u64 obd_md_valid, const char *name, size_t buf_size, + struct ptlrpc_request **req) { - return mdc_xattr_common(exp, &RQF_MDS_GETXATTR, - fid, MDS_GETXATTR, valid, xattr_name, - input, input_size, output_size, flags, - -1, request); + struct mdt_body *body; + int rc; + + LASSERT(obd_md_valid == OBD_MD_FLXATTR || + obd_md_valid == OBD_MD_FLXATTRLS); + + CDEBUG(D_INFO, "%s: get xattr '%s' for "DFID"\n", + exp->exp_obd->obd_name, name, PFID(fid)); + rc = mdc_xattr_common(exp, &RQF_MDS_GETXATTR, fid, MDS_GETXATTR, + obd_md_valid, name, NULL, 0, buf_size, 0, -1, + req); + if (rc < 0) + GOTO(out, rc); + + body = req_capsule_server_get(&(*req)->rq_pill, &RMF_MDT_BODY); + if (body == NULL) + GOTO(out, rc = -EPROTO); + + /* only detect the xattr size */ + if (buf_size == 0) { + /* LU-11109: Older MDTs do not distinguish + * between nonexistent xattrs and zero length + * values in this case. Newer MDTs will return + * -ENODATA or set OBD_MD_FLXATTR. */ + GOTO(out, rc = body->mbo_eadatasize); + } + + if (body->mbo_eadatasize == 0) { + /* LU-11109: Newer MDTs set OBD_MD_FLXATTR on + * success so that we can distinguish between + * zero length value and nonexistent xattr. + * + * If OBD_MD_FLXATTR is not set then we keep + * the old behavior and return -ENODATA for + * getxattr() when mbo_eadatasize is 0. But + * -ENODATA only makes sense for getxattr() + * and not for listxattr(). */ + if (body->mbo_valid & OBD_MD_FLXATTR) + GOTO(out, rc = 0); + else if (obd_md_valid == OBD_MD_FLXATTR) + GOTO(out, rc = -ENODATA); + else + GOTO(out, rc = 0); + } + + GOTO(out, rc = body->mbo_eadatasize); +out: + if (rc < 0) { + ptlrpc_req_finished(*req); + *req = NULL; + } + + return rc; } #ifdef CONFIG_FS_POSIX_ACL @@ -492,14 +584,14 @@ int mdc_get_lustre_md(struct obd_export *exp, struct ptlrpc_request *req, GOTO(out, rc = -EPROTO); } - lmv_size = md->body->mbo_eadatasize; - if (lmv_size == 0) { - CDEBUG(D_INFO, "OBD_MD_FLDIREA is set, " - "but eadatasize 0\n"); - RETURN(-EPROTO); - } - if (md->body->mbo_valid & OBD_MD_MEA) { + lmv_size = md->body->mbo_eadatasize; + if (lmv_size == 0) { + CDEBUG(D_INFO, "OBD_MD_FLDIREA is set, " + "but eadatasize 0\n"); + RETURN(-EPROTO); + } + lmv = req_capsule_server_sized_get(pill, &RMF_MDT_MD, lmv_size); if (lmv == NULL) @@ -509,14 +601,43 @@ int mdc_get_lustre_md(struct obd_export *exp, struct ptlrpc_request *req, if (rc < 0) GOTO(out, rc); - if (rc < (typeof(rc))sizeof(*md->lmv)) { - CDEBUG(D_INFO, "size too small: " - "rc < sizeof(*md->lmv) (%d < %d)\n", - rc, (int)sizeof(*md->lmv)); + if (rc < (int)sizeof(*md->lmv)) { + struct lmv_foreign_md *lfm = md->lfm; + + /* short (< sizeof(struct lmv_stripe_md)) + * foreign LMV case + */ + if (lfm->lfm_magic != LMV_MAGIC_FOREIGN) { + CDEBUG(D_INFO, + "lmv size too small: %d < %d\n", + rc, (int)sizeof(*md->lmv)); + GOTO(out, rc = -EPROTO); + } + } + } + + /* since 2.12.58 intent_getattr fetches default LMV */ + if (md->body->mbo_valid & OBD_MD_DEFAULT_MEA) { + lmv_size = sizeof(struct lmv_user_md); + lmv = req_capsule_server_sized_get(pill, + &RMF_DEFAULT_MDT_MD, + lmv_size); + if (!lmv) + GOTO(out, rc = -EPROTO); + + rc = md_unpackmd(md_exp, &md->default_lmv, lmv, + lmv_size); + if (rc < 0) + GOTO(out, rc); + + if (rc < (int)sizeof(*md->default_lmv)) { + CDEBUG(D_INFO, + "default lmv size too small: %d < %d\n", + rc, (int)sizeof(*md->default_lmv)); GOTO(out, rc = -EPROTO); } } - } + } rc = 0; if (md->body->mbo_valid & OBD_MD_FLACL) { @@ -553,41 +674,41 @@ int mdc_free_lustre_md(struct obd_export *exp, struct lustre_md *md) void mdc_replay_open(struct ptlrpc_request *req) { - struct md_open_data *mod = req->rq_cb_data; - struct ptlrpc_request *close_req; - struct obd_client_handle *och; - struct lustre_handle old; - struct mdt_body *body; - ENTRY; + struct md_open_data *mod = req->rq_cb_data; + struct ptlrpc_request *close_req; + struct obd_client_handle *och; + struct lustre_handle old_open_handle = { }; + struct mdt_body *body; + ENTRY; - if (mod == NULL) { - DEBUG_REQ(D_ERROR, req, - "Can't properly replay without open data."); - EXIT; - return; - } + if (mod == NULL) { + DEBUG_REQ(D_ERROR, req, + "Can't properly replay without open data."); + EXIT; + return; + } - body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - LASSERT(body != NULL); + body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); + LASSERT(body != NULL); spin_lock(&req->rq_lock); och = mod->mod_och; - if (och && och->och_fh.cookie) + if (och && och->och_open_handle.cookie) req->rq_early_free_repbuf = 1; else req->rq_early_free_repbuf = 0; spin_unlock(&req->rq_lock); if (req->rq_early_free_repbuf) { - struct lustre_handle *file_fh; + struct lustre_handle *file_open_handle; LASSERT(och->och_magic == OBD_CLIENT_HANDLE_MAGIC); - file_fh = &och->och_fh; + file_open_handle = &och->och_open_handle; CDEBUG(D_HA, "updating handle from %#llx to %#llx\n", - file_fh->cookie, body->mbo_handle.cookie); - old = *file_fh; - *file_fh = body->mbo_handle; + file_open_handle->cookie, body->mbo_open_handle.cookie); + old_open_handle = *file_open_handle; + *file_open_handle = body->mbo_open_handle; } close_req = mod->mod_close_req; @@ -601,10 +722,11 @@ void mdc_replay_open(struct ptlrpc_request *req) LASSERT(epoch); if (req->rq_early_free_repbuf) - LASSERT(!memcmp(&old, &epoch->mio_handle, sizeof(old))); + LASSERT(old_open_handle.cookie == + epoch->mio_open_handle.cookie); DEBUG_REQ(D_HA, close_req, "updating close body with new fh"); - epoch->mio_handle = body->mbo_handle; + epoch->mio_open_handle = body->mbo_open_handle; } EXIT; } @@ -686,20 +808,20 @@ int mdc_set_open_replay_data(struct obd_export *exp, open_req->rq_commit_cb = mdc_commit_open; open_req->rq_early_free_repbuf = 1; spin_unlock(&open_req->rq_lock); - } + } rec->cr_fid2 = body->mbo_fid1; - rec->cr_ioepoch = body->mbo_ioepoch; - rec->cr_old_handle.cookie = body->mbo_handle.cookie; + rec->cr_open_handle_old = body->mbo_open_handle; open_req->rq_replay_cb = mdc_replay_open; if (!fid_is_sane(&body->mbo_fid1)) { - DEBUG_REQ(D_ERROR, open_req, "Saving replay request with " - "insane fid"); - LBUG(); - } + DEBUG_REQ(D_ERROR, open_req, + "saving replay request with insane FID " DFID, + PFID(&body->mbo_fid1)); + LBUG(); + } - DEBUG_REQ(D_RPCTRACE, open_req, "Set up open replay data"); - RETURN(0); + DEBUG_REQ(D_RPCTRACE, open_req, "Set up open replay data"); + RETURN(0); } static void mdc_free_open(struct md_open_data *mod) @@ -743,7 +865,7 @@ int mdc_clear_open_replay_data(struct obd_export *exp, spin_lock(&mod->mod_open_req->rq_lock); if (mod->mod_och) - mod->mod_och->och_fh.cookie = 0; + mod->mod_och->och_open_handle.cookie = 0; mod->mod_open_req->rq_early_free_repbuf = 0; spin_unlock(&mod->mod_open_req->rq_lock); mdc_free_open(mod); @@ -808,7 +930,7 @@ static int mdc_close(struct obd_export *exp, struct md_op_data *op_data, mod->mod_close_req = req; - DEBUG_REQ(D_HA, mod->mod_open_req, "matched open"); + DEBUG_REQ(D_RPCTRACE, mod->mod_open_req, "matched open"); /* We no longer want to preserve this open for replay even * though the open was committed. b=3632, b=3633 */ spin_lock(&mod->mod_open_req->rq_lock); @@ -844,6 +966,9 @@ static int mdc_close(struct obd_export *exp, struct md_op_data *op_data, req->rq_request_portal = MDS_READPAGE_PORTAL; ptlrpc_at_set_req_timeout(req); + if (!(exp_connect_flags2(exp) & OBD_CONNECT2_LSOM)) + op_data->op_xvalid &= ~(OP_XVALID_LAZYSIZE | + OP_XVALID_LAZYBLOCKS); mdc_close_pack(req, op_data); @@ -991,7 +1116,7 @@ static void mdc_release_page(struct page *page, int remove) if (remove) { lock_page(page); if (likely(page->mapping != NULL)) - truncate_complete_page(page->mapping, page); + delete_from_page_cache(page); unlock_page(page); } put_page(page); @@ -1009,14 +1134,14 @@ static struct page *mdc_page_locate(struct address_space *mapping, __u64 *hash, struct page *page; int found; - spin_lock_irq(&mapping->tree_lock); + xa_lock_irq(&mapping->i_pages); found = radix_tree_gang_lookup(&mapping->page_tree, (void **)&page, offset, 1); if (found > 0 && !radix_tree_exceptional_entry(page)) { struct lu_dirpage *dp; get_page(page); - spin_unlock_irq(&mapping->tree_lock); + xa_unlock_irq(&mapping->i_pages); /* * In contrast to find_lock_page() we are sure that directory * page cannot be truncated (while DLM lock is held) and, @@ -1065,7 +1190,7 @@ static struct page *mdc_page_locate(struct address_space *mapping, __u64 *hash, page = ERR_PTR(-EIO); } } else { - spin_unlock_irq(&mapping->tree_lock); + xa_unlock_irq(&mapping->i_pages); page = NULL; } return page; @@ -1127,12 +1252,12 @@ static void mdc_adjust_dirpages(struct page **pages, int cfs_pgs, int lu_pgs) int i; for (i = 0; i < cfs_pgs; i++) { - struct lu_dirpage *dp = kmap(pages[i]); - struct lu_dirpage *first = dp; - struct lu_dirent *end_dirent = NULL; - struct lu_dirent *ent; - __u64 hash_end = le64_to_cpu(dp->ldp_hash_end); - __u32 flags = le32_to_cpu(dp->ldp_flags); + struct lu_dirpage *dp = kmap(pages[i]); + struct lu_dirpage *first = dp; + struct lu_dirent *end_dirent = NULL; + struct lu_dirent *ent; + __u64 hash_end = dp->ldp_hash_end; + __u32 flags = dp->ldp_flags; while (--lu_pgs > 0) { ent = lu_dirent_start(dp); @@ -1147,8 +1272,8 @@ static void mdc_adjust_dirpages(struct page **pages, int cfs_pgs, int lu_pgs) break; /* Save the hash and flags of this lu_dirpage. */ - hash_end = le64_to_cpu(dp->ldp_hash_end); - flags = le32_to_cpu(dp->ldp_flags); + hash_end = dp->ldp_hash_end; + flags = dp->ldp_flags; /* Check if lu_dirpage contains no entries. */ if (end_dirent == NULL) @@ -1183,14 +1308,6 @@ struct readpage_param { struct md_callback *rp_cb; }; -#ifndef HAVE_DELETE_FROM_PAGE_CACHE -static inline void delete_from_page_cache(struct page *page) -{ - remove_from_page_cache(page); - put_page(page); -} -#endif - /** * Read pages from server. * @@ -1232,7 +1349,8 @@ static int mdc_read_page_remote(void *data, struct page *page0) } for (npages = 1; npages < max_pages; npages++) { - page = page_cache_alloc_cold(inode->i_mapping); + page = __page_cache_alloc(mapping_gfp_mask(inode->i_mapping) + | __GFP_COLD); if (page == NULL) break; page_pool[npages] = page; @@ -1445,33 +1563,95 @@ fail: goto out_unlock; } +static int mdc_statfs_interpret(const struct lu_env *env, + struct ptlrpc_request *req, void *args, int rc) +{ + struct obd_info *oinfo = args; + struct obd_statfs *osfs; + + if (!rc) { + osfs = req_capsule_server_get(&req->rq_pill, &RMF_OBD_STATFS); + if (!osfs) + return -EPROTO; + + oinfo->oi_osfs = osfs; + + CDEBUG(D_CACHE, "blocks=%llu free=%llu avail=%llu " + "objects=%llu free=%llu state=%x\n", + osfs->os_blocks, osfs->os_bfree, osfs->os_bavail, + osfs->os_files, osfs->os_ffree, osfs->os_state); + } + + oinfo->oi_cb_up(oinfo, rc); + + return rc; +} + +static int mdc_statfs_async(struct obd_export *exp, + struct obd_info *oinfo, time64_t max_age, + struct ptlrpc_request_set *unused) +{ + struct ptlrpc_request *req; + struct obd_info *aa; + + req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), &RQF_MDS_STATFS, + LUSTRE_MDS_VERSION, MDS_STATFS); + if (req == NULL) + return -ENOMEM; + + ptlrpc_request_set_replen(req); + req->rq_interpret_reply = mdc_statfs_interpret; + + aa = ptlrpc_req_async_args(aa, req); + *aa = *oinfo; + + ptlrpcd_add_req(req); + + return 0; +} static int mdc_statfs(const struct lu_env *env, struct obd_export *exp, struct obd_statfs *osfs, - __u64 max_age, __u32 flags) + time64_t max_age, __u32 flags) { - struct obd_device *obd = class_exp2obd(exp); - struct ptlrpc_request *req; - struct obd_statfs *msfs; - struct obd_import *imp = NULL; - int rc; - ENTRY; + struct obd_device *obd = class_exp2obd(exp); + struct req_format *fmt; + struct ptlrpc_request *req; + struct obd_statfs *msfs; + struct obd_import *imp = NULL; + int rc; + ENTRY; /* * Since the request might also come from lprocfs, so we need * sync this with client_disconnect_export Bug15684 */ down_read(&obd->u.cli.cl_sem); - if (obd->u.cli.cl_import) - imp = class_import_get(obd->u.cli.cl_import); + if (obd->u.cli.cl_import) + imp = class_import_get(obd->u.cli.cl_import); up_read(&obd->u.cli.cl_sem); - if (!imp) - RETURN(-ENODEV); + if (!imp) + RETURN(-ENODEV); + + fmt = &RQF_MDS_STATFS; + if ((exp_connect_flags2(exp) & OBD_CONNECT2_SUM_STATFS) && + (flags & OBD_STATFS_SUM)) + fmt = &RQF_MDS_STATFS_NEW; + req = ptlrpc_request_alloc_pack(imp, fmt, LUSTRE_MDS_VERSION, + MDS_STATFS); + if (req == NULL) + GOTO(output, rc = -ENOMEM); - req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_STATFS, - LUSTRE_MDS_VERSION, MDS_STATFS); - if (req == NULL) - GOTO(output, rc = -ENOMEM); + if ((flags & OBD_STATFS_SUM) && + (exp_connect_flags2(exp) & OBD_CONNECT2_SUM_STATFS)) { + /* request aggregated states */ + struct mdt_body *body; + + body = req_capsule_client_get(&req->rq_pill, &RMF_MDT_BODY); + if (body == NULL) + GOTO(out, rc = -EPROTO); + body->mbo_valid = OBD_MD_FLAGSTATFS; + } ptlrpc_request_set_replen(req); @@ -1587,29 +1767,53 @@ out: ptlrpc_req_finished(req); return rc; } - -static int mdc_ioc_hsm_ct_register(struct obd_import *imp, __u32 archives) +/** + * Send hsm_ct_register to MDS + * + * \param[in] imp import + * \param[in] archive_count if in bitmap format, it is the bitmap, + * else it is the count of archive_ids + * \param[in] archives if in bitmap format, it is NULL, + * else it is archive_id lists + */ +static int mdc_ioc_hsm_ct_register(struct obd_import *imp, __u32 archive_count, + __u32 *archives) { - __u32 *archive_mask; - struct ptlrpc_request *req; - int rc; + struct ptlrpc_request *req; + __u32 *archive_array; + size_t archives_size; + int rc; ENTRY; - req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_HSM_CT_REGISTER, - LUSTRE_MDS_VERSION, - MDS_HSM_CT_REGISTER); + req = ptlrpc_request_alloc(imp, &RQF_MDS_HSM_CT_REGISTER); if (req == NULL) - GOTO(out, rc = -ENOMEM); + RETURN(-ENOMEM); + + if (archives != NULL) + archives_size = sizeof(*archive_array) * archive_count; + else + archives_size = sizeof(archive_count); + + req_capsule_set_size(&req->rq_pill, &RMF_MDS_HSM_ARCHIVE, + RCL_CLIENT, archives_size); + + rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_HSM_CT_REGISTER); + if (rc) { + ptlrpc_request_free(req); + RETURN(-ENOMEM); + } mdc_pack_body(req, NULL, 0, 0, -1, 0); - /* Copy hsm_progress struct */ - archive_mask = req_capsule_client_get(&req->rq_pill, - &RMF_MDS_HSM_ARCHIVE); - if (archive_mask == NULL) + archive_array = req_capsule_client_get(&req->rq_pill, + &RMF_MDS_HSM_ARCHIVE); + if (archive_array == NULL) GOTO(out, rc = -EPROTO); - *archive_mask = archives; + if (archives != NULL) + memcpy(archive_array, archives, archives_size); + else + *archive_array = archive_count; ptlrpc_request_set_replen(req); @@ -1834,38 +2038,37 @@ static int mdc_ioc_hsm_ct_start(struct obd_export *exp, static int mdc_quotactl(struct obd_device *unused, struct obd_export *exp, struct obd_quotactl *oqctl) { - struct ptlrpc_request *req; - struct obd_quotactl *oqc; - int rc; - ENTRY; + struct ptlrpc_request *req; + struct obd_quotactl *oqc; + int rc; + ENTRY; - req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), - &RQF_MDS_QUOTACTL, LUSTRE_MDS_VERSION, - MDS_QUOTACTL); - if (req == NULL) - RETURN(-ENOMEM); + req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), + &RQF_MDS_QUOTACTL, LUSTRE_MDS_VERSION, + MDS_QUOTACTL); + if (req == NULL) + RETURN(-ENOMEM); - oqc = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL); - *oqc = *oqctl; + oqc = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL); + *oqc = *oqctl; - ptlrpc_request_set_replen(req); - ptlrpc_at_set_req_timeout(req); - req->rq_no_resend = 1; + ptlrpc_request_set_replen(req); + ptlrpc_at_set_req_timeout(req); - rc = ptlrpc_queue_wait(req); - if (rc) - CERROR("ptlrpc_queue_wait failed, rc: %d\n", rc); - - if (req->rq_repmsg && - (oqc = req_capsule_server_get(&req->rq_pill, &RMF_OBD_QUOTACTL))) { - *oqctl = *oqc; - } else if (!rc) { - CERROR ("Can't unpack obd_quotactl\n"); - rc = -EPROTO; - } - ptlrpc_req_finished(req); + rc = ptlrpc_queue_wait(req); + if (rc) + CERROR("ptlrpc_queue_wait failed, rc: %d\n", rc); + + if (req->rq_repmsg && + (oqc = req_capsule_server_get(&req->rq_pill, &RMF_OBD_QUOTACTL))) { + *oqctl = *oqc; + } else if (!rc) { + CERROR ("Can't unpack obd_quotactl\n"); + rc = -EPROTO; + } + ptlrpc_req_finished(req); - RETURN(rc); + RETURN(rc); } static int mdc_ioc_swap_layouts(struct obd_export *exp, @@ -1971,9 +2174,6 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, case IOC_OSC_SET_ACTIVE: rc = ptlrpc_set_import_active(imp, data->ioc_offset); GOTO(out, rc); - case OBD_IOC_PING_TARGET: - rc = ptlrpc_obd_ping(obd); - GOTO(out, rc); /* * Normally IOC_OBD_STATFS, OBD_IOC_QUOTACTL iocontrol are handled by * LMV instead of MDC. But when the cluster is upgraded from 1.8, @@ -1994,7 +2194,7 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, GOTO(out, rc = -EFAULT); rc = mdc_statfs(NULL, obd->obd_self_export, &stat_buf, - cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS), + ktime_get_seconds() - OBD_STATFS_CACHE_SECONDS, 0); if (rc != 0) GOTO(out, rc); @@ -2131,9 +2331,8 @@ static void lustre_swab_kuch(struct kuc_hdr *l) static int mdc_ioc_hsm_ct_start(struct obd_export *exp, struct lustre_kernelcomm *lk) { - struct obd_import *imp = class_exp2cliimp(exp); - __u32 archive = lk->lk_data; - int rc = 0; + struct obd_import *imp = class_exp2cliimp(exp); + int rc = 0; if (lk->lk_group != KUC_GRP_HSM) { CERROR("Bad copytool group %d\n", lk->lk_group); @@ -2147,7 +2346,12 @@ static int mdc_ioc_hsm_ct_start(struct obd_export *exp, /* Unregister with the coordinator */ rc = mdc_ioc_hsm_ct_unregister(imp); } else { - rc = mdc_ioc_hsm_ct_register(imp, archive); + __u32 *archives = NULL; + + if ((lk->lk_flags & LK_FLG_DATANR) && lk->lk_data_count > 0) + archives = lk->lk_data; + + rc = mdc_ioc_hsm_ct_register(imp, lk->lk_data_count, archives); } return rc; @@ -2198,17 +2402,29 @@ static int mdc_hsm_copytool_send(const struct obd_uuid *uuid, */ static int mdc_hsm_ct_reregister(void *data, void *cb_arg) { - struct kkuc_ct_data *kcd = data; - struct obd_import *imp = (struct obd_import *)cb_arg; - int rc; + struct obd_import *imp = (struct obd_import *)cb_arg; + struct kkuc_ct_data *kcd = data; + __u32 *archives = NULL; + int rc; - if (kcd == NULL || kcd->kcd_magic != KKUC_CT_DATA_MAGIC) + if (kcd == NULL || + (kcd->kcd_magic != KKUC_CT_DATA_ARRAY_MAGIC && + kcd->kcd_magic != KKUC_CT_DATA_BITMAP_MAGIC)) return -EPROTO; - CDEBUG(D_HA, "%s: recover copytool registration to MDT (archive=%#x)\n", - imp->imp_obd->obd_name, kcd->kcd_archive); - rc = mdc_ioc_hsm_ct_register(imp, kcd->kcd_archive); + if (kcd->kcd_magic == KKUC_CT_DATA_BITMAP_MAGIC) { + CDEBUG(D_HA, "%s: recover copytool registration to MDT " + "(archive=%#x)\n", imp->imp_obd->obd_name, + kcd->kcd_nr_archives); + } else { + CDEBUG(D_HA, "%s: recover copytool registration to MDT " + "(archive nr = %u)\n", + imp->imp_obd->obd_name, kcd->kcd_nr_archives); + if (kcd->kcd_nr_archives != 0) + archives = kcd->kcd_archives; + } + rc = mdc_ioc_hsm_ct_register(imp, kcd->kcd_nr_archives, archives); /* ignore error if the copytool is already registered */ return (rc == -EEXIST) ? 0 : rc; } @@ -2349,6 +2565,81 @@ static int mdc_fsync(struct obd_export *exp, const struct lu_fid *fid, RETURN(rc); } +struct mdc_rmfid_args { + int *mra_rcs; + int mra_nr; +}; + +int mdc_rmfid_interpret(const struct lu_env *env, struct ptlrpc_request *req, + void *args, int rc) +{ + struct mdc_rmfid_args *aa; + int *rcs, size; + ENTRY; + + if (!rc) { + aa = ptlrpc_req_async_args(aa, req); + + size = req_capsule_get_size(&req->rq_pill, &RMF_RCS, + RCL_SERVER); + LASSERT(size == sizeof(int) * aa->mra_nr); + rcs = req_capsule_server_get(&req->rq_pill, &RMF_RCS); + LASSERT(rcs); + LASSERT(aa->mra_rcs); + LASSERT(aa->mra_nr); + memcpy(aa->mra_rcs, rcs, size); + } + + RETURN(rc); +} + +static int mdc_rmfid(struct obd_export *exp, struct fid_array *fa, + int *rcs, struct ptlrpc_request_set *set) +{ + struct ptlrpc_request *req; + struct mdc_rmfid_args *aa; + struct mdt_body *b; + struct lu_fid *tmp; + int rc, flen; + ENTRY; + + req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_RMFID); + if (req == NULL) + RETURN(-ENOMEM); + + flen = fa->fa_nr * sizeof(struct lu_fid); + req_capsule_set_size(&req->rq_pill, &RMF_FID_ARRAY, + RCL_CLIENT, flen); + req_capsule_set_size(&req->rq_pill, &RMF_FID_ARRAY, + RCL_SERVER, flen); + req_capsule_set_size(&req->rq_pill, &RMF_RCS, + RCL_SERVER, fa->fa_nr * sizeof(__u32)); + rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_RMFID); + if (rc) { + ptlrpc_request_free(req); + RETURN(rc); + } + tmp = req_capsule_client_get(&req->rq_pill, &RMF_FID_ARRAY); + memcpy(tmp, fa->fa_fids, flen); + + mdc_pack_body(req, NULL, 0, 0, -1, 0); + b = req_capsule_client_get(&req->rq_pill, &RMF_MDT_BODY); + b->mbo_ctime = ktime_get_real_seconds(); + + ptlrpc_request_set_replen(req); + + LASSERT(rcs); + aa = ptlrpc_req_async_args(aa, req); + aa->mra_rcs = rcs; + aa->mra_nr = fa->fa_nr; + req->rq_interpret_reply = mdc_rmfid_interpret; + + ptlrpc_set_add_req(set, req); + ptlrpc_check_set(NULL, set); + + RETURN(rc); +} + static int mdc_import_event(struct obd_device *obd, struct obd_import *imp, enum obd_import_event event) { @@ -2463,6 +2754,12 @@ static int mdc_cancel_weight(struct ldlm_lock *lock) if (lock->l_policy_data.l_inodebits.bits & MDS_INODELOCK_OPEN) RETURN(0); + /* Special case for DoM locks, cancel only unused and granted locks */ + if (ldlm_has_dom(lock) && + (lock->l_granted_mode != lock->l_req_mode || + osc_ldlm_weigh_ast(lock) != 0)) + RETURN(0); + RETURN(1); } @@ -2521,14 +2818,11 @@ int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg) if (rc < 0) RETURN(rc); -#ifdef CONFIG_PROC_FS - obd->obd_vars = lprocfs_mdc_obd_vars; - lprocfs_obd_setup(obd, false); - lprocfs_alloc_md_stats(obd, 0); -#endif + rc = mdc_tunables_init(obd); + if (rc) + GOTO(err_osc_cleanup, rc); - sptlrpc_lprocfs_cliobd_attach(obd); - ptlrpc_lprocfs_register_obd(obd); + obd->u.cli.cl_dom_min_inline_repsize = MDC_DOM_DEF_INLINE_REPSIZE; ns_register_cancel(obd->obd_namespace, mdc_cancel_weight); @@ -2553,9 +2847,9 @@ int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg) err_changelog_cleanup: mdc_llog_finish(obd); err_llog_cleanup: - ptlrpc_lprocfs_unregister_obd(obd); lprocfs_free_md_stats(obd); - + ptlrpc_lprocfs_unregister_obd(obd); +err_osc_cleanup: osc_cleanup_common(obd); return rc; } @@ -2603,66 +2897,58 @@ static int mdc_cleanup(struct obd_device *obd) return osc_cleanup_common(obd); } -int mdc_process_config(struct obd_device *obd, size_t len, void *buf) -{ - struct lustre_cfg *lcfg = buf; - int rc; - - rc = class_process_proc_param(PARAM_MDC, obd->obd_vars, lcfg, obd); - return (rc > 0 ? 0: rc); -} - static struct obd_ops mdc_obd_ops = { - .o_owner = THIS_MODULE, - .o_setup = mdc_setup, - .o_precleanup = mdc_precleanup, - .o_cleanup = mdc_cleanup, - .o_add_conn = client_import_add_conn, - .o_del_conn = client_import_del_conn, - .o_connect = client_connect_import, + .o_owner = THIS_MODULE, + .o_setup = mdc_setup, + .o_precleanup = mdc_precleanup, + .o_cleanup = mdc_cleanup, + .o_add_conn = client_import_add_conn, + .o_del_conn = client_import_del_conn, + .o_connect = client_connect_import, .o_reconnect = osc_reconnect, .o_disconnect = osc_disconnect, - .o_iocontrol = mdc_iocontrol, - .o_set_info_async = mdc_set_info_async, - .o_statfs = mdc_statfs, + .o_iocontrol = mdc_iocontrol, + .o_set_info_async = mdc_set_info_async, + .o_statfs = mdc_statfs, + .o_statfs_async = mdc_statfs_async, .o_fid_init = client_fid_init, .o_fid_fini = client_fid_fini, - .o_fid_alloc = mdc_fid_alloc, - .o_import_event = mdc_import_event, - .o_get_info = mdc_get_info, - .o_process_config = mdc_process_config, - .o_get_uuid = mdc_get_uuid, - .o_quotactl = mdc_quotactl, + .o_fid_alloc = mdc_fid_alloc, + .o_import_event = mdc_import_event, + .o_get_info = mdc_get_info, + .o_get_uuid = mdc_get_uuid, + .o_quotactl = mdc_quotactl, }; static struct md_ops mdc_md_ops = { .m_get_root = mdc_get_root, - .m_null_inode = mdc_null_inode, - .m_close = mdc_close, - .m_create = mdc_create, - .m_enqueue = mdc_enqueue, - .m_getattr = mdc_getattr, - .m_getattr_name = mdc_getattr_name, - .m_intent_lock = mdc_intent_lock, - .m_link = mdc_link, - .m_rename = mdc_rename, - .m_setattr = mdc_setattr, - .m_setxattr = mdc_setxattr, - .m_getxattr = mdc_getxattr, + .m_null_inode = mdc_null_inode, + .m_close = mdc_close, + .m_create = mdc_create, + .m_enqueue = mdc_enqueue, + .m_getattr = mdc_getattr, + .m_getattr_name = mdc_getattr_name, + .m_intent_lock = mdc_intent_lock, + .m_link = mdc_link, + .m_rename = mdc_rename, + .m_setattr = mdc_setattr, + .m_setxattr = mdc_setxattr, + .m_getxattr = mdc_getxattr, .m_fsync = mdc_fsync, .m_file_resync = mdc_file_resync, .m_read_page = mdc_read_page, - .m_unlink = mdc_unlink, - .m_cancel_unused = mdc_cancel_unused, - .m_init_ea_size = mdc_init_ea_size, - .m_set_lock_data = mdc_set_lock_data, - .m_lock_match = mdc_lock_match, - .m_get_lustre_md = mdc_get_lustre_md, - .m_free_lustre_md = mdc_free_lustre_md, - .m_set_open_replay_data = mdc_set_open_replay_data, - .m_clear_open_replay_data = mdc_clear_open_replay_data, - .m_intent_getattr_async = mdc_intent_getattr_async, - .m_revalidate_lock = mdc_revalidate_lock + .m_unlink = mdc_unlink, + .m_cancel_unused = mdc_cancel_unused, + .m_init_ea_size = mdc_init_ea_size, + .m_set_lock_data = mdc_set_lock_data, + .m_lock_match = mdc_lock_match, + .m_get_lustre_md = mdc_get_lustre_md, + .m_free_lustre_md = mdc_free_lustre_md, + .m_set_open_replay_data = mdc_set_open_replay_data, + .m_clear_open_replay_data = mdc_clear_open_replay_data, + .m_intent_getattr_async = mdc_intent_getattr_async, + .m_revalidate_lock = mdc_revalidate_lock, + .m_rmfid = mdc_rmfid, }; static int __init mdc_init(void)