4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2017, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
32 * lustre/mdt/mdt_xattr.c
34 * Lustre Metadata Target (mdt) extended attributes management.
36 * Author: Peter Braam <braam@clusterfs.com>
37 * Author: Andreas Dilger <adilger@clusterfs.com>
38 * Author: Phil Schwan <phil@clusterfs.com>
39 * Author: Huang Hua <huanghua@clusterfs.com>
42 #define DEBUG_SUBSYSTEM S_MDS
44 #include <linux/xattr.h>
45 #include <obd_class.h>
46 #include <lustre_nodemap.h>
47 #include <lustre_acl.h>
48 #include "mdt_internal.h"
51 /* return EADATA length to the caller. negative value means error */
52 static int mdt_getxattr_pack_reply(struct mdt_thread_info * info)
54 struct req_capsule *pill = info->mti_pill;
55 struct ptlrpc_request *req = mdt_info_req(info);
56 const char *xattr_name;
58 static const char user_string[] = "user.";
64 valid = info->mti_body->mbo_valid & (OBD_MD_FLXATTR | OBD_MD_FLXATTRLS);
66 /* Determine how many bytes we need */
67 if (valid == OBD_MD_FLXATTR) {
68 xattr_name = req_capsule_client_get(pill, &RMF_NAME);
72 if (!(exp_connect_flags(req->rq_export) & OBD_CONNECT_XATTR) &&
73 !strncmp(xattr_name, user_string, sizeof(user_string) - 1))
76 size = mo_xattr_get(info->mti_env,
77 mdt_object_child(info->mti_object),
78 &LU_BUF_NULL, xattr_name);
79 if (size == -ENODATA) {
80 /* XXX: Some client code will not handle -ENODATA
81 * for XATTR_NAME_LOV (trusted.lov) properly. */
82 if (strcmp(xattr_name, XATTR_NAME_LOV) == 0)
89 } else if (valid == OBD_MD_FLXATTRLS) {
91 size = mo_xattr_list(info->mti_env,
92 mdt_object_child(info->mti_object),
94 } else if (valid == OBD_MD_FLXATTRALL) {
96 /* N.B. eadatasize = 0 is not valid for FLXATTRALL */
97 /* We could calculate accurate sizes, but this would
98 * introduce a lot of overhead, let's do it later... */
99 size = info->mti_body->mbo_eadatasize;
100 if (size <= 0 || size > info->mti_mdt->mdt_max_ea_size ||
101 size & (sizeof(__u32) - 1)) {
102 DEBUG_REQ(D_ERROR, req,
103 "%s: invalid EA size(%d) for FLXATTRALL\n",
104 mdt_obd_name(info->mti_mdt), size);
107 req_capsule_set_size(pill, &RMF_EAVALS, RCL_SERVER, size);
108 req_capsule_set_size(pill, &RMF_EAVALS_LENS, RCL_SERVER, size);
110 CDEBUG(D_INFO, "Valid bits: %#llx\n",
111 info->mti_body->mbo_valid);
116 if (size != -EOPNOTSUPP && size != -ENOENT)
117 CERROR("%s: error geting EA size for '%s': rc = %d\n",
118 mdt_obd_name(info->mti_mdt), xattr_name, size);
122 if (req_capsule_has_field(pill, &RMF_ACL, RCL_SERVER))
123 req_capsule_set_size(pill, &RMF_ACL, RCL_SERVER,
124 LUSTRE_POSIX_ACL_MAX_SIZE_OLD);
126 req_capsule_set_size(pill, &RMF_EADATA, RCL_SERVER,
127 info->mti_body->mbo_eadatasize == 0 ? 0 : size);
129 rc2 = req_capsule_server_pack(pill);
133 if (OBD_FAIL_CHECK(OBD_FAIL_MDS_GETXATTR_PACK))
136 RETURN(rc < 0 ? rc : size);
139 static int mdt_nodemap_map_acl(struct mdt_thread_info *info, void *buf,
140 size_t size, const char *name,
141 enum nodemap_tree_type tree_type)
143 struct lu_nodemap *nodemap;
144 struct obd_export *exp = info->mti_exp;
149 if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0 ||
150 strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) {
151 if (size > info->mti_mdt->mdt_max_ea_size ||
152 (!exp_connect_large_acl(exp) &&
153 size > LUSTRE_POSIX_ACL_MAX_SIZE_OLD))
154 GOTO(out, rc = -ERANGE);
156 nodemap = nodemap_get_from_exp(exp);
158 GOTO(out, rc = PTR_ERR(nodemap));
160 rc = nodemap_map_acl(nodemap, buf, size, tree_type);
161 nodemap_putref(nodemap);
169 static int mdt_getxattr_all(struct mdt_thread_info *info,
170 struct mdt_body *reqbody, struct mdt_body *repbody,
171 struct lu_buf *buf, struct md_object *next)
173 const struct lu_env *env = info->mti_env;
174 char *v, *b, *eadatahead, *eadatatail;
176 int eadatasize, eavallen = 0, eavallens = 0, rc;
181 * The format of the pill is the following:
182 * EADATA: attr1\0attr2\0...attrn\0
183 * EAVALS: val1val2...valn
184 * EAVALS_LENS: 4,4,...4
187 eadatahead = buf->lb_buf;
189 /* Fill out EADATA first */
190 rc = mo_xattr_list(env, next, buf);
192 GOTO(out_shrink, rc);
195 eadatatail = eadatahead + eadatasize;
197 v = req_capsule_server_get(info->mti_pill, &RMF_EAVALS);
198 sizes = req_capsule_server_get(info->mti_pill, &RMF_EAVALS_LENS);
200 /* Fill out EAVALS and EAVALS_LENS */
201 for (b = eadatahead; b < eadatatail; b += strlen(b) + 1, v += rc) {
203 buf->lb_len = reqbody->mbo_eadatasize - eavallen;
204 rc = mo_xattr_get(env, next, buf, b);
206 GOTO(out_shrink, rc);
207 rc = mdt_nodemap_map_acl(info, buf->lb_buf, rc, b,
208 NODEMAP_FS_TO_CLIENT);
210 GOTO(out_shrink, rc);
211 sizes[eavallens] = rc;
222 repbody->mbo_aclsize = eavallen;
223 repbody->mbo_max_mdsize = eavallens;
225 req_capsule_shrink(info->mti_pill, &RMF_EAVALS, eavallen, RCL_SERVER);
226 req_capsule_shrink(info->mti_pill, &RMF_EAVALS_LENS,
227 eavallens * sizeof(__u32), RCL_SERVER);
228 req_capsule_shrink(info->mti_pill, &RMF_EADATA, eadatasize, RCL_SERVER);
235 int mdt_getxattr(struct mdt_thread_info *info)
237 struct ptlrpc_request *req = mdt_info_req(info);
238 struct mdt_body *reqbody;
239 struct mdt_body *repbody = NULL;
240 struct md_object *next;
246 LASSERT(info->mti_object != NULL);
247 LASSERT(lu_object_assert_exists(&info->mti_object->mot_obj));
249 CDEBUG(D_INODE, "getxattr "DFID"\n", PFID(&info->mti_body->mbo_fid1));
251 rc = req_check_sepol(info->mti_pill);
253 RETURN(err_serious(rc));
255 reqbody = req_capsule_client_get(info->mti_pill, &RMF_MDT_BODY);
257 RETURN(err_serious(-EFAULT));
259 rc = mdt_init_ucred(info, reqbody);
261 RETURN(err_serious(rc));
263 next = mdt_object_child(info->mti_object);
264 easize = mdt_getxattr_pack_reply(info);
265 if (easize == -ENODATA)
266 GOTO(out, rc = easize);
268 GOTO(out, rc = err_serious(easize));
270 repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
271 LASSERT(repbody != NULL);
273 /* No need further getxattr. */
274 if (easize == 0 || reqbody->mbo_eadatasize == 0)
275 GOTO(out, rc = easize);
277 buf = &info->mti_buf;
278 buf->lb_buf = req_capsule_server_get(info->mti_pill, &RMF_EADATA);
279 buf->lb_len = easize;
281 valid = info->mti_body->mbo_valid & (OBD_MD_FLXATTR | OBD_MD_FLXATTRLS);
283 if (valid == OBD_MD_FLXATTR) {
284 const char *xattr_name = req_capsule_client_get(info->mti_pill,
286 rc = mo_xattr_get(info->mti_env, next, buf, xattr_name);
290 rc = mdt_nodemap_map_acl(info, buf->lb_buf, rc, xattr_name,
291 NODEMAP_FS_TO_CLIENT);
292 } else if (valid == OBD_MD_FLXATTRLS) {
293 CDEBUG(D_INODE, "listxattr\n");
295 rc = mo_xattr_list(info->mti_env, next, buf);
297 CDEBUG(D_INFO, "listxattr failed: %d\n", rc);
298 } else if (valid == OBD_MD_FLXATTRALL) {
299 rc = mdt_getxattr_all(info, reqbody, repbody,
307 mdt_counter_incr(req, LPROC_MDT_GETXATTR);
308 /* LU-11109: Set OBD_MD_FLXATTR on success so that
309 * newer clients can distinguish between nonexistent
310 * xattrs and zero length values. */
311 repbody->mbo_valid |= OBD_MD_FLXATTR;
312 repbody->mbo_eadatasize = rc;
315 mdt_exit_ucred(info);
319 /* shrink dir layout after migration */
320 static int mdt_dir_layout_shrink(struct mdt_thread_info *info)
322 const struct lu_env *env = info->mti_env;
323 struct mdt_device *mdt = info->mti_mdt;
324 struct lu_ucred *uc = mdt_ucred(info);
325 struct mdt_reint_record *rr = &info->mti_rr;
326 struct lmv_user_md *lmu = rr->rr_eadata;
327 __u32 lum_stripe_count = lmu->lum_stripe_count;
328 struct lu_buf *buf = &info->mti_buf;
329 struct lmv_mds_md_v1 *lmv;
330 struct md_attr *ma = &info->mti_attr;
331 struct ldlm_enqueue_info *einfo = &info->mti_einfo[0];
332 struct mdt_object *pobj = NULL;
333 struct mdt_object *obj;
334 struct mdt_lock_handle *lhp = NULL;
335 struct mdt_lock_handle *lhc;
340 if (!mdt->mdt_enable_dir_migration)
343 if (!md_capable(uc, CFS_CAP_SYS_ADMIN) &&
344 uc->uc_gid != mdt->mdt_enable_remote_dir_gid &&
345 mdt->mdt_enable_remote_dir_gid != -1)
348 /* mti_big_lmm is used to save LMV, but it may be uninitialized. */
349 if (unlikely(!info->mti_big_lmm)) {
350 info->mti_big_lmmsize = lmv_mds_md_size(64, LMV_MAGIC);
351 OBD_ALLOC(info->mti_big_lmm, info->mti_big_lmmsize);
352 if (!info->mti_big_lmm)
356 obj = mdt_object_find(env, mdt, rr->rr_fid1);
358 RETURN(PTR_ERR(obj));
360 /* get parent from PFID */
361 rc = mdt_attr_get_pfid(info, obj, &ma->ma_pfid);
365 pobj = mdt_object_find(env, mdt, &ma->ma_pfid);
367 GOTO(put_obj, rc = PTR_ERR(pobj));
369 /* revoke object remote LOOKUP lock */
370 if (mdt_object_remote(pobj)) {
371 rc = mdt_revoke_remote_lookup_lock(info, pobj, obj);
377 * lock parent if dir will be shrunk to 1 stripe, because dir will be
378 * converted to normal directory, as will change dir fid and update
379 * namespace of parent.
381 lhp = &info->mti_lh[MDT_LH_PARENT];
382 mdt_lock_reg_init(lhp, LCK_PW);
384 if (le32_to_cpu(lmu->lum_stripe_count) < 2) {
385 rc = mdt_reint_object_lock(info, pobj, lhp,
386 MDS_INODELOCK_UPDATE, true);
392 lhc = &info->mti_lh[MDT_LH_CHILD];
393 mdt_lock_reg_init(lhc, LCK_EX);
394 rc = mdt_reint_striped_lock(info, obj, lhc, MDS_INODELOCK_FULL, einfo,
397 GOTO(unlock_pobj, rc);
399 ma->ma_lmv = info->mti_big_lmm;
400 ma->ma_lmv_size = info->mti_big_lmmsize;
402 rc = mdt_stripe_get(info, obj, ma, XATTR_NAME_LMV);
404 GOTO(unlock_obj, rc);
406 /* user may run 'lfs migrate' multiple times, so it's shrunk already */
407 if (!(ma->ma_valid & MA_LMV))
408 GOTO(unlock_obj, rc = -EALREADY);
410 lmv = &ma->ma_lmv->lmv_md_v1;
413 if (!(le32_to_cpu(lmv->lmv_hash_type) & LMV_HASH_FLAG_MIGRATION))
414 GOTO(unlock_obj, rc = -EALREADY);
416 lum_stripe_count = lmu->lum_stripe_count;
417 if (!lum_stripe_count)
418 lum_stripe_count = cpu_to_le32(1);
420 if (lmv->lmv_migrate_offset != lum_stripe_count) {
421 CERROR("%s: "DFID" migrate mdt count mismatch %u != %u\n",
422 mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
423 lmv->lmv_migrate_offset, lmu->lum_stripe_count);
424 GOTO(unlock_obj, rc = -EINVAL);
427 if (lmv->lmv_master_mdt_index != lmu->lum_stripe_offset) {
428 CERROR("%s: "DFID" migrate mdt index mismatch %u != %u\n",
429 mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
430 lmv->lmv_master_mdt_index, lmu->lum_stripe_offset);
431 GOTO(unlock_obj, rc = -EINVAL);
434 if (lum_stripe_count > 1 &&
435 (lmv->lmv_hash_type & cpu_to_le32(LMV_HASH_TYPE_MASK)) !=
436 lmu->lum_hash_type) {
437 CERROR("%s: "DFID" migrate mdt hash mismatch %u != %u\n",
438 mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
439 lmv->lmv_hash_type, lmu->lum_hash_type);
440 GOTO(unlock_obj, rc = -EINVAL);
443 buf->lb_buf = rr->rr_eadata;
444 buf->lb_len = rr->rr_eadatalen;
445 rc = mo_xattr_set(env, mdt_object_child(obj), buf, XATTR_NAME_LMV, 0);
446 GOTO(unlock_obj, rc);
449 mdt_reint_striped_unlock(info, obj, lhc, einfo, rc);
451 mdt_object_unlock(info, pobj, lhp, rc);
453 mdt_object_put(env, pobj);
455 mdt_object_put(env, obj);
460 int mdt_reint_setxattr(struct mdt_thread_info *info,
461 struct mdt_lock_handle *unused)
463 struct ptlrpc_request *req = mdt_info_req(info);
464 struct mdt_lock_handle *lh;
465 const struct lu_env *env = info->mti_env;
466 struct lu_buf *buf = &info->mti_buf;
467 struct mdt_reint_record *rr = &info->mti_rr;
468 struct md_attr *ma = &info->mti_attr;
469 struct lu_attr *attr = &info->mti_attr.ma_attr;
470 struct mdt_object *obj;
471 struct md_object *child;
472 __u64 valid = attr->la_valid;
473 const char *xattr_name = rr->rr_name.ln_name;
474 int xattr_len = rr->rr_eadatalen;
475 __u64 lockpart = MDS_INODELOCK_UPDATE;
479 CDEBUG(D_INODE, "setxattr for "DFID": %s %s\n", PFID(rr->rr_fid1),
480 valid & OBD_MD_FLXATTR ? "set" : "remove", xattr_name);
482 if (info->mti_dlm_req)
483 ldlm_request_cancel(req, info->mti_dlm_req, 0, LATF_SKIP);
485 if (OBD_FAIL_CHECK(OBD_FAIL_MDS_SETXATTR))
486 RETURN(err_serious(-ENOMEM));
488 rc = mdt_init_ucred_reint(info);
492 if (strncmp(xattr_name, XATTR_USER_PREFIX,
493 sizeof(XATTR_USER_PREFIX) - 1) == 0) {
494 if (!(exp_connect_flags(req->rq_export) & OBD_CONNECT_XATTR))
495 GOTO(out, rc = -EOPNOTSUPP);
496 } else if (strncmp(xattr_name, XATTR_TRUSTED_PREFIX,
497 sizeof(XATTR_TRUSTED_PREFIX) - 1) == 0) {
499 /* setxattr(LMV) with lum is used to shrink dir layout */
500 if (strcmp(xattr_name, XATTR_NAME_LMV) == 0) {
501 __u32 *magic = rr->rr_eadata;
503 /* we don't let to remove LMV? */
507 if (le32_to_cpu(*magic) == LMV_USER_MAGIC ||
508 le32_to_cpu(*magic) == LMV_USER_MAGIC_SPECIFIC) {
509 rc = mdt_dir_layout_shrink(info);
514 if (!md_capable(mdt_ucred(info), CFS_CAP_SYS_ADMIN))
515 GOTO(out, rc = -EPERM);
517 if (strcmp(xattr_name, XATTR_NAME_LOV) == 0 ||
518 strcmp(xattr_name, XATTR_NAME_LMA) == 0 ||
519 strcmp(xattr_name, XATTR_NAME_LMV) == 0 ||
520 strcmp(xattr_name, XATTR_NAME_LINK) == 0 ||
521 strcmp(xattr_name, XATTR_NAME_FID) == 0 ||
522 strcmp(xattr_name, XATTR_NAME_VERSION) == 0 ||
523 strcmp(xattr_name, XATTR_NAME_SOM) == 0 ||
524 strcmp(xattr_name, XATTR_NAME_HSM) == 0 ||
525 strcmp(xattr_name, XATTR_NAME_LFSCK_NAMESPACE) == 0)
527 } else if ((valid & OBD_MD_FLXATTR) &&
528 (strcmp(xattr_name, XATTR_NAME_ACL_ACCESS) == 0 ||
529 strcmp(xattr_name, XATTR_NAME_ACL_DEFAULT) == 0)) {
530 rc = mdt_nodemap_map_acl(info, rr->rr_eadata, xattr_len,
531 xattr_name, NODEMAP_CLIENT_TO_FS);
534 /* ACLs were mapped out, return an error so the user knows */
536 GOTO(out, rc = -EPERM);
537 } else if ((strlen(xattr_name) > strlen(XATTR_LUSTRE_LOV) + 1) &&
538 strncmp(xattr_name, XATTR_LUSTRE_LOV,
539 strlen(XATTR_LUSTRE_LOV)) == 0) {
541 if (strncmp(xattr_name, XATTR_LUSTRE_LOV".add",
542 strlen(XATTR_LUSTRE_LOV".add")) &&
543 strncmp(xattr_name, XATTR_LUSTRE_LOV".set",
544 strlen(XATTR_LUSTRE_LOV".set")) &&
545 strncmp(xattr_name, XATTR_LUSTRE_LOV".del",
546 strlen(XATTR_LUSTRE_LOV".del"))) {
547 CERROR("%s: invalid xattr name: %s\n",
548 mdt_obd_name(info->mti_mdt), xattr_name);
549 GOTO(out, rc = -EINVAL);
552 lockpart |= MDS_INODELOCK_LAYOUT;
555 /* Revoke all clients' lookup lock, since the access
556 * permissions for this inode is changed when ACL_ACCESS is
557 * set. This isn't needed for ACL_DEFAULT, since that does
558 * not change the access permissions of this inode, nor any
559 * other existing inodes. It is setting the ACLs inherited
560 * by new directories/files at create time. */
561 /* We need revoke both LOOKUP|PERM lock here, see mdt_attr_set. */
562 if (!strcmp(xattr_name, XATTR_NAME_ACL_ACCESS))
563 lockpart |= MDS_INODELOCK_PERM | MDS_INODELOCK_LOOKUP;
564 /* We need to take the lock on behalf of old clients so that newer
565 * clients flush their xattr caches */
567 lockpart |= MDS_INODELOCK_XATTR;
569 lh = &info->mti_lh[MDT_LH_PARENT];
570 /* ACLs were sent to clients under LCK_CR locks, so taking LCK_EX
572 mdt_lock_reg_init(lh, LCK_EX);
573 obj = mdt_object_find_lock(info, rr->rr_fid1, lh, lockpart);
575 GOTO(out, rc = PTR_ERR(obj));
577 tgt_vbr_obj_set(env, mdt_obj2dt(obj));
578 rc = mdt_version_get_check_save(info, obj, 0);
580 GOTO(out_unlock, rc);
582 if (unlikely(!(valid & OBD_MD_FLCTIME))) {
583 /* This isn't strictly an error, but all current clients
584 * should set OBD_MD_FLCTIME when setting attributes. */
585 CWARN("%s: client miss to set OBD_MD_FLCTIME when "
586 "setxattr %s: [object "DFID"] [valid %llu]\n",
587 mdt_obd_name(info->mti_mdt), xattr_name,
588 PFID(rr->rr_fid1), valid);
589 attr->la_ctime = ktime_get_real_seconds();
591 attr->la_valid = LA_CTIME;
592 child = mdt_object_child(obj);
593 if (valid & OBD_MD_FLXATTR) {
596 if (attr->la_flags & XATTR_REPLACE)
597 flags |= LU_XATTR_REPLACE;
599 if (attr->la_flags & XATTR_CREATE)
600 flags |= LU_XATTR_CREATE;
602 mdt_fail_write(env, info->mti_mdt->mdt_bottom,
603 OBD_FAIL_MDS_SETXATTR_WRITE);
605 buf->lb_buf = rr->rr_eadata;
606 buf->lb_len = xattr_len;
607 rc = mo_xattr_set(env, child, buf, xattr_name, flags);
608 /* update ctime after xattr changed */
610 ma->ma_attr_flags |= MDS_PERM_BYPASS;
611 mo_attr_set(env, child, ma);
613 } else if (valid & OBD_MD_FLXATTRRM) {
614 rc = mo_xattr_del(env, child, xattr_name);
615 /* update ctime after xattr changed */
617 ma->ma_attr_flags |= MDS_PERM_BYPASS;
618 mo_attr_set(env, child, ma);
621 CDEBUG(D_INFO, "valid bits: %#llx\n", valid);
626 mdt_counter_incr(req, LPROC_MDT_SETXATTR);
630 mdt_object_unlock_put(info, obj, lh, rc);
632 mdt_exit_ucred(info);