1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
5 * Use is subject to license terms.
7 * Copyright (c) 2012, 2017, Intel Corporation.
11 * This file is part of Lustre, http://www.lustre.org/
13 * Functions to manipulate extended attributes and system attributes
15 * Author: Alex Zhuravlev <bzzz@whamcloud.com>
16 * Author: Mike Pershin <tappro@whamcloud.com>
19 #define DEBUG_SUBSYSTEM S_OSD
21 #include <libcfs/libcfs.h>
22 #include <obd_support.h>
23 #include <lustre_net.h>
25 #include <obd_class.h>
26 #include <lustre_disk.h>
27 #include <lustre_fid.h>
28 #include <lustre_linkea.h>
30 #include "osd_internal.h"
32 #include <sys/dnode.h>
37 #include <sys/spa_impl.h>
38 #include <sys/zfs_znode.h>
39 #include <sys/dmu_tx.h>
40 #include <sys/dmu_objset.h>
41 #include <sys/dsl_prop.h>
42 #include <sys/sa_impl.h>
45 #include <linux/posix_acl_xattr.h>
46 #include <lustre_scrub.h>
48 int __osd_xattr_load(struct osd_device *osd, sa_handle_t *hdl, nvlist_t **sa)
53 rc = -sa_size(hdl, SA_ZPL_DXATTR(osd), &size);
56 rc = -nvlist_alloc(sa, NV_UNIQUE_NAME, KM_SLEEP);
60 buf = zio_buf_alloc(size);
65 rc = -sa_lookup(hdl, SA_ZPL_DXATTR(osd), buf, size);
67 rc = -nvlist_unpack(buf, size, sa, KM_SLEEP);
68 zio_buf_free(buf, size);
74 static inline int __osd_xattr_cache(struct osd_object *obj)
76 LASSERT(obj->oo_sa_hdl);
77 if (obj->oo_sa_xattr != NULL)
79 return __osd_xattr_load(osd_obj2dev(obj),
80 obj->oo_sa_hdl, &obj->oo_sa_xattr);
84 __osd_sa_xattr_get(const struct lu_env *env, struct osd_object *obj,
85 const struct lu_buf *buf, const char *name, int *sizep)
90 rc = __osd_xattr_cache(obj);
94 LASSERT(obj->oo_sa_xattr);
95 rc = -nvlist_lookup_byte_array(obj->oo_sa_xattr, name,
100 if (buf == NULL || buf->lb_buf == NULL) {
101 /* return the required size by *sizep */
105 if (*sizep > buf->lb_len)
106 return -ERANGE; /* match ldiskfs error */
108 memcpy(buf->lb_buf, nv_value, *sizep);
112 int __osd_xattr_get_large(const struct lu_env *env, struct osd_device *osd,
113 uint64_t xattr, struct lu_buf *buf,
114 const char *name, int *sizep)
117 sa_handle_t *sa_hdl = NULL;
118 uint64_t xa_data_obj, size;
121 /* are there any extended attributes? */
122 if (xattr == ZFS_NO_OBJECT)
125 /* Lookup the object number containing the xattr data */
126 rc = -zap_lookup(osd->od_os, xattr, name, sizeof(uint64_t), 1,
131 rc = __osd_obj2dnode(osd->od_os, xa_data_obj, &xa_data_dn);
135 rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL, SA_HDL_PRIVATE,
140 /* Get the xattr value length / object size */
141 rc = -sa_lookup(sa_hdl, SA_ZPL_SIZE(osd), &size, 8);
145 if (size > INT_MAX) {
152 if (buf == NULL || buf->lb_buf == NULL) {
153 /* We only need to return the required size */
156 if (*sizep > buf->lb_len) {
157 rc = -ERANGE; /* match ldiskfs error */
161 rc = -dmu_read(osd->od_os, xa_data_dn->dn_object, 0,
162 size, buf->lb_buf, DMU_READ_PREFETCH);
165 sa_handle_destroy(sa_hdl);
167 osd_dnode_rele(xa_data_dn);
173 * Copy an extended attribute into the buffer provided, or compute
174 * the required buffer size if \a buf is NULL.
176 * On success, the number of bytes used or required is stored in \a sizep.
178 * Note that no locking is done here.
180 * \param[in] env execution environment
181 * \param[in] obj object for which to retrieve xattr
182 * \param[out] buf buffer to store xattr value in
183 * \param[in] name name of xattr to copy
184 * \param[out] sizep bytes used or required to store xattr
186 * \retval 0 on success
187 * \retval negative negated errno on failure
189 int osd_xattr_get_internal(const struct lu_env *env, struct osd_object *obj,
190 struct lu_buf *buf, const char *name, int *sizep)
194 if (unlikely(!dt_object_exists(&obj->oo_dt) || obj->oo_destroyed))
197 /* check SA_ZPL_DXATTR first then fallback to directory xattr */
198 rc = __osd_sa_xattr_get(env, obj, buf, name, sizep);
202 return __osd_xattr_get_large(env, osd_obj2dev(obj), obj->oo_xattr,
207 * Copy LMA extended attribute into provided buffer
209 * Note that no locking is done here.
211 * \param[in] env execution environment
212 * \param[in] obj object for which to retrieve xattr
213 * \param[out] buf buffer to store xattr value in
215 * \retval 0 on success
216 * \retval negative negated errno on failure
218 int osd_xattr_get_lma(const struct lu_env *env, struct osd_object *obj,
227 if (unlikely(obj->oo_destroyed))
230 /* check SA_ZPL_DXATTR first then fallback to directory xattr */
231 rc = __osd_sa_xattr_get(env, obj, buf, XATTR_NAME_LMA, &size);
232 if (!rc && unlikely(size < sizeof(struct lustre_mdt_attrs)))
237 rc = __osd_xattr_get_large(env, osd_obj2dev(obj), obj->oo_xattr,
238 buf, XATTR_NAME_LMA, &size);
239 if (!rc && unlikely(size < sizeof(struct lustre_mdt_attrs)))
246 static int osd_get_pfid_from_lma(const struct lu_env *env,
247 struct osd_object *obj,
248 struct lu_buf *buf, int *sizep)
250 struct osd_thread_info *info = osd_oti_get(env);
251 struct lustre_ost_attrs *loa =
252 (struct lustre_ost_attrs *)&info->oti_buf;
253 struct lustre_mdt_attrs *lma = &loa->loa_lma;
254 struct filter_fid *ff;
255 struct ost_layout *ol;
256 struct lu_buf tbuf = {
258 .lb_len = sizeof(info->oti_buf),
263 BUILD_BUG_ON(sizeof(info->oti_buf) < sizeof(*loa));
264 rc = osd_xattr_get_internal(env, obj, &tbuf,
265 XATTR_NAME_LMA, sizep);
269 lustre_loa_swab(loa, true);
270 LASSERT(lma->lma_compat & LMAC_STRIPE_INFO);
272 *sizep = sizeof(*ff);
273 if (buf->lb_len == 0 || !buf->lb_buf)
276 if (buf->lb_len < *sizep)
281 ol->ol_stripe_count = cpu_to_le32(loa->loa_parent_fid.f_ver >>
282 PFID_STRIPE_IDX_BITS);
283 ol->ol_stripe_size = cpu_to_le32(loa->loa_stripe_size);
284 loa->loa_parent_fid.f_ver &= PFID_STRIPE_COUNT_MASK;
285 fid_cpu_to_le(&ff->ff_parent, &loa->loa_parent_fid);
286 if (lma->lma_compat & LMAC_COMP_INFO) {
287 ol->ol_comp_start = cpu_to_le64(loa->loa_comp_start);
288 ol->ol_comp_end = cpu_to_le64(loa->loa_comp_end);
289 ol->ol_comp_id = cpu_to_le32(loa->loa_comp_id);
291 ol->ol_comp_start = 0;
299 int osd_xattr_get(const struct lu_env *env, struct dt_object *dt,
300 struct lu_buf *buf, const char *name)
302 struct osd_object *obj = osd_dt_obj(dt);
306 if (!osd_obj2dev(obj)->od_posix_acl &&
307 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
308 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
311 down_read(&obj->oo_guard);
312 if (unlikely(!dt_object_exists(dt) || obj->oo_destroyed))
313 GOTO(out, rc = -ENOENT);
314 LASSERT(obj->oo_dn != NULL);
316 /* For the OST migrated from ldiskfs, the PFID EA may
317 * be stored in LMA because of ldiskfs inode size.
319 if (strcmp(name, XATTR_NAME_FID) == 0 && obj->oo_pfid_in_lma)
320 rc = osd_get_pfid_from_lma(env, obj, buf, &size);
322 rc = osd_xattr_get_internal(env, obj, buf, name, &size);
329 up_read(&obj->oo_guard);
333 /* the function is used to declare EAs when SA is not supported */
334 static void __osd_xattr_declare_legacy(const struct lu_env *env,
335 struct osd_object *obj,
336 int vallen, const char *name,
337 struct osd_thandle *oh)
339 struct osd_device *osd = osd_obj2dev(obj);
340 dmu_tx_t *tx = oh->ot_tx;
341 uint64_t xa_data_obj;
344 if (obj->oo_xattr == ZFS_NO_OBJECT) {
345 /* xattr zap + entry */
346 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, (char *) name);
347 /* xattr value obj */
348 dmu_tx_hold_sa_create(tx, ZFS_SA_BASE_ATTR_SIZE);
349 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, vallen);
353 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1,
357 * Entry already exists.
358 * We'll truncate the existing object.
360 dmu_tx_hold_bonus(tx, xa_data_obj);
361 dmu_tx_hold_free(tx, xa_data_obj, vallen, DMU_OBJECT_END);
362 dmu_tx_hold_write(tx, xa_data_obj, 0, vallen);
363 } else if (rc == -ENOENT) {
365 * Entry doesn't exist, we need to create a new one and a new
366 * object to store the value.
368 dmu_tx_hold_bonus(tx, obj->oo_xattr);
369 dmu_tx_hold_zap(tx, obj->oo_xattr, TRUE, (char *) name);
370 dmu_tx_hold_sa_create(tx, ZFS_SA_BASE_ATTR_SIZE);
371 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, vallen);
375 void __osd_xattr_declare_set(const struct lu_env *env, struct osd_object *obj,
376 int vallen, const char *name,
377 struct osd_thandle *oh)
379 struct osd_device *osd = osd_obj2dev(obj);
380 dmu_tx_t *tx = oh->ot_tx;
383 if (unlikely(obj->oo_destroyed))
386 if (strcmp(name, XATTR_NAME_LINK) == 0 &&
387 osd->od_remote_parent_dir != ZFS_NO_OBJECT) {
388 /* If some name entry resides on remote MDT, then will create
389 * agent entry under remote parent. On the other hand, if the
390 * remote entry will be removed, then related agent entry may
391 * need to be removed from the remote parent. So there may be
392 * kinds of cases, let's declare enough credits. The credits
393 * for create agent entry is enough for remove case.
395 osd_tx_hold_zap(tx, osd->od_remote_parent_dir,
399 if (unlikely(!osd_obj2dev(obj)->od_xattr_in_sa)) {
400 __osd_xattr_declare_legacy(env, obj, vallen, name, oh);
404 /* declare EA in SA */
405 if (dt_object_exists(&obj->oo_dt)) {
406 LASSERT(obj->oo_sa_hdl);
407 /* XXX: it should be possible to skip spill declaration if
408 * specific EA is part of bonus and doesn't grow
410 dmu_tx_hold_spill(tx, obj->oo_dn->dn_object);
414 bonuslen = osd_obj_bonuslen(obj);
416 /* the object doesn't exist, but we've declared bonus
417 * in osd_declare_object_create() yet
419 if (obj->oo_ea_in_bonus > bonuslen) {
420 /* spill has been declared already */
421 } else if (obj->oo_ea_in_bonus + vallen > bonuslen) {
422 /* we're about to exceed bonus, let's declare spill */
423 dmu_tx_hold_spill(tx, DMU_NEW_OBJECT);
425 obj->oo_ea_in_bonus += vallen;
428 int osd_declare_xattr_set(const struct lu_env *env, struct dt_object *dt,
429 const struct lu_buf *buf, const char *name,
430 int fl, struct thandle *handle)
432 struct osd_object *obj = osd_dt_obj(dt);
433 struct osd_thandle *oh;
436 LASSERT(handle != NULL);
437 oh = container_of(handle, struct osd_thandle, ot_super);
439 down_read(&obj->oo_guard);
440 __osd_xattr_declare_set(env, obj, buf->lb_len, name, oh);
441 up_read(&obj->oo_guard);
446 int __osd_sa_attr_init(const struct lu_env *env, struct osd_object *obj,
447 struct osd_thandle *oh)
449 sa_bulk_attr_t *bulk = osd_oti_get(env)->oti_attr_bulk;
450 struct osa_attr *osa = &osd_oti_get(env)->oti_osa;
451 struct lu_buf *lb = &osd_oti_get(env)->oti_xattr_lbuf;
452 struct osd_device *osd = osd_obj2dev(obj);
454 inode_timespec_t now;
458 obj->oo_late_xattr = 0;
459 obj->oo_late_attr_set = 0;
461 gen = dmu_tx_get_txg(oh->ot_tx);
463 ZFS_TIME_ENCODE(&now, osa->btime);
465 obj->oo_attr.la_valid |= LA_BTIME;
466 obj->oo_attr.la_btime = osa->btime[0];
467 osa->atime[0] = obj->oo_attr.la_atime;
468 osa->ctime[0] = obj->oo_attr.la_ctime;
469 osa->mtime[0] = obj->oo_attr.la_mtime;
470 osa->mode = obj->oo_attr.la_mode;
471 osa->uid = obj->oo_attr.la_uid;
472 osa->gid = obj->oo_attr.la_gid;
473 osa->rdev = obj->oo_attr.la_rdev;
474 osa->nlink = obj->oo_attr.la_nlink;
475 osa->flags = attrs_fs2zfs(obj->oo_attr.la_flags);
476 osa->size = obj->oo_attr.la_size;
477 #ifdef ZFS_PROJINHERIT
478 if (osd->od_projectused_dn) {
479 if (obj->oo_attr.la_valid & LA_PROJID)
480 osa->projid = obj->oo_attr.la_projid;
482 osa->projid = ZFS_DEFAULT_PROJID;
483 osa->flags |= ZFS_PROJID;
484 obj->oo_with_projid = 1;
489 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(osd), NULL, &osa->mode, 8);
490 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_SIZE(osd), NULL, &osa->size, 8);
491 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GEN(osd), NULL, &gen, 8);
492 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_UID(osd), NULL, &osa->uid, 8);
493 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GID(osd), NULL, &osa->gid, 8);
494 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_PARENT(osd), NULL,
496 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(osd), NULL, &osa->flags, 8);
497 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(osd), NULL, osa->atime, 16);
498 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(osd), NULL, osa->mtime, 16);
499 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(osd), NULL, osa->ctime, 16);
500 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CRTIME(osd), NULL, osa->btime, 16);
501 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_LINKS(osd), NULL, &osa->nlink, 8);
502 #ifdef ZFS_PROJINHERIT
503 if (osd->od_projectused_dn)
504 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_PROJID(osd), NULL,
507 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_RDEV(osd), NULL, &osa->rdev, 8);
508 LASSERT(cnt <= ARRAY_SIZE(osd_oti_get(env)->oti_attr_bulk));
510 /* Update the SA for additions, modifications, and removals. */
511 rc = -nvlist_size(obj->oo_sa_xattr, &size, NV_ENCODE_XDR);
515 lu_buf_check_and_alloc(lb, size);
516 if (lb->lb_buf == NULL) {
518 CERROR("%s: can't allocate buffer for xattr update: rc = %d\n",
523 rc = -nvlist_pack(obj->oo_sa_xattr, (char **)&lb->lb_buf, &size,
524 NV_ENCODE_XDR, KM_SLEEP);
528 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_DXATTR(osd), NULL, lb->lb_buf, size);
530 rc = -sa_replace_all_by_template(obj->oo_sa_hdl, bulk, cnt, oh->ot_tx);
535 int __osd_sa_xattr_update(const struct lu_env *env, struct osd_object *obj,
536 struct osd_thandle *oh)
538 struct lu_buf *lb = &osd_oti_get(env)->oti_xattr_lbuf;
539 struct osd_device *osd = osd_obj2dev(obj);
544 obj->oo_late_xattr = 0;
546 /* Update the SA for additions, modifications, and removals. */
547 rc = -nvlist_size(obj->oo_sa_xattr, &size, NV_ENCODE_XDR);
551 lu_buf_check_and_alloc(lb, size);
552 if (lb->lb_buf == NULL) {
554 CERROR("%s: can't allocate buffer for xattr update: rc = %d\n",
560 rc = -nvlist_pack(obj->oo_sa_xattr, &dxattr, &size,
561 NV_ENCODE_XDR, KM_SLEEP);
564 LASSERT(dxattr == lb->lb_buf);
566 sa_update(obj->oo_sa_hdl, SA_ZPL_DXATTR(osd), dxattr, size, oh->ot_tx);
572 * Set an extended attribute.
573 * This transaction must have called udmu_xattr_declare_set() first.
575 * Returns 0 on success or a negative error number on failure.
577 * No locking is done here.
579 int __osd_sa_xattr_schedule_update(const struct lu_env *env,
580 struct osd_object *obj,
581 struct osd_thandle *oh)
584 LASSERT(obj->oo_sa_hdl);
585 LASSERT(obj->oo_sa_xattr);
587 /* schedule batched SA update in osd_object_sa_dirty_rele() */
588 obj->oo_late_xattr = 1;
589 osd_object_sa_dirty_add(obj, oh);
595 int __osd_sa_xattr_set(const struct lu_env *env, struct osd_object *obj,
596 const struct lu_buf *buf, const char *name, int fl,
597 struct osd_thandle *oh)
605 rc = __osd_xattr_cache(obj);
609 LASSERT(obj->oo_sa_xattr);
610 if (buf->lb_len > OBD_MAX_EA_SIZE) {
613 /* Prevent the DXATTR SA from consuming the entire SA region */
614 rc = -nvlist_size(obj->oo_sa_xattr, &size, NV_ENCODE_XDR);
618 if (size + buf->lb_len > DXATTR_MAX_SA_SIZE)
622 /* even in case of -EFBIG we must lookup xattr and check can we
623 * rewrite it then delete from SA
625 rc = -nvlist_lookup_byte_array(obj->oo_sa_xattr, name, &nv_value,
628 if (fl & LU_XATTR_CREATE) {
630 } else if (too_big) {
631 rc = -nvlist_remove(obj->oo_sa_xattr, name,
632 DATA_TYPE_BYTE_ARRAY);
635 rc = __osd_sa_xattr_schedule_update(env, obj, oh);
636 return rc == 0 ? -EFBIG : rc;
638 } else if (rc == -ENOENT) {
639 if (fl & LU_XATTR_REPLACE)
647 /* Ensure xattr doesn't exist in ZAP */
648 if (obj->oo_xattr != ZFS_NO_OBJECT) {
649 struct osd_device *osd = osd_obj2dev(obj);
652 rc = -zap_lookup(osd->od_os, obj->oo_xattr,
655 rc = -dmu_object_free(osd->od_os, objid, oh->ot_tx);
657 zap_remove(osd->od_os, obj->oo_xattr,
662 rc = -nvlist_add_byte_array(obj->oo_sa_xattr, name,
663 (uchar_t *)buf->lb_buf, buf->lb_len);
667 /* batch updates only for just created dnodes where we
668 * used to set number of EAs in a single transaction
670 if (obj->oo_dn->dn_allocated_txg == oh->ot_tx->tx_txg)
671 rc = __osd_sa_xattr_schedule_update(env, obj, oh);
673 rc = __osd_sa_xattr_update(env, obj, oh);
679 __osd_xattr_set(const struct lu_env *env, struct osd_object *obj,
680 const struct lu_buf *buf, const char *name, int fl,
681 struct osd_thandle *oh)
683 struct osd_device *osd = osd_obj2dev(obj);
684 dnode_t *xa_zap_dn = NULL;
685 dnode_t *xa_data_dn = NULL;
686 uint64_t xa_data_obj;
687 sa_handle_t *sa_hdl = NULL;
688 dmu_tx_t *tx = oh->ot_tx;
692 LASSERT(obj->oo_sa_hdl);
694 if (obj->oo_xattr == ZFS_NO_OBJECT) {
695 struct lu_attr *la = &osd_oti_get(env)->oti_la;
697 la->la_valid = LA_MODE;
698 la->la_mode = S_IFDIR | 0755;
699 rc = __osd_zap_create(env, osd, &xa_zap_dn, tx, la, 0, 0);
703 obj->oo_xattr = xa_zap_dn->dn_object;
704 rc = osd_object_sa_update(obj, SA_ZPL_XATTR(osd),
705 &obj->oo_xattr, 8, oh);
710 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1,
713 if (fl & LU_XATTR_CREATE) {
717 /* Entry already exists. We'll truncate the existing object. */
718 rc = __osd_obj2dnode(osd->od_os, xa_data_obj, &xa_data_dn);
722 rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL,
723 SA_HDL_PRIVATE, &sa_hdl);
727 rc = -sa_lookup(sa_hdl, SA_ZPL_SIZE(osd), &size, 8);
731 rc = -dmu_free_range(osd->od_os, xa_data_dn->dn_object,
732 0, DMU_OBJECT_END, tx);
735 } else if (rc == -ENOENT) {
736 struct lu_attr *la = &osd_oti_get(env)->oti_la;
738 * Entry doesn't exist, we need to create a new one and a new
739 * object to store the value.
741 if (fl & LU_XATTR_REPLACE) {
742 /* should be ENOATTR according to the
743 * man, but that is undefined here
749 la->la_valid = LA_MODE;
750 la->la_mode = S_IFREG | 0644;
751 rc = __osd_object_create(env, osd, obj,
752 lu_object_fid(&obj->oo_dt.do_lu),
753 &xa_data_dn, tx, la);
756 xa_data_obj = xa_data_dn->dn_object;
758 rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL,
759 SA_HDL_PRIVATE, &sa_hdl);
763 rc = -zap_add(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t),
764 1, &xa_data_obj, tx);
768 /* There was an error looking up the xattr name */
772 /* Finally write the xattr value */
773 dmu_write(osd->od_os, xa_data_obj, 0, buf->lb_len, buf->lb_buf, tx);
776 rc = -sa_update(sa_hdl, SA_ZPL_SIZE(osd), &size, 8, tx);
779 sa_handle_destroy(sa_hdl);
781 if (xa_data_dn != NULL)
782 osd_dnode_rele(xa_data_dn);
783 if (xa_zap_dn != NULL)
784 osd_dnode_rele(xa_zap_dn);
789 static int osd_xattr_split_pfid(const struct lu_env *env,
790 struct osd_object *obj, struct osd_thandle *oh)
792 struct osd_thread_info *info = osd_oti_get(env);
793 struct lustre_ost_attrs *loa =
794 (struct lustre_ost_attrs *)&info->oti_buf;
795 struct lustre_mdt_attrs *lma = &loa->loa_lma;
796 struct lu_buf buf = {
798 .lb_len = sizeof(info->oti_buf),
804 BUILD_BUG_ON(sizeof(info->oti_buf) < sizeof(*loa));
805 rc = osd_xattr_get_internal(env, obj, &buf, XATTR_NAME_LMA, &size);
809 lustre_loa_swab(loa, true);
810 LASSERT(lma->lma_compat & LMAC_STRIPE_INFO);
812 lma->lma_compat &= ~(LMAC_STRIPE_INFO | LMAC_COMP_INFO);
813 lustre_lma_swab(lma);
815 buf.lb_len = sizeof(*lma);
816 rc = osd_xattr_set_internal(env, obj, &buf, XATTR_NAME_LMA,
817 LU_XATTR_REPLACE, oh);
819 obj->oo_pfid_in_lma = 0;
825 * In DNE environment, the object (in spite of regular file or directory)
826 * and its name entry may reside on different MDTs. Under such case, we will
827 * create an agent entry on the MDT where the object resides. The agent entry
828 * references the object locally, that makes the object to be visible to the
829 * userspace when mounted as 'zfs' directly. Then the userspace tools, such
830 * as 'tar' can handle the object properly.
832 * We handle the agent entry during set linkEA that is the common interface
833 * for both regular file and directroy, can handle kinds of cases, such as
834 * create/link/unlink/rename, and so on.
836 * NOTE: we need to do that for both directory and regular file, so we can NOT
837 * do that when ea_{insert,delete} that are directory based operations.
839 static int osd_xattr_handle_linkea(const struct lu_env *env,
840 struct osd_device *osd,
841 struct osd_object *obj,
842 const struct lu_buf *buf,
843 struct osd_thandle *oh)
845 const struct lu_fid *fid = lu_object_fid(&obj->oo_dt.do_lu);
846 struct lu_fid *tfid = &osd_oti_get(env)->oti_fid;
847 struct linkea_data ldata = { .ld_buf = (struct lu_buf *)buf };
848 struct lu_name tmpname;
853 rc = linkea_init_with_rec(&ldata);
855 linkea_first_entry(&ldata);
856 while (ldata.ld_lee != NULL && !remote) {
857 linkea_entry_unpack(ldata.ld_lee, &ldata.ld_reclen,
859 if (osd_remote_fid(env, osd, tfid) > 0)
862 linkea_next_entry(&ldata);
864 } else if (rc == -ENODATA) {
870 if (lu_object_has_agent_entry(&obj->oo_dt.do_lu) && !remote) {
871 rc = osd_delete_from_remote_parent(env, osd, obj, oh, false);
873 CERROR("%s: failed to remove agent entry for "DFID": rc = %d\n",
874 osd_name(osd), PFID(fid), rc);
875 } else if (!lu_object_has_agent_entry(&obj->oo_dt.do_lu) && remote) {
876 rc = osd_add_to_remote_parent(env, osd, obj, oh);
878 CWARN("%s: failed to create agent entry for "DFID": rc = %d\n",
879 osd_name(osd), PFID(fid), rc);
885 int osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
886 const struct lu_buf *buf, const char *name, int fl,
887 struct thandle *handle)
889 struct osd_object *obj = osd_dt_obj(dt);
890 struct osd_device *osd = osd_obj2dev(obj);
891 struct osd_thandle *oh;
895 LASSERT(handle != NULL);
896 LASSERT(osd_invariant(obj));
898 if (!osd_obj2dev(obj)->od_posix_acl &&
899 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
900 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
903 oh = container_of(handle, struct osd_thandle, ot_super);
905 down_write(&obj->oo_guard);
906 CDEBUG(D_INODE, "Setting xattr %s with size %d\n",
907 name, (int)buf->lb_len);
908 /* For the OST migrated from ldiskfs, the PFID EA may
909 * be stored in LMA because of ldiskfs inode size.
911 if (unlikely(strcmp(name, XATTR_NAME_FID) == 0 &&
912 obj->oo_pfid_in_lma)) {
913 rc = osd_xattr_split_pfid(env, obj, oh);
915 fl = LU_XATTR_CREATE;
916 } else if (strcmp(name, XATTR_NAME_LINK) == 0 &&
917 osd->od_remote_parent_dir != ZFS_NO_OBJECT) {
918 rc = osd_xattr_handle_linkea(env, osd, obj, buf, oh);
922 rc = osd_xattr_set_internal(env, obj, buf, name, fl, oh);
923 up_write(&obj->oo_guard);
929 __osd_xattr_declare_del(const struct lu_env *env, struct osd_object *obj,
930 const char *name, struct osd_thandle *oh)
932 struct osd_device *osd = osd_obj2dev(obj);
933 dmu_tx_t *tx = oh->ot_tx;
934 uint64_t xa_data_obj;
937 /* update SA_ZPL_DXATTR if xattr was in SA */
938 dmu_tx_hold_sa(tx, obj->oo_sa_hdl, 0);
940 if (obj->oo_xattr == ZFS_NO_OBJECT)
943 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, 8, 1, &xa_data_obj);
945 /* Entry exists. Will delete the existing obj and ZAP entry */
946 dmu_tx_hold_bonus(tx, xa_data_obj);
947 dmu_tx_hold_free(tx, xa_data_obj, 0, DMU_OBJECT_END);
948 dmu_tx_hold_zap(tx, obj->oo_xattr, FALSE, (char *) name);
950 } else if (rc == -ENOENT) {
952 * Entry doesn't exist, nothing to be changed.
957 /* An error happened */
961 int osd_declare_xattr_del(const struct lu_env *env, struct dt_object *dt,
962 const char *name, struct thandle *handle)
964 struct osd_object *obj = osd_dt_obj(dt);
965 struct osd_thandle *oh;
968 LASSERT(handle != NULL);
969 LASSERT(osd_invariant(obj));
971 oh = container_of(handle, struct osd_thandle, ot_super);
972 LASSERT(oh->ot_tx != NULL);
974 down_read(&obj->oo_guard);
975 if (likely(dt_object_exists(&obj->oo_dt) && !obj->oo_destroyed)) {
976 LASSERT(obj->oo_dn != NULL);
977 __osd_xattr_declare_del(env, obj, name, oh);
979 up_read(&obj->oo_guard);
984 static int __osd_sa_xattr_del(const struct lu_env *env, struct osd_object *obj,
985 const char *name, struct osd_thandle *oh)
989 rc = __osd_xattr_cache(obj);
993 rc = -nvlist_remove(obj->oo_sa_xattr, name, DATA_TYPE_BYTE_ARRAY);
998 * only migrate delete LMV, and it needs to be done immediately, because
999 * it's used in deleting sub stripes, and if this is delayed, later when
1000 * destroying the master object, it will delete sub stripes again.
1002 if (!strcmp(name, XATTR_NAME_LMV))
1003 rc = __osd_sa_xattr_update(env, obj, oh);
1005 rc = __osd_sa_xattr_schedule_update(env, obj, oh);
1009 static int __osd_xattr_del(const struct lu_env *env, struct osd_object *obj,
1010 const char *name, struct osd_thandle *oh)
1012 struct osd_device *osd = osd_obj2dev(obj);
1013 uint64_t xa_data_obj;
1016 if (unlikely(!dt_object_exists(&obj->oo_dt) || obj->oo_destroyed))
1019 /* try remove xattr from SA at first */
1020 rc = __osd_sa_xattr_del(env, obj, name, oh);
1024 if (obj->oo_xattr == ZFS_NO_OBJECT)
1027 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1,
1029 if (rc == -ENOENT) {
1031 } else if (rc == 0) {
1032 /* Entry exists. We'll delete the existing obj and ZAP entry */
1033 rc = -dmu_object_free(osd->od_os, xa_data_obj, oh->ot_tx);
1037 rc = -zap_remove(osd->od_os, obj->oo_xattr, name, oh->ot_tx);
1043 int osd_xattr_del(const struct lu_env *env, struct dt_object *dt,
1044 const char *name, struct thandle *handle)
1046 struct osd_object *obj = osd_dt_obj(dt);
1047 struct osd_thandle *oh;
1051 LASSERT(handle != NULL);
1052 oh = container_of(handle, struct osd_thandle, ot_super);
1053 LASSERT(oh->ot_tx != NULL);
1055 if (!osd_obj2dev(obj)->od_posix_acl &&
1056 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
1057 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
1058 RETURN(-EOPNOTSUPP);
1060 down_write(&obj->oo_guard);
1061 if (unlikely(!dt_object_exists(dt) || obj->oo_destroyed))
1062 GOTO(out, rc = -ENOENT);
1063 LASSERT(obj->oo_dn != NULL);
1064 /* For the OST migrated from ldiskfs, the PFID EA may
1065 * be stored in LMA because of ldiskfs inode size.
1067 if (unlikely(strcmp(name, XATTR_NAME_FID) == 0 && obj->oo_pfid_in_lma))
1068 rc = osd_xattr_split_pfid(env, obj, oh);
1070 rc = __osd_xattr_del(env, obj, name, oh);
1073 up_write(&obj->oo_guard);
1077 void osd_declare_xattrs_destroy(const struct lu_env *env,
1078 struct osd_object *obj, struct osd_thandle *oh)
1080 struct osd_device *osd = osd_obj2dev(obj);
1081 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
1082 uint64_t oid = obj->oo_xattr, xid;
1083 dmu_tx_t *tx = oh->ot_tx;
1087 if (oid == ZFS_NO_OBJECT)
1088 return; /* Nothing to do for SA xattrs */
1090 /* Declare to free the ZAP holding xattrs */
1091 dmu_tx_hold_free(tx, oid, 0, DMU_OBJECT_END);
1093 rc = osd_zap_cursor_init(&zc, osd->od_os, oid, 0);
1097 while (zap_cursor_retrieve(zc, za) == 0) {
1098 LASSERT(za->za_num_integers == 1);
1099 LASSERT(za->za_integer_length == sizeof(uint64_t));
1101 rc = -zap_lookup(osd->od_os, oid, za->za_name,
1102 sizeof(uint64_t), 1, &xid);
1104 CERROR("%s: xattr %s lookup failed: rc = %d\n",
1105 osd->od_svname, za->za_name, rc);
1108 dmu_tx_hold_free(tx, xid, 0, DMU_OBJECT_END);
1110 zap_cursor_advance(zc);
1113 osd_zap_cursor_fini(zc);
1115 if (rc && tx->tx_err == 0)
1119 int osd_xattrs_destroy(const struct lu_env *env,
1120 struct osd_object *obj, struct osd_thandle *oh)
1122 struct osd_device *osd = osd_obj2dev(obj);
1123 dmu_tx_t *tx = oh->ot_tx;
1124 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
1129 /* The transaction must have been assigned to a transaction group. */
1130 LASSERT(tx->tx_txg != 0);
1132 if (obj->oo_xattr == ZFS_NO_OBJECT)
1133 return 0; /* Nothing to do for SA xattrs */
1135 /* Free the ZAP holding the xattrs */
1136 rc = osd_zap_cursor_init(&zc, osd->od_os, obj->oo_xattr, 0);
1140 while (zap_cursor_retrieve(zc, za) == 0) {
1141 LASSERT(za->za_num_integers == 1);
1142 LASSERT(za->za_integer_length == sizeof(uint64_t));
1144 rc = -zap_lookup(osd->od_os, obj->oo_xattr, za->za_name,
1145 sizeof(uint64_t), 1, &xid);
1147 CERROR("%s: lookup xattr %s failed: rc = %d\n",
1148 osd->od_svname, za->za_name, rc);
1150 rc = -dmu_object_free(osd->od_os, xid, tx);
1152 CERROR("%s: free xattr %s failed: rc = %d\n",
1153 osd->od_svname, za->za_name, rc);
1155 zap_cursor_advance(zc);
1157 osd_zap_cursor_fini(zc);
1159 rc = -dmu_object_free(osd->od_os, obj->oo_xattr, tx);
1161 CERROR("%s: free xattr %llu failed: rc = %d\n",
1162 osd->od_svname, obj->oo_xattr, rc);
1168 osd_sa_xattr_list(const struct lu_env *env, struct osd_object *obj,
1169 const struct lu_buf *lb)
1171 nvpair_t *nvp = NULL;
1172 int len, counted = 0;
1175 rc = __osd_xattr_cache(obj);
1179 while ((nvp = nvlist_next_nvpair(obj->oo_sa_xattr, nvp)) != NULL) {
1180 const char *name = nvpair_name(nvp);
1182 if (!osd_obj2dev(obj)->od_posix_acl &&
1183 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
1184 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
1188 if (lb->lb_buf != NULL) {
1189 if (counted + len + 1 > lb->lb_len)
1192 memcpy(lb->lb_buf + counted, name, len + 1);
1199 int osd_xattr_list(const struct lu_env *env, struct dt_object *dt,
1200 const struct lu_buf *lb)
1202 struct osd_object *obj = osd_dt_obj(dt);
1203 struct osd_device *osd = osd_obj2dev(obj);
1204 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
1209 down_read(&obj->oo_guard);
1210 if (unlikely(!dt_object_exists(dt) || obj->oo_destroyed))
1211 GOTO(out, rc = -ENOENT);
1212 LASSERT(obj->oo_dn != NULL);
1214 rc = osd_sa_xattr_list(env, obj, lb);
1220 /* continue with dnode xattr if any */
1221 if (obj->oo_xattr == ZFS_NO_OBJECT)
1222 GOTO(out, rc = counted);
1224 rc = osd_zap_cursor_init(&zc, osd->od_os, obj->oo_xattr, 0);
1228 while ((rc = -zap_cursor_retrieve(zc, za)) == 0) {
1229 if (!osd_obj2dev(obj)->od_posix_acl &&
1230 (strcmp(za->za_name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
1231 strcmp(za->za_name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) {
1232 zap_cursor_advance(zc);
1236 rc = strlen(za->za_name);
1237 if (lb->lb_buf != NULL) {
1238 if (counted + rc + 1 > lb->lb_len)
1239 GOTO(out_fini, rc = -ERANGE);
1241 memcpy(lb->lb_buf + counted, za->za_name, rc + 1);
1245 zap_cursor_advance(zc);
1247 if (rc == -ENOENT) /* no more kes in the index */
1249 else if (unlikely(rc < 0))
1254 osd_zap_cursor_fini(zc);
1256 up_read(&obj->oo_guard);