4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2012, 2017, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
31 * lustre/osd-zfs/osd_xattr.c
32 * functions to manipulate extended attributes and system attributes
34 * Author: Alex Zhuravlev <bzzz@whamcloud.com>
35 * Author: Mike Pershin <tappro@whamcloud.com>
38 #define DEBUG_SUBSYSTEM S_OSD
40 #include <libcfs/libcfs.h>
41 #include <obd_support.h>
42 #include <lustre_net.h>
44 #include <obd_class.h>
45 #include <lustre_disk.h>
46 #include <lustre_fid.h>
47 #include <lustre_linkea.h>
49 #include "osd_internal.h"
51 #include <sys/dnode.h>
56 #include <sys/spa_impl.h>
57 #include <sys/zfs_znode.h>
58 #include <sys/dmu_tx.h>
59 #include <sys/dmu_objset.h>
60 #include <sys/dsl_prop.h>
61 #include <sys/sa_impl.h>
64 #include <linux/posix_acl_xattr.h>
65 #include <lustre_scrub.h>
67 int __osd_xattr_load(struct osd_device *osd, sa_handle_t *hdl, nvlist_t **sa)
72 rc = -sa_size(hdl, SA_ZPL_DXATTR(osd), &size);
75 rc = -nvlist_alloc(sa, NV_UNIQUE_NAME, KM_SLEEP);
79 buf = zio_buf_alloc(size);
84 rc = -sa_lookup(hdl, SA_ZPL_DXATTR(osd), buf, size);
86 rc = -nvlist_unpack(buf, size, sa, KM_SLEEP);
87 zio_buf_free(buf, size);
93 static inline int __osd_xattr_cache(struct osd_object *obj)
95 LASSERT(obj->oo_sa_hdl);
96 if (obj->oo_sa_xattr != NULL)
98 return __osd_xattr_load(osd_obj2dev(obj),
99 obj->oo_sa_hdl, &obj->oo_sa_xattr);
103 __osd_sa_xattr_get(const struct lu_env *env, struct osd_object *obj,
104 const struct lu_buf *buf, const char *name, int *sizep)
109 rc = __osd_xattr_cache(obj);
113 LASSERT(obj->oo_sa_xattr);
114 rc = -nvlist_lookup_byte_array(obj->oo_sa_xattr, name,
119 if (buf == NULL || buf->lb_buf == NULL) {
120 /* return the required size by *sizep */
124 if (*sizep > buf->lb_len)
125 return -ERANGE; /* match ldiskfs error */
127 memcpy(buf->lb_buf, nv_value, *sizep);
131 int __osd_xattr_get_large(const struct lu_env *env, struct osd_device *osd,
132 uint64_t xattr, struct lu_buf *buf,
133 const char *name, int *sizep)
136 sa_handle_t *sa_hdl = NULL;
137 uint64_t xa_data_obj, size;
140 /* are there any extended attributes? */
141 if (xattr == ZFS_NO_OBJECT)
144 /* Lookup the object number containing the xattr data */
145 rc = -zap_lookup(osd->od_os, xattr, name, sizeof(uint64_t), 1,
150 rc = __osd_obj2dnode(osd->od_os, xa_data_obj, &xa_data_dn);
154 rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL, SA_HDL_PRIVATE,
159 /* Get the xattr value length / object size */
160 rc = -sa_lookup(sa_hdl, SA_ZPL_SIZE(osd), &size, 8);
164 if (size > INT_MAX) {
171 if (buf == NULL || buf->lb_buf == NULL) {
172 /* We only need to return the required size */
175 if (*sizep > buf->lb_len) {
176 rc = -ERANGE; /* match ldiskfs error */
180 rc = -dmu_read(osd->od_os, xa_data_dn->dn_object, 0,
181 size, buf->lb_buf, DMU_READ_PREFETCH);
184 sa_handle_destroy(sa_hdl);
186 osd_dnode_rele(xa_data_dn);
192 * Copy an extended attribute into the buffer provided, or compute
193 * the required buffer size if \a buf is NULL.
195 * On success, the number of bytes used or required is stored in \a sizep.
197 * Note that no locking is done here.
199 * \param[in] env execution environment
200 * \param[in] obj object for which to retrieve xattr
201 * \param[out] buf buffer to store xattr value in
202 * \param[in] name name of xattr to copy
203 * \param[out] sizep bytes used or required to store xattr
205 * \retval 0 on success
206 * \retval negative negated errno on failure
208 int osd_xattr_get_internal(const struct lu_env *env, struct osd_object *obj,
209 struct lu_buf *buf, const char *name, int *sizep)
213 if (unlikely(!dt_object_exists(&obj->oo_dt) || obj->oo_destroyed))
216 /* check SA_ZPL_DXATTR first then fallback to directory xattr */
217 rc = __osd_sa_xattr_get(env, obj, buf, name, sizep);
221 return __osd_xattr_get_large(env, osd_obj2dev(obj), obj->oo_xattr,
226 * Copy LMA extended attribute into provided buffer
228 * Note that no locking is done here.
230 * \param[in] env execution environment
231 * \param[in] obj object for which to retrieve xattr
232 * \param[out] buf buffer to store xattr value in
234 * \retval 0 on success
235 * \retval negative negated errno on failure
237 int osd_xattr_get_lma(const struct lu_env *env, struct osd_object *obj,
246 if (unlikely(obj->oo_destroyed))
249 /* check SA_ZPL_DXATTR first then fallback to directory xattr */
250 rc = __osd_sa_xattr_get(env, obj, buf, XATTR_NAME_LMA, &size);
251 if (!rc && unlikely(size < sizeof(struct lustre_mdt_attrs)))
256 rc = __osd_xattr_get_large(env, osd_obj2dev(obj), obj->oo_xattr,
257 buf, XATTR_NAME_LMA, &size);
258 if (!rc && unlikely(size < sizeof(struct lustre_mdt_attrs)))
265 static int osd_get_pfid_from_lma(const struct lu_env *env,
266 struct osd_object *obj,
267 struct lu_buf *buf, int *sizep)
269 struct osd_thread_info *info = osd_oti_get(env);
270 struct lustre_ost_attrs *loa =
271 (struct lustre_ost_attrs *)&info->oti_buf;
272 struct lustre_mdt_attrs *lma = &loa->loa_lma;
273 struct filter_fid *ff;
274 struct ost_layout *ol;
275 struct lu_buf tbuf = {
277 .lb_len = sizeof(info->oti_buf),
282 BUILD_BUG_ON(sizeof(info->oti_buf) < sizeof(*loa));
283 rc = osd_xattr_get_internal(env, obj, &tbuf,
284 XATTR_NAME_LMA, sizep);
288 lustre_loa_swab(loa, true);
289 LASSERT(lma->lma_compat & LMAC_STRIPE_INFO);
291 *sizep = sizeof(*ff);
292 if (buf->lb_len == 0 || !buf->lb_buf)
295 if (buf->lb_len < *sizep)
300 ol->ol_stripe_count = cpu_to_le32(loa->loa_parent_fid.f_ver >>
301 PFID_STRIPE_IDX_BITS);
302 ol->ol_stripe_size = cpu_to_le32(loa->loa_stripe_size);
303 loa->loa_parent_fid.f_ver &= PFID_STRIPE_COUNT_MASK;
304 fid_cpu_to_le(&ff->ff_parent, &loa->loa_parent_fid);
305 if (lma->lma_compat & LMAC_COMP_INFO) {
306 ol->ol_comp_start = cpu_to_le64(loa->loa_comp_start);
307 ol->ol_comp_end = cpu_to_le64(loa->loa_comp_end);
308 ol->ol_comp_id = cpu_to_le32(loa->loa_comp_id);
310 ol->ol_comp_start = 0;
318 int osd_xattr_get(const struct lu_env *env, struct dt_object *dt,
319 struct lu_buf *buf, const char *name)
321 struct osd_object *obj = osd_dt_obj(dt);
325 if (!osd_obj2dev(obj)->od_posix_acl &&
326 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
327 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
330 down_read(&obj->oo_guard);
331 if (unlikely(!dt_object_exists(dt) || obj->oo_destroyed))
332 GOTO(out, rc = -ENOENT);
333 LASSERT(obj->oo_dn != NULL);
335 /* For the OST migrated from ldiskfs, the PFID EA may
336 * be stored in LMA because of ldiskfs inode size. */
337 if (strcmp(name, XATTR_NAME_FID) == 0 && obj->oo_pfid_in_lma)
338 rc = osd_get_pfid_from_lma(env, obj, buf, &size);
340 rc = osd_xattr_get_internal(env, obj, buf, name, &size);
347 up_read(&obj->oo_guard);
351 /* the function is used to declare EAs when SA is not supported */
352 static void __osd_xattr_declare_legacy(const struct lu_env *env,
353 struct osd_object *obj,
354 int vallen, const char *name,
355 struct osd_thandle *oh)
357 struct osd_device *osd = osd_obj2dev(obj);
358 dmu_tx_t *tx = oh->ot_tx;
359 uint64_t xa_data_obj;
362 if (obj->oo_xattr == ZFS_NO_OBJECT) {
363 /* xattr zap + entry */
364 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, (char *) name);
365 /* xattr value obj */
366 dmu_tx_hold_sa_create(tx, ZFS_SA_BASE_ATTR_SIZE);
367 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, vallen);
371 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1,
375 * Entry already exists.
376 * We'll truncate the existing object.
378 dmu_tx_hold_bonus(tx, xa_data_obj);
379 dmu_tx_hold_free(tx, xa_data_obj, vallen, DMU_OBJECT_END);
380 dmu_tx_hold_write(tx, xa_data_obj, 0, vallen);
381 } else if (rc == -ENOENT) {
383 * Entry doesn't exist, we need to create a new one and a new
384 * object to store the value.
386 dmu_tx_hold_bonus(tx, obj->oo_xattr);
387 dmu_tx_hold_zap(tx, obj->oo_xattr, TRUE, (char *) name);
388 dmu_tx_hold_sa_create(tx, ZFS_SA_BASE_ATTR_SIZE);
389 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, vallen);
393 void __osd_xattr_declare_set(const struct lu_env *env, struct osd_object *obj,
394 int vallen, const char *name,
395 struct osd_thandle *oh)
397 struct osd_device *osd = osd_obj2dev(obj);
398 dmu_tx_t *tx = oh->ot_tx;
401 if (unlikely(obj->oo_destroyed))
404 if (strcmp(name, XATTR_NAME_LINK) == 0 &&
405 osd->od_remote_parent_dir != ZFS_NO_OBJECT) {
406 /* If some name entry resides on remote MDT, then will create
407 * agent entry under remote parent. On the other hand, if the
408 * remote entry will be removed, then related agent entry may
409 * need to be removed from the remote parent. So there may be
410 * kinds of cases, let's declare enough credits. The credits
411 * for create agent entry is enough for remove case. */
412 osd_tx_hold_zap(tx, osd->od_remote_parent_dir,
416 if (unlikely(!osd_obj2dev(obj)->od_xattr_in_sa)) {
417 __osd_xattr_declare_legacy(env, obj, vallen, name, oh);
421 /* declare EA in SA */
422 if (dt_object_exists(&obj->oo_dt)) {
423 LASSERT(obj->oo_sa_hdl);
424 /* XXX: it should be possible to skip spill
425 * declaration if specific EA is part of
426 * bonus and doesn't grow */
427 dmu_tx_hold_spill(tx, obj->oo_dn->dn_object);
431 bonuslen = osd_obj_bonuslen(obj);
433 /* the object doesn't exist, but we've declared bonus
434 * in osd_declare_object_create() yet */
435 if (obj->oo_ea_in_bonus > bonuslen) {
436 /* spill has been declared already */
437 } else if (obj->oo_ea_in_bonus + vallen > bonuslen) {
438 /* we're about to exceed bonus, let's declare spill */
439 dmu_tx_hold_spill(tx, DMU_NEW_OBJECT);
441 obj->oo_ea_in_bonus += vallen;
444 int osd_declare_xattr_set(const struct lu_env *env, struct dt_object *dt,
445 const struct lu_buf *buf, const char *name,
446 int fl, struct thandle *handle)
448 struct osd_object *obj = osd_dt_obj(dt);
449 struct osd_thandle *oh;
452 LASSERT(handle != NULL);
453 oh = container_of(handle, struct osd_thandle, ot_super);
455 down_read(&obj->oo_guard);
456 __osd_xattr_declare_set(env, obj, buf->lb_len, name, oh);
457 up_read(&obj->oo_guard);
462 int __osd_sa_attr_init(const struct lu_env *env, struct osd_object *obj,
463 struct osd_thandle *oh)
465 sa_bulk_attr_t *bulk = osd_oti_get(env)->oti_attr_bulk;
466 struct osa_attr *osa = &osd_oti_get(env)->oti_osa;
467 struct lu_buf *lb = &osd_oti_get(env)->oti_xattr_lbuf;
468 struct osd_device *osd = osd_obj2dev(obj);
470 inode_timespec_t now;
474 obj->oo_late_xattr = 0;
475 obj->oo_late_attr_set = 0;
477 gen = dmu_tx_get_txg(oh->ot_tx);
479 ZFS_TIME_ENCODE(&now, osa->btime);
481 obj->oo_attr.la_valid |= LA_BTIME;
482 obj->oo_attr.la_btime = osa->btime[0];
483 osa->atime[0] = obj->oo_attr.la_atime;
484 osa->ctime[0] = obj->oo_attr.la_ctime;
485 osa->mtime[0] = obj->oo_attr.la_mtime;
486 osa->mode = obj->oo_attr.la_mode;
487 osa->uid = obj->oo_attr.la_uid;
488 osa->gid = obj->oo_attr.la_gid;
489 osa->rdev = obj->oo_attr.la_rdev;
490 osa->nlink = obj->oo_attr.la_nlink;
491 osa->flags = attrs_fs2zfs(obj->oo_attr.la_flags);
492 osa->size = obj->oo_attr.la_size;
493 #ifdef ZFS_PROJINHERIT
494 if (osd->od_projectused_dn) {
495 if (obj->oo_attr.la_valid & LA_PROJID)
496 osa->projid = obj->oo_attr.la_projid;
498 osa->projid = ZFS_DEFAULT_PROJID;
499 osa->flags |= ZFS_PROJID;
500 obj->oo_with_projid = 1;
505 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(osd), NULL, &osa->mode, 8);
506 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_SIZE(osd), NULL, &osa->size, 8);
507 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GEN(osd), NULL, &gen, 8);
508 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_UID(osd), NULL, &osa->uid, 8);
509 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GID(osd), NULL, &osa->gid, 8);
510 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_PARENT(osd), NULL,
512 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(osd), NULL, &osa->flags, 8);
513 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(osd), NULL, osa->atime, 16);
514 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(osd), NULL, osa->mtime, 16);
515 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(osd), NULL, osa->ctime, 16);
516 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CRTIME(osd), NULL, osa->btime, 16);
517 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_LINKS(osd), NULL, &osa->nlink, 8);
518 #ifdef ZFS_PROJINHERIT
519 if (osd->od_projectused_dn)
520 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_PROJID(osd), NULL,
523 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_RDEV(osd), NULL, &osa->rdev, 8);
524 LASSERT(cnt <= ARRAY_SIZE(osd_oti_get(env)->oti_attr_bulk));
526 /* Update the SA for additions, modifications, and removals. */
527 rc = -nvlist_size(obj->oo_sa_xattr, &size, NV_ENCODE_XDR);
531 lu_buf_check_and_alloc(lb, size);
532 if (lb->lb_buf == NULL) {
533 CERROR("%s: can't allocate buffer for xattr update\n",
538 rc = -nvlist_pack(obj->oo_sa_xattr, (char **)&lb->lb_buf, &size,
539 NV_ENCODE_XDR, KM_SLEEP);
543 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_DXATTR(osd), NULL, lb->lb_buf, size);
545 rc = -sa_replace_all_by_template(obj->oo_sa_hdl, bulk, cnt, oh->ot_tx);
550 int __osd_sa_xattr_update(const struct lu_env *env, struct osd_object *obj,
551 struct osd_thandle *oh)
553 struct lu_buf *lb = &osd_oti_get(env)->oti_xattr_lbuf;
554 struct osd_device *osd = osd_obj2dev(obj);
559 obj->oo_late_xattr = 0;
561 /* Update the SA for additions, modifications, and removals. */
562 rc = -nvlist_size(obj->oo_sa_xattr, &size, NV_ENCODE_XDR);
566 lu_buf_check_and_alloc(lb, size);
567 if (lb->lb_buf == NULL) {
568 CERROR("%s: can't allocate buffer for xattr update\n",
574 rc = -nvlist_pack(obj->oo_sa_xattr, &dxattr, &size,
575 NV_ENCODE_XDR, KM_SLEEP);
578 LASSERT(dxattr == lb->lb_buf);
580 sa_update(obj->oo_sa_hdl, SA_ZPL_DXATTR(osd), dxattr, size, oh->ot_tx);
586 * Set an extended attribute.
587 * This transaction must have called udmu_xattr_declare_set() first.
589 * Returns 0 on success or a negative error number on failure.
591 * No locking is done here.
593 int __osd_sa_xattr_schedule_update(const struct lu_env *env,
594 struct osd_object *obj,
595 struct osd_thandle *oh)
598 LASSERT(obj->oo_sa_hdl);
599 LASSERT(obj->oo_sa_xattr);
601 /* schedule batched SA update in osd_object_sa_dirty_rele() */
602 obj->oo_late_xattr = 1;
603 osd_object_sa_dirty_add(obj, oh);
609 int __osd_sa_xattr_set(const struct lu_env *env, struct osd_object *obj,
610 const struct lu_buf *buf, const char *name, int fl,
611 struct osd_thandle *oh)
619 rc = __osd_xattr_cache(obj);
623 LASSERT(obj->oo_sa_xattr);
624 if (buf->lb_len > OBD_MAX_EA_SIZE) {
627 /* Prevent the DXATTR SA from consuming the entire SA
629 rc = -nvlist_size(obj->oo_sa_xattr, &size, NV_ENCODE_XDR);
633 if (size + buf->lb_len > DXATTR_MAX_SA_SIZE)
637 /* even in case of -EFBIG we must lookup xattr and check can we
638 * rewrite it then delete from SA */
639 rc = -nvlist_lookup_byte_array(obj->oo_sa_xattr, name, &nv_value,
642 if (fl & LU_XATTR_CREATE) {
644 } else if (too_big) {
645 rc = -nvlist_remove(obj->oo_sa_xattr, name,
646 DATA_TYPE_BYTE_ARRAY);
649 rc = __osd_sa_xattr_schedule_update(env, obj, oh);
650 return rc == 0 ? -EFBIG : rc;
652 } else if (rc == -ENOENT) {
653 if (fl & LU_XATTR_REPLACE)
661 /* Ensure xattr doesn't exist in ZAP */
662 if (obj->oo_xattr != ZFS_NO_OBJECT) {
663 struct osd_device *osd = osd_obj2dev(obj);
665 rc = -zap_lookup(osd->od_os, obj->oo_xattr,
668 rc = -dmu_object_free(osd->od_os, objid, oh->ot_tx);
670 zap_remove(osd->od_os, obj->oo_xattr,
675 rc = -nvlist_add_byte_array(obj->oo_sa_xattr, name,
676 (uchar_t *)buf->lb_buf, buf->lb_len);
680 /* batch updates only for just created dnodes where we
681 * used to set number of EAs in a single transaction */
682 if (obj->oo_dn->dn_allocated_txg == oh->ot_tx->tx_txg)
683 rc = __osd_sa_xattr_schedule_update(env, obj, oh);
685 rc = __osd_sa_xattr_update(env, obj, oh);
691 __osd_xattr_set(const struct lu_env *env, struct osd_object *obj,
692 const struct lu_buf *buf, const char *name, int fl,
693 struct osd_thandle *oh)
695 struct osd_device *osd = osd_obj2dev(obj);
696 dnode_t *xa_zap_dn = NULL;
697 dnode_t *xa_data_dn = NULL;
698 uint64_t xa_data_obj;
699 sa_handle_t *sa_hdl = NULL;
700 dmu_tx_t *tx = oh->ot_tx;
704 LASSERT(obj->oo_sa_hdl);
706 if (obj->oo_xattr == ZFS_NO_OBJECT) {
707 struct lu_attr *la = &osd_oti_get(env)->oti_la;
709 la->la_valid = LA_MODE;
710 la->la_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
711 rc = __osd_zap_create(env, osd, &xa_zap_dn, tx, la, 0, 0);
715 obj->oo_xattr = xa_zap_dn->dn_object;
716 rc = osd_object_sa_update(obj, SA_ZPL_XATTR(osd),
717 &obj->oo_xattr, 8, oh);
722 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1,
725 if (fl & LU_XATTR_CREATE) {
730 * Entry already exists.
731 * We'll truncate the existing object.
733 rc = __osd_obj2dnode(osd->od_os, xa_data_obj, &xa_data_dn);
737 rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL,
738 SA_HDL_PRIVATE, &sa_hdl);
742 rc = -sa_lookup(sa_hdl, SA_ZPL_SIZE(osd), &size, 8);
746 rc = -dmu_free_range(osd->od_os, xa_data_dn->dn_object,
747 0, DMU_OBJECT_END, tx);
750 } else if (rc == -ENOENT) {
751 struct lu_attr *la = &osd_oti_get(env)->oti_la;
753 * Entry doesn't exist, we need to create a new one and a new
754 * object to store the value.
756 if (fl & LU_XATTR_REPLACE) {
757 /* should be ENOATTR according to the
758 * man, but that is undefined here */
763 la->la_valid = LA_MODE;
764 la->la_mode = S_IFREG | S_IRUGO | S_IWUSR;
765 rc = __osd_object_create(env, osd, obj,
766 lu_object_fid(&obj->oo_dt.do_lu),
767 &xa_data_dn, tx, la);
770 xa_data_obj = xa_data_dn->dn_object;
772 rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL,
773 SA_HDL_PRIVATE, &sa_hdl);
777 rc = -zap_add(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t),
778 1, &xa_data_obj, tx);
782 /* There was an error looking up the xattr name */
786 /* Finally write the xattr value */
787 dmu_write(osd->od_os, xa_data_obj, 0, buf->lb_len, buf->lb_buf, tx);
790 rc = -sa_update(sa_hdl, SA_ZPL_SIZE(osd), &size, 8, tx);
793 sa_handle_destroy(sa_hdl);
795 if (xa_data_dn != NULL)
796 osd_dnode_rele(xa_data_dn);
797 if (xa_zap_dn != NULL)
798 osd_dnode_rele(xa_zap_dn);
803 static int osd_xattr_split_pfid(const struct lu_env *env,
804 struct osd_object *obj, struct osd_thandle *oh)
806 struct osd_thread_info *info = osd_oti_get(env);
807 struct lustre_ost_attrs *loa =
808 (struct lustre_ost_attrs *)&info->oti_buf;
809 struct lustre_mdt_attrs *lma = &loa->loa_lma;
810 struct lu_buf buf = {
812 .lb_len = sizeof(info->oti_buf),
818 BUILD_BUG_ON(sizeof(info->oti_buf) < sizeof(*loa));
819 rc = osd_xattr_get_internal(env, obj, &buf, XATTR_NAME_LMA, &size);
823 lustre_loa_swab(loa, true);
824 LASSERT(lma->lma_compat & LMAC_STRIPE_INFO);
826 lma->lma_compat &= ~(LMAC_STRIPE_INFO | LMAC_COMP_INFO);
827 lustre_lma_swab(lma);
829 buf.lb_len = sizeof(*lma);
830 rc = osd_xattr_set_internal(env, obj, &buf, XATTR_NAME_LMA,
831 LU_XATTR_REPLACE, oh);
833 obj->oo_pfid_in_lma = 0;
839 * In DNE environment, the object (in spite of regular file or directory)
840 * and its name entry may reside on different MDTs. Under such case, we will
841 * create an agent entry on the MDT where the object resides. The agent entry
842 * references the object locally, that makes the object to be visible to the
843 * userspace when mounted as 'zfs' directly. Then the userspace tools, such
844 * as 'tar' can handle the object properly.
846 * We handle the agent entry during set linkEA that is the common interface
847 * for both regular file and directroy, can handle kinds of cases, such as
848 * create/link/unlink/rename, and so on.
850 * NOTE: we need to do that for both directory and regular file, so we can NOT
851 * do that when ea_{insert,delete} that are directory based operations.
853 static int osd_xattr_handle_linkea(const struct lu_env *env,
854 struct osd_device *osd,
855 struct osd_object *obj,
856 const struct lu_buf *buf,
857 struct osd_thandle *oh)
859 const struct lu_fid *fid = lu_object_fid(&obj->oo_dt.do_lu);
860 struct lu_fid *tfid = &osd_oti_get(env)->oti_fid;
861 struct linkea_data ldata = { .ld_buf = (struct lu_buf *)buf };
862 struct lu_name tmpname;
867 rc = linkea_init_with_rec(&ldata);
869 linkea_first_entry(&ldata);
870 while (ldata.ld_lee != NULL && !remote) {
871 linkea_entry_unpack(ldata.ld_lee, &ldata.ld_reclen,
873 if (osd_remote_fid(env, osd, tfid) > 0)
876 linkea_next_entry(&ldata);
878 } else if (rc == -ENODATA) {
884 if (lu_object_has_agent_entry(&obj->oo_dt.do_lu) && !remote) {
885 rc = osd_delete_from_remote_parent(env, osd, obj, oh, false);
887 CERROR("%s: failed to remove agent entry for "DFID
888 ": rc = %d\n", osd_name(osd), PFID(fid), rc);
889 } else if (!lu_object_has_agent_entry(&obj->oo_dt.do_lu) && remote) {
890 rc = osd_add_to_remote_parent(env, osd, obj, oh);
892 CWARN("%s: failed to create agent entry for "DFID
893 ": rc = %d\n", osd_name(osd), PFID(fid), rc);
899 int osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
900 const struct lu_buf *buf, const char *name, int fl,
901 struct thandle *handle)
903 struct osd_object *obj = osd_dt_obj(dt);
904 struct osd_device *osd = osd_obj2dev(obj);
905 struct osd_thandle *oh;
909 LASSERT(handle != NULL);
910 LASSERT(osd_invariant(obj));
912 if (!osd_obj2dev(obj)->od_posix_acl &&
913 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
914 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
917 oh = container_of(handle, struct osd_thandle, ot_super);
919 down_write(&obj->oo_guard);
920 CDEBUG(D_INODE, "Setting xattr %s with size %d\n",
921 name, (int)buf->lb_len);
922 /* For the OST migrated from ldiskfs, the PFID EA may
923 * be stored in LMA because of ldiskfs inode size. */
924 if (unlikely(strcmp(name, XATTR_NAME_FID) == 0 &&
925 obj->oo_pfid_in_lma)) {
926 rc = osd_xattr_split_pfid(env, obj, oh);
928 fl = LU_XATTR_CREATE;
929 } else if (strcmp(name, XATTR_NAME_LINK) == 0 &&
930 osd->od_remote_parent_dir != ZFS_NO_OBJECT) {
931 rc = osd_xattr_handle_linkea(env, osd, obj, buf, oh);
935 rc = osd_xattr_set_internal(env, obj, buf, name, fl, oh);
936 up_write(&obj->oo_guard);
942 __osd_xattr_declare_del(const struct lu_env *env, struct osd_object *obj,
943 const char *name, struct osd_thandle *oh)
945 struct osd_device *osd = osd_obj2dev(obj);
946 dmu_tx_t *tx = oh->ot_tx;
947 uint64_t xa_data_obj;
950 /* update SA_ZPL_DXATTR if xattr was in SA */
951 dmu_tx_hold_sa(tx, obj->oo_sa_hdl, 0);
953 if (obj->oo_xattr == ZFS_NO_OBJECT)
956 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, 8, 1, &xa_data_obj);
960 * We'll delete the existing object and ZAP entry.
962 dmu_tx_hold_bonus(tx, xa_data_obj);
963 dmu_tx_hold_free(tx, xa_data_obj, 0, DMU_OBJECT_END);
964 dmu_tx_hold_zap(tx, obj->oo_xattr, FALSE, (char *) name);
966 } else if (rc == -ENOENT) {
968 * Entry doesn't exist, nothing to be changed.
973 /* An error happened */
977 int osd_declare_xattr_del(const struct lu_env *env, struct dt_object *dt,
978 const char *name, struct thandle *handle)
980 struct osd_object *obj = osd_dt_obj(dt);
981 struct osd_thandle *oh;
984 LASSERT(handle != NULL);
985 LASSERT(osd_invariant(obj));
987 oh = container_of(handle, struct osd_thandle, ot_super);
988 LASSERT(oh->ot_tx != NULL);
990 down_read(&obj->oo_guard);
991 if (likely(dt_object_exists(&obj->oo_dt) && !obj->oo_destroyed)) {
992 LASSERT(obj->oo_dn != NULL);
993 __osd_xattr_declare_del(env, obj, name, oh);
995 up_read(&obj->oo_guard);
1000 static int __osd_sa_xattr_del(const struct lu_env *env, struct osd_object *obj,
1001 const char *name, struct osd_thandle *oh)
1005 rc = __osd_xattr_cache(obj);
1009 rc = -nvlist_remove(obj->oo_sa_xattr, name, DATA_TYPE_BYTE_ARRAY);
1014 * only migrate delete LMV, and it needs to be done immediately, because
1015 * it's used in deleting sub stripes, and if this is delayed, later when
1016 * destroying the master object, it will delete sub stripes again.
1018 if (!strcmp(name, XATTR_NAME_LMV))
1019 rc = __osd_sa_xattr_update(env, obj, oh);
1021 rc = __osd_sa_xattr_schedule_update(env, obj, oh);
1025 static int __osd_xattr_del(const struct lu_env *env, struct osd_object *obj,
1026 const char *name, struct osd_thandle *oh)
1028 struct osd_device *osd = osd_obj2dev(obj);
1029 uint64_t xa_data_obj;
1032 if (unlikely(!dt_object_exists(&obj->oo_dt) || obj->oo_destroyed))
1035 /* try remove xattr from SA at first */
1036 rc = __osd_sa_xattr_del(env, obj, name, oh);
1040 if (obj->oo_xattr == ZFS_NO_OBJECT)
1043 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1,
1045 if (rc == -ENOENT) {
1047 } else if (rc == 0) {
1050 * We'll delete the existing object and ZAP entry.
1052 rc = -dmu_object_free(osd->od_os, xa_data_obj, oh->ot_tx);
1056 rc = -zap_remove(osd->od_os, obj->oo_xattr, name, oh->ot_tx);
1062 int osd_xattr_del(const struct lu_env *env, struct dt_object *dt,
1063 const char *name, struct thandle *handle)
1065 struct osd_object *obj = osd_dt_obj(dt);
1066 struct osd_thandle *oh;
1070 LASSERT(handle != NULL);
1071 oh = container_of(handle, struct osd_thandle, ot_super);
1072 LASSERT(oh->ot_tx != NULL);
1074 if (!osd_obj2dev(obj)->od_posix_acl &&
1075 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
1076 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
1077 RETURN(-EOPNOTSUPP);
1079 down_write(&obj->oo_guard);
1080 if (unlikely(!dt_object_exists(dt) || obj->oo_destroyed))
1081 GOTO(out, rc = -ENOENT);
1082 LASSERT(obj->oo_dn != NULL);
1083 /* For the OST migrated from ldiskfs, the PFID EA may
1084 * be stored in LMA because of ldiskfs inode size. */
1085 if (unlikely(strcmp(name, XATTR_NAME_FID) == 0 && obj->oo_pfid_in_lma))
1086 rc = osd_xattr_split_pfid(env, obj, oh);
1088 rc = __osd_xattr_del(env, obj, name, oh);
1091 up_write(&obj->oo_guard);
1095 void osd_declare_xattrs_destroy(const struct lu_env *env,
1096 struct osd_object *obj, struct osd_thandle *oh)
1098 struct osd_device *osd = osd_obj2dev(obj);
1099 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
1100 uint64_t oid = obj->oo_xattr, xid;
1101 dmu_tx_t *tx = oh->ot_tx;
1105 if (oid == ZFS_NO_OBJECT)
1106 return; /* Nothing to do for SA xattrs */
1108 /* Declare to free the ZAP holding xattrs */
1109 dmu_tx_hold_free(tx, oid, 0, DMU_OBJECT_END);
1111 rc = osd_zap_cursor_init(&zc, osd->od_os, oid, 0);
1115 while (zap_cursor_retrieve(zc, za) == 0) {
1116 LASSERT(za->za_num_integers == 1);
1117 LASSERT(za->za_integer_length == sizeof(uint64_t));
1119 rc = -zap_lookup(osd->od_os, oid, za->za_name,
1120 sizeof(uint64_t), 1, &xid);
1122 CERROR("%s: xattr %s lookup failed: rc = %d\n",
1123 osd->od_svname, za->za_name, rc);
1126 dmu_tx_hold_free(tx, xid, 0, DMU_OBJECT_END);
1128 zap_cursor_advance(zc);
1131 osd_zap_cursor_fini(zc);
1133 if (rc && tx->tx_err == 0)
1137 int osd_xattrs_destroy(const struct lu_env *env,
1138 struct osd_object *obj, struct osd_thandle *oh)
1140 struct osd_device *osd = osd_obj2dev(obj);
1141 dmu_tx_t *tx = oh->ot_tx;
1142 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
1147 /* The transaction must have been assigned to a transaction group. */
1148 LASSERT(tx->tx_txg != 0);
1150 if (obj->oo_xattr == ZFS_NO_OBJECT)
1151 return 0; /* Nothing to do for SA xattrs */
1153 /* Free the ZAP holding the xattrs */
1154 rc = osd_zap_cursor_init(&zc, osd->od_os, obj->oo_xattr, 0);
1158 while (zap_cursor_retrieve(zc, za) == 0) {
1159 LASSERT(za->za_num_integers == 1);
1160 LASSERT(za->za_integer_length == sizeof(uint64_t));
1162 rc = -zap_lookup(osd->od_os, obj->oo_xattr, za->za_name,
1163 sizeof(uint64_t), 1, &xid);
1165 CERROR("%s: lookup xattr %s failed: rc = %d\n",
1166 osd->od_svname, za->za_name, rc);
1168 rc = -dmu_object_free(osd->od_os, xid, tx);
1170 CERROR("%s: free xattr %s failed: rc = %d\n",
1171 osd->od_svname, za->za_name, rc);
1173 zap_cursor_advance(zc);
1175 osd_zap_cursor_fini(zc);
1177 rc = -dmu_object_free(osd->od_os, obj->oo_xattr, tx);
1179 CERROR("%s: free xattr %llu failed: rc = %d\n",
1180 osd->od_svname, obj->oo_xattr, rc);
1186 osd_sa_xattr_list(const struct lu_env *env, struct osd_object *obj,
1187 const struct lu_buf *lb)
1189 nvpair_t *nvp = NULL;
1190 int len, counted = 0;
1193 rc = __osd_xattr_cache(obj);
1197 while ((nvp = nvlist_next_nvpair(obj->oo_sa_xattr, nvp)) != NULL) {
1198 const char *name = nvpair_name(nvp);
1200 if (!osd_obj2dev(obj)->od_posix_acl &&
1201 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
1202 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
1206 if (lb->lb_buf != NULL) {
1207 if (counted + len + 1 > lb->lb_len)
1210 memcpy(lb->lb_buf + counted, name, len + 1);
1217 int osd_xattr_list(const struct lu_env *env, struct dt_object *dt,
1218 const struct lu_buf *lb)
1220 struct osd_object *obj = osd_dt_obj(dt);
1221 struct osd_device *osd = osd_obj2dev(obj);
1222 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
1227 down_read(&obj->oo_guard);
1228 if (unlikely(!dt_object_exists(dt) || obj->oo_destroyed))
1229 GOTO(out, rc = -ENOENT);
1230 LASSERT(obj->oo_dn != NULL);
1232 rc = osd_sa_xattr_list(env, obj, lb);
1238 /* continue with dnode xattr if any */
1239 if (obj->oo_xattr == ZFS_NO_OBJECT)
1240 GOTO(out, rc = counted);
1242 rc = osd_zap_cursor_init(&zc, osd->od_os, obj->oo_xattr, 0);
1246 while ((rc = -zap_cursor_retrieve(zc, za)) == 0) {
1247 if (!osd_obj2dev(obj)->od_posix_acl &&
1248 (strcmp(za->za_name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
1249 strcmp(za->za_name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) {
1250 zap_cursor_advance(zc);
1254 rc = strlen(za->za_name);
1255 if (lb->lb_buf != NULL) {
1256 if (counted + rc + 1 > lb->lb_len)
1257 GOTO(out_fini, rc = -ERANGE);
1259 memcpy(lb->lb_buf + counted, za->za_name, rc + 1);
1263 zap_cursor_advance(zc);
1265 if (rc == -ENOENT) /* no more kes in the index */
1267 else if (unlikely(rc < 0))
1272 osd_zap_cursor_fini(zc);
1274 up_read(&obj->oo_guard);