4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2012, 2017, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
31 * lustre/osd-zfs/osd_xattr.c
32 * functions to manipulate extended attributes and system attributes
34 * Author: Alex Zhuravlev <bzzz@whamcloud.com>
35 * Author: Mike Pershin <tappro@whamcloud.com>
38 #define DEBUG_SUBSYSTEM S_OSD
40 #include <libcfs/libcfs.h>
41 #include <obd_support.h>
42 #include <lustre_net.h>
44 #include <obd_class.h>
45 #include <lustre_disk.h>
46 #include <lustre_fid.h>
47 #include <lustre_linkea.h>
49 #include "osd_internal.h"
51 #include <sys/dnode.h>
56 #include <sys/spa_impl.h>
57 #include <sys/zfs_znode.h>
58 #include <sys/dmu_tx.h>
59 #include <sys/dmu_objset.h>
60 #include <sys/dsl_prop.h>
61 #include <sys/sa_impl.h>
64 #include <linux/posix_acl_xattr.h>
65 #include <lustre_scrub.h>
67 int __osd_xattr_load(struct osd_device *osd, sa_handle_t *hdl, nvlist_t **sa)
72 rc = -sa_size(hdl, SA_ZPL_DXATTR(osd), &size);
75 rc = -nvlist_alloc(sa, NV_UNIQUE_NAME, KM_SLEEP);
79 buf = osd_zio_buf_alloc(size);
84 rc = -sa_lookup(hdl, SA_ZPL_DXATTR(osd), buf, size);
86 rc = -nvlist_unpack(buf, size, sa, KM_SLEEP);
87 osd_zio_buf_free(buf, size);
93 static inline int __osd_xattr_cache(struct osd_object *obj)
95 LASSERT(obj->oo_sa_hdl);
96 if (obj->oo_sa_xattr != NULL)
98 return __osd_xattr_load(osd_obj2dev(obj),
99 obj->oo_sa_hdl, &obj->oo_sa_xattr);
103 __osd_sa_xattr_get(const struct lu_env *env, struct osd_object *obj,
104 const struct lu_buf *buf, const char *name, int *sizep)
109 rc = __osd_xattr_cache(obj);
113 LASSERT(obj->oo_sa_xattr);
114 rc = -nvlist_lookup_byte_array(obj->oo_sa_xattr, name,
119 if (buf == NULL || buf->lb_buf == NULL) {
120 /* return the required size by *sizep */
124 if (*sizep > buf->lb_len)
125 return -ERANGE; /* match ldiskfs error */
127 memcpy(buf->lb_buf, nv_value, *sizep);
131 int __osd_xattr_get_large(const struct lu_env *env, struct osd_device *osd,
132 uint64_t xattr, struct lu_buf *buf,
133 const char *name, int *sizep)
136 sa_handle_t *sa_hdl = NULL;
137 uint64_t xa_data_obj, size;
140 /* are there any extended attributes? */
141 if (xattr == ZFS_NO_OBJECT)
144 /* Lookup the object number containing the xattr data */
145 rc = -zap_lookup(osd->od_os, xattr, name, sizeof(uint64_t), 1,
150 rc = __osd_obj2dnode(osd->od_os, xa_data_obj, &xa_data_dn);
154 rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL, SA_HDL_PRIVATE,
159 /* Get the xattr value length / object size */
160 rc = -sa_lookup(sa_hdl, SA_ZPL_SIZE(osd), &size, 8);
164 if (size > INT_MAX) {
171 if (buf == NULL || buf->lb_buf == NULL) {
172 /* We only need to return the required size */
175 if (*sizep > buf->lb_len) {
176 rc = -ERANGE; /* match ldiskfs error */
180 rc = -dmu_read(osd->od_os, xa_data_dn->dn_object, 0,
181 size, buf->lb_buf, DMU_READ_PREFETCH);
184 sa_handle_destroy(sa_hdl);
186 osd_dnode_rele(xa_data_dn);
192 * Copy an extended attribute into the buffer provided, or compute
193 * the required buffer size if \a buf is NULL.
195 * On success, the number of bytes used or required is stored in \a sizep.
197 * Note that no locking is done here.
199 * \param[in] env execution environment
200 * \param[in] obj object for which to retrieve xattr
201 * \param[out] buf buffer to store xattr value in
202 * \param[in] name name of xattr to copy
203 * \param[out] sizep bytes used or required to store xattr
205 * \retval 0 on success
206 * \retval negative negated errno on failure
208 int osd_xattr_get_internal(const struct lu_env *env, struct osd_object *obj,
209 struct lu_buf *buf, const char *name, int *sizep)
213 if (unlikely(!dt_object_exists(&obj->oo_dt) || obj->oo_destroyed))
216 /* check SA_ZPL_DXATTR first then fallback to directory xattr */
217 rc = __osd_sa_xattr_get(env, obj, buf, name, sizep);
221 return __osd_xattr_get_large(env, osd_obj2dev(obj), obj->oo_xattr,
226 * Copy LMA extended attribute into provided buffer
228 * Note that no locking is done here.
230 * \param[in] env execution environment
231 * \param[in] obj object for which to retrieve xattr
232 * \param[out] buf buffer to store xattr value in
234 * \retval 0 on success
235 * \retval negative negated errno on failure
237 int osd_xattr_get_lma(const struct lu_env *env, struct osd_object *obj,
246 if (unlikely(obj->oo_destroyed))
249 /* check SA_ZPL_DXATTR first then fallback to directory xattr */
250 rc = __osd_sa_xattr_get(env, obj, buf, XATTR_NAME_LMA, &size);
251 if (!rc && unlikely(size < sizeof(struct lustre_mdt_attrs)))
256 rc = __osd_xattr_get_large(env, osd_obj2dev(obj), obj->oo_xattr,
257 buf, XATTR_NAME_LMA, &size);
258 if (!rc && unlikely(size < sizeof(struct lustre_mdt_attrs)))
265 static int osd_get_pfid_from_lma(const struct lu_env *env,
266 struct osd_object *obj,
267 struct lu_buf *buf, int *sizep)
269 struct osd_thread_info *info = osd_oti_get(env);
270 struct lustre_ost_attrs *loa =
271 (struct lustre_ost_attrs *)&info->oti_buf;
272 struct lustre_mdt_attrs *lma = &loa->loa_lma;
273 struct filter_fid *ff;
274 struct ost_layout *ol;
275 struct lu_buf tbuf = {
277 .lb_len = sizeof(info->oti_buf),
282 BUILD_BUG_ON(sizeof(info->oti_buf) < sizeof(*loa));
283 rc = osd_xattr_get_internal(env, obj, &tbuf,
284 XATTR_NAME_LMA, sizep);
288 lustre_loa_swab(loa, true);
289 LASSERT(lma->lma_compat & LMAC_STRIPE_INFO);
291 *sizep = sizeof(*ff);
292 if (buf->lb_len == 0 || !buf->lb_buf)
295 if (buf->lb_len < *sizep)
300 ol->ol_stripe_count = cpu_to_le32(loa->loa_parent_fid.f_ver >>
301 PFID_STRIPE_IDX_BITS);
302 ol->ol_stripe_size = cpu_to_le32(loa->loa_stripe_size);
303 loa->loa_parent_fid.f_ver &= PFID_STRIPE_COUNT_MASK;
304 fid_cpu_to_le(&ff->ff_parent, &loa->loa_parent_fid);
305 if (lma->lma_compat & LMAC_COMP_INFO) {
306 ol->ol_comp_start = cpu_to_le64(loa->loa_comp_start);
307 ol->ol_comp_end = cpu_to_le64(loa->loa_comp_end);
308 ol->ol_comp_id = cpu_to_le32(loa->loa_comp_id);
310 ol->ol_comp_start = 0;
318 int osd_xattr_get(const struct lu_env *env, struct dt_object *dt,
319 struct lu_buf *buf, const char *name)
321 struct osd_object *obj = osd_dt_obj(dt);
325 LASSERT(obj->oo_dn != NULL);
326 LASSERT(osd_invariant(obj));
328 if (!osd_obj2dev(obj)->od_posix_acl &&
329 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
330 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
333 down_read(&obj->oo_guard);
334 if (unlikely(!dt_object_exists(dt) || obj->oo_destroyed)) {
335 up_read(&obj->oo_guard);
339 /* For the OST migrated from ldiskfs, the PFID EA may
340 * be stored in LMA because of ldiskfs inode size. */
341 if (strcmp(name, XATTR_NAME_FID) == 0 && obj->oo_pfid_in_lma)
342 rc = osd_get_pfid_from_lma(env, obj, buf, &size);
344 rc = osd_xattr_get_internal(env, obj, buf, name, &size);
345 up_read(&obj->oo_guard);
354 /* the function is used to declare EAs when SA is not supported */
355 void __osd_xattr_declare_legacy(const struct lu_env *env,
356 struct osd_object *obj,
357 int vallen, const char *name,
358 struct osd_thandle *oh)
360 struct osd_device *osd = osd_obj2dev(obj);
361 dmu_tx_t *tx = oh->ot_tx;
362 uint64_t xa_data_obj;
365 if (obj->oo_xattr == ZFS_NO_OBJECT) {
366 /* xattr zap + entry */
367 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, (char *) name);
368 /* xattr value obj */
369 dmu_tx_hold_sa_create(tx, ZFS_SA_BASE_ATTR_SIZE);
370 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, vallen);
374 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1,
378 * Entry already exists.
379 * We'll truncate the existing object.
381 dmu_tx_hold_bonus(tx, xa_data_obj);
382 dmu_tx_hold_free(tx, xa_data_obj, vallen, DMU_OBJECT_END);
383 dmu_tx_hold_write(tx, xa_data_obj, 0, vallen);
384 } else if (rc == -ENOENT) {
386 * Entry doesn't exist, we need to create a new one and a new
387 * object to store the value.
389 dmu_tx_hold_bonus(tx, obj->oo_xattr);
390 dmu_tx_hold_zap(tx, obj->oo_xattr, TRUE, (char *) name);
391 dmu_tx_hold_sa_create(tx, ZFS_SA_BASE_ATTR_SIZE);
392 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, vallen);
396 void __osd_xattr_declare_set(const struct lu_env *env, struct osd_object *obj,
397 int vallen, const char *name,
398 struct osd_thandle *oh)
400 struct osd_device *osd = osd_obj2dev(obj);
401 dmu_tx_t *tx = oh->ot_tx;
404 if (unlikely(obj->oo_destroyed))
407 if (strcmp(name, XATTR_NAME_LINK) == 0 &&
408 osd->od_remote_parent_dir != ZFS_NO_OBJECT) {
409 /* If some name entry resides on remote MDT, then will create
410 * agent entry under remote parent. On the other hand, if the
411 * remote entry will be removed, then related agent entry may
412 * need to be removed from the remote parent. So there may be
413 * kinds of cases, let's declare enough credits. The credits
414 * for create agent entry is enough for remove case. */
415 osd_tx_hold_zap(tx, osd->od_remote_parent_dir,
419 if (unlikely(!osd_obj2dev(obj)->od_xattr_in_sa)) {
420 __osd_xattr_declare_legacy(env, obj, vallen, name, oh);
424 /* declare EA in SA */
425 if (dt_object_exists(&obj->oo_dt)) {
426 LASSERT(obj->oo_sa_hdl);
427 /* XXX: it should be possible to skip spill
428 * declaration if specific EA is part of
429 * bonus and doesn't grow */
430 dmu_tx_hold_spill(tx, obj->oo_dn->dn_object);
434 bonuslen = osd_obj_bonuslen(obj);
436 /* the object doesn't exist, but we've declared bonus
437 * in osd_declare_object_create() yet */
438 if (obj->oo_ea_in_bonus > bonuslen) {
439 /* spill has been declared already */
440 } else if (obj->oo_ea_in_bonus + vallen > bonuslen) {
441 /* we're about to exceed bonus, let's declare spill */
442 dmu_tx_hold_spill(tx, DMU_NEW_OBJECT);
444 obj->oo_ea_in_bonus += vallen;
447 int osd_declare_xattr_set(const struct lu_env *env, struct dt_object *dt,
448 const struct lu_buf *buf, const char *name,
449 int fl, struct thandle *handle)
451 struct osd_object *obj = osd_dt_obj(dt);
452 struct osd_thandle *oh;
455 LASSERT(handle != NULL);
456 oh = container_of(handle, struct osd_thandle, ot_super);
458 down_read(&obj->oo_guard);
459 __osd_xattr_declare_set(env, obj, buf->lb_len, name, oh);
460 up_read(&obj->oo_guard);
465 int __osd_sa_attr_init(const struct lu_env *env, struct osd_object *obj,
466 struct osd_thandle *oh)
468 sa_bulk_attr_t *bulk = osd_oti_get(env)->oti_attr_bulk;
469 struct osa_attr *osa = &osd_oti_get(env)->oti_osa;
470 struct lu_buf *lb = &osd_oti_get(env)->oti_xattr_lbuf;
471 struct osd_device *osd = osd_obj2dev(obj);
473 inode_timespec_t now;
477 obj->oo_late_xattr = 0;
478 obj->oo_late_attr_set = 0;
480 gen = dmu_tx_get_txg(oh->ot_tx);
482 ZFS_TIME_ENCODE(&now, osa->btime);
484 obj->oo_attr.la_valid |= LA_BTIME;
485 obj->oo_attr.la_btime = osa->btime[0];
486 osa->atime[0] = obj->oo_attr.la_atime;
487 osa->ctime[0] = obj->oo_attr.la_ctime;
488 osa->mtime[0] = obj->oo_attr.la_mtime;
489 osa->mode = obj->oo_attr.la_mode;
490 osa->uid = obj->oo_attr.la_uid;
491 osa->gid = obj->oo_attr.la_gid;
492 osa->rdev = obj->oo_attr.la_rdev;
493 osa->nlink = obj->oo_attr.la_nlink;
494 osa->flags = attrs_fs2zfs(obj->oo_attr.la_flags);
495 osa->size = obj->oo_attr.la_size;
496 #ifdef ZFS_PROJINHERIT
497 if (osd->od_projectused_dn) {
498 if (obj->oo_attr.la_valid & LA_PROJID)
499 osa->projid = obj->oo_attr.la_projid;
501 osa->projid = ZFS_DEFAULT_PROJID;
502 osa->flags |= ZFS_PROJID;
503 obj->oo_with_projid = 1;
508 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(osd), NULL, &osa->mode, 8);
509 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_SIZE(osd), NULL, &osa->size, 8);
510 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GEN(osd), NULL, &gen, 8);
511 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_UID(osd), NULL, &osa->uid, 8);
512 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GID(osd), NULL, &osa->gid, 8);
513 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_PARENT(osd), NULL,
515 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(osd), NULL, &osa->flags, 8);
516 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(osd), NULL, osa->atime, 16);
517 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(osd), NULL, osa->mtime, 16);
518 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(osd), NULL, osa->ctime, 16);
519 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CRTIME(osd), NULL, osa->btime, 16);
520 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_LINKS(osd), NULL, &osa->nlink, 8);
521 #ifdef ZFS_PROJINHERIT
522 if (osd->od_projectused_dn)
523 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_PROJID(osd), NULL,
526 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_RDEV(osd), NULL, &osa->rdev, 8);
527 LASSERT(cnt <= ARRAY_SIZE(osd_oti_get(env)->oti_attr_bulk));
529 /* Update the SA for additions, modifications, and removals. */
530 rc = -nvlist_size(obj->oo_sa_xattr, &size, NV_ENCODE_XDR);
534 lu_buf_check_and_alloc(lb, size);
535 if (lb->lb_buf == NULL) {
536 CERROR("%s: can't allocate buffer for xattr update\n",
541 rc = -nvlist_pack(obj->oo_sa_xattr, (char **)&lb->lb_buf, &size,
542 NV_ENCODE_XDR, KM_SLEEP);
546 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_DXATTR(osd), NULL, lb->lb_buf, size);
548 rc = -sa_replace_all_by_template(obj->oo_sa_hdl, bulk, cnt, oh->ot_tx);
553 int __osd_sa_xattr_update(const struct lu_env *env, struct osd_object *obj,
554 struct osd_thandle *oh)
556 struct lu_buf *lb = &osd_oti_get(env)->oti_xattr_lbuf;
557 struct osd_device *osd = osd_obj2dev(obj);
562 obj->oo_late_xattr = 0;
564 /* Update the SA for additions, modifications, and removals. */
565 rc = -nvlist_size(obj->oo_sa_xattr, &size, NV_ENCODE_XDR);
569 lu_buf_check_and_alloc(lb, size);
570 if (lb->lb_buf == NULL) {
571 CERROR("%s: can't allocate buffer for xattr update\n",
577 rc = -nvlist_pack(obj->oo_sa_xattr, &dxattr, &size,
578 NV_ENCODE_XDR, KM_SLEEP);
581 LASSERT(dxattr == lb->lb_buf);
583 sa_update(obj->oo_sa_hdl, SA_ZPL_DXATTR(osd), dxattr, size, oh->ot_tx);
589 * Set an extended attribute.
590 * This transaction must have called udmu_xattr_declare_set() first.
592 * Returns 0 on success or a negative error number on failure.
594 * No locking is done here.
596 int __osd_sa_xattr_schedule_update(const struct lu_env *env,
597 struct osd_object *obj,
598 struct osd_thandle *oh)
601 LASSERT(obj->oo_sa_hdl);
602 LASSERT(obj->oo_sa_xattr);
604 /* schedule batched SA update in osd_object_sa_dirty_rele() */
605 obj->oo_late_xattr = 1;
606 osd_object_sa_dirty_add(obj, oh);
612 int __osd_sa_xattr_set(const struct lu_env *env, struct osd_object *obj,
613 const struct lu_buf *buf, const char *name, int fl,
614 struct osd_thandle *oh)
622 rc = __osd_xattr_cache(obj);
626 LASSERT(obj->oo_sa_xattr);
627 if (buf->lb_len > OBD_MAX_EA_SIZE) {
630 /* Prevent the DXATTR SA from consuming the entire SA
632 rc = -nvlist_size(obj->oo_sa_xattr, &size, NV_ENCODE_XDR);
636 if (size + buf->lb_len > DXATTR_MAX_SA_SIZE)
640 /* even in case of -EFBIG we must lookup xattr and check can we
641 * rewrite it then delete from SA */
642 rc = -nvlist_lookup_byte_array(obj->oo_sa_xattr, name, &nv_value,
645 if (fl & LU_XATTR_CREATE) {
647 } else if (too_big) {
648 rc = -nvlist_remove(obj->oo_sa_xattr, name,
649 DATA_TYPE_BYTE_ARRAY);
652 rc = __osd_sa_xattr_schedule_update(env, obj, oh);
653 return rc == 0 ? -EFBIG : rc;
655 } else if (rc == -ENOENT) {
656 if (fl & LU_XATTR_REPLACE)
664 /* Ensure xattr doesn't exist in ZAP */
665 if (obj->oo_xattr != ZFS_NO_OBJECT) {
666 struct osd_device *osd = osd_obj2dev(obj);
668 rc = -zap_lookup(osd->od_os, obj->oo_xattr,
671 rc = -dmu_object_free(osd->od_os, objid, oh->ot_tx);
673 zap_remove(osd->od_os, obj->oo_xattr,
678 rc = -nvlist_add_byte_array(obj->oo_sa_xattr, name,
679 (uchar_t *)buf->lb_buf, buf->lb_len);
683 /* batch updates only for just created dnodes where we
684 * used to set number of EAs in a single transaction */
685 if (obj->oo_dn->dn_allocated_txg == oh->ot_tx->tx_txg)
686 rc = __osd_sa_xattr_schedule_update(env, obj, oh);
688 rc = __osd_sa_xattr_update(env, obj, oh);
694 __osd_xattr_set(const struct lu_env *env, struct osd_object *obj,
695 const struct lu_buf *buf, const char *name, int fl,
696 struct osd_thandle *oh)
698 struct osd_device *osd = osd_obj2dev(obj);
699 dnode_t *xa_zap_dn = NULL;
700 dnode_t *xa_data_dn = NULL;
701 uint64_t xa_data_obj;
702 sa_handle_t *sa_hdl = NULL;
703 dmu_tx_t *tx = oh->ot_tx;
707 LASSERT(obj->oo_sa_hdl);
709 if (obj->oo_xattr == ZFS_NO_OBJECT) {
710 struct lu_attr *la = &osd_oti_get(env)->oti_la;
712 la->la_valid = LA_MODE;
713 la->la_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
714 rc = __osd_zap_create(env, osd, &xa_zap_dn, tx, la, 0, 0);
718 obj->oo_xattr = xa_zap_dn->dn_object;
719 rc = osd_object_sa_update(obj, SA_ZPL_XATTR(osd),
720 &obj->oo_xattr, 8, oh);
725 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1,
728 if (fl & LU_XATTR_CREATE) {
733 * Entry already exists.
734 * We'll truncate the existing object.
736 rc = __osd_obj2dnode(osd->od_os, xa_data_obj, &xa_data_dn);
740 rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL,
741 SA_HDL_PRIVATE, &sa_hdl);
745 rc = -sa_lookup(sa_hdl, SA_ZPL_SIZE(osd), &size, 8);
749 rc = -dmu_free_range(osd->od_os, xa_data_dn->dn_object,
750 0, DMU_OBJECT_END, tx);
753 } else if (rc == -ENOENT) {
754 struct lu_attr *la = &osd_oti_get(env)->oti_la;
756 * Entry doesn't exist, we need to create a new one and a new
757 * object to store the value.
759 if (fl & LU_XATTR_REPLACE) {
760 /* should be ENOATTR according to the
761 * man, but that is undefined here */
766 la->la_valid = LA_MODE;
767 la->la_mode = S_IFREG | S_IRUGO | S_IWUSR;
768 rc = __osd_object_create(env, osd, obj,
769 lu_object_fid(&obj->oo_dt.do_lu),
770 &xa_data_dn, tx, la);
773 xa_data_obj = xa_data_dn->dn_object;
775 rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL,
776 SA_HDL_PRIVATE, &sa_hdl);
780 rc = -zap_add(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t),
781 1, &xa_data_obj, tx);
785 /* There was an error looking up the xattr name */
789 /* Finally write the xattr value */
790 dmu_write(osd->od_os, xa_data_obj, 0, buf->lb_len, buf->lb_buf, tx);
793 rc = -sa_update(sa_hdl, SA_ZPL_SIZE(osd), &size, 8, tx);
796 sa_handle_destroy(sa_hdl);
798 if (xa_data_dn != NULL)
799 osd_dnode_rele(xa_data_dn);
800 if (xa_zap_dn != NULL)
801 osd_dnode_rele(xa_zap_dn);
806 static int osd_xattr_split_pfid(const struct lu_env *env,
807 struct osd_object *obj, struct osd_thandle *oh)
809 struct osd_thread_info *info = osd_oti_get(env);
810 struct lustre_ost_attrs *loa =
811 (struct lustre_ost_attrs *)&info->oti_buf;
812 struct lustre_mdt_attrs *lma = &loa->loa_lma;
813 struct lu_buf buf = {
815 .lb_len = sizeof(info->oti_buf),
821 BUILD_BUG_ON(sizeof(info->oti_buf) < sizeof(*loa));
822 rc = osd_xattr_get_internal(env, obj, &buf, XATTR_NAME_LMA, &size);
826 lustre_loa_swab(loa, true);
827 LASSERT(lma->lma_compat & LMAC_STRIPE_INFO);
829 lma->lma_compat &= ~(LMAC_STRIPE_INFO | LMAC_COMP_INFO);
830 lustre_lma_swab(lma);
832 buf.lb_len = sizeof(*lma);
833 rc = osd_xattr_set_internal(env, obj, &buf, XATTR_NAME_LMA,
834 LU_XATTR_REPLACE, oh);
836 obj->oo_pfid_in_lma = 0;
842 * In DNE environment, the object (in spite of regular file or directory)
843 * and its name entry may reside on different MDTs. Under such case, we will
844 * create an agent entry on the MDT where the object resides. The agent entry
845 * references the object locally, that makes the object to be visible to the
846 * userspace when mounted as 'zfs' directly. Then the userspace tools, such
847 * as 'tar' can handle the object properly.
849 * We handle the agent entry during set linkEA that is the common interface
850 * for both regular file and directroy, can handle kinds of cases, such as
851 * create/link/unlink/rename, and so on.
853 * NOTE: we need to do that for both directory and regular file, so we can NOT
854 * do that when ea_{insert,delete} that are directory based operations.
856 static int osd_xattr_handle_linkea(const struct lu_env *env,
857 struct osd_device *osd,
858 struct osd_object *obj,
859 const struct lu_buf *buf,
860 struct osd_thandle *oh)
862 const struct lu_fid *fid = lu_object_fid(&obj->oo_dt.do_lu);
863 struct lu_fid *tfid = &osd_oti_get(env)->oti_fid;
864 struct linkea_data ldata = { .ld_buf = (struct lu_buf *)buf };
865 struct lu_name tmpname;
870 rc = linkea_init_with_rec(&ldata);
872 linkea_first_entry(&ldata);
873 while (ldata.ld_lee != NULL && !remote) {
874 linkea_entry_unpack(ldata.ld_lee, &ldata.ld_reclen,
876 if (osd_remote_fid(env, osd, tfid) > 0)
879 linkea_next_entry(&ldata);
881 } else if (rc == -ENODATA) {
887 if (lu_object_has_agent_entry(&obj->oo_dt.do_lu) && !remote) {
888 rc = osd_delete_from_remote_parent(env, osd, obj, oh, false);
890 CERROR("%s: failed to remove agent entry for "DFID
891 ": rc = %d\n", osd_name(osd), PFID(fid), rc);
892 } else if (!lu_object_has_agent_entry(&obj->oo_dt.do_lu) && remote) {
893 rc = osd_add_to_remote_parent(env, osd, obj, oh);
895 CWARN("%s: failed to create agent entry for "DFID
896 ": rc = %d\n", osd_name(osd), PFID(fid), rc);
902 int osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
903 const struct lu_buf *buf, const char *name, int fl,
904 struct thandle *handle)
906 struct osd_object *obj = osd_dt_obj(dt);
907 struct osd_device *osd = osd_obj2dev(obj);
908 struct osd_thandle *oh;
912 LASSERT(handle != NULL);
913 LASSERT(osd_invariant(obj));
915 if (!osd_obj2dev(obj)->od_posix_acl &&
916 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
917 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
920 oh = container_of(handle, struct osd_thandle, ot_super);
922 down_write(&obj->oo_guard);
923 CDEBUG(D_INODE, "Setting xattr %s with size %d\n",
924 name, (int)buf->lb_len);
925 /* For the OST migrated from ldiskfs, the PFID EA may
926 * be stored in LMA because of ldiskfs inode size. */
927 if (unlikely(strcmp(name, XATTR_NAME_FID) == 0 &&
928 obj->oo_pfid_in_lma)) {
929 rc = osd_xattr_split_pfid(env, obj, oh);
931 fl = LU_XATTR_CREATE;
932 } else if (strcmp(name, XATTR_NAME_LINK) == 0 &&
933 osd->od_remote_parent_dir != ZFS_NO_OBJECT) {
934 rc = osd_xattr_handle_linkea(env, osd, obj, buf, oh);
938 rc = osd_xattr_set_internal(env, obj, buf, name, fl, oh);
939 up_write(&obj->oo_guard);
945 __osd_xattr_declare_del(const struct lu_env *env, struct osd_object *obj,
946 const char *name, struct osd_thandle *oh)
948 struct osd_device *osd = osd_obj2dev(obj);
949 dmu_tx_t *tx = oh->ot_tx;
950 uint64_t xa_data_obj;
953 /* update SA_ZPL_DXATTR if xattr was in SA */
954 dmu_tx_hold_sa(tx, obj->oo_sa_hdl, 0);
956 if (obj->oo_xattr == ZFS_NO_OBJECT)
959 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, 8, 1, &xa_data_obj);
963 * We'll delete the existing object and ZAP entry.
965 dmu_tx_hold_bonus(tx, xa_data_obj);
966 dmu_tx_hold_free(tx, xa_data_obj, 0, DMU_OBJECT_END);
967 dmu_tx_hold_zap(tx, obj->oo_xattr, FALSE, (char *) name);
969 } else if (rc == -ENOENT) {
971 * Entry doesn't exist, nothing to be changed.
976 /* An error happened */
980 int osd_declare_xattr_del(const struct lu_env *env, struct dt_object *dt,
981 const char *name, struct thandle *handle)
983 struct osd_object *obj = osd_dt_obj(dt);
984 struct osd_thandle *oh;
987 LASSERT(handle != NULL);
988 LASSERT(osd_invariant(obj));
990 oh = container_of(handle, struct osd_thandle, ot_super);
991 LASSERT(oh->ot_tx != NULL);
992 LASSERT(obj->oo_dn != NULL);
994 down_read(&obj->oo_guard);
995 if (likely(dt_object_exists(&obj->oo_dt) && !obj->oo_destroyed))
996 __osd_xattr_declare_del(env, obj, name, oh);
997 up_read(&obj->oo_guard);
1002 static int __osd_sa_xattr_del(const struct lu_env *env, struct osd_object *obj,
1003 const char *name, struct osd_thandle *oh)
1007 rc = __osd_xattr_cache(obj);
1011 rc = -nvlist_remove(obj->oo_sa_xattr, name, DATA_TYPE_BYTE_ARRAY);
1016 * only migrate delete LMV, and it needs to be done immediately, because
1017 * it's used in deleting sub stripes, and if this is delayed, later when
1018 * destroying the master object, it will delete sub stripes again.
1020 if (!strcmp(name, XATTR_NAME_LMV))
1021 rc = __osd_sa_xattr_update(env, obj, oh);
1023 rc = __osd_sa_xattr_schedule_update(env, obj, oh);
1027 static int __osd_xattr_del(const struct lu_env *env, struct osd_object *obj,
1028 const char *name, struct osd_thandle *oh)
1030 struct osd_device *osd = osd_obj2dev(obj);
1031 uint64_t xa_data_obj;
1034 if (unlikely(!dt_object_exists(&obj->oo_dt) || obj->oo_destroyed))
1037 /* try remove xattr from SA at first */
1038 rc = __osd_sa_xattr_del(env, obj, name, oh);
1042 if (obj->oo_xattr == ZFS_NO_OBJECT)
1045 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1,
1047 if (rc == -ENOENT) {
1049 } else if (rc == 0) {
1052 * We'll delete the existing object and ZAP entry.
1054 rc = -dmu_object_free(osd->od_os, xa_data_obj, oh->ot_tx);
1058 rc = -zap_remove(osd->od_os, obj->oo_xattr, name, oh->ot_tx);
1064 int osd_xattr_del(const struct lu_env *env, struct dt_object *dt,
1065 const char *name, struct thandle *handle)
1067 struct osd_object *obj = osd_dt_obj(dt);
1068 struct osd_thandle *oh;
1072 LASSERT(handle != NULL);
1073 LASSERT(obj->oo_dn != NULL);
1074 LASSERT(osd_invariant(obj));
1075 LASSERT(dt_object_exists(dt));
1076 oh = container_of(handle, struct osd_thandle, ot_super);
1077 LASSERT(oh->ot_tx != NULL);
1079 if (!osd_obj2dev(obj)->od_posix_acl &&
1080 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
1081 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
1082 RETURN(-EOPNOTSUPP);
1084 down_write(&obj->oo_guard);
1085 /* For the OST migrated from ldiskfs, the PFID EA may
1086 * be stored in LMA because of ldiskfs inode size. */
1087 if (unlikely(strcmp(name, XATTR_NAME_FID) == 0 && obj->oo_pfid_in_lma))
1088 rc = osd_xattr_split_pfid(env, obj, oh);
1090 rc = __osd_xattr_del(env, obj, name, oh);
1091 up_write(&obj->oo_guard);
1096 void osd_declare_xattrs_destroy(const struct lu_env *env,
1097 struct osd_object *obj, struct osd_thandle *oh)
1099 struct osd_device *osd = osd_obj2dev(obj);
1100 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
1101 uint64_t oid = obj->oo_xattr, xid;
1102 dmu_tx_t *tx = oh->ot_tx;
1106 if (oid == ZFS_NO_OBJECT)
1107 return; /* Nothing to do for SA xattrs */
1109 /* Declare to free the ZAP holding xattrs */
1110 dmu_tx_hold_free(tx, oid, 0, DMU_OBJECT_END);
1112 rc = osd_zap_cursor_init(&zc, osd->od_os, oid, 0);
1116 while (zap_cursor_retrieve(zc, za) == 0) {
1117 LASSERT(za->za_num_integers == 1);
1118 LASSERT(za->za_integer_length == sizeof(uint64_t));
1120 rc = -zap_lookup(osd->od_os, oid, za->za_name,
1121 sizeof(uint64_t), 1, &xid);
1123 CERROR("%s: xattr %s lookup failed: rc = %d\n",
1124 osd->od_svname, za->za_name, rc);
1127 dmu_tx_hold_free(tx, xid, 0, DMU_OBJECT_END);
1129 zap_cursor_advance(zc);
1132 osd_zap_cursor_fini(zc);
1134 if (rc && tx->tx_err == 0)
1138 int osd_xattrs_destroy(const struct lu_env *env,
1139 struct osd_object *obj, struct osd_thandle *oh)
1141 struct osd_device *osd = osd_obj2dev(obj);
1142 dmu_tx_t *tx = oh->ot_tx;
1143 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
1148 /* The transaction must have been assigned to a transaction group. */
1149 LASSERT(tx->tx_txg != 0);
1151 if (obj->oo_xattr == ZFS_NO_OBJECT)
1152 return 0; /* Nothing to do for SA xattrs */
1154 /* Free the ZAP holding the xattrs */
1155 rc = osd_zap_cursor_init(&zc, osd->od_os, obj->oo_xattr, 0);
1159 while (zap_cursor_retrieve(zc, za) == 0) {
1160 LASSERT(za->za_num_integers == 1);
1161 LASSERT(za->za_integer_length == sizeof(uint64_t));
1163 rc = -zap_lookup(osd->od_os, obj->oo_xattr, za->za_name,
1164 sizeof(uint64_t), 1, &xid);
1166 CERROR("%s: lookup xattr %s failed: rc = %d\n",
1167 osd->od_svname, za->za_name, rc);
1169 rc = -dmu_object_free(osd->od_os, xid, tx);
1171 CERROR("%s: free xattr %s failed: rc = %d\n",
1172 osd->od_svname, za->za_name, rc);
1174 zap_cursor_advance(zc);
1176 osd_zap_cursor_fini(zc);
1178 rc = -dmu_object_free(osd->od_os, obj->oo_xattr, tx);
1180 CERROR("%s: free xattr %llu failed: rc = %d\n",
1181 osd->od_svname, obj->oo_xattr, rc);
1187 osd_sa_xattr_list(const struct lu_env *env, struct osd_object *obj,
1188 const struct lu_buf *lb)
1190 nvpair_t *nvp = NULL;
1191 int len, counted = 0;
1194 rc = __osd_xattr_cache(obj);
1198 while ((nvp = nvlist_next_nvpair(obj->oo_sa_xattr, nvp)) != NULL) {
1199 const char *name = nvpair_name(nvp);
1201 if (!osd_obj2dev(obj)->od_posix_acl &&
1202 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
1203 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
1207 if (lb->lb_buf != NULL) {
1208 if (counted + len + 1 > lb->lb_len)
1211 memcpy(lb->lb_buf + counted, name, len + 1);
1218 int osd_xattr_list(const struct lu_env *env, struct dt_object *dt,
1219 const struct lu_buf *lb)
1221 struct osd_object *obj = osd_dt_obj(dt);
1222 struct osd_device *osd = osd_obj2dev(obj);
1223 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
1228 LASSERT(obj->oo_dn != NULL);
1229 LASSERT(osd_invariant(obj));
1230 LASSERT(dt_object_exists(dt));
1232 down_read(&obj->oo_guard);
1234 rc = osd_sa_xattr_list(env, obj, lb);
1240 /* continue with dnode xattr if any */
1241 if (obj->oo_xattr == ZFS_NO_OBJECT)
1242 GOTO(out, rc = counted);
1244 rc = osd_zap_cursor_init(&zc, osd->od_os, obj->oo_xattr, 0);
1248 while ((rc = -zap_cursor_retrieve(zc, za)) == 0) {
1249 if (!osd_obj2dev(obj)->od_posix_acl &&
1250 (strcmp(za->za_name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
1251 strcmp(za->za_name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) {
1252 zap_cursor_advance(zc);
1256 rc = strlen(za->za_name);
1257 if (lb->lb_buf != NULL) {
1258 if (counted + rc + 1 > lb->lb_len)
1259 GOTO(out_fini, rc = -ERANGE);
1261 memcpy(lb->lb_buf + counted, za->za_name, rc + 1);
1265 zap_cursor_advance(zc);
1267 if (rc == -ENOENT) /* no more kes in the index */
1269 else if (unlikely(rc < 0))
1274 osd_zap_cursor_fini(zc);
1276 up_read(&obj->oo_guard);