4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2012, 2017, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
32 * lustre/osd-zfs/osd_xattr.c
33 * functions to manipulate extended attributes and system attributes
35 * Author: Alex Zhuravlev <bzzz@whamcloud.com>
36 * Author: Mike Pershin <tappro@whamcloud.com>
39 #define DEBUG_SUBSYSTEM S_OSD
41 #include <libcfs/libcfs.h>
42 #include <obd_support.h>
43 #include <lustre_net.h>
45 #include <obd_class.h>
46 #include <lustre_disk.h>
47 #include <lustre_fid.h>
48 #include <lustre_linkea.h>
50 #include "osd_internal.h"
52 #include <sys/dnode.h>
57 #include <sys/spa_impl.h>
58 #include <sys/zfs_znode.h>
59 #include <sys/dmu_tx.h>
60 #include <sys/dmu_objset.h>
61 #include <sys/dsl_prop.h>
62 #include <sys/sa_impl.h>
65 #include <linux/posix_acl_xattr.h>
66 #include <lustre_scrub.h>
68 int __osd_xattr_load(struct osd_device *osd, sa_handle_t *hdl, nvlist_t **sa)
73 rc = -sa_size(hdl, SA_ZPL_DXATTR(osd), &size);
76 rc = -nvlist_alloc(sa, NV_UNIQUE_NAME, KM_SLEEP);
80 buf = osd_zio_buf_alloc(size);
85 rc = -sa_lookup(hdl, SA_ZPL_DXATTR(osd), buf, size);
87 rc = -nvlist_unpack(buf, size, sa, KM_SLEEP);
88 osd_zio_buf_free(buf, size);
94 static inline int __osd_xattr_cache(struct osd_object *obj)
96 LASSERT(obj->oo_sa_hdl);
97 if (obj->oo_sa_xattr != NULL)
99 return __osd_xattr_load(osd_obj2dev(obj),
100 obj->oo_sa_hdl, &obj->oo_sa_xattr);
104 __osd_sa_xattr_get(const struct lu_env *env, struct osd_object *obj,
105 const struct lu_buf *buf, const char *name, int *sizep)
110 rc = __osd_xattr_cache(obj);
114 LASSERT(obj->oo_sa_xattr);
115 rc = -nvlist_lookup_byte_array(obj->oo_sa_xattr, name,
120 if (buf == NULL || buf->lb_buf == NULL) {
121 /* return the required size by *sizep */
125 if (*sizep > buf->lb_len)
126 return -ERANGE; /* match ldiskfs error */
128 memcpy(buf->lb_buf, nv_value, *sizep);
132 int __osd_xattr_get_large(const struct lu_env *env, struct osd_device *osd,
133 uint64_t xattr, struct lu_buf *buf,
134 const char *name, int *sizep)
137 sa_handle_t *sa_hdl = NULL;
138 uint64_t xa_data_obj, size;
141 /* are there any extended attributes? */
142 if (xattr == ZFS_NO_OBJECT)
145 /* Lookup the object number containing the xattr data */
146 rc = -zap_lookup(osd->od_os, xattr, name, sizeof(uint64_t), 1,
151 rc = __osd_obj2dnode(osd->od_os, xa_data_obj, &xa_data_dn);
155 rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL, SA_HDL_PRIVATE,
160 /* Get the xattr value length / object size */
161 rc = -sa_lookup(sa_hdl, SA_ZPL_SIZE(osd), &size, 8);
165 if (size > INT_MAX) {
172 if (buf == NULL || buf->lb_buf == NULL) {
173 /* We only need to return the required size */
176 if (*sizep > buf->lb_len) {
177 rc = -ERANGE; /* match ldiskfs error */
181 rc = -dmu_read(osd->od_os, xa_data_dn->dn_object, 0,
182 size, buf->lb_buf, DMU_READ_PREFETCH);
185 sa_handle_destroy(sa_hdl);
187 osd_dnode_rele(xa_data_dn);
193 * Copy an extended attribute into the buffer provided, or compute
194 * the required buffer size if \a buf is NULL.
196 * On success, the number of bytes used or required is stored in \a sizep.
198 * Note that no locking is done here.
200 * \param[in] env execution environment
201 * \param[in] obj object for which to retrieve xattr
202 * \param[out] buf buffer to store xattr value in
203 * \param[in] name name of xattr to copy
204 * \param[out] sizep bytes used or required to store xattr
206 * \retval 0 on success
207 * \retval negative negated errno on failure
209 int osd_xattr_get_internal(const struct lu_env *env, struct osd_object *obj,
210 struct lu_buf *buf, const char *name, int *sizep)
214 if (unlikely(!dt_object_exists(&obj->oo_dt) || obj->oo_destroyed))
217 /* check SA_ZPL_DXATTR first then fallback to directory xattr */
218 rc = __osd_sa_xattr_get(env, obj, buf, name, sizep);
222 return __osd_xattr_get_large(env, osd_obj2dev(obj), obj->oo_xattr,
227 * Copy LMA extended attribute into provided buffer
229 * Note that no locking is done here.
231 * \param[in] env execution environment
232 * \param[in] obj object for which to retrieve xattr
233 * \param[out] buf buffer to store xattr value in
235 * \retval 0 on success
236 * \retval negative negated errno on failure
238 int osd_xattr_get_lma(const struct lu_env *env, struct osd_object *obj,
247 if (unlikely(obj->oo_destroyed))
250 /* check SA_ZPL_DXATTR first then fallback to directory xattr */
251 rc = __osd_sa_xattr_get(env, obj, buf, XATTR_NAME_LMA, &size);
252 if (!rc && unlikely(size < sizeof(struct lustre_mdt_attrs)))
257 rc = __osd_xattr_get_large(env, osd_obj2dev(obj), obj->oo_xattr,
258 buf, XATTR_NAME_LMA, &size);
259 if (!rc && unlikely(size < sizeof(struct lustre_mdt_attrs)))
266 static int osd_get_pfid_from_lma(const struct lu_env *env,
267 struct osd_object *obj,
268 struct lu_buf *buf, int *sizep)
270 struct osd_thread_info *info = osd_oti_get(env);
271 struct lustre_ost_attrs *loa =
272 (struct lustre_ost_attrs *)&info->oti_buf;
273 struct lustre_mdt_attrs *lma = &loa->loa_lma;
274 struct filter_fid *ff;
275 struct ost_layout *ol;
276 struct lu_buf tbuf = {
278 .lb_len = sizeof(info->oti_buf),
283 BUILD_BUG_ON(sizeof(info->oti_buf) < sizeof(*loa));
284 rc = osd_xattr_get_internal(env, obj, &tbuf,
285 XATTR_NAME_LMA, sizep);
289 lustre_loa_swab(loa, true);
290 LASSERT(lma->lma_compat & LMAC_STRIPE_INFO);
292 *sizep = sizeof(*ff);
293 if (buf->lb_len == 0 || !buf->lb_buf)
296 if (buf->lb_len < *sizep)
301 ol->ol_stripe_count = cpu_to_le32(loa->loa_parent_fid.f_ver >>
302 PFID_STRIPE_IDX_BITS);
303 ol->ol_stripe_size = cpu_to_le32(loa->loa_stripe_size);
304 loa->loa_parent_fid.f_ver &= PFID_STRIPE_COUNT_MASK;
305 fid_cpu_to_le(&ff->ff_parent, &loa->loa_parent_fid);
306 if (lma->lma_compat & LMAC_COMP_INFO) {
307 ol->ol_comp_start = cpu_to_le64(loa->loa_comp_start);
308 ol->ol_comp_end = cpu_to_le64(loa->loa_comp_end);
309 ol->ol_comp_id = cpu_to_le32(loa->loa_comp_id);
311 ol->ol_comp_start = 0;
319 int osd_xattr_get(const struct lu_env *env, struct dt_object *dt,
320 struct lu_buf *buf, const char *name)
322 struct osd_object *obj = osd_dt_obj(dt);
326 LASSERT(obj->oo_dn != NULL);
327 LASSERT(osd_invariant(obj));
329 if (!osd_obj2dev(obj)->od_posix_acl &&
330 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
331 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
334 down_read(&obj->oo_guard);
335 if (unlikely(!dt_object_exists(dt) || obj->oo_destroyed)) {
336 up_read(&obj->oo_guard);
340 /* For the OST migrated from ldiskfs, the PFID EA may
341 * be stored in LMA because of ldiskfs inode size. */
342 if (strcmp(name, XATTR_NAME_FID) == 0 && obj->oo_pfid_in_lma)
343 rc = osd_get_pfid_from_lma(env, obj, buf, &size);
345 rc = osd_xattr_get_internal(env, obj, buf, name, &size);
346 up_read(&obj->oo_guard);
355 /* the function is used to declare EAs when SA is not supported */
356 void __osd_xattr_declare_legacy(const struct lu_env *env,
357 struct osd_object *obj,
358 int vallen, const char *name,
359 struct osd_thandle *oh)
361 struct osd_device *osd = osd_obj2dev(obj);
362 dmu_tx_t *tx = oh->ot_tx;
363 uint64_t xa_data_obj;
366 if (obj->oo_xattr == ZFS_NO_OBJECT) {
367 /* xattr zap + entry */
368 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, (char *) name);
369 /* xattr value obj */
370 dmu_tx_hold_sa_create(tx, ZFS_SA_BASE_ATTR_SIZE);
371 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, vallen);
375 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1,
379 * Entry already exists.
380 * We'll truncate the existing object.
382 dmu_tx_hold_bonus(tx, xa_data_obj);
383 dmu_tx_hold_free(tx, xa_data_obj, vallen, DMU_OBJECT_END);
384 dmu_tx_hold_write(tx, xa_data_obj, 0, vallen);
385 } else if (rc == -ENOENT) {
387 * Entry doesn't exist, we need to create a new one and a new
388 * object to store the value.
390 dmu_tx_hold_bonus(tx, obj->oo_xattr);
391 dmu_tx_hold_zap(tx, obj->oo_xattr, TRUE, (char *) name);
392 dmu_tx_hold_sa_create(tx, ZFS_SA_BASE_ATTR_SIZE);
393 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, vallen);
397 void __osd_xattr_declare_set(const struct lu_env *env, struct osd_object *obj,
398 int vallen, const char *name,
399 struct osd_thandle *oh)
401 struct osd_device *osd = osd_obj2dev(obj);
402 dmu_tx_t *tx = oh->ot_tx;
405 if (unlikely(obj->oo_destroyed))
408 if (strcmp(name, XATTR_NAME_LINK) == 0 &&
409 osd->od_remote_parent_dir != ZFS_NO_OBJECT) {
410 /* If some name entry resides on remote MDT, then will create
411 * agent entry under remote parent. On the other hand, if the
412 * remote entry will be removed, then related agent entry may
413 * need to be removed from the remote parent. So there may be
414 * kinds of cases, let's declare enough credits. The credits
415 * for create agent entry is enough for remove case. */
416 osd_tx_hold_zap(tx, osd->od_remote_parent_dir,
420 if (unlikely(!osd_obj2dev(obj)->od_xattr_in_sa)) {
421 __osd_xattr_declare_legacy(env, obj, vallen, name, oh);
425 /* declare EA in SA */
426 if (dt_object_exists(&obj->oo_dt)) {
427 LASSERT(obj->oo_sa_hdl);
428 /* XXX: it should be possible to skip spill
429 * declaration if specific EA is part of
430 * bonus and doesn't grow */
431 dmu_tx_hold_spill(tx, obj->oo_dn->dn_object);
435 bonuslen = osd_obj_bonuslen(obj);
437 /* the object doesn't exist, but we've declared bonus
438 * in osd_declare_object_create() yet */
439 if (obj->oo_ea_in_bonus > bonuslen) {
440 /* spill has been declared already */
441 } else if (obj->oo_ea_in_bonus + vallen > bonuslen) {
442 /* we're about to exceed bonus, let's declare spill */
443 dmu_tx_hold_spill(tx, DMU_NEW_OBJECT);
445 obj->oo_ea_in_bonus += vallen;
448 int osd_declare_xattr_set(const struct lu_env *env, struct dt_object *dt,
449 const struct lu_buf *buf, const char *name,
450 int fl, struct thandle *handle)
452 struct osd_object *obj = osd_dt_obj(dt);
453 struct osd_thandle *oh;
456 LASSERT(handle != NULL);
457 oh = container_of(handle, struct osd_thandle, ot_super);
459 down_read(&obj->oo_guard);
460 __osd_xattr_declare_set(env, obj, buf->lb_len, name, oh);
461 up_read(&obj->oo_guard);
466 int __osd_sa_attr_init(const struct lu_env *env, struct osd_object *obj,
467 struct osd_thandle *oh)
469 sa_bulk_attr_t *bulk = osd_oti_get(env)->oti_attr_bulk;
470 struct osa_attr *osa = &osd_oti_get(env)->oti_osa;
471 struct lu_buf *lb = &osd_oti_get(env)->oti_xattr_lbuf;
472 struct osd_device *osd = osd_obj2dev(obj);
474 inode_timespec_t now;
478 obj->oo_late_xattr = 0;
479 obj->oo_late_attr_set = 0;
481 gen = dmu_tx_get_txg(oh->ot_tx);
483 ZFS_TIME_ENCODE(&now, osa->btime);
485 obj->oo_attr.la_valid |= LA_BTIME;
486 obj->oo_attr.la_btime = osa->btime[0];
487 osa->atime[0] = obj->oo_attr.la_atime;
488 osa->ctime[0] = obj->oo_attr.la_ctime;
489 osa->mtime[0] = obj->oo_attr.la_mtime;
490 osa->mode = obj->oo_attr.la_mode;
491 osa->uid = obj->oo_attr.la_uid;
492 osa->gid = obj->oo_attr.la_gid;
493 osa->rdev = obj->oo_attr.la_rdev;
494 osa->nlink = obj->oo_attr.la_nlink;
495 osa->flags = attrs_fs2zfs(obj->oo_attr.la_flags);
496 osa->size = obj->oo_attr.la_size;
497 #ifdef ZFS_PROJINHERIT
498 if (osd->od_projectused_dn) {
499 if (obj->oo_attr.la_valid & LA_PROJID)
500 osa->projid = obj->oo_attr.la_projid;
502 osa->projid = ZFS_DEFAULT_PROJID;
503 osa->flags |= ZFS_PROJID;
504 obj->oo_with_projid = 1;
509 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(osd), NULL, &osa->mode, 8);
510 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_SIZE(osd), NULL, &osa->size, 8);
511 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GEN(osd), NULL, &gen, 8);
512 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_UID(osd), NULL, &osa->uid, 8);
513 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GID(osd), NULL, &osa->gid, 8);
514 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_PARENT(osd), NULL,
516 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(osd), NULL, &osa->flags, 8);
517 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(osd), NULL, osa->atime, 16);
518 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(osd), NULL, osa->mtime, 16);
519 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(osd), NULL, osa->ctime, 16);
520 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CRTIME(osd), NULL, osa->btime, 16);
521 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_LINKS(osd), NULL, &osa->nlink, 8);
522 #ifdef ZFS_PROJINHERIT
523 if (osd->od_projectused_dn)
524 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_PROJID(osd), NULL,
527 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_RDEV(osd), NULL, &osa->rdev, 8);
528 LASSERT(cnt <= ARRAY_SIZE(osd_oti_get(env)->oti_attr_bulk));
530 /* Update the SA for additions, modifications, and removals. */
531 rc = -nvlist_size(obj->oo_sa_xattr, &size, NV_ENCODE_XDR);
535 lu_buf_check_and_alloc(lb, size);
536 if (lb->lb_buf == NULL) {
537 CERROR("%s: can't allocate buffer for xattr update\n",
542 rc = -nvlist_pack(obj->oo_sa_xattr, (char **)&lb->lb_buf, &size,
543 NV_ENCODE_XDR, KM_SLEEP);
547 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_DXATTR(osd), NULL, lb->lb_buf, size);
549 rc = -sa_replace_all_by_template(obj->oo_sa_hdl, bulk, cnt, oh->ot_tx);
554 int __osd_sa_xattr_update(const struct lu_env *env, struct osd_object *obj,
555 struct osd_thandle *oh)
557 struct lu_buf *lb = &osd_oti_get(env)->oti_xattr_lbuf;
558 struct osd_device *osd = osd_obj2dev(obj);
563 obj->oo_late_xattr = 0;
565 /* Update the SA for additions, modifications, and removals. */
566 rc = -nvlist_size(obj->oo_sa_xattr, &size, NV_ENCODE_XDR);
570 lu_buf_check_and_alloc(lb, size);
571 if (lb->lb_buf == NULL) {
572 CERROR("%s: can't allocate buffer for xattr update\n",
578 rc = -nvlist_pack(obj->oo_sa_xattr, &dxattr, &size,
579 NV_ENCODE_XDR, KM_SLEEP);
582 LASSERT(dxattr == lb->lb_buf);
584 sa_update(obj->oo_sa_hdl, SA_ZPL_DXATTR(osd), dxattr, size, oh->ot_tx);
590 * Set an extended attribute.
591 * This transaction must have called udmu_xattr_declare_set() first.
593 * Returns 0 on success or a negative error number on failure.
595 * No locking is done here.
597 int __osd_sa_xattr_schedule_update(const struct lu_env *env,
598 struct osd_object *obj,
599 struct osd_thandle *oh)
602 LASSERT(obj->oo_sa_hdl);
603 LASSERT(obj->oo_sa_xattr);
605 /* schedule batched SA update in osd_object_sa_dirty_rele() */
606 obj->oo_late_xattr = 1;
607 osd_object_sa_dirty_add(obj, oh);
613 int __osd_sa_xattr_set(const struct lu_env *env, struct osd_object *obj,
614 const struct lu_buf *buf, const char *name, int fl,
615 struct osd_thandle *oh)
623 rc = __osd_xattr_cache(obj);
627 LASSERT(obj->oo_sa_xattr);
628 if (buf->lb_len > OBD_MAX_EA_SIZE) {
631 /* Prevent the DXATTR SA from consuming the entire SA
633 rc = -nvlist_size(obj->oo_sa_xattr, &size, NV_ENCODE_XDR);
637 if (size + buf->lb_len > DXATTR_MAX_SA_SIZE)
641 /* even in case of -EFBIG we must lookup xattr and check can we
642 * rewrite it then delete from SA */
643 rc = -nvlist_lookup_byte_array(obj->oo_sa_xattr, name, &nv_value,
646 if (fl & LU_XATTR_CREATE) {
648 } else if (too_big) {
649 rc = -nvlist_remove(obj->oo_sa_xattr, name,
650 DATA_TYPE_BYTE_ARRAY);
653 rc = __osd_sa_xattr_schedule_update(env, obj, oh);
654 return rc == 0 ? -EFBIG : rc;
656 } else if (rc == -ENOENT) {
657 if (fl & LU_XATTR_REPLACE)
665 /* Ensure xattr doesn't exist in ZAP */
666 if (obj->oo_xattr != ZFS_NO_OBJECT) {
667 struct osd_device *osd = osd_obj2dev(obj);
669 rc = -zap_lookup(osd->od_os, obj->oo_xattr,
672 rc = -dmu_object_free(osd->od_os, objid, oh->ot_tx);
674 zap_remove(osd->od_os, obj->oo_xattr,
679 rc = -nvlist_add_byte_array(obj->oo_sa_xattr, name,
680 (uchar_t *)buf->lb_buf, buf->lb_len);
684 /* batch updates only for just created dnodes where we
685 * used to set number of EAs in a single transaction */
686 if (obj->oo_dn->dn_allocated_txg == oh->ot_tx->tx_txg)
687 rc = __osd_sa_xattr_schedule_update(env, obj, oh);
689 rc = __osd_sa_xattr_update(env, obj, oh);
695 __osd_xattr_set(const struct lu_env *env, struct osd_object *obj,
696 const struct lu_buf *buf, const char *name, int fl,
697 struct osd_thandle *oh)
699 struct osd_device *osd = osd_obj2dev(obj);
700 dnode_t *xa_zap_dn = NULL;
701 dnode_t *xa_data_dn = NULL;
702 uint64_t xa_data_obj;
703 sa_handle_t *sa_hdl = NULL;
704 dmu_tx_t *tx = oh->ot_tx;
708 LASSERT(obj->oo_sa_hdl);
710 if (obj->oo_xattr == ZFS_NO_OBJECT) {
711 struct lu_attr *la = &osd_oti_get(env)->oti_la;
713 la->la_valid = LA_MODE;
714 la->la_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
715 rc = __osd_zap_create(env, osd, &xa_zap_dn, tx, la, 0, 0);
719 obj->oo_xattr = xa_zap_dn->dn_object;
720 rc = osd_object_sa_update(obj, SA_ZPL_XATTR(osd),
721 &obj->oo_xattr, 8, oh);
726 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1,
729 if (fl & LU_XATTR_CREATE) {
734 * Entry already exists.
735 * We'll truncate the existing object.
737 rc = __osd_obj2dnode(osd->od_os, xa_data_obj, &xa_data_dn);
741 rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL,
742 SA_HDL_PRIVATE, &sa_hdl);
746 rc = -sa_lookup(sa_hdl, SA_ZPL_SIZE(osd), &size, 8);
750 rc = -dmu_free_range(osd->od_os, xa_data_dn->dn_object,
751 0, DMU_OBJECT_END, tx);
754 } else if (rc == -ENOENT) {
755 struct lu_attr *la = &osd_oti_get(env)->oti_la;
757 * Entry doesn't exist, we need to create a new one and a new
758 * object to store the value.
760 if (fl & LU_XATTR_REPLACE) {
761 /* should be ENOATTR according to the
762 * man, but that is undefined here */
767 la->la_valid = LA_MODE;
768 la->la_mode = S_IFREG | S_IRUGO | S_IWUSR;
769 rc = __osd_object_create(env, osd, obj,
770 lu_object_fid(&obj->oo_dt.do_lu),
771 &xa_data_dn, tx, la);
774 xa_data_obj = xa_data_dn->dn_object;
776 rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL,
777 SA_HDL_PRIVATE, &sa_hdl);
781 rc = -zap_add(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t),
782 1, &xa_data_obj, tx);
786 /* There was an error looking up the xattr name */
790 /* Finally write the xattr value */
791 dmu_write(osd->od_os, xa_data_obj, 0, buf->lb_len, buf->lb_buf, tx);
794 rc = -sa_update(sa_hdl, SA_ZPL_SIZE(osd), &size, 8, tx);
797 sa_handle_destroy(sa_hdl);
799 if (xa_data_dn != NULL)
800 osd_dnode_rele(xa_data_dn);
801 if (xa_zap_dn != NULL)
802 osd_dnode_rele(xa_zap_dn);
807 static int osd_xattr_split_pfid(const struct lu_env *env,
808 struct osd_object *obj, struct osd_thandle *oh)
810 struct osd_thread_info *info = osd_oti_get(env);
811 struct lustre_ost_attrs *loa =
812 (struct lustre_ost_attrs *)&info->oti_buf;
813 struct lustre_mdt_attrs *lma = &loa->loa_lma;
814 struct lu_buf buf = {
816 .lb_len = sizeof(info->oti_buf),
822 BUILD_BUG_ON(sizeof(info->oti_buf) < sizeof(*loa));
823 rc = osd_xattr_get_internal(env, obj, &buf, XATTR_NAME_LMA, &size);
827 lustre_loa_swab(loa, true);
828 LASSERT(lma->lma_compat & LMAC_STRIPE_INFO);
830 lma->lma_compat &= ~(LMAC_STRIPE_INFO | LMAC_COMP_INFO);
831 lustre_lma_swab(lma);
833 buf.lb_len = sizeof(*lma);
834 rc = osd_xattr_set_internal(env, obj, &buf, XATTR_NAME_LMA,
835 LU_XATTR_REPLACE, oh);
837 obj->oo_pfid_in_lma = 0;
843 * In DNE environment, the object (in spite of regular file or directory)
844 * and its name entry may reside on different MDTs. Under such case, we will
845 * create an agent entry on the MDT where the object resides. The agent entry
846 * references the object locally, that makes the object to be visible to the
847 * userspace when mounted as 'zfs' directly. Then the userspace tools, such
848 * as 'tar' can handle the object properly.
850 * We handle the agent entry during set linkEA that is the common interface
851 * for both regular file and directroy, can handle kinds of cases, such as
852 * create/link/unlink/rename, and so on.
854 * NOTE: we need to do that for both directory and regular file, so we can NOT
855 * do that when ea_{insert,delete} that are directory based operations.
857 static int osd_xattr_handle_linkea(const struct lu_env *env,
858 struct osd_device *osd,
859 struct osd_object *obj,
860 const struct lu_buf *buf,
861 struct osd_thandle *oh)
863 const struct lu_fid *fid = lu_object_fid(&obj->oo_dt.do_lu);
864 struct lu_fid *tfid = &osd_oti_get(env)->oti_fid;
865 struct linkea_data ldata = { .ld_buf = (struct lu_buf *)buf };
866 struct lu_name tmpname;
871 rc = linkea_init_with_rec(&ldata);
873 linkea_first_entry(&ldata);
874 while (ldata.ld_lee != NULL && !remote) {
875 linkea_entry_unpack(ldata.ld_lee, &ldata.ld_reclen,
877 if (osd_remote_fid(env, osd, tfid) > 0)
880 linkea_next_entry(&ldata);
882 } else if (rc == -ENODATA) {
888 if (lu_object_has_agent_entry(&obj->oo_dt.do_lu) && !remote) {
889 rc = osd_delete_from_remote_parent(env, osd, obj, oh, false);
891 CERROR("%s: failed to remove agent entry for "DFID
892 ": rc = %d\n", osd_name(osd), PFID(fid), rc);
893 } else if (!lu_object_has_agent_entry(&obj->oo_dt.do_lu) && remote) {
894 rc = osd_add_to_remote_parent(env, osd, obj, oh);
896 CWARN("%s: failed to create agent entry for "DFID
897 ": rc = %d\n", osd_name(osd), PFID(fid), rc);
903 int osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
904 const struct lu_buf *buf, const char *name, int fl,
905 struct thandle *handle)
907 struct osd_object *obj = osd_dt_obj(dt);
908 struct osd_device *osd = osd_obj2dev(obj);
909 struct osd_thandle *oh;
913 LASSERT(handle != NULL);
914 LASSERT(osd_invariant(obj));
916 if (!osd_obj2dev(obj)->od_posix_acl &&
917 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
918 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
921 oh = container_of(handle, struct osd_thandle, ot_super);
923 down_write(&obj->oo_guard);
924 CDEBUG(D_INODE, "Setting xattr %s with size %d\n",
925 name, (int)buf->lb_len);
926 /* For the OST migrated from ldiskfs, the PFID EA may
927 * be stored in LMA because of ldiskfs inode size. */
928 if (unlikely(strcmp(name, XATTR_NAME_FID) == 0 &&
929 obj->oo_pfid_in_lma)) {
930 rc = osd_xattr_split_pfid(env, obj, oh);
932 fl = LU_XATTR_CREATE;
933 } else if (strcmp(name, XATTR_NAME_LINK) == 0 &&
934 osd->od_remote_parent_dir != ZFS_NO_OBJECT) {
935 rc = osd_xattr_handle_linkea(env, osd, obj, buf, oh);
939 rc = osd_xattr_set_internal(env, obj, buf, name, fl, oh);
940 up_write(&obj->oo_guard);
946 __osd_xattr_declare_del(const struct lu_env *env, struct osd_object *obj,
947 const char *name, struct osd_thandle *oh)
949 struct osd_device *osd = osd_obj2dev(obj);
950 dmu_tx_t *tx = oh->ot_tx;
951 uint64_t xa_data_obj;
954 /* update SA_ZPL_DXATTR if xattr was in SA */
955 dmu_tx_hold_sa(tx, obj->oo_sa_hdl, 0);
957 if (obj->oo_xattr == ZFS_NO_OBJECT)
960 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, 8, 1, &xa_data_obj);
964 * We'll delete the existing object and ZAP entry.
966 dmu_tx_hold_bonus(tx, xa_data_obj);
967 dmu_tx_hold_free(tx, xa_data_obj, 0, DMU_OBJECT_END);
968 dmu_tx_hold_zap(tx, obj->oo_xattr, FALSE, (char *) name);
970 } else if (rc == -ENOENT) {
972 * Entry doesn't exist, nothing to be changed.
977 /* An error happened */
981 int osd_declare_xattr_del(const struct lu_env *env, struct dt_object *dt,
982 const char *name, struct thandle *handle)
984 struct osd_object *obj = osd_dt_obj(dt);
985 struct osd_thandle *oh;
988 LASSERT(handle != NULL);
989 LASSERT(osd_invariant(obj));
991 oh = container_of(handle, struct osd_thandle, ot_super);
992 LASSERT(oh->ot_tx != NULL);
993 LASSERT(obj->oo_dn != NULL);
995 down_read(&obj->oo_guard);
996 if (likely(dt_object_exists(&obj->oo_dt) && !obj->oo_destroyed))
997 __osd_xattr_declare_del(env, obj, name, oh);
998 up_read(&obj->oo_guard);
1003 static int __osd_sa_xattr_del(const struct lu_env *env, struct osd_object *obj,
1004 const char *name, struct osd_thandle *oh)
1008 rc = __osd_xattr_cache(obj);
1012 rc = -nvlist_remove(obj->oo_sa_xattr, name, DATA_TYPE_BYTE_ARRAY);
1017 * only migrate delete LMV, and it needs to be done immediately, because
1018 * it's used in deleting sub stripes, and if this is delayed, later when
1019 * destroying the master object, it will delete sub stripes again.
1021 if (!strcmp(name, XATTR_NAME_LMV))
1022 rc = __osd_sa_xattr_update(env, obj, oh);
1024 rc = __osd_sa_xattr_schedule_update(env, obj, oh);
1028 static int __osd_xattr_del(const struct lu_env *env, struct osd_object *obj,
1029 const char *name, struct osd_thandle *oh)
1031 struct osd_device *osd = osd_obj2dev(obj);
1032 uint64_t xa_data_obj;
1035 if (unlikely(!dt_object_exists(&obj->oo_dt) || obj->oo_destroyed))
1038 /* try remove xattr from SA at first */
1039 rc = __osd_sa_xattr_del(env, obj, name, oh);
1043 if (obj->oo_xattr == ZFS_NO_OBJECT)
1046 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1,
1048 if (rc == -ENOENT) {
1050 } else if (rc == 0) {
1053 * We'll delete the existing object and ZAP entry.
1055 rc = -dmu_object_free(osd->od_os, xa_data_obj, oh->ot_tx);
1059 rc = -zap_remove(osd->od_os, obj->oo_xattr, name, oh->ot_tx);
1065 int osd_xattr_del(const struct lu_env *env, struct dt_object *dt,
1066 const char *name, struct thandle *handle)
1068 struct osd_object *obj = osd_dt_obj(dt);
1069 struct osd_thandle *oh;
1073 LASSERT(handle != NULL);
1074 LASSERT(obj->oo_dn != NULL);
1075 LASSERT(osd_invariant(obj));
1076 LASSERT(dt_object_exists(dt));
1077 oh = container_of(handle, struct osd_thandle, ot_super);
1078 LASSERT(oh->ot_tx != NULL);
1080 if (!osd_obj2dev(obj)->od_posix_acl &&
1081 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
1082 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
1083 RETURN(-EOPNOTSUPP);
1085 down_write(&obj->oo_guard);
1086 /* For the OST migrated from ldiskfs, the PFID EA may
1087 * be stored in LMA because of ldiskfs inode size. */
1088 if (unlikely(strcmp(name, XATTR_NAME_FID) == 0 && obj->oo_pfid_in_lma))
1089 rc = osd_xattr_split_pfid(env, obj, oh);
1091 rc = __osd_xattr_del(env, obj, name, oh);
1092 up_write(&obj->oo_guard);
1097 void osd_declare_xattrs_destroy(const struct lu_env *env,
1098 struct osd_object *obj, struct osd_thandle *oh)
1100 struct osd_device *osd = osd_obj2dev(obj);
1101 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
1102 uint64_t oid = obj->oo_xattr, xid;
1103 dmu_tx_t *tx = oh->ot_tx;
1107 if (oid == ZFS_NO_OBJECT)
1108 return; /* Nothing to do for SA xattrs */
1110 /* Declare to free the ZAP holding xattrs */
1111 dmu_tx_hold_free(tx, oid, 0, DMU_OBJECT_END);
1113 rc = osd_zap_cursor_init(&zc, osd->od_os, oid, 0);
1117 while (zap_cursor_retrieve(zc, za) == 0) {
1118 LASSERT(za->za_num_integers == 1);
1119 LASSERT(za->za_integer_length == sizeof(uint64_t));
1121 rc = -zap_lookup(osd->od_os, oid, za->za_name,
1122 sizeof(uint64_t), 1, &xid);
1124 CERROR("%s: xattr %s lookup failed: rc = %d\n",
1125 osd->od_svname, za->za_name, rc);
1128 dmu_tx_hold_free(tx, xid, 0, DMU_OBJECT_END);
1130 zap_cursor_advance(zc);
1133 osd_zap_cursor_fini(zc);
1135 if (rc && tx->tx_err == 0)
1139 int osd_xattrs_destroy(const struct lu_env *env,
1140 struct osd_object *obj, struct osd_thandle *oh)
1142 struct osd_device *osd = osd_obj2dev(obj);
1143 dmu_tx_t *tx = oh->ot_tx;
1144 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
1149 /* The transaction must have been assigned to a transaction group. */
1150 LASSERT(tx->tx_txg != 0);
1152 if (obj->oo_xattr == ZFS_NO_OBJECT)
1153 return 0; /* Nothing to do for SA xattrs */
1155 /* Free the ZAP holding the xattrs */
1156 rc = osd_zap_cursor_init(&zc, osd->od_os, obj->oo_xattr, 0);
1160 while (zap_cursor_retrieve(zc, za) == 0) {
1161 LASSERT(za->za_num_integers == 1);
1162 LASSERT(za->za_integer_length == sizeof(uint64_t));
1164 rc = -zap_lookup(osd->od_os, obj->oo_xattr, za->za_name,
1165 sizeof(uint64_t), 1, &xid);
1167 CERROR("%s: lookup xattr %s failed: rc = %d\n",
1168 osd->od_svname, za->za_name, rc);
1170 rc = -dmu_object_free(osd->od_os, xid, tx);
1172 CERROR("%s: free xattr %s failed: rc = %d\n",
1173 osd->od_svname, za->za_name, rc);
1175 zap_cursor_advance(zc);
1177 osd_zap_cursor_fini(zc);
1179 rc = -dmu_object_free(osd->od_os, obj->oo_xattr, tx);
1181 CERROR("%s: free xattr %llu failed: rc = %d\n",
1182 osd->od_svname, obj->oo_xattr, rc);
1188 osd_sa_xattr_list(const struct lu_env *env, struct osd_object *obj,
1189 const struct lu_buf *lb)
1191 nvpair_t *nvp = NULL;
1192 int len, counted = 0;
1195 rc = __osd_xattr_cache(obj);
1199 while ((nvp = nvlist_next_nvpair(obj->oo_sa_xattr, nvp)) != NULL) {
1200 const char *name = nvpair_name(nvp);
1202 if (!osd_obj2dev(obj)->od_posix_acl &&
1203 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
1204 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
1208 if (lb->lb_buf != NULL) {
1209 if (counted + len + 1 > lb->lb_len)
1212 memcpy(lb->lb_buf + counted, name, len + 1);
1219 int osd_xattr_list(const struct lu_env *env, struct dt_object *dt,
1220 const struct lu_buf *lb)
1222 struct osd_object *obj = osd_dt_obj(dt);
1223 struct osd_device *osd = osd_obj2dev(obj);
1224 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
1229 LASSERT(obj->oo_dn != NULL);
1230 LASSERT(osd_invariant(obj));
1231 LASSERT(dt_object_exists(dt));
1233 down_read(&obj->oo_guard);
1235 rc = osd_sa_xattr_list(env, obj, lb);
1241 /* continue with dnode xattr if any */
1242 if (obj->oo_xattr == ZFS_NO_OBJECT)
1243 GOTO(out, rc = counted);
1245 rc = osd_zap_cursor_init(&zc, osd->od_os, obj->oo_xattr, 0);
1249 while ((rc = -zap_cursor_retrieve(zc, za)) == 0) {
1250 if (!osd_obj2dev(obj)->od_posix_acl &&
1251 (strcmp(za->za_name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
1252 strcmp(za->za_name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) {
1253 zap_cursor_advance(zc);
1257 rc = strlen(za->za_name);
1258 if (lb->lb_buf != NULL) {
1259 if (counted + rc + 1 > lb->lb_len)
1260 GOTO(out_fini, rc = -ERANGE);
1262 memcpy(lb->lb_buf + counted, za->za_name, rc + 1);
1266 zap_cursor_advance(zc);
1268 if (rc == -ENOENT) /* no more kes in the index */
1270 else if (unlikely(rc < 0))
1275 osd_zap_cursor_fini(zc);
1277 up_read(&obj->oo_guard);