4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2012, 2017, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
32 * lustre/osd-zfs/osd_xattr.c
33 * functions to manipulate extended attributes and system attributes
35 * Author: Alex Zhuravlev <bzzz@whamcloud.com>
36 * Author: Mike Pershin <tappro@whamcloud.com>
39 #define DEBUG_SUBSYSTEM S_OSD
41 #include <libcfs/libcfs.h>
42 #include <obd_support.h>
43 #include <lustre_net.h>
45 #include <obd_class.h>
46 #include <lustre_disk.h>
47 #include <lustre_fid.h>
48 #include <lustre_linkea.h>
50 #include "osd_internal.h"
52 #include <sys/dnode.h>
57 #include <sys/spa_impl.h>
58 #include <sys/zfs_znode.h>
59 #include <sys/dmu_tx.h>
60 #include <sys/dmu_objset.h>
61 #include <sys/dsl_prop.h>
62 #include <sys/sa_impl.h>
65 #include <linux/posix_acl_xattr.h>
66 #include <lustre_scrub.h>
68 int __osd_xattr_load(struct osd_device *osd, sa_handle_t *hdl, nvlist_t **sa)
73 rc = -sa_size(hdl, SA_ZPL_DXATTR(osd), &size);
76 rc = -nvlist_alloc(sa, NV_UNIQUE_NAME, KM_SLEEP);
80 buf = osd_zio_buf_alloc(size);
85 rc = -sa_lookup(hdl, SA_ZPL_DXATTR(osd), buf, size);
87 rc = -nvlist_unpack(buf, size, sa, KM_SLEEP);
88 osd_zio_buf_free(buf, size);
94 static inline int __osd_xattr_cache(struct osd_object *obj)
96 LASSERT(obj->oo_sa_hdl);
97 if (obj->oo_sa_xattr != NULL)
99 return __osd_xattr_load(osd_obj2dev(obj),
100 obj->oo_sa_hdl, &obj->oo_sa_xattr);
104 __osd_sa_xattr_get(const struct lu_env *env, struct osd_object *obj,
105 const struct lu_buf *buf, const char *name, int *sizep)
110 rc = __osd_xattr_cache(obj);
114 LASSERT(obj->oo_sa_xattr);
115 rc = -nvlist_lookup_byte_array(obj->oo_sa_xattr, name,
120 if (buf == NULL || buf->lb_buf == NULL) {
121 /* return the required size by *sizep */
125 if (*sizep > buf->lb_len)
126 return -ERANGE; /* match ldiskfs error */
128 memcpy(buf->lb_buf, nv_value, *sizep);
132 int __osd_xattr_get_large(const struct lu_env *env, struct osd_device *osd,
133 uint64_t xattr, struct lu_buf *buf,
134 const char *name, int *sizep)
137 sa_handle_t *sa_hdl = NULL;
138 uint64_t xa_data_obj, size;
141 /* are there any extended attributes? */
142 if (xattr == ZFS_NO_OBJECT)
145 /* Lookup the object number containing the xattr data */
146 rc = -zap_lookup(osd->od_os, xattr, name, sizeof(uint64_t), 1,
151 rc = __osd_obj2dnode(osd->od_os, xa_data_obj, &xa_data_dn);
155 rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL, SA_HDL_PRIVATE,
160 /* Get the xattr value length / object size */
161 rc = -sa_lookup(sa_hdl, SA_ZPL_SIZE(osd), &size, 8);
165 if (size > INT_MAX) {
172 if (buf == NULL || buf->lb_buf == NULL) {
173 /* We only need to return the required size */
176 if (*sizep > buf->lb_len) {
177 rc = -ERANGE; /* match ldiskfs error */
181 rc = -dmu_read(osd->od_os, xa_data_dn->dn_object, 0,
182 size, buf->lb_buf, DMU_READ_PREFETCH);
185 sa_handle_destroy(sa_hdl);
187 osd_dnode_rele(xa_data_dn);
193 * Copy an extended attribute into the buffer provided, or compute
194 * the required buffer size if \a buf is NULL.
196 * On success, the number of bytes used or required is stored in \a sizep.
198 * Note that no locking is done here.
200 * \param[in] env execution environment
201 * \param[in] obj object for which to retrieve xattr
202 * \param[out] buf buffer to store xattr value in
203 * \param[in] name name of xattr to copy
204 * \param[out] sizep bytes used or required to store xattr
206 * \retval 0 on success
207 * \retval negative negated errno on failure
209 int osd_xattr_get_internal(const struct lu_env *env, struct osd_object *obj,
210 struct lu_buf *buf, const char *name, int *sizep)
214 if (unlikely(!dt_object_exists(&obj->oo_dt) || obj->oo_destroyed))
217 /* check SA_ZPL_DXATTR first then fallback to directory xattr */
218 rc = __osd_sa_xattr_get(env, obj, buf, name, sizep);
222 return __osd_xattr_get_large(env, osd_obj2dev(obj), obj->oo_xattr,
226 static int osd_get_pfid_from_lma(const struct lu_env *env,
227 struct osd_object *obj,
228 struct lu_buf *buf, int *sizep)
230 struct osd_thread_info *info = osd_oti_get(env);
231 struct lustre_ost_attrs *loa =
232 (struct lustre_ost_attrs *)&info->oti_buf;
233 struct lustre_mdt_attrs *lma = &loa->loa_lma;
234 struct filter_fid *ff;
235 struct ost_layout *ol;
236 struct lu_buf tbuf = {
238 .lb_len = sizeof(info->oti_buf),
243 BUILD_BUG_ON(sizeof(info->oti_buf) < sizeof(*loa));
244 rc = osd_xattr_get_internal(env, obj, &tbuf,
245 XATTR_NAME_LMA, sizep);
249 lustre_loa_swab(loa, true);
250 LASSERT(lma->lma_compat & LMAC_STRIPE_INFO);
252 *sizep = sizeof(*ff);
253 if (buf->lb_len == 0 || !buf->lb_buf)
256 if (buf->lb_len < *sizep)
261 ol->ol_stripe_count = cpu_to_le32(loa->loa_parent_fid.f_ver >>
262 PFID_STRIPE_IDX_BITS);
263 ol->ol_stripe_size = cpu_to_le32(loa->loa_stripe_size);
264 loa->loa_parent_fid.f_ver &= PFID_STRIPE_COUNT_MASK;
265 fid_cpu_to_le(&ff->ff_parent, &loa->loa_parent_fid);
266 if (lma->lma_compat & LMAC_COMP_INFO) {
267 ol->ol_comp_start = cpu_to_le64(loa->loa_comp_start);
268 ol->ol_comp_end = cpu_to_le64(loa->loa_comp_end);
269 ol->ol_comp_id = cpu_to_le32(loa->loa_comp_id);
271 ol->ol_comp_start = 0;
279 int osd_xattr_get(const struct lu_env *env, struct dt_object *dt,
280 struct lu_buf *buf, const char *name)
282 struct osd_object *obj = osd_dt_obj(dt);
286 LASSERT(obj->oo_dn != NULL);
287 LASSERT(osd_invariant(obj));
289 if (!osd_obj2dev(obj)->od_posix_acl &&
290 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
291 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
294 down_read(&obj->oo_guard);
295 if (unlikely(!dt_object_exists(dt) || obj->oo_destroyed)) {
296 up_read(&obj->oo_guard);
300 /* For the OST migrated from ldiskfs, the PFID EA may
301 * be stored in LMA because of ldiskfs inode size. */
302 if (strcmp(name, XATTR_NAME_FID) == 0 && obj->oo_pfid_in_lma)
303 rc = osd_get_pfid_from_lma(env, obj, buf, &size);
305 rc = osd_xattr_get_internal(env, obj, buf, name, &size);
306 up_read(&obj->oo_guard);
315 /* the function is used to declare EAs when SA is not supported */
316 void __osd_xattr_declare_legacy(const struct lu_env *env,
317 struct osd_object *obj,
318 int vallen, const char *name,
319 struct osd_thandle *oh)
321 struct osd_device *osd = osd_obj2dev(obj);
322 dmu_tx_t *tx = oh->ot_tx;
323 uint64_t xa_data_obj;
326 if (obj->oo_xattr == ZFS_NO_OBJECT) {
327 /* xattr zap + entry */
328 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, (char *) name);
329 /* xattr value obj */
330 dmu_tx_hold_sa_create(tx, ZFS_SA_BASE_ATTR_SIZE);
331 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, vallen);
335 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1,
339 * Entry already exists.
340 * We'll truncate the existing object.
342 dmu_tx_hold_bonus(tx, xa_data_obj);
343 dmu_tx_hold_free(tx, xa_data_obj, vallen, DMU_OBJECT_END);
344 dmu_tx_hold_write(tx, xa_data_obj, 0, vallen);
345 } else if (rc == -ENOENT) {
347 * Entry doesn't exist, we need to create a new one and a new
348 * object to store the value.
350 dmu_tx_hold_bonus(tx, obj->oo_xattr);
351 dmu_tx_hold_zap(tx, obj->oo_xattr, TRUE, (char *) name);
352 dmu_tx_hold_sa_create(tx, ZFS_SA_BASE_ATTR_SIZE);
353 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, vallen);
357 void __osd_xattr_declare_set(const struct lu_env *env, struct osd_object *obj,
358 int vallen, const char *name,
359 struct osd_thandle *oh)
361 struct osd_device *osd = osd_obj2dev(obj);
362 dmu_tx_t *tx = oh->ot_tx;
365 if (unlikely(obj->oo_destroyed))
368 if (strcmp(name, XATTR_NAME_LINK) == 0 &&
369 osd->od_remote_parent_dir != ZFS_NO_OBJECT) {
370 /* If some name entry resides on remote MDT, then will create
371 * agent entry under remote parent. On the other hand, if the
372 * remote entry will be removed, then related agent entry may
373 * need to be removed from the remote parent. So there may be
374 * kinds of cases, let's declare enough credits. The credits
375 * for create agent entry is enough for remove case. */
376 osd_tx_hold_zap(tx, osd->od_remote_parent_dir,
380 if (unlikely(!osd_obj2dev(obj)->od_xattr_in_sa)) {
381 __osd_xattr_declare_legacy(env, obj, vallen, name, oh);
385 /* declare EA in SA */
386 if (dt_object_exists(&obj->oo_dt)) {
387 LASSERT(obj->oo_sa_hdl);
388 /* XXX: it should be possible to skip spill
389 * declaration if specific EA is part of
390 * bonus and doesn't grow */
391 dmu_tx_hold_spill(tx, obj->oo_dn->dn_object);
395 bonuslen = osd_obj_bonuslen(obj);
397 /* the object doesn't exist, but we've declared bonus
398 * in osd_declare_object_create() yet */
399 if (obj->oo_ea_in_bonus > bonuslen) {
400 /* spill has been declared already */
401 } else if (obj->oo_ea_in_bonus + vallen > bonuslen) {
402 /* we're about to exceed bonus, let's declare spill */
403 dmu_tx_hold_spill(tx, DMU_NEW_OBJECT);
405 obj->oo_ea_in_bonus += vallen;
408 int osd_declare_xattr_set(const struct lu_env *env, struct dt_object *dt,
409 const struct lu_buf *buf, const char *name,
410 int fl, struct thandle *handle)
412 struct osd_object *obj = osd_dt_obj(dt);
413 struct osd_thandle *oh;
416 LASSERT(handle != NULL);
417 oh = container_of(handle, struct osd_thandle, ot_super);
419 down_read(&obj->oo_guard);
420 __osd_xattr_declare_set(env, obj, buf->lb_len, name, oh);
421 up_read(&obj->oo_guard);
426 int __osd_sa_attr_init(const struct lu_env *env, struct osd_object *obj,
427 struct osd_thandle *oh)
429 sa_bulk_attr_t *bulk = osd_oti_get(env)->oti_attr_bulk;
430 struct osa_attr *osa = &osd_oti_get(env)->oti_osa;
431 struct lu_buf *lb = &osd_oti_get(env)->oti_xattr_lbuf;
432 struct osd_device *osd = osd_obj2dev(obj);
434 inode_timespec_t now;
438 obj->oo_late_xattr = 0;
439 obj->oo_late_attr_set = 0;
441 gen = dmu_tx_get_txg(oh->ot_tx);
443 ZFS_TIME_ENCODE(&now, osa->btime);
445 obj->oo_attr.la_valid |= LA_BTIME;
446 obj->oo_attr.la_btime = osa->btime[0];
447 osa->atime[0] = obj->oo_attr.la_atime;
448 osa->ctime[0] = obj->oo_attr.la_ctime;
449 osa->mtime[0] = obj->oo_attr.la_mtime;
450 osa->mode = obj->oo_attr.la_mode;
451 osa->uid = obj->oo_attr.la_uid;
452 osa->gid = obj->oo_attr.la_gid;
453 osa->rdev = obj->oo_attr.la_rdev;
454 osa->nlink = obj->oo_attr.la_nlink;
455 osa->flags = attrs_fs2zfs(obj->oo_attr.la_flags);
456 osa->size = obj->oo_attr.la_size;
457 #ifdef ZFS_PROJINHERIT
458 if (osd->od_projectused_dn) {
459 if (obj->oo_attr.la_valid & LA_PROJID)
460 osa->projid = obj->oo_attr.la_projid;
462 osa->projid = ZFS_DEFAULT_PROJID;
463 osa->flags |= ZFS_PROJID;
464 obj->oo_with_projid = 1;
469 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(osd), NULL, &osa->mode, 8);
470 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_SIZE(osd), NULL, &osa->size, 8);
471 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GEN(osd), NULL, &gen, 8);
472 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_UID(osd), NULL, &osa->uid, 8);
473 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GID(osd), NULL, &osa->gid, 8);
474 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_PARENT(osd), NULL,
476 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(osd), NULL, &osa->flags, 8);
477 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(osd), NULL, osa->atime, 16);
478 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(osd), NULL, osa->mtime, 16);
479 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(osd), NULL, osa->ctime, 16);
480 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CRTIME(osd), NULL, osa->btime, 16);
481 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_LINKS(osd), NULL, &osa->nlink, 8);
482 #ifdef ZFS_PROJINHERIT
483 if (osd->od_projectused_dn)
484 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_PROJID(osd), NULL,
487 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_RDEV(osd), NULL, &osa->rdev, 8);
488 LASSERT(cnt <= ARRAY_SIZE(osd_oti_get(env)->oti_attr_bulk));
490 /* Update the SA for additions, modifications, and removals. */
491 rc = -nvlist_size(obj->oo_sa_xattr, &size, NV_ENCODE_XDR);
495 lu_buf_check_and_alloc(lb, size);
496 if (lb->lb_buf == NULL) {
497 CERROR("%s: can't allocate buffer for xattr update\n",
502 rc = -nvlist_pack(obj->oo_sa_xattr, (char **)&lb->lb_buf, &size,
503 NV_ENCODE_XDR, KM_SLEEP);
507 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_DXATTR(osd), NULL, lb->lb_buf, size);
509 rc = -sa_replace_all_by_template(obj->oo_sa_hdl, bulk, cnt, oh->ot_tx);
514 int __osd_sa_xattr_update(const struct lu_env *env, struct osd_object *obj,
515 struct osd_thandle *oh)
517 struct lu_buf *lb = &osd_oti_get(env)->oti_xattr_lbuf;
518 struct osd_device *osd = osd_obj2dev(obj);
523 obj->oo_late_xattr = 0;
525 /* Update the SA for additions, modifications, and removals. */
526 rc = -nvlist_size(obj->oo_sa_xattr, &size, NV_ENCODE_XDR);
530 lu_buf_check_and_alloc(lb, size);
531 if (lb->lb_buf == NULL) {
532 CERROR("%s: can't allocate buffer for xattr update\n",
538 rc = -nvlist_pack(obj->oo_sa_xattr, &dxattr, &size,
539 NV_ENCODE_XDR, KM_SLEEP);
542 LASSERT(dxattr == lb->lb_buf);
544 sa_update(obj->oo_sa_hdl, SA_ZPL_DXATTR(osd), dxattr, size, oh->ot_tx);
550 * Set an extended attribute.
551 * This transaction must have called udmu_xattr_declare_set() first.
553 * Returns 0 on success or a negative error number on failure.
555 * No locking is done here.
557 int __osd_sa_xattr_schedule_update(const struct lu_env *env,
558 struct osd_object *obj,
559 struct osd_thandle *oh)
562 LASSERT(obj->oo_sa_hdl);
563 LASSERT(obj->oo_sa_xattr);
565 /* schedule batched SA update in osd_object_sa_dirty_rele() */
566 obj->oo_late_xattr = 1;
567 osd_object_sa_dirty_add(obj, oh);
573 int __osd_sa_xattr_set(const struct lu_env *env, struct osd_object *obj,
574 const struct lu_buf *buf, const char *name, int fl,
575 struct osd_thandle *oh)
583 rc = __osd_xattr_cache(obj);
587 LASSERT(obj->oo_sa_xattr);
588 if (buf->lb_len > OBD_MAX_EA_SIZE) {
591 /* Prevent the DXATTR SA from consuming the entire SA
593 rc = -nvlist_size(obj->oo_sa_xattr, &size, NV_ENCODE_XDR);
597 if (size + buf->lb_len > DXATTR_MAX_SA_SIZE)
601 /* even in case of -EFBIG we must lookup xattr and check can we
602 * rewrite it then delete from SA */
603 rc = -nvlist_lookup_byte_array(obj->oo_sa_xattr, name, &nv_value,
606 if (fl & LU_XATTR_CREATE) {
608 } else if (too_big) {
609 rc = -nvlist_remove(obj->oo_sa_xattr, name,
610 DATA_TYPE_BYTE_ARRAY);
613 rc = __osd_sa_xattr_schedule_update(env, obj, oh);
614 return rc == 0 ? -EFBIG : rc;
616 } else if (rc == -ENOENT) {
617 if (fl & LU_XATTR_REPLACE)
625 /* Ensure xattr doesn't exist in ZAP */
626 if (obj->oo_xattr != ZFS_NO_OBJECT) {
627 struct osd_device *osd = osd_obj2dev(obj);
629 rc = -zap_lookup(osd->od_os, obj->oo_xattr,
632 rc = -dmu_object_free(osd->od_os, objid, oh->ot_tx);
634 zap_remove(osd->od_os, obj->oo_xattr,
639 rc = -nvlist_add_byte_array(obj->oo_sa_xattr, name,
640 (uchar_t *)buf->lb_buf, buf->lb_len);
644 /* batch updates only for just created dnodes where we
645 * used to set number of EAs in a single transaction */
646 if (obj->oo_dn->dn_allocated_txg == oh->ot_tx->tx_txg)
647 rc = __osd_sa_xattr_schedule_update(env, obj, oh);
649 rc = __osd_sa_xattr_update(env, obj, oh);
655 __osd_xattr_set(const struct lu_env *env, struct osd_object *obj,
656 const struct lu_buf *buf, const char *name, int fl,
657 struct osd_thandle *oh)
659 struct osd_device *osd = osd_obj2dev(obj);
660 dnode_t *xa_zap_dn = NULL;
661 dnode_t *xa_data_dn = NULL;
662 uint64_t xa_data_obj;
663 sa_handle_t *sa_hdl = NULL;
664 dmu_tx_t *tx = oh->ot_tx;
668 LASSERT(obj->oo_sa_hdl);
670 if (obj->oo_xattr == ZFS_NO_OBJECT) {
671 struct lu_attr *la = &osd_oti_get(env)->oti_la;
673 la->la_valid = LA_MODE;
674 la->la_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
675 rc = __osd_zap_create(env, osd, &xa_zap_dn, tx, la, 0, 0);
679 obj->oo_xattr = xa_zap_dn->dn_object;
680 rc = osd_object_sa_update(obj, SA_ZPL_XATTR(osd),
681 &obj->oo_xattr, 8, oh);
686 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1,
689 if (fl & LU_XATTR_CREATE) {
694 * Entry already exists.
695 * We'll truncate the existing object.
697 rc = __osd_obj2dnode(osd->od_os, xa_data_obj, &xa_data_dn);
701 rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL,
702 SA_HDL_PRIVATE, &sa_hdl);
706 rc = -sa_lookup(sa_hdl, SA_ZPL_SIZE(osd), &size, 8);
710 rc = -dmu_free_range(osd->od_os, xa_data_dn->dn_object,
711 0, DMU_OBJECT_END, tx);
714 } else if (rc == -ENOENT) {
715 struct lu_attr *la = &osd_oti_get(env)->oti_la;
717 * Entry doesn't exist, we need to create a new one and a new
718 * object to store the value.
720 if (fl & LU_XATTR_REPLACE) {
721 /* should be ENOATTR according to the
722 * man, but that is undefined here */
727 la->la_valid = LA_MODE;
728 la->la_mode = S_IFREG | S_IRUGO | S_IWUSR;
729 rc = __osd_object_create(env, osd, obj,
730 lu_object_fid(&obj->oo_dt.do_lu),
731 &xa_data_dn, tx, la);
734 xa_data_obj = xa_data_dn->dn_object;
736 rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL,
737 SA_HDL_PRIVATE, &sa_hdl);
741 rc = -zap_add(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t),
742 1, &xa_data_obj, tx);
746 /* There was an error looking up the xattr name */
750 /* Finally write the xattr value */
751 dmu_write(osd->od_os, xa_data_obj, 0, buf->lb_len, buf->lb_buf, tx);
754 rc = -sa_update(sa_hdl, SA_ZPL_SIZE(osd), &size, 8, tx);
757 sa_handle_destroy(sa_hdl);
759 if (xa_data_dn != NULL)
760 osd_dnode_rele(xa_data_dn);
761 if (xa_zap_dn != NULL)
762 osd_dnode_rele(xa_zap_dn);
767 static int osd_xattr_split_pfid(const struct lu_env *env,
768 struct osd_object *obj, struct osd_thandle *oh)
770 struct osd_thread_info *info = osd_oti_get(env);
771 struct lustre_ost_attrs *loa =
772 (struct lustre_ost_attrs *)&info->oti_buf;
773 struct lustre_mdt_attrs *lma = &loa->loa_lma;
774 struct lu_buf buf = {
776 .lb_len = sizeof(info->oti_buf),
782 BUILD_BUG_ON(sizeof(info->oti_buf) < sizeof(*loa));
783 rc = osd_xattr_get_internal(env, obj, &buf, XATTR_NAME_LMA, &size);
787 lustre_loa_swab(loa, true);
788 LASSERT(lma->lma_compat & LMAC_STRIPE_INFO);
790 lma->lma_compat &= ~(LMAC_STRIPE_INFO | LMAC_COMP_INFO);
791 lustre_lma_swab(lma);
793 buf.lb_len = sizeof(*lma);
794 rc = osd_xattr_set_internal(env, obj, &buf, XATTR_NAME_LMA,
795 LU_XATTR_REPLACE, oh);
797 obj->oo_pfid_in_lma = 0;
803 * In DNE environment, the object (in spite of regular file or directory)
804 * and its name entry may reside on different MDTs. Under such case, we will
805 * create an agent entry on the MDT where the object resides. The agent entry
806 * references the object locally, that makes the object to be visible to the
807 * userspace when mounted as 'zfs' directly. Then the userspace tools, such
808 * as 'tar' can handle the object properly.
810 * We handle the agent entry during set linkEA that is the common interface
811 * for both regular file and directroy, can handle kinds of cases, such as
812 * create/link/unlink/rename, and so on.
814 * NOTE: we need to do that for both directory and regular file, so we can NOT
815 * do that when ea_{insert,delete} that are directory based operations.
817 static int osd_xattr_handle_linkea(const struct lu_env *env,
818 struct osd_device *osd,
819 struct osd_object *obj,
820 const struct lu_buf *buf,
821 struct osd_thandle *oh)
823 const struct lu_fid *fid = lu_object_fid(&obj->oo_dt.do_lu);
824 struct lu_fid *tfid = &osd_oti_get(env)->oti_fid;
825 struct linkea_data ldata = { .ld_buf = (struct lu_buf *)buf };
826 struct lu_name tmpname;
831 rc = linkea_init_with_rec(&ldata);
833 linkea_first_entry(&ldata);
834 while (ldata.ld_lee != NULL && !remote) {
835 linkea_entry_unpack(ldata.ld_lee, &ldata.ld_reclen,
837 if (osd_remote_fid(env, osd, tfid) > 0)
840 linkea_next_entry(&ldata);
842 } else if (rc == -ENODATA) {
848 if (lu_object_has_agent_entry(&obj->oo_dt.do_lu) && !remote) {
849 rc = osd_delete_from_remote_parent(env, osd, obj, oh, false);
851 CERROR("%s: failed to remove agent entry for "DFID
852 ": rc = %d\n", osd_name(osd), PFID(fid), rc);
853 } else if (!lu_object_has_agent_entry(&obj->oo_dt.do_lu) && remote) {
854 rc = osd_add_to_remote_parent(env, osd, obj, oh);
856 CWARN("%s: failed to create agent entry for "DFID
857 ": rc = %d\n", osd_name(osd), PFID(fid), rc);
863 int osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
864 const struct lu_buf *buf, const char *name, int fl,
865 struct thandle *handle)
867 struct osd_object *obj = osd_dt_obj(dt);
868 struct osd_device *osd = osd_obj2dev(obj);
869 struct osd_thandle *oh;
873 LASSERT(handle != NULL);
874 LASSERT(osd_invariant(obj));
876 if (!osd_obj2dev(obj)->od_posix_acl &&
877 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
878 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
881 oh = container_of(handle, struct osd_thandle, ot_super);
883 down_write(&obj->oo_guard);
884 CDEBUG(D_INODE, "Setting xattr %s with size %d\n",
885 name, (int)buf->lb_len);
886 /* For the OST migrated from ldiskfs, the PFID EA may
887 * be stored in LMA because of ldiskfs inode size. */
888 if (unlikely(strcmp(name, XATTR_NAME_FID) == 0 &&
889 obj->oo_pfid_in_lma)) {
890 rc = osd_xattr_split_pfid(env, obj, oh);
892 fl = LU_XATTR_CREATE;
893 } else if (strcmp(name, XATTR_NAME_LINK) == 0 &&
894 osd->od_remote_parent_dir != ZFS_NO_OBJECT) {
895 rc = osd_xattr_handle_linkea(env, osd, obj, buf, oh);
899 rc = osd_xattr_set_internal(env, obj, buf, name, fl, oh);
900 up_write(&obj->oo_guard);
906 __osd_xattr_declare_del(const struct lu_env *env, struct osd_object *obj,
907 const char *name, struct osd_thandle *oh)
909 struct osd_device *osd = osd_obj2dev(obj);
910 dmu_tx_t *tx = oh->ot_tx;
911 uint64_t xa_data_obj;
914 /* update SA_ZPL_DXATTR if xattr was in SA */
915 dmu_tx_hold_sa(tx, obj->oo_sa_hdl, 0);
917 if (obj->oo_xattr == ZFS_NO_OBJECT)
920 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, 8, 1, &xa_data_obj);
924 * We'll delete the existing object and ZAP entry.
926 dmu_tx_hold_bonus(tx, xa_data_obj);
927 dmu_tx_hold_free(tx, xa_data_obj, 0, DMU_OBJECT_END);
928 dmu_tx_hold_zap(tx, obj->oo_xattr, FALSE, (char *) name);
930 } else if (rc == -ENOENT) {
932 * Entry doesn't exist, nothing to be changed.
937 /* An error happened */
941 int osd_declare_xattr_del(const struct lu_env *env, struct dt_object *dt,
942 const char *name, struct thandle *handle)
944 struct osd_object *obj = osd_dt_obj(dt);
945 struct osd_thandle *oh;
948 LASSERT(handle != NULL);
949 LASSERT(osd_invariant(obj));
951 oh = container_of(handle, struct osd_thandle, ot_super);
952 LASSERT(oh->ot_tx != NULL);
953 LASSERT(obj->oo_dn != NULL);
955 down_read(&obj->oo_guard);
956 if (likely(dt_object_exists(&obj->oo_dt) && !obj->oo_destroyed))
957 __osd_xattr_declare_del(env, obj, name, oh);
958 up_read(&obj->oo_guard);
963 static int __osd_sa_xattr_del(const struct lu_env *env, struct osd_object *obj,
964 const char *name, struct osd_thandle *oh)
968 rc = __osd_xattr_cache(obj);
972 rc = -nvlist_remove(obj->oo_sa_xattr, name, DATA_TYPE_BYTE_ARRAY);
977 * only migrate delete LMV, and it needs to be done immediately, because
978 * it's used in deleting sub stripes, and if this is delayed, later when
979 * destroying the master object, it will delete sub stripes again.
981 if (!strcmp(name, XATTR_NAME_LMV))
982 rc = __osd_sa_xattr_update(env, obj, oh);
984 rc = __osd_sa_xattr_schedule_update(env, obj, oh);
988 static int __osd_xattr_del(const struct lu_env *env, struct osd_object *obj,
989 const char *name, struct osd_thandle *oh)
991 struct osd_device *osd = osd_obj2dev(obj);
992 uint64_t xa_data_obj;
995 if (unlikely(!dt_object_exists(&obj->oo_dt) || obj->oo_destroyed))
998 /* try remove xattr from SA at first */
999 rc = __osd_sa_xattr_del(env, obj, name, oh);
1003 if (obj->oo_xattr == ZFS_NO_OBJECT)
1006 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1,
1008 if (rc == -ENOENT) {
1010 } else if (rc == 0) {
1013 * We'll delete the existing object and ZAP entry.
1015 rc = -dmu_object_free(osd->od_os, xa_data_obj, oh->ot_tx);
1019 rc = -zap_remove(osd->od_os, obj->oo_xattr, name, oh->ot_tx);
1025 int osd_xattr_del(const struct lu_env *env, struct dt_object *dt,
1026 const char *name, struct thandle *handle)
1028 struct osd_object *obj = osd_dt_obj(dt);
1029 struct osd_thandle *oh;
1033 LASSERT(handle != NULL);
1034 LASSERT(obj->oo_dn != NULL);
1035 LASSERT(osd_invariant(obj));
1036 LASSERT(dt_object_exists(dt));
1037 oh = container_of(handle, struct osd_thandle, ot_super);
1038 LASSERT(oh->ot_tx != NULL);
1040 if (!osd_obj2dev(obj)->od_posix_acl &&
1041 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
1042 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
1043 RETURN(-EOPNOTSUPP);
1045 down_write(&obj->oo_guard);
1046 /* For the OST migrated from ldiskfs, the PFID EA may
1047 * be stored in LMA because of ldiskfs inode size. */
1048 if (unlikely(strcmp(name, XATTR_NAME_FID) == 0 && obj->oo_pfid_in_lma))
1049 rc = osd_xattr_split_pfid(env, obj, oh);
1051 rc = __osd_xattr_del(env, obj, name, oh);
1052 up_write(&obj->oo_guard);
1057 void osd_declare_xattrs_destroy(const struct lu_env *env,
1058 struct osd_object *obj, struct osd_thandle *oh)
1060 struct osd_device *osd = osd_obj2dev(obj);
1061 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
1062 uint64_t oid = obj->oo_xattr, xid;
1063 dmu_tx_t *tx = oh->ot_tx;
1067 if (oid == ZFS_NO_OBJECT)
1068 return; /* Nothing to do for SA xattrs */
1070 /* Declare to free the ZAP holding xattrs */
1071 dmu_tx_hold_free(tx, oid, 0, DMU_OBJECT_END);
1073 rc = osd_zap_cursor_init(&zc, osd->od_os, oid, 0);
1077 while (zap_cursor_retrieve(zc, za) == 0) {
1078 LASSERT(za->za_num_integers == 1);
1079 LASSERT(za->za_integer_length == sizeof(uint64_t));
1081 rc = -zap_lookup(osd->od_os, oid, za->za_name,
1082 sizeof(uint64_t), 1, &xid);
1084 CERROR("%s: xattr %s lookup failed: rc = %d\n",
1085 osd->od_svname, za->za_name, rc);
1088 dmu_tx_hold_free(tx, xid, 0, DMU_OBJECT_END);
1090 zap_cursor_advance(zc);
1093 osd_zap_cursor_fini(zc);
1095 if (rc && tx->tx_err == 0)
1099 int osd_xattrs_destroy(const struct lu_env *env,
1100 struct osd_object *obj, struct osd_thandle *oh)
1102 struct osd_device *osd = osd_obj2dev(obj);
1103 dmu_tx_t *tx = oh->ot_tx;
1104 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
1109 /* The transaction must have been assigned to a transaction group. */
1110 LASSERT(tx->tx_txg != 0);
1112 if (obj->oo_xattr == ZFS_NO_OBJECT)
1113 return 0; /* Nothing to do for SA xattrs */
1115 /* Free the ZAP holding the xattrs */
1116 rc = osd_zap_cursor_init(&zc, osd->od_os, obj->oo_xattr, 0);
1120 while (zap_cursor_retrieve(zc, za) == 0) {
1121 LASSERT(za->za_num_integers == 1);
1122 LASSERT(za->za_integer_length == sizeof(uint64_t));
1124 rc = -zap_lookup(osd->od_os, obj->oo_xattr, za->za_name,
1125 sizeof(uint64_t), 1, &xid);
1127 CERROR("%s: lookup xattr %s failed: rc = %d\n",
1128 osd->od_svname, za->za_name, rc);
1130 rc = -dmu_object_free(osd->od_os, xid, tx);
1132 CERROR("%s: free xattr %s failed: rc = %d\n",
1133 osd->od_svname, za->za_name, rc);
1135 zap_cursor_advance(zc);
1137 osd_zap_cursor_fini(zc);
1139 rc = -dmu_object_free(osd->od_os, obj->oo_xattr, tx);
1141 CERROR("%s: free xattr %llu failed: rc = %d\n",
1142 osd->od_svname, obj->oo_xattr, rc);
1148 osd_sa_xattr_list(const struct lu_env *env, struct osd_object *obj,
1149 const struct lu_buf *lb)
1151 nvpair_t *nvp = NULL;
1152 int len, counted = 0;
1155 rc = __osd_xattr_cache(obj);
1159 while ((nvp = nvlist_next_nvpair(obj->oo_sa_xattr, nvp)) != NULL) {
1160 const char *name = nvpair_name(nvp);
1162 if (!osd_obj2dev(obj)->od_posix_acl &&
1163 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
1164 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
1168 if (lb->lb_buf != NULL) {
1169 if (counted + len + 1 > lb->lb_len)
1172 memcpy(lb->lb_buf + counted, name, len + 1);
1179 int osd_xattr_list(const struct lu_env *env, struct dt_object *dt,
1180 const struct lu_buf *lb)
1182 struct osd_object *obj = osd_dt_obj(dt);
1183 struct osd_device *osd = osd_obj2dev(obj);
1184 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
1189 LASSERT(obj->oo_dn != NULL);
1190 LASSERT(osd_invariant(obj));
1191 LASSERT(dt_object_exists(dt));
1193 down_read(&obj->oo_guard);
1195 rc = osd_sa_xattr_list(env, obj, lb);
1201 /* continue with dnode xattr if any */
1202 if (obj->oo_xattr == ZFS_NO_OBJECT)
1203 GOTO(out, rc = counted);
1205 rc = osd_zap_cursor_init(&zc, osd->od_os, obj->oo_xattr, 0);
1209 while ((rc = -zap_cursor_retrieve(zc, za)) == 0) {
1210 if (!osd_obj2dev(obj)->od_posix_acl &&
1211 (strcmp(za->za_name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
1212 strcmp(za->za_name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) {
1213 zap_cursor_advance(zc);
1217 rc = strlen(za->za_name);
1218 if (lb->lb_buf != NULL) {
1219 if (counted + rc + 1 > lb->lb_len)
1220 GOTO(out_fini, rc = -ERANGE);
1222 memcpy(lb->lb_buf + counted, za->za_name, rc + 1);
1226 zap_cursor_advance(zc);
1228 if (rc == -ENOENT) /* no more kes in the index */
1230 else if (unlikely(rc < 0))
1235 osd_zap_cursor_fini(zc);
1237 up_read(&obj->oo_guard);