4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2012, 2017, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
32 * lustre/osd-zfs/osd_xattr.c
33 * functions to manipulate extended attributes and system attributes
35 * Author: Alex Zhuravlev <bzzz@whamcloud.com>
36 * Author: Mike Pershin <tappro@whamcloud.com>
39 #define DEBUG_SUBSYSTEM S_OSD
41 #include <libcfs/libcfs.h>
42 #include <obd_support.h>
43 #include <lustre_net.h>
45 #include <obd_class.h>
46 #include <lustre_disk.h>
47 #include <lustre_fid.h>
48 #include <lustre_linkea.h>
50 #include "osd_internal.h"
52 #include <sys/dnode.h>
57 #include <sys/spa_impl.h>
58 #include <sys/zfs_znode.h>
59 #include <sys/dmu_tx.h>
60 #include <sys/dmu_objset.h>
61 #include <sys/dsl_prop.h>
62 #include <sys/sa_impl.h>
65 #include <linux/posix_acl_xattr.h>
66 #include <lustre_scrub.h>
68 int __osd_xattr_load(struct osd_device *osd, sa_handle_t *hdl, nvlist_t **sa)
73 rc = -sa_size(hdl, SA_ZPL_DXATTR(osd), &size);
76 rc = -nvlist_alloc(sa, NV_UNIQUE_NAME, KM_SLEEP);
80 buf = osd_zio_buf_alloc(size);
85 rc = -sa_lookup(hdl, SA_ZPL_DXATTR(osd), buf, size);
87 rc = -nvlist_unpack(buf, size, sa, KM_SLEEP);
88 osd_zio_buf_free(buf, size);
94 static inline int __osd_xattr_cache(struct osd_object *obj)
96 LASSERT(obj->oo_sa_hdl);
97 if (obj->oo_sa_xattr != NULL)
99 return __osd_xattr_load(osd_obj2dev(obj),
100 obj->oo_sa_hdl, &obj->oo_sa_xattr);
104 __osd_sa_xattr_get(const struct lu_env *env, struct osd_object *obj,
105 const struct lu_buf *buf, const char *name, int *sizep)
110 rc = __osd_xattr_cache(obj);
114 LASSERT(obj->oo_sa_xattr);
115 rc = -nvlist_lookup_byte_array(obj->oo_sa_xattr, name,
120 if (buf == NULL || buf->lb_buf == NULL) {
121 /* return the required size by *sizep */
125 if (*sizep > buf->lb_len)
126 return -ERANGE; /* match ldiskfs error */
128 memcpy(buf->lb_buf, nv_value, *sizep);
132 int __osd_xattr_get_large(const struct lu_env *env, struct osd_device *osd,
133 uint64_t xattr, struct lu_buf *buf,
134 const char *name, int *sizep)
137 sa_handle_t *sa_hdl = NULL;
138 uint64_t xa_data_obj, size;
141 /* are there any extended attributes? */
142 if (xattr == ZFS_NO_OBJECT)
145 /* Lookup the object number containing the xattr data */
146 rc = -zap_lookup(osd->od_os, xattr, name, sizeof(uint64_t), 1,
151 rc = __osd_obj2dnode(osd->od_os, xa_data_obj, &xa_data_dn);
155 rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL, SA_HDL_PRIVATE,
160 /* Get the xattr value length / object size */
161 rc = -sa_lookup(sa_hdl, SA_ZPL_SIZE(osd), &size, 8);
165 if (size > INT_MAX) {
172 if (buf == NULL || buf->lb_buf == NULL) {
173 /* We only need to return the required size */
176 if (*sizep > buf->lb_len) {
177 rc = -ERANGE; /* match ldiskfs error */
181 rc = -dmu_read(osd->od_os, xa_data_dn->dn_object, 0,
182 size, buf->lb_buf, DMU_READ_PREFETCH);
185 sa_handle_destroy(sa_hdl);
187 osd_dnode_rele(xa_data_dn);
193 * Copy an extended attribute into the buffer provided, or compute
194 * the required buffer size if \a buf is NULL.
196 * On success, the number of bytes used or required is stored in \a sizep.
198 * Note that no locking is done here.
200 * \param[in] env execution environment
201 * \param[in] obj object for which to retrieve xattr
202 * \param[out] buf buffer to store xattr value in
203 * \param[in] name name of xattr to copy
204 * \param[out] sizep bytes used or required to store xattr
206 * \retval 0 on success
207 * \retval negative negated errno on failure
209 int osd_xattr_get_internal(const struct lu_env *env, struct osd_object *obj,
210 struct lu_buf *buf, const char *name, int *sizep)
214 if (unlikely(!dt_object_exists(&obj->oo_dt) || obj->oo_destroyed))
217 /* check SA_ZPL_DXATTR first then fallback to directory xattr */
218 rc = __osd_sa_xattr_get(env, obj, buf, name, sizep);
222 return __osd_xattr_get_large(env, osd_obj2dev(obj), obj->oo_xattr,
226 static int osd_get_pfid_from_lma(const struct lu_env *env,
227 struct osd_object *obj,
228 struct lu_buf *buf, int *sizep)
230 struct osd_thread_info *info = osd_oti_get(env);
231 struct lustre_ost_attrs *loa =
232 (struct lustre_ost_attrs *)&info->oti_buf;
233 struct lustre_mdt_attrs *lma = &loa->loa_lma;
234 struct filter_fid *ff;
235 struct ost_layout *ol;
236 struct lu_buf tbuf = {
238 .lb_len = sizeof(info->oti_buf),
243 CLASSERT(sizeof(info->oti_buf) >= sizeof(*loa));
244 rc = osd_xattr_get_internal(env, obj, &tbuf,
245 XATTR_NAME_LMA, sizep);
249 lustre_loa_swab(loa, true);
250 LASSERT(lma->lma_compat & LMAC_STRIPE_INFO);
252 *sizep = sizeof(*ff);
253 if (buf->lb_len == 0 || !buf->lb_buf)
256 if (buf->lb_len < *sizep)
261 ol->ol_stripe_count = cpu_to_le32(loa->loa_parent_fid.f_ver >>
262 PFID_STRIPE_IDX_BITS);
263 ol->ol_stripe_size = cpu_to_le32(loa->loa_stripe_size);
264 loa->loa_parent_fid.f_ver &= PFID_STRIPE_COUNT_MASK;
265 fid_cpu_to_le(&ff->ff_parent, &loa->loa_parent_fid);
266 if (lma->lma_compat & LMAC_COMP_INFO) {
267 ol->ol_comp_start = cpu_to_le64(loa->loa_comp_start);
268 ol->ol_comp_end = cpu_to_le64(loa->loa_comp_end);
269 ol->ol_comp_id = cpu_to_le32(loa->loa_comp_id);
271 ol->ol_comp_start = 0;
279 int osd_xattr_get(const struct lu_env *env, struct dt_object *dt,
280 struct lu_buf *buf, const char *name)
282 struct osd_object *obj = osd_dt_obj(dt);
286 LASSERT(obj->oo_dn != NULL);
287 LASSERT(osd_invariant(obj));
289 if (!osd_obj2dev(obj)->od_posix_acl &&
290 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
291 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
294 down_read(&obj->oo_guard);
295 if (unlikely(!dt_object_exists(dt) || obj->oo_destroyed)) {
296 up_read(&obj->oo_guard);
300 /* For the OST migrated from ldiskfs, the PFID EA may
301 * be stored in LMA because of ldiskfs inode size. */
302 if (strcmp(name, XATTR_NAME_FID) == 0 && obj->oo_pfid_in_lma)
303 rc = osd_get_pfid_from_lma(env, obj, buf, &size);
305 rc = osd_xattr_get_internal(env, obj, buf, name, &size);
306 up_read(&obj->oo_guard);
315 /* the function is used to declare EAs when SA is not supported */
316 void __osd_xattr_declare_legacy(const struct lu_env *env,
317 struct osd_object *obj,
318 int vallen, const char *name,
319 struct osd_thandle *oh)
321 struct osd_device *osd = osd_obj2dev(obj);
322 dmu_tx_t *tx = oh->ot_tx;
323 uint64_t xa_data_obj;
326 if (obj->oo_xattr == ZFS_NO_OBJECT) {
327 /* xattr zap + entry */
328 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, (char *) name);
329 /* xattr value obj */
330 dmu_tx_hold_sa_create(tx, ZFS_SA_BASE_ATTR_SIZE);
331 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, vallen);
335 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1,
339 * Entry already exists.
340 * We'll truncate the existing object.
342 dmu_tx_hold_bonus(tx, xa_data_obj);
343 dmu_tx_hold_free(tx, xa_data_obj, vallen, DMU_OBJECT_END);
344 dmu_tx_hold_write(tx, xa_data_obj, 0, vallen);
345 } else if (rc == -ENOENT) {
347 * Entry doesn't exist, we need to create a new one and a new
348 * object to store the value.
350 dmu_tx_hold_bonus(tx, obj->oo_xattr);
351 dmu_tx_hold_zap(tx, obj->oo_xattr, TRUE, (char *) name);
352 dmu_tx_hold_sa_create(tx, ZFS_SA_BASE_ATTR_SIZE);
353 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, vallen);
357 void __osd_xattr_declare_set(const struct lu_env *env, struct osd_object *obj,
358 int vallen, const char *name,
359 struct osd_thandle *oh)
361 struct osd_device *osd = osd_obj2dev(obj);
362 dmu_tx_t *tx = oh->ot_tx;
365 if (unlikely(obj->oo_destroyed))
368 if (strcmp(name, XATTR_NAME_LINK) == 0 &&
369 osd->od_remote_parent_dir != ZFS_NO_OBJECT) {
370 /* If some name entry resides on remote MDT, then will create
371 * agent entry under remote parent. On the other hand, if the
372 * remote entry will be removed, then related agent entry may
373 * need to be removed from the remote parent. So there may be
374 * kinds of cases, let's declare enough credits. The credits
375 * for create agent entry is enough for remove case. */
376 osd_tx_hold_zap(tx, osd->od_remote_parent_dir,
380 if (unlikely(!osd_obj2dev(obj)->od_xattr_in_sa)) {
381 __osd_xattr_declare_legacy(env, obj, vallen, name, oh);
385 /* declare EA in SA */
386 if (dt_object_exists(&obj->oo_dt)) {
387 LASSERT(obj->oo_sa_hdl);
388 /* XXX: it should be possible to skip spill
389 * declaration if specific EA is part of
390 * bonus and doesn't grow */
391 dmu_tx_hold_spill(tx, obj->oo_dn->dn_object);
395 bonuslen = osd_obj_bonuslen(obj);
397 /* the object doesn't exist, but we've declared bonus
398 * in osd_declare_object_create() yet */
399 if (obj->oo_ea_in_bonus > bonuslen) {
400 /* spill has been declared already */
401 } else if (obj->oo_ea_in_bonus + vallen > bonuslen) {
402 /* we're about to exceed bonus, let's declare spill */
403 dmu_tx_hold_spill(tx, DMU_NEW_OBJECT);
405 obj->oo_ea_in_bonus += vallen;
408 int osd_declare_xattr_set(const struct lu_env *env, struct dt_object *dt,
409 const struct lu_buf *buf, const char *name,
410 int fl, struct thandle *handle)
412 struct osd_object *obj = osd_dt_obj(dt);
413 struct osd_thandle *oh;
416 LASSERT(handle != NULL);
417 oh = container_of0(handle, struct osd_thandle, ot_super);
419 down_read(&obj->oo_guard);
420 __osd_xattr_declare_set(env, obj, buf->lb_len, name, oh);
421 up_read(&obj->oo_guard);
426 int __osd_sa_attr_init(const struct lu_env *env, struct osd_object *obj,
427 struct osd_thandle *oh)
429 sa_bulk_attr_t *bulk = osd_oti_get(env)->oti_attr_bulk;
430 struct osa_attr *osa = &osd_oti_get(env)->oti_osa;
431 struct lu_buf *lb = &osd_oti_get(env)->oti_xattr_lbuf;
432 struct osd_device *osd = osd_obj2dev(obj);
433 uint64_t crtime[2], gen;
434 inode_timespec_t now;
438 obj->oo_late_xattr = 0;
439 obj->oo_late_attr_set = 0;
441 gen = dmu_tx_get_txg(oh->ot_tx);
443 ZFS_TIME_ENCODE(&now, crtime);
445 osa->atime[0] = obj->oo_attr.la_atime;
446 osa->ctime[0] = obj->oo_attr.la_ctime;
447 osa->mtime[0] = obj->oo_attr.la_mtime;
448 osa->mode = obj->oo_attr.la_mode;
449 osa->uid = obj->oo_attr.la_uid;
450 osa->gid = obj->oo_attr.la_gid;
451 osa->rdev = obj->oo_attr.la_rdev;
452 osa->nlink = obj->oo_attr.la_nlink;
453 osa->flags = attrs_fs2zfs(obj->oo_attr.la_flags);
454 osa->size = obj->oo_attr.la_size;
455 #ifdef ZFS_PROJINHERIT
456 if (osd->od_projectused_dn) {
457 if (obj->oo_attr.la_valid & LA_PROJID)
458 osa->projid = obj->oo_attr.la_projid;
460 osa->projid = ZFS_DEFAULT_PROJID;
461 osa->flags |= ZFS_PROJID;
462 obj->oo_with_projid = 1;
467 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(osd), NULL, &osa->mode, 8);
468 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_SIZE(osd), NULL, &osa->size, 8);
469 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GEN(osd), NULL, &gen, 8);
470 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_UID(osd), NULL, &osa->uid, 8);
471 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GID(osd), NULL, &osa->gid, 8);
472 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_PARENT(osd), NULL,
474 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(osd), NULL, &osa->flags, 8);
475 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(osd), NULL, osa->atime, 16);
476 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(osd), NULL, osa->mtime, 16);
477 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(osd), NULL, osa->ctime, 16);
478 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CRTIME(osd), NULL, crtime, 16);
479 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_LINKS(osd), NULL, &osa->nlink, 8);
480 #ifdef ZFS_PROJINHERIT
481 if (osd->od_projectused_dn)
482 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_PROJID(osd), NULL,
485 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_RDEV(osd), NULL, &osa->rdev, 8);
486 LASSERT(cnt <= ARRAY_SIZE(osd_oti_get(env)->oti_attr_bulk));
488 /* Update the SA for additions, modifications, and removals. */
489 rc = -nvlist_size(obj->oo_sa_xattr, &size, NV_ENCODE_XDR);
493 lu_buf_check_and_alloc(lb, size);
494 if (lb->lb_buf == NULL) {
495 CERROR("%s: can't allocate buffer for xattr update\n",
500 rc = -nvlist_pack(obj->oo_sa_xattr, (char **)&lb->lb_buf, &size,
501 NV_ENCODE_XDR, KM_SLEEP);
505 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_DXATTR(osd), NULL, lb->lb_buf, size);
507 rc = -sa_replace_all_by_template(obj->oo_sa_hdl, bulk, cnt, oh->ot_tx);
512 int __osd_sa_xattr_update(const struct lu_env *env, struct osd_object *obj,
513 struct osd_thandle *oh)
515 struct lu_buf *lb = &osd_oti_get(env)->oti_xattr_lbuf;
516 struct osd_device *osd = osd_obj2dev(obj);
521 obj->oo_late_xattr = 0;
523 /* Update the SA for additions, modifications, and removals. */
524 rc = -nvlist_size(obj->oo_sa_xattr, &size, NV_ENCODE_XDR);
528 lu_buf_check_and_alloc(lb, size);
529 if (lb->lb_buf == NULL) {
530 CERROR("%s: can't allocate buffer for xattr update\n",
536 rc = -nvlist_pack(obj->oo_sa_xattr, &dxattr, &size,
537 NV_ENCODE_XDR, KM_SLEEP);
540 LASSERT(dxattr == lb->lb_buf);
542 sa_update(obj->oo_sa_hdl, SA_ZPL_DXATTR(osd), dxattr, size, oh->ot_tx);
548 * Set an extended attribute.
549 * This transaction must have called udmu_xattr_declare_set() first.
551 * Returns 0 on success or a negative error number on failure.
553 * No locking is done here.
555 int __osd_sa_xattr_schedule_update(const struct lu_env *env,
556 struct osd_object *obj,
557 struct osd_thandle *oh)
560 LASSERT(obj->oo_sa_hdl);
561 LASSERT(obj->oo_sa_xattr);
563 /* schedule batched SA update in osd_object_sa_dirty_rele() */
564 obj->oo_late_xattr = 1;
565 osd_object_sa_dirty_add(obj, oh);
571 int __osd_sa_xattr_set(const struct lu_env *env, struct osd_object *obj,
572 const struct lu_buf *buf, const char *name, int fl,
573 struct osd_thandle *oh)
581 rc = __osd_xattr_cache(obj);
585 LASSERT(obj->oo_sa_xattr);
586 /* Limited to 32k to keep nvpair memory allocations small */
587 if (buf->lb_len > DXATTR_MAX_ENTRY_SIZE) {
590 /* Prevent the DXATTR SA from consuming the entire SA
592 rc = -nvlist_size(obj->oo_sa_xattr, &size, NV_ENCODE_XDR);
596 if (size + buf->lb_len > DXATTR_MAX_SA_SIZE)
600 /* even in case of -EFBIG we must lookup xattr and check can we
601 * rewrite it then delete from SA */
602 rc = -nvlist_lookup_byte_array(obj->oo_sa_xattr, name, &nv_value,
605 if (fl & LU_XATTR_CREATE) {
607 } else if (too_big) {
608 rc = -nvlist_remove(obj->oo_sa_xattr, name,
609 DATA_TYPE_BYTE_ARRAY);
612 rc = __osd_sa_xattr_schedule_update(env, obj, oh);
613 return rc == 0 ? -EFBIG : rc;
615 } else if (rc == -ENOENT) {
616 if (fl & LU_XATTR_REPLACE)
624 /* Ensure xattr doesn't exist in ZAP */
625 if (obj->oo_xattr != ZFS_NO_OBJECT) {
626 struct osd_device *osd = osd_obj2dev(obj);
628 rc = -zap_lookup(osd->od_os, obj->oo_xattr,
631 rc = -dmu_object_free(osd->od_os, objid, oh->ot_tx);
633 zap_remove(osd->od_os, obj->oo_xattr,
638 rc = -nvlist_add_byte_array(obj->oo_sa_xattr, name,
639 (uchar_t *)buf->lb_buf, buf->lb_len);
643 /* batch updates only for just created dnodes where we
644 * used to set number of EAs in a single transaction */
645 if (obj->oo_dn->dn_allocated_txg == oh->ot_tx->tx_txg)
646 rc = __osd_sa_xattr_schedule_update(env, obj, oh);
648 rc = __osd_sa_xattr_update(env, obj, oh);
654 __osd_xattr_set(const struct lu_env *env, struct osd_object *obj,
655 const struct lu_buf *buf, const char *name, int fl,
656 struct osd_thandle *oh)
658 struct osd_device *osd = osd_obj2dev(obj);
659 dnode_t *xa_zap_dn = NULL;
660 dnode_t *xa_data_dn = NULL;
661 uint64_t xa_data_obj;
662 sa_handle_t *sa_hdl = NULL;
663 dmu_tx_t *tx = oh->ot_tx;
667 LASSERT(obj->oo_sa_hdl);
669 if (obj->oo_xattr == ZFS_NO_OBJECT) {
670 struct lu_attr *la = &osd_oti_get(env)->oti_la;
672 la->la_valid = LA_MODE;
673 la->la_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
674 rc = __osd_zap_create(env, osd, &xa_zap_dn, tx, la, 0, 0);
678 obj->oo_xattr = xa_zap_dn->dn_object;
679 rc = osd_object_sa_update(obj, SA_ZPL_XATTR(osd),
680 &obj->oo_xattr, 8, oh);
685 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1,
688 if (fl & LU_XATTR_CREATE) {
693 * Entry already exists.
694 * We'll truncate the existing object.
696 rc = __osd_obj2dnode(osd->od_os, xa_data_obj, &xa_data_dn);
700 rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL,
701 SA_HDL_PRIVATE, &sa_hdl);
705 rc = -sa_lookup(sa_hdl, SA_ZPL_SIZE(osd), &size, 8);
709 rc = -dmu_free_range(osd->od_os, xa_data_dn->dn_object,
710 0, DMU_OBJECT_END, tx);
713 } else if (rc == -ENOENT) {
714 struct lu_attr *la = &osd_oti_get(env)->oti_la;
716 * Entry doesn't exist, we need to create a new one and a new
717 * object to store the value.
719 if (fl & LU_XATTR_REPLACE) {
720 /* should be ENOATTR according to the
721 * man, but that is undefined here */
726 la->la_valid = LA_MODE;
727 la->la_mode = S_IFREG | S_IRUGO | S_IWUSR;
728 rc = __osd_object_create(env, osd, obj,
729 lu_object_fid(&obj->oo_dt.do_lu),
730 &xa_data_dn, tx, la);
733 xa_data_obj = xa_data_dn->dn_object;
735 rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL,
736 SA_HDL_PRIVATE, &sa_hdl);
740 rc = -zap_add(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t),
741 1, &xa_data_obj, tx);
745 /* There was an error looking up the xattr name */
749 /* Finally write the xattr value */
750 dmu_write(osd->od_os, xa_data_obj, 0, buf->lb_len, buf->lb_buf, tx);
753 rc = -sa_update(sa_hdl, SA_ZPL_SIZE(osd), &size, 8, tx);
756 sa_handle_destroy(sa_hdl);
758 if (xa_data_dn != NULL)
759 osd_dnode_rele(xa_data_dn);
760 if (xa_zap_dn != NULL)
761 osd_dnode_rele(xa_zap_dn);
766 static int osd_xattr_split_pfid(const struct lu_env *env,
767 struct osd_object *obj, struct osd_thandle *oh)
769 struct osd_thread_info *info = osd_oti_get(env);
770 struct lustre_ost_attrs *loa =
771 (struct lustre_ost_attrs *)&info->oti_buf;
772 struct lustre_mdt_attrs *lma = &loa->loa_lma;
773 struct lu_buf buf = {
775 .lb_len = sizeof(info->oti_buf),
781 CLASSERT(sizeof(info->oti_buf) >= sizeof(*loa));
782 rc = osd_xattr_get_internal(env, obj, &buf, XATTR_NAME_LMA, &size);
786 lustre_loa_swab(loa, true);
787 LASSERT(lma->lma_compat & LMAC_STRIPE_INFO);
789 lma->lma_compat &= ~(LMAC_STRIPE_INFO | LMAC_COMP_INFO);
790 lustre_lma_swab(lma);
792 buf.lb_len = sizeof(*lma);
793 rc = osd_xattr_set_internal(env, obj, &buf, XATTR_NAME_LMA,
794 LU_XATTR_REPLACE, oh);
796 obj->oo_pfid_in_lma = 0;
802 * In DNE environment, the object (in spite of regular file or directory)
803 * and its name entry may reside on different MDTs. Under such case, we will
804 * create an agent entry on the MDT where the object resides. The agent entry
805 * references the object locally, that makes the object to be visible to the
806 * userspace when mounted as 'zfs' directly. Then the userspace tools, such
807 * as 'tar' can handle the object properly.
809 * We handle the agent entry during set linkEA that is the common interface
810 * for both regular file and directroy, can handle kinds of cases, such as
811 * create/link/unlink/rename, and so on.
813 * NOTE: we need to do that for both directory and regular file, so we can NOT
814 * do that when ea_{insert,delete} that are directory based operations.
816 static int osd_xattr_handle_linkea(const struct lu_env *env,
817 struct osd_device *osd,
818 struct osd_object *obj,
819 const struct lu_buf *buf,
820 struct osd_thandle *oh)
822 const struct lu_fid *fid = lu_object_fid(&obj->oo_dt.do_lu);
823 struct lu_fid *tfid = &osd_oti_get(env)->oti_fid;
824 struct linkea_data ldata = { .ld_buf = (struct lu_buf *)buf };
825 struct lu_name tmpname;
830 rc = linkea_init_with_rec(&ldata);
832 linkea_first_entry(&ldata);
833 while (ldata.ld_lee != NULL && !remote) {
834 linkea_entry_unpack(ldata.ld_lee, &ldata.ld_reclen,
836 if (osd_remote_fid(env, osd, tfid) > 0)
839 linkea_next_entry(&ldata);
841 } else if (rc == -ENODATA) {
847 if (lu_object_has_agent_entry(&obj->oo_dt.do_lu) && !remote) {
848 rc = osd_delete_from_remote_parent(env, osd, obj, oh, false);
850 CERROR("%s: failed to remove agent entry for "DFID
851 ": rc = %d\n", osd_name(osd), PFID(fid), rc);
852 } else if (!lu_object_has_agent_entry(&obj->oo_dt.do_lu) && remote) {
853 rc = osd_add_to_remote_parent(env, osd, obj, oh);
855 CWARN("%s: failed to create agent entry for "DFID
856 ": rc = %d\n", osd_name(osd), PFID(fid), rc);
862 int osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
863 const struct lu_buf *buf, const char *name, int fl,
864 struct thandle *handle)
866 struct osd_object *obj = osd_dt_obj(dt);
867 struct osd_device *osd = osd_obj2dev(obj);
868 struct osd_thandle *oh;
872 LASSERT(handle != NULL);
873 LASSERT(osd_invariant(obj));
875 if (!osd_obj2dev(obj)->od_posix_acl &&
876 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
877 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
880 oh = container_of0(handle, struct osd_thandle, ot_super);
882 down_write(&obj->oo_guard);
883 CDEBUG(D_INODE, "Setting xattr %s with size %d\n",
884 name, (int)buf->lb_len);
885 /* For the OST migrated from ldiskfs, the PFID EA may
886 * be stored in LMA because of ldiskfs inode size. */
887 if (unlikely(strcmp(name, XATTR_NAME_FID) == 0 &&
888 obj->oo_pfid_in_lma)) {
889 rc = osd_xattr_split_pfid(env, obj, oh);
891 fl = LU_XATTR_CREATE;
892 } else if (strcmp(name, XATTR_NAME_LINK) == 0 &&
893 osd->od_remote_parent_dir != ZFS_NO_OBJECT) {
894 rc = osd_xattr_handle_linkea(env, osd, obj, buf, oh);
898 rc = osd_xattr_set_internal(env, obj, buf, name, fl, oh);
899 up_write(&obj->oo_guard);
905 __osd_xattr_declare_del(const struct lu_env *env, struct osd_object *obj,
906 const char *name, struct osd_thandle *oh)
908 struct osd_device *osd = osd_obj2dev(obj);
909 dmu_tx_t *tx = oh->ot_tx;
910 uint64_t xa_data_obj;
913 /* update SA_ZPL_DXATTR if xattr was in SA */
914 dmu_tx_hold_sa(tx, obj->oo_sa_hdl, 0);
916 if (obj->oo_xattr == ZFS_NO_OBJECT)
919 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, 8, 1, &xa_data_obj);
923 * We'll delete the existing object and ZAP entry.
925 dmu_tx_hold_bonus(tx, xa_data_obj);
926 dmu_tx_hold_free(tx, xa_data_obj, 0, DMU_OBJECT_END);
927 dmu_tx_hold_zap(tx, obj->oo_xattr, FALSE, (char *) name);
929 } else if (rc == -ENOENT) {
931 * Entry doesn't exist, nothing to be changed.
936 /* An error happened */
940 int osd_declare_xattr_del(const struct lu_env *env, struct dt_object *dt,
941 const char *name, struct thandle *handle)
943 struct osd_object *obj = osd_dt_obj(dt);
944 struct osd_thandle *oh;
947 LASSERT(handle != NULL);
948 LASSERT(osd_invariant(obj));
950 oh = container_of0(handle, struct osd_thandle, ot_super);
951 LASSERT(oh->ot_tx != NULL);
952 LASSERT(obj->oo_dn != NULL);
954 down_read(&obj->oo_guard);
955 if (likely(dt_object_exists(&obj->oo_dt) && !obj->oo_destroyed))
956 __osd_xattr_declare_del(env, obj, name, oh);
957 up_read(&obj->oo_guard);
962 static int __osd_sa_xattr_del(const struct lu_env *env, struct osd_object *obj,
963 const char *name, struct osd_thandle *oh)
967 rc = __osd_xattr_cache(obj);
971 rc = -nvlist_remove(obj->oo_sa_xattr, name, DATA_TYPE_BYTE_ARRAY);
976 * only migrate delete LMV, and it needs to be done immediately, because
977 * it's used in deleting sub stripes, and if this is delayed, later when
978 * destroying the master object, it will delete sub stripes again.
980 if (!strcmp(name, XATTR_NAME_LMV))
981 rc = __osd_sa_xattr_update(env, obj, oh);
983 rc = __osd_sa_xattr_schedule_update(env, obj, oh);
987 static int __osd_xattr_del(const struct lu_env *env, struct osd_object *obj,
988 const char *name, struct osd_thandle *oh)
990 struct osd_device *osd = osd_obj2dev(obj);
991 uint64_t xa_data_obj;
994 if (unlikely(!dt_object_exists(&obj->oo_dt) || obj->oo_destroyed))
997 /* try remove xattr from SA at first */
998 rc = __osd_sa_xattr_del(env, obj, name, oh);
1002 if (obj->oo_xattr == ZFS_NO_OBJECT)
1005 rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1,
1007 if (rc == -ENOENT) {
1009 } else if (rc == 0) {
1012 * We'll delete the existing object and ZAP entry.
1014 rc = -dmu_object_free(osd->od_os, xa_data_obj, oh->ot_tx);
1018 rc = -zap_remove(osd->od_os, obj->oo_xattr, name, oh->ot_tx);
1024 int osd_xattr_del(const struct lu_env *env, struct dt_object *dt,
1025 const char *name, struct thandle *handle)
1027 struct osd_object *obj = osd_dt_obj(dt);
1028 struct osd_thandle *oh;
1032 LASSERT(handle != NULL);
1033 LASSERT(obj->oo_dn != NULL);
1034 LASSERT(osd_invariant(obj));
1035 LASSERT(dt_object_exists(dt));
1036 oh = container_of0(handle, struct osd_thandle, ot_super);
1037 LASSERT(oh->ot_tx != NULL);
1039 if (!osd_obj2dev(obj)->od_posix_acl &&
1040 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
1041 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
1042 RETURN(-EOPNOTSUPP);
1044 down_write(&obj->oo_guard);
1045 /* For the OST migrated from ldiskfs, the PFID EA may
1046 * be stored in LMA because of ldiskfs inode size. */
1047 if (unlikely(strcmp(name, XATTR_NAME_FID) == 0 && obj->oo_pfid_in_lma))
1048 rc = osd_xattr_split_pfid(env, obj, oh);
1050 rc = __osd_xattr_del(env, obj, name, oh);
1051 up_write(&obj->oo_guard);
1056 void osd_declare_xattrs_destroy(const struct lu_env *env,
1057 struct osd_object *obj, struct osd_thandle *oh)
1059 struct osd_device *osd = osd_obj2dev(obj);
1060 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
1061 uint64_t oid = obj->oo_xattr, xid;
1062 dmu_tx_t *tx = oh->ot_tx;
1066 if (oid == ZFS_NO_OBJECT)
1067 return; /* Nothing to do for SA xattrs */
1069 /* Declare to free the ZAP holding xattrs */
1070 dmu_tx_hold_free(tx, oid, 0, DMU_OBJECT_END);
1072 rc = osd_zap_cursor_init(&zc, osd->od_os, oid, 0);
1076 while (zap_cursor_retrieve(zc, za) == 0) {
1077 LASSERT(za->za_num_integers == 1);
1078 LASSERT(za->za_integer_length == sizeof(uint64_t));
1080 rc = -zap_lookup(osd->od_os, oid, za->za_name,
1081 sizeof(uint64_t), 1, &xid);
1083 CERROR("%s: xattr %s lookup failed: rc = %d\n",
1084 osd->od_svname, za->za_name, rc);
1087 dmu_tx_hold_free(tx, xid, 0, DMU_OBJECT_END);
1089 zap_cursor_advance(zc);
1092 osd_zap_cursor_fini(zc);
1094 if (rc && tx->tx_err == 0)
1098 int osd_xattrs_destroy(const struct lu_env *env,
1099 struct osd_object *obj, struct osd_thandle *oh)
1101 struct osd_device *osd = osd_obj2dev(obj);
1102 dmu_tx_t *tx = oh->ot_tx;
1103 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
1108 /* The transaction must have been assigned to a transaction group. */
1109 LASSERT(tx->tx_txg != 0);
1111 if (obj->oo_xattr == ZFS_NO_OBJECT)
1112 return 0; /* Nothing to do for SA xattrs */
1114 /* Free the ZAP holding the xattrs */
1115 rc = osd_zap_cursor_init(&zc, osd->od_os, obj->oo_xattr, 0);
1119 while (zap_cursor_retrieve(zc, za) == 0) {
1120 LASSERT(za->za_num_integers == 1);
1121 LASSERT(za->za_integer_length == sizeof(uint64_t));
1123 rc = -zap_lookup(osd->od_os, obj->oo_xattr, za->za_name,
1124 sizeof(uint64_t), 1, &xid);
1126 CERROR("%s: lookup xattr %s failed: rc = %d\n",
1127 osd->od_svname, za->za_name, rc);
1129 rc = -dmu_object_free(osd->od_os, xid, tx);
1131 CERROR("%s: free xattr %s failed: rc = %d\n",
1132 osd->od_svname, za->za_name, rc);
1134 zap_cursor_advance(zc);
1136 osd_zap_cursor_fini(zc);
1138 rc = -dmu_object_free(osd->od_os, obj->oo_xattr, tx);
1140 CERROR("%s: free xattr %llu failed: rc = %d\n",
1141 osd->od_svname, obj->oo_xattr, rc);
1147 osd_sa_xattr_list(const struct lu_env *env, struct osd_object *obj,
1148 const struct lu_buf *lb)
1150 nvpair_t *nvp = NULL;
1151 int len, counted = 0;
1154 rc = __osd_xattr_cache(obj);
1158 while ((nvp = nvlist_next_nvpair(obj->oo_sa_xattr, nvp)) != NULL) {
1159 const char *name = nvpair_name(nvp);
1161 if (!osd_obj2dev(obj)->od_posix_acl &&
1162 (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
1163 strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
1167 if (lb->lb_buf != NULL) {
1168 if (counted + len + 1 > lb->lb_len)
1171 memcpy(lb->lb_buf + counted, name, len + 1);
1178 int osd_xattr_list(const struct lu_env *env, struct dt_object *dt,
1179 const struct lu_buf *lb)
1181 struct osd_object *obj = osd_dt_obj(dt);
1182 struct osd_device *osd = osd_obj2dev(obj);
1183 zap_attribute_t *za = &osd_oti_get(env)->oti_za;
1188 LASSERT(obj->oo_dn != NULL);
1189 LASSERT(osd_invariant(obj));
1190 LASSERT(dt_object_exists(dt));
1192 down_read(&obj->oo_guard);
1194 rc = osd_sa_xattr_list(env, obj, lb);
1200 /* continue with dnode xattr if any */
1201 if (obj->oo_xattr == ZFS_NO_OBJECT)
1202 GOTO(out, rc = counted);
1204 rc = osd_zap_cursor_init(&zc, osd->od_os, obj->oo_xattr, 0);
1208 while ((rc = -zap_cursor_retrieve(zc, za)) == 0) {
1209 if (!osd_obj2dev(obj)->od_posix_acl &&
1210 (strcmp(za->za_name, XATTR_NAME_POSIX_ACL_ACCESS) == 0 ||
1211 strcmp(za->za_name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) {
1212 zap_cursor_advance(zc);
1216 rc = strlen(za->za_name);
1217 if (lb->lb_buf != NULL) {
1218 if (counted + rc + 1 > lb->lb_len)
1219 GOTO(out_fini, rc = -ERANGE);
1221 memcpy(lb->lb_buf + counted, za->za_name, rc + 1);
1225 zap_cursor_advance(zc);
1227 if (rc == -ENOENT) /* no more kes in the index */
1229 else if (unlikely(rc < 0))
1234 osd_zap_cursor_fini(zc);
1236 up_read(&obj->oo_guard);