X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fosd-zfs%2Fosd_internal.h;h=6ad66cbbd7d03a0304af026ac5e7db97a0afade3;hp=9f567d616eb1b0e84d7d5c79d39910a97b140d7c;hb=69940d2b3a445284e7e10ea04e8cb72ee37a8724;hpb=11b048aaffd6ced90892c4e6e9b763aa3a81fd6f;ds=sidebyside diff --git a/lustre/osd-zfs/osd_internal.h b/lustre/osd-zfs/osd_internal.h index 9f567d6..6ad66cb 100644 --- a/lustre/osd-zfs/osd_internal.h +++ b/lustre/osd-zfs/osd_internal.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -27,7 +23,7 @@ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2012, 2014, Intel Corporation. + * Copyright (c) 2012, 2016, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -54,6 +50,8 @@ #include #include #include +#include +#include /** * By design including kmem.h overrides the Linux slab interfaces to provide @@ -88,8 +86,8 @@ /* Statfs {minimum, safe estimate, and maximum} dnodes per block */ #define OSD_DNODE_MIN_BLKSHIFT (DNODES_PER_BLOCK_SHIFT) -#define OSD_DNODE_EST_BLKSHIFT (DNODES_PER_BLOCK_SHIFT >> 1) -#define OSD_DNODE_EST_COUNT 1024 +#define OSD_DNODE_EST_BLKSHIFT (12) /* est 4KB/dnode */ +#define OSD_DNODE_EST_COUNT 4096 #define OSD_GRANT_FOR_LOCAL_OIDS (2ULL << 20) /* 2MB for last_rcvd, ... */ @@ -163,6 +161,21 @@ struct osa_attr { uint64_t ctime[2]; }; + +#define OSD_INS_CACHE_SIZE 8 + +/* OI cache entry */ +struct osd_idmap_cache { + struct osd_device *oic_dev; + struct lu_fid oic_fid; + /** max 2^48 dnodes per dataset, avoid spilling into another word */ + uint64_t oic_dnode:DN_MAX_OBJECT_SHIFT, + oic_remote:1; /* FID isn't local */ +}; + +/* max.number of regular attrubites the callers may ask for */ +#define OSD_MAX_IN_BULK 13 + struct osd_thread_info { const struct lu_env *oti_env; @@ -180,6 +193,7 @@ struct osd_thread_info { union { char oti_key[MAXNAMELEN + 1]; __u64 oti_key64[(MAXNAMELEN + 1)/sizeof(__u64)]; + sa_bulk_attr_t oti_attr_bulk[OSD_MAX_IN_BULK]; }; struct lustre_mdt_attrs oti_mdt_attrs; @@ -191,6 +205,11 @@ struct osd_thread_info { struct lquota_id_info oti_qi; struct lu_seq_range oti_seq_range; + + /* dedicated OI cache for insert (which needs inum) */ + struct osd_idmap_cache *oti_ins_cache; + int oti_ins_cache_size; + int oti_ins_cache_used; }; extern struct lu_context_key osd_key; @@ -221,6 +240,7 @@ struct osd_thandle { struct osd_oi { char oi_name[OSD_OI_NAME_SIZE]; /* unused */ uint64_t oi_zapid; + dnode_t *oi_dn; }; struct osd_seq { @@ -262,9 +282,9 @@ struct osd_device { unsigned int od_oi_count; struct osd_seq_list od_seq_list; - unsigned int od_rdonly:1, + unsigned int od_dev_set_rdonly:1, /**< osd_ro() called */ + od_prop_rdonly:1, /**< ZFS property readonly */ od_xattr_in_sa:1, - od_quota_iused_est:1, od_is_ost:1, od_posix_acl:1; @@ -313,33 +333,47 @@ struct osd_object { * Not modified concurrently (either setup early during object * creation, or assigned by osd_object_create() under write lock). */ - dmu_buf_t *oo_db; + dnode_t *oo_dn; sa_handle_t *oo_sa_hdl; nvlist_t *oo_sa_xattr; struct list_head oo_sa_linkage; - struct list_head oo_unlinked_linkage; + /* used to implement osd_object_*_{lock|unlock} */ struct rw_semaphore oo_sem; + /* to serialize some updates: destroy vs. others, + * xattr_set, object block size change etc */ + struct rw_semaphore oo_guard; + + /* protected by oo_guard */ + struct list_head oo_unlinked_linkage; + /* cached attributes */ rwlock_t oo_attr_lock; struct lu_attr oo_attr; - /* protects extended attributes and oo_unlinked_linkage */ - struct semaphore oo_guard; + /* external dnode holding large EAs, protected by oo_guard */ uint64_t oo_xattr; enum osd_destroy_type oo_destroy; __u32 oo_destroyed:1; - /* record size for index file */ - unsigned char oo_keysize; - unsigned char oo_recsize; - unsigned char oo_recusize; /* unit size */ + + /* the i_flags in LMA */ + __u32 oo_lma_flags; + union { + int oo_ea_in_bonus; /* EA bytes we expect */ + struct { + /* record size for index file */ + unsigned char oo_keysize; + unsigned char oo_recsize; + unsigned char oo_recusize; /* unit size */ + }; + }; }; int osd_statfs(const struct lu_env *, struct dt_device *, struct obd_statfs *); extern const struct dt_index_operations osd_acct_index_ops; -uint64_t osd_quota_fid2dmu(const struct lu_fid *fid); +int osd_quota_fid2dmu(const struct lu_fid *fid, uint64_t *oid); extern struct lu_device_operations osd_lu_ops; extern struct dt_index_operations osd_dir_ops; int osd_declare_quota(const struct lu_env *env, struct osd_device *osd, @@ -400,6 +434,17 @@ static inline int osd_invariant(const struct osd_object *obj) return 1; } +/** + * Put the osd object once done with it. + * + * \param obj osd object that needs to be put + */ +static inline void osd_object_put(const struct lu_env *env, + struct osd_object *obj) +{ + dt_object_put(env, &obj->oo_dt); +} + static inline int osd_object_invariant(const struct lu_object *l) { return osd_invariant(osd_obj(l)); @@ -440,19 +485,21 @@ int osd_procfs_fini(struct osd_device *osd); /* osd_object.c */ extern char *osd_obj_tag; void osd_object_sa_dirty_rele(struct osd_thandle *oh); -int __osd_obj2dbuf(const struct lu_env *env, objset_t *os, - uint64_t oid, dmu_buf_t **dbp); +int __osd_obj2dnode(const struct lu_env *env, objset_t *os, + uint64_t oid, dnode_t **dnp); struct lu_object *osd_object_alloc(const struct lu_env *env, const struct lu_object_header *hdr, struct lu_device *d); int osd_object_sa_update(struct osd_object *obj, sa_attr_type_t type, void *buf, uint32_t buflen, struct osd_thandle *oh); int __osd_zap_create(const struct lu_env *env, struct osd_device *osd, - dmu_buf_t **zap_dbp, dmu_tx_t *tx, struct lu_attr *la, - uint64_t parent, zap_flags_t flags); + dnode_t **zap_dnp, dmu_tx_t *tx, struct lu_attr *la, + zap_flags_t flags); int __osd_object_create(const struct lu_env *env, struct osd_object *obj, - dmu_buf_t **dbp, dmu_tx_t *tx, struct lu_attr *la, - uint64_t parent); + dnode_t **dnp, dmu_tx_t *tx, struct lu_attr *la); +int __osd_attr_init(const struct lu_env *env, struct osd_device *osd, + sa_handle_t *sa_hdl, dmu_tx_t *tx, + struct lu_attr *la, uint64_t parent); /* osd_oi.c */ int osd_oi_init(const struct lu_env *env, struct osd_device *o); @@ -460,12 +507,19 @@ void osd_oi_fini(const struct lu_env *env, struct osd_device *o); int osd_fid_lookup(const struct lu_env *env, struct osd_device *, const struct lu_fid *, uint64_t *); uint64_t osd_get_name_n_idx(const struct lu_env *env, struct osd_device *osd, - const struct lu_fid *fid, char *buf); + const struct lu_fid *fid, char *buf, int bufsize); int osd_options_init(void); -int osd_convert_root_to_new_seq(const struct lu_env *env, - struct osd_device *o); int osd_ost_seq_exists(const struct lu_env *env, struct osd_device *osd, __u64 seq); +int osd_idc_find_and_init(const struct lu_env *env, struct osd_device *osd, + struct osd_object *obj); +struct osd_idmap_cache *osd_idc_find_or_init(const struct lu_env *env, + struct osd_device *osd, + const struct lu_fid *fid); +struct osd_idmap_cache *osd_idc_find(const struct lu_env *env, + struct osd_device *osd, + const struct lu_fid *fid); + /* osd_index.c */ int osd_index_try(const struct lu_env *env, struct dt_object *dt, const struct dt_index_features *feat); @@ -477,6 +531,8 @@ int osd_zap_cursor_init(zap_cursor_t **zc, struct objset *os, uint64_t id, uint64_t dirhash); void osd_zap_cursor_fini(zap_cursor_t *zc); uint64_t osd_zap_cursor_serialize(zap_cursor_t *zc); +int osd_remote_fid(const struct lu_env *env, struct osd_device *osd, + const struct lu_fid *fid); /* osd_xattr.c */ int __osd_xattr_load(struct osd_device *osd, uint64_t dnode, @@ -511,6 +567,8 @@ int __osd_sa_xattr_set(const struct lu_env *env, struct osd_object *obj, int __osd_xattr_set(const struct lu_env *env, struct osd_object *obj, const struct lu_buf *buf, const char *name, int fl, struct osd_thandle *oh); +int __osd_sa_xattr_update(const struct lu_env *env, struct osd_object *obj, + struct osd_thandle *oh); static inline int osd_xattr_set_internal(const struct lu_env *env, struct osd_object *obj, const struct lu_buf *buf, const char *name, int fl, @@ -518,6 +576,10 @@ osd_xattr_set_internal(const struct lu_env *env, struct osd_object *obj, { int rc; + if (unlikely(!dt_object_exists(&obj->oo_dt) || obj->oo_destroyed)) + return -ENOENT; + + LASSERT(obj->oo_dn); if (osd_obj2dev(obj)->od_xattr_in_sa) { rc = __osd_sa_xattr_set(env, obj, buf, name, fl, oh); if (rc == -EFBIG) @@ -531,16 +593,16 @@ osd_xattr_set_internal(const struct lu_env *env, struct osd_object *obj, static inline uint64_t attrs_fs2zfs(const uint32_t flags) { - return (((flags & FS_APPEND_FL) ? ZFS_APPENDONLY : 0) | - ((flags & FS_NODUMP_FL) ? ZFS_NODUMP : 0) | - ((flags & FS_IMMUTABLE_FL) ? ZFS_IMMUTABLE : 0)); + return (flags & LUSTRE_APPEND_FL ? ZFS_APPENDONLY : 0) | + (flags & LUSTRE_NODUMP_FL ? ZFS_NODUMP : 0) | + (flags & LUSTRE_IMMUTABLE_FL ? ZFS_IMMUTABLE : 0); } static inline uint32_t attrs_zfs2fs(const uint64_t flags) { - return (((flags & ZFS_APPENDONLY) ? FS_APPEND_FL : 0) | - ((flags & ZFS_NODUMP) ? FS_NODUMP_FL : 0) | - ((flags & ZFS_IMMUTABLE) ? FS_IMMUTABLE_FL : 0)); + return (flags & ZFS_APPENDONLY ? LUSTRE_APPEND_FL : 0) | + (flags & ZFS_NODUMP ? LUSTRE_NODUMP_FL : 0) | + (flags & ZFS_IMMUTABLE ? LUSTRE_IMMUTABLE_FL : 0); } #endif @@ -581,4 +643,116 @@ osd_zio_buf_free(void *buf, size_t size) #define osd_zio_buf_free(buf, size) zio_buf_free(buf, size) #endif +#ifdef HAVE_DMU_OBJECT_ALLOC_DNSIZE +static inline uint64_t +osd_dmu_object_alloc(objset_t *os, dmu_object_type_t objtype, int blocksize, + int dnodesize, dmu_tx_t *tx) +{ + if (dnodesize == 0) + dnodesize = MAX(dmu_objset_dnodesize(os), DNODE_MIN_SIZE); + + return dmu_object_alloc_dnsize(os, objtype, blocksize, DMU_OT_SA, + DN_BONUS_SIZE(dnodesize), dnodesize, tx); +} + +static inline uint64_t +osd_zap_create_flags(objset_t *os, int normflags, zap_flags_t flags, + dmu_object_type_t ot, int leaf_blockshift, + int indirect_blockshift, int dnodesize, dmu_tx_t *tx) +{ + if (dnodesize == 0) + dnodesize = MAX(dmu_objset_dnodesize(os), DNODE_MIN_SIZE); + + return zap_create_flags_dnsize(os, normflags, flags, ot, + leaf_blockshift, indirect_blockshift, + DMU_OT_SA, DN_BONUS_SIZE(dnodesize), + dnodesize, tx); +} + +static inline int +osd_obj_bonuslen(struct osd_object *obj) +{ + int bonuslen = DN_BONUS_SIZE(DNODE_MIN_SIZE); + + if (obj->oo_dn != NULL && obj->oo_dn->dn_num_slots != 0) { + bonuslen = DN_SLOTS_TO_BONUSLEN(obj->oo_dn->dn_num_slots); + } else { + objset_t *os = osd_dtobj2objset(&obj->oo_dt); + int dnodesize; + + if (os != NULL) { + dnodesize = dmu_objset_dnodesize(os); + if (dnodesize != 0) + bonuslen = DN_BONUS_SIZE(dnodesize); + } + } + + return bonuslen; +} +#else +static inline uint64_t +osd_dmu_object_alloc(objset_t *os, dmu_object_type_t objtype, int blocksize, + int dnodesize, dmu_tx_t *tx) +{ + return dmu_object_alloc(os, objtype, blocksize, DMU_OT_SA, + DN_MAX_BONUSLEN, tx); +} + +static inline uint64_t +osd_zap_create_flags(objset_t *os, int normflags, zap_flags_t flags, + dmu_object_type_t ot, int leaf_blockshift, + int indirect_blockshift, int dnodesize, dmu_tx_t *tx) +{ + return zap_create_flags(os, normflags, flags, ot, leaf_blockshift, + indirect_blockshift, DMU_OT_SA, + DN_MAX_BONUSLEN, tx); +} + +static inline int +osd_obj_bonuslen(struct osd_object *obj) +{ + return DN_MAX_BONUSLEN; +} +#endif /* HAVE_DMU_OBJECT_ALLOC_DNSIZE */ + +#ifdef HAVE_DMU_PREFETCH_6ARG +#define osd_dmu_prefetch(os, obj, lvl, off, len, pri) \ + dmu_prefetch((os), (obj), (lvl), (off), (len), (pri)) +#else +#define osd_dmu_prefetch(os, obj, lvl, off, len, pri) \ + dmu_prefetch((os), (obj), (lvl), (off)) +#endif + +static inline void osd_dnode_rele(dnode_t *dn) +{ + dmu_buf_impl_t *db; + LASSERT(dn); + LASSERT(dn->dn_bonus); + db = dn->dn_bonus; + + DB_DNODE_EXIT(db); + dmu_buf_rele(&db->db, osd_obj_tag); +} + +#ifdef HAVE_DMU_USEROBJ_ACCOUNTING + +#define OSD_DMU_USEROBJ_PREFIX DMU_OBJACCT_PREFIX + +static inline bool osd_dmu_userobj_accounting_available(struct osd_device *osd) +{ + if (unlikely(dmu_objset_userobjspace_upgradable(osd->od_os))) + dmu_objset_userobjspace_upgrade(osd->od_os); + + return dmu_objset_userobjspace_present(osd->od_os); +} +#else + +#define OSD_DMU_USEROBJ_PREFIX "obj-" + +static inline bool osd_dmu_userobj_accounting_available(struct osd_device *osd) +{ + return false; +} +#endif /* #ifdef HAVE_DMU_USEROBJ_ACCOUNTING */ + #endif /* _OSD_INTERNAL_H */