X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fosd-zfs%2Fosd_internal.h;h=fa0c3f2cb6c31f95344530a9fa96d660aca6341a;hp=9fee6d52ff5606b23a0f4987820f8ff9283f42c3;hb=f9ca359284357d145819beb08b316e932f7a3060;hpb=069deb0686030fcbe4a5f844b03423eb7d8cde41 diff --git a/lustre/osd-zfs/osd_internal.h b/lustre/osd-zfs/osd_internal.h index 9fee6d5..fa0c3f2 100644 --- a/lustre/osd-zfs/osd_internal.h +++ b/lustre/osd-zfs/osd_internal.h @@ -26,10 +26,8 @@ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. - */ -/* - * Copyright (c) 2012, Intel Corporation. - * Use is subject to license terms. + * + * Copyright (c) 2012, 2015, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -47,12 +45,36 @@ #define _OSD_INTERNAL_H #include +#include #include +#ifdef SHRINK_STOP +#undef SHRINK_STOP +#endif #include - #include +#include +#include + +/** + * By design including kmem.h overrides the Linux slab interfaces to provide + * the Illumos kmem cache interfaces. To override this and gain access to + * the Linux interfaces these preprocessor macros must be undefined. + */ +#ifdef kmem_cache_destroy +#undef kmem_cache_destroy +#endif + +#ifdef kmem_cache_create +#undef kmem_cache_create +#endif -#include "udmu.h" +#ifdef kmem_cache_alloc +#undef kmem_cache_alloc +#endif + +#ifdef kmem_cache_free +#undef kmem_cache_free +#endif #define LUSTRE_ROOT_FID_SEQ 0 #define DMU_OSD_SVNAME "svname" @@ -60,6 +82,17 @@ #define OSD_GFP_IO (GFP_NOFS | __GFP_HIGHMEM) +/* Statfs space reservation for grant, fragmentation, and unlink space. */ +#define OSD_STATFS_RESERVED_SIZE (16ULL << 20) /* reserve 16MB minimum */ +#define OSD_STATFS_RESERVED_SHIFT (7) /* reserve 0.78% of all space */ + +/* Statfs {minimum, safe estimate, and maximum} dnodes per block */ +#define OSD_DNODE_MIN_BLKSHIFT (DNODES_PER_BLOCK_SHIFT) +#define OSD_DNODE_EST_BLKSHIFT (DNODES_PER_BLOCK_SHIFT >> 1) +#define OSD_DNODE_EST_COUNT 1024 + +#define OSD_GRANT_FOR_LOCAL_OIDS (2ULL << 20) /* 2MB for last_rcvd, ... */ + /** * Iterator's in-memory data structure for quota file. */ @@ -84,7 +117,6 @@ struct osd_it_quota { struct osd_zap_it { zap_cursor_t *ozi_zc; struct osd_object *ozi_obj; - struct lustre_capa *ozi_capa; unsigned ozi_reset:1; /* 1 -- no need to advance */ /* ozi_pos - position of the cursor: * 0 - before any record @@ -93,7 +125,7 @@ struct osd_zap_it { * 3 - real records */ unsigned ozi_pos:3; union { - char ozi_name[NAME_MAX + 1]; /* file name for dir */ + char ozi_name[MAXNAMELEN]; /* file name for dir */ __u64 ozi_key; /* binary key for index files */ }; }; @@ -139,24 +171,16 @@ struct osd_thread_info { * XXX temporary: for ->i_op calls. */ struct timespec oti_time; - /* - * XXX temporary: for capa operations. - */ - struct lustre_capa_key oti_capa_key; - struct lustre_capa oti_capa; struct ost_id oti_ostid; char oti_buf[64]; - /** osd iterator context used for iterator session */ + char oti_str[64]; union { - struct osd_zap_it oti_it_zap; - struct osd_it_quota oti_it_quota; + char oti_key[MAXNAMELEN + 1]; + __u64 oti_key64[(MAXNAMELEN + 1)/sizeof(__u64)]; }; - - char oti_str[64]; - char oti_key[MAXNAMELEN + 1]; struct lustre_mdt_attrs oti_mdt_attrs; struct lu_attr oti_la; @@ -178,8 +202,10 @@ static inline struct osd_thread_info *osd_oti_get(const struct lu_env *env) struct osd_thandle { struct thandle ot_super; - cfs_list_t ot_dcb_list; - cfs_list_t ot_sa_list; + struct list_head ot_dcb_list; + struct list_head ot_stop_dcb_list; + struct list_head ot_unlinked_list; + struct list_head ot_sa_list; struct semaphore ot_sa_lock; dmu_tx_t *ot_tx; struct lquota_trans ot_quota_trans; @@ -200,13 +226,13 @@ struct osd_oi { struct osd_seq { uint64_t *os_compat_dirs; int os_subdir_count; /* subdir count for each seq */ - obd_seq os_seq; /* seq number */ - cfs_list_t os_seq_list; /* list to seq_list */ + u64 os_seq; /* seq number */ + struct list_head os_seq_list; /* list to seq_list */ }; struct osd_seq_list { - rwlock_t osl_seq_list_lock; /* lock for seq_list */ - cfs_list_t osl_seq_list; /* list head for seq */ + rwlock_t osl_seq_list_lock; /* lock for seq_list */ + struct list_head osl_seq_list; /* list head for seq */ struct semaphore osl_seq_init_sem; }; @@ -219,28 +245,30 @@ struct osd_device { /* super-class */ struct dt_device od_dt_dev; /* information about underlying file system */ - udmu_objset_t od_objset; - - /* - * Fid Capability - */ - unsigned int od_fl_capa:1; - unsigned long od_capa_timeout; - __u32 od_capa_alg; - struct lustre_capa_key *od_capa_keys; - cfs_hlist_head_t *od_capa_hash; - - cfs_proc_dir_entry_t *od_proc_entry; + struct objset *od_os; + uint64_t od_rootid; /* id of root znode */ + uint64_t od_unlinkedid; /* id of unlinked zapobj */ + /* SA attr mapping->id, + * name is the same as in ZFS to use defines SA_ZPL_...*/ + sa_attr_type_t *z_attr_table; + + struct proc_dir_entry *od_proc_entry; struct lprocfs_stats *od_stats; + uint64_t od_max_blksz; uint64_t od_root; + uint64_t od_O_id; struct osd_oi **od_oi_table; unsigned int od_oi_count; struct osd_seq_list od_seq_list; - unsigned int od_rdonly:1, + unsigned int od_dev_set_rdonly:1, /**< osd_ro() called */ + od_prop_rdonly:1, /**< ZFS property readonly */ od_xattr_in_sa:1, - od_quota_iused_est:1; + od_quota_iused_est:1, + od_is_ost:1, + od_posix_acl:1; + char od_mntdev[128]; char od_svname[128]; @@ -254,14 +282,27 @@ struct osd_device { /* quota slave instance */ struct qsd_instance *od_quota_slave; + struct brw_stats od_brw_stats; + atomic_t od_r_in_flight; + atomic_t od_w_in_flight; + /* used to debug zerocopy logic: the fields track all * allocated, loaned and referenced buffers in use. * to be removed once the change is tested well. */ - cfs_atomic_t od_zerocopy_alloc; - cfs_atomic_t od_zerocopy_loan; - cfs_atomic_t od_zerocopy_pin; + atomic_t od_zerocopy_alloc; + atomic_t od_zerocopy_loan; + atomic_t od_zerocopy_pin; arc_prune_t *arc_prune_cb; + + /* osd seq instance */ + struct lu_client_seq *od_cl_seq; +}; + +enum osd_destroy_type { + OSD_DESTROY_NONE = 0, + OSD_DESTROY_SYNC = 1, + OSD_DESTROY_ASYNC = 2, }; struct osd_object { @@ -276,30 +317,47 @@ struct osd_object { dmu_buf_t *oo_db; sa_handle_t *oo_sa_hdl; nvlist_t *oo_sa_xattr; - cfs_list_t oo_sa_linkage; + struct list_head oo_sa_linkage; + /* used to implement osd_object_*_{lock|unlock} */ struct rw_semaphore oo_sem; + /* to serialize some updates: destroy vs. others, + * xattr_set, etc */ + struct rw_semaphore oo_guard; + + /* protected by oo_guard */ + struct list_head oo_unlinked_linkage; + /* cached attributes */ rwlock_t oo_attr_lock; struct lu_attr oo_attr; - /* protects extended attributes */ - struct semaphore oo_guard; + /* external dnode holding large EAs, protected by oo_guard */ uint64_t oo_xattr; + enum osd_destroy_type oo_destroy; + + __u32 oo_destroyed:1; + /* the i_flags in LMA */ + __u32 oo_lma_flags; /* record size for index file */ - int oo_recsize; + unsigned char oo_keysize; + unsigned char oo_recsize; + unsigned char oo_recusize; /* unit size */ }; int osd_statfs(const struct lu_env *, struct dt_device *, struct obd_statfs *); extern const struct dt_index_operations osd_acct_index_ops; uint64_t osd_quota_fid2dmu(const struct lu_fid *fid); extern struct lu_device_operations osd_lu_ops; +extern struct dt_index_operations osd_dir_ops; int osd_declare_quota(const struct lu_env *env, struct osd_device *osd, qid_t uid, qid_t gid, long long space, struct osd_thandle *oh, bool is_blk, int *flags, bool force); +uint64_t osd_objs_count_estimate(uint64_t refdbytes, uint64_t usedobjs, + uint64_t nrblocks, uint64_t est_maxblockshift); /* * Helpers. @@ -344,7 +402,7 @@ static inline struct lu_device *osd2lu_dev(struct osd_device *osd) static inline struct objset * osd_dtobj2objset(struct dt_object *o) { - return osd_dev(o->do_lu.lo_dev)->od_objset.os; + return osd_dev(o->do_lu.lo_dev)->od_os; } static inline int osd_invariant(const struct osd_object *obj) @@ -367,7 +425,7 @@ static inline char *osd_name(struct osd_device *osd) return osd->od_dt_dev.dd_lu_dev.ld_obd->obd_name; } -#ifdef LPROCFS +#ifdef CONFIG_PROC_FS enum { LPROC_OSD_READ_BYTES = 0, LPROC_OSD_WRITE_BYTES = 1, @@ -382,35 +440,29 @@ enum { LPROC_OSD_LAST, }; +extern struct kmem_cache *osd_zapit_cachep; /* osd_lproc.c */ extern struct lprocfs_vars lprocfs_osd_obd_vars[]; -extern struct lprocfs_vars lprocfs_osd_module_vars[]; int osd_procfs_init(struct osd_device *osd, const char *name); int osd_procfs_fini(struct osd_device *osd); -int udmu_zap_cursor_retrieve_key(const struct lu_env *env, - zap_cursor_t *zc, char *key, int max); -int udmu_zap_cursor_retrieve_value(const struct lu_env *env, - zap_cursor_t *zc, char *buf, - int buf_size, int *bytes_read); - /* osd_object.c */ +extern char *osd_obj_tag; void osd_object_sa_dirty_rele(struct osd_thandle *oh); int __osd_obj2dbuf(const struct lu_env *env, objset_t *os, - uint64_t oid, dmu_buf_t **dbp, void *tag); + uint64_t oid, dmu_buf_t **dbp); struct lu_object *osd_object_alloc(const struct lu_env *env, const struct lu_object_header *hdr, struct lu_device *d); int osd_object_sa_update(struct osd_object *obj, sa_attr_type_t type, void *buf, uint32_t buflen, struct osd_thandle *oh); -int __osd_zap_create(const struct lu_env *env, udmu_objset_t *uos, +int __osd_zap_create(const struct lu_env *env, struct osd_device *osd, dmu_buf_t **zap_dbp, dmu_tx_t *tx, struct lu_attr *la, - uint64_t parent, void *tag, zap_flags_t flags); -int __osd_object_create(const struct lu_env *env, udmu_objset_t *uos, + uint64_t parent, zap_flags_t flags); +int __osd_object_create(const struct lu_env *env, struct osd_object *obj, dmu_buf_t **dbp, dmu_tx_t *tx, struct lu_attr *la, - uint64_t parent, void *tag); -int __osd_object_free(udmu_objset_t *uos, uint64_t oid, dmu_tx_t *tx); + uint64_t parent); /* osd_oi.c */ int osd_oi_init(const struct lu_env *env, struct osd_device *o); @@ -420,36 +472,45 @@ int osd_fid_lookup(const struct lu_env *env, uint64_t osd_get_name_n_idx(const struct lu_env *env, struct osd_device *osd, const struct lu_fid *fid, char *buf); int osd_options_init(void); -int osd_convert_root_to_new_seq(const struct lu_env *env, - struct osd_device *o); - +int osd_ost_seq_exists(const struct lu_env *env, struct osd_device *osd, + __u64 seq); /* osd_index.c */ int osd_index_try(const struct lu_env *env, struct dt_object *dt, const struct dt_index_features *feat); int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd, - const struct lu_fid *fid, struct lu_seq_range *range); + u64 seq, struct lu_seq_range *range); +void osd_zap_cursor_init_serialized(zap_cursor_t *zc, struct objset *os, + uint64_t id, uint64_t dirhash); +int osd_zap_cursor_init(zap_cursor_t **zc, struct objset *os, + uint64_t id, uint64_t dirhash); +void osd_zap_cursor_fini(zap_cursor_t *zc); +uint64_t osd_zap_cursor_serialize(zap_cursor_t *zc); /* osd_xattr.c */ -int __osd_xattr_load(udmu_objset_t *uos, uint64_t dnode, nvlist_t **sa_xattr); -int __osd_xattr_get_large(const struct lu_env *env, udmu_objset_t *uos, +int __osd_xattr_load(struct osd_device *osd, uint64_t dnode, + nvlist_t **sa_xattr); +int __osd_xattr_get_large(const struct lu_env *env, struct osd_device *osd, uint64_t xattr, struct lu_buf *buf, const char *name, int *sizep); int osd_xattr_get(const struct lu_env *env, struct dt_object *dt, - struct lu_buf *buf, const char *name, - struct lustre_capa *capa); + struct lu_buf *buf, const char *name); int osd_declare_xattr_set(const struct lu_env *env, struct dt_object *dt, const struct lu_buf *buf, const char *name, int fl, struct thandle *handle); int osd_xattr_set(const struct lu_env *env, struct dt_object *dt, const struct lu_buf *buf, const char *name, int fl, - struct thandle *handle, struct lustre_capa *capa); + struct thandle *handle); int osd_declare_xattr_del(const struct lu_env *env, struct dt_object *dt, const char *name, struct thandle *handle); int osd_xattr_del(const struct lu_env *env, struct dt_object *dt, - const char *name, struct thandle *handle, - struct lustre_capa *capa); + const char *name, struct thandle *handle); +void osd_declare_xattrs_destroy(const struct lu_env *env, + struct osd_object *obj, + struct osd_thandle *oh); +int osd_xattrs_destroy(const struct lu_env *env, + struct osd_object *obj, struct osd_thandle *oh); int osd_xattr_list(const struct lu_env *env, struct dt_object *dt, - struct lu_buf *lb, struct lustre_capa *capa); + const struct lu_buf *lb); void __osd_xattr_declare_set(const struct lu_env *env, struct osd_object *obj, int vallen, const char *name, struct osd_thandle *oh); int __osd_sa_xattr_set(const struct lu_env *env, struct osd_object *obj, @@ -461,10 +522,14 @@ int __osd_xattr_set(const struct lu_env *env, struct osd_object *obj, static inline int osd_xattr_set_internal(const struct lu_env *env, struct osd_object *obj, const struct lu_buf *buf, const char *name, int fl, - struct osd_thandle *oh, struct lustre_capa *capa) + struct osd_thandle *oh) { int rc; + if (unlikely(!dt_object_exists(&obj->oo_dt) || obj->oo_destroyed)) + return -ENOENT; + + LASSERT(obj->oo_db); if (osd_obj2dev(obj)->od_xattr_in_sa) { rc = __osd_sa_xattr_set(env, obj, buf, name, fl, oh); if (rc == -EFBIG) @@ -476,5 +541,56 @@ osd_xattr_set_internal(const struct lu_env *env, struct osd_object *obj, return rc; } +static inline uint64_t attrs_fs2zfs(const uint32_t flags) +{ + return (flags & LUSTRE_APPEND_FL ? ZFS_APPENDONLY : 0) | + (flags & LUSTRE_NODUMP_FL ? ZFS_NODUMP : 0) | + (flags & LUSTRE_IMMUTABLE_FL ? ZFS_IMMUTABLE : 0); +} + +static inline uint32_t attrs_zfs2fs(const uint64_t flags) +{ + return (flags & ZFS_APPENDONLY ? LUSTRE_APPEND_FL : 0) | + (flags & ZFS_NODUMP ? LUSTRE_NODUMP_FL : 0) | + (flags & ZFS_IMMUTABLE ? LUSTRE_IMMUTABLE_FL : 0); +} + +#endif + +#ifndef HAVE_DSL_POOL_CONFIG +static inline void dsl_pool_config_enter(dsl_pool_t *dp, char *name) +{ +} + +static inline void dsl_pool_config_exit(dsl_pool_t *dp, char *name) +{ +} +#endif + +#ifdef HAVE_SPA_MAXBLOCKSIZE +#define osd_spa_maxblocksize(spa) spa_maxblocksize(spa) +#define osd_spa_maxblockshift(spa) fls64(spa_maxblocksize(spa) - 1) +#else +#define osd_spa_maxblocksize(spa) SPA_MAXBLOCKSIZE +#define osd_spa_maxblockshift(spa) SPA_MAXBLOCKSHIFT +#define SPA_OLD_MAXBLOCKSIZE SPA_MAXBLOCKSIZE #endif + +#ifdef HAVE_SA_SPILL_ALLOC +static inline void * +osd_zio_buf_alloc(size_t size) +{ + return sa_spill_alloc(KM_SLEEP); +} + +static inline void +osd_zio_buf_free(void *buf, size_t size) +{ + sa_spill_free(buf); +} +#else +#define osd_zio_buf_alloc(size) zio_buf_alloc(size) +#define osd_zio_buf_free(buf, size) zio_buf_free(buf, size) +#endif + #endif /* _OSD_INTERNAL_H */