X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fosd-zfs%2Fosd_internal.h;h=527eb799d84b6e716233e9ddeb9ebee5782f1b99;hb=22856f4e5e2a84be332b6c70f039c47f24cf7b44;hp=dd3822f1299b2636efab5d0ea1eeb259dcf604ef;hpb=736ffdc4d6975aaeac83019dd2a88100f4e3d028;p=fs%2Flustre-release.git diff --git a/lustre/osd-zfs/osd_internal.h b/lustre/osd-zfs/osd_internal.h index dd3822f..527eb79 100644 --- a/lustre/osd-zfs/osd_internal.h +++ b/lustre/osd-zfs/osd_internal.h @@ -26,10 +26,8 @@ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. - */ -/* - * Copyright (c) 2012, 2013, Intel Corporation. - * Use is subject to license terms. + * + * Copyright (c) 2012, 2015, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -49,21 +47,34 @@ #include #include #include +#ifdef SHRINK_STOP +#undef SHRINK_STOP +#endif +#include +#include +#include +#include -#define _SPL_KMEM_H -#include -#define kmem_zalloc(a, b) kzalloc(a, b) -#define kmem_free(ptr, sz) ((void)(sz), kfree(ptr)) -#ifndef KM_SLEEP -#define KM_SLEEP GFP_KERNEL +/** + * By design including kmem.h overrides the Linux slab interfaces to provide + * the Illumos kmem cache interfaces. To override this and gain access to + * the Linux interfaces these preprocessor macros must be undefined. + */ +#ifdef kmem_cache_destroy +#undef kmem_cache_destroy #endif -#include +#ifdef kmem_cache_create +#undef kmem_cache_create +#endif -#include +#ifdef kmem_cache_alloc +#undef kmem_cache_alloc +#endif -#include -#include "udmu.h" +#ifdef kmem_cache_free +#undef kmem_cache_free +#endif #define LUSTRE_ROOT_FID_SEQ 0 #define DMU_OSD_SVNAME "svname" @@ -71,6 +82,17 @@ #define OSD_GFP_IO (GFP_NOFS | __GFP_HIGHMEM) +/* Statfs space reservation for grant, fragmentation, and unlink space. */ +#define OSD_STATFS_RESERVED_SIZE (16ULL << 20) /* reserve 16MB minimum */ +#define OSD_STATFS_RESERVED_SHIFT (7) /* reserve 0.78% of all space */ + +/* Statfs {minimum, safe estimate, and maximum} dnodes per block */ +#define OSD_DNODE_MIN_BLKSHIFT (DNODES_PER_BLOCK_SHIFT) +#define OSD_DNODE_EST_BLKSHIFT (DNODES_PER_BLOCK_SHIFT >> 1) +#define OSD_DNODE_EST_COUNT 1024 + +#define OSD_GRANT_FOR_LOCAL_OIDS (2ULL << 20) /* 2MB for last_rcvd, ... */ + /** * Iterator's in-memory data structure for quota file. */ @@ -95,7 +117,6 @@ struct osd_it_quota { struct osd_zap_it { zap_cursor_t *ozi_zc; struct osd_object *ozi_obj; - struct lustre_capa *ozi_capa; unsigned ozi_reset:1; /* 1 -- no need to advance */ /* ozi_pos - position of the cursor: * 0 - before any record @@ -150,22 +171,11 @@ struct osd_thread_info { * XXX temporary: for ->i_op calls. */ struct timespec oti_time; - /* - * XXX temporary: for capa operations. - */ - struct lustre_capa_key oti_capa_key; - struct lustre_capa oti_capa; struct ost_id oti_ostid; char oti_buf[64]; - /** osd iterator context used for iterator session */ - union { - struct osd_zap_it oti_it_zap; - struct osd_it_quota oti_it_quota; - }; - char oti_str[64]; union { char oti_key[MAXNAMELEN + 1]; @@ -193,6 +203,8 @@ static inline struct osd_thread_info *osd_oti_get(const struct lu_env *env) struct osd_thandle { struct thandle ot_super; struct list_head ot_dcb_list; + struct list_head ot_stop_dcb_list; + struct list_head ot_unlinked_list; struct list_head ot_sa_list; struct semaphore ot_sa_lock; dmu_tx_t *ot_tx; @@ -214,7 +226,7 @@ struct osd_oi { struct osd_seq { uint64_t *os_compat_dirs; int os_subdir_count; /* subdir count for each seq */ - obd_seq os_seq; /* seq number */ + u64 os_seq; /* seq number */ struct list_head os_seq_list; /* list to seq_list */ }; @@ -233,27 +245,25 @@ struct osd_device { /* super-class */ struct dt_device od_dt_dev; /* information about underlying file system */ - udmu_objset_t od_objset; - - /* - * Fid Capability - */ - unsigned int od_fl_capa:1; - unsigned long od_capa_timeout; - __u32 od_capa_alg; - struct lustre_capa_key *od_capa_keys; - struct hlist_head *od_capa_hash; + struct objset *od_os; + uint64_t od_rootid; /* id of root znode */ + uint64_t od_unlinkedid; /* id of unlinked zapobj */ + /* SA attr mapping->id, + * name is the same as in ZFS to use defines SA_ZPL_...*/ + sa_attr_type_t *z_attr_table; struct proc_dir_entry *od_proc_entry; struct lprocfs_stats *od_stats; + uint64_t od_max_blksz; uint64_t od_root; uint64_t od_O_id; struct osd_oi **od_oi_table; unsigned int od_oi_count; struct osd_seq_list od_seq_list; - unsigned int od_rdonly:1, + unsigned int od_dev_set_rdonly:1, /**< osd_ro() called */ + od_prop_rdonly:1, /**< ZFS property readonly */ od_xattr_in_sa:1, od_quota_iused_est:1, od_is_ost:1, @@ -272,6 +282,10 @@ struct osd_device { /* quota slave instance */ struct qsd_instance *od_quota_slave; + struct brw_stats od_brw_stats; + atomic_t od_r_in_flight; + atomic_t od_w_in_flight; + /* used to debug zerocopy logic: the fields track all * allocated, loaned and referenced buffers in use. * to be removed once the change is tested well. */ @@ -285,6 +299,12 @@ struct osd_device { struct lu_client_seq *od_cl_seq; }; +enum osd_destroy_type { + OSD_DESTROY_NONE = 0, + OSD_DESTROY_SYNC = 1, + OSD_DESTROY_ASYNC = 2, +}; + struct osd_object { struct dt_object oo_dt; /* @@ -298,6 +318,7 @@ struct osd_object { sa_handle_t *oo_sa_hdl; nvlist_t *oo_sa_xattr; struct list_head oo_sa_linkage; + struct list_head oo_unlinked_linkage; struct rw_semaphore oo_sem; @@ -305,10 +326,15 @@ struct osd_object { rwlock_t oo_attr_lock; struct lu_attr oo_attr; - /* protects extended attributes */ + /* protects extended attributes and oo_unlinked_linkage */ struct semaphore oo_guard; uint64_t oo_xattr; + enum osd_destroy_type oo_destroy; + __u32 oo_destroyed:1; + + /* the i_flags in LMA */ + __u32 oo_lma_flags; /* record size for index file */ unsigned char oo_keysize; unsigned char oo_recsize; @@ -319,10 +345,13 @@ int osd_statfs(const struct lu_env *, struct dt_device *, struct obd_statfs *); extern const struct dt_index_operations osd_acct_index_ops; uint64_t osd_quota_fid2dmu(const struct lu_fid *fid); extern struct lu_device_operations osd_lu_ops; +extern struct dt_index_operations osd_dir_ops; int osd_declare_quota(const struct lu_env *env, struct osd_device *osd, qid_t uid, qid_t gid, long long space, struct osd_thandle *oh, bool is_blk, int *flags, bool force); +uint64_t osd_objs_count_estimate(uint64_t refdbytes, uint64_t usedobjs, + uint64_t nrblocks, uint64_t est_maxblockshift); /* * Helpers. @@ -367,7 +396,7 @@ static inline struct lu_device *osd2lu_dev(struct osd_device *osd) static inline struct objset * osd_dtobj2objset(struct dt_object *o) { - return osd_dev(o->do_lu.lo_dev)->od_objset.os; + return osd_dev(o->do_lu.lo_dev)->od_os; } static inline int osd_invariant(const struct osd_object *obj) @@ -390,7 +419,7 @@ static inline char *osd_name(struct osd_device *osd) return osd->od_dt_dev.dd_lu_dev.ld_obd->obd_name; } -#ifdef LPROCFS +#ifdef CONFIG_PROC_FS enum { LPROC_OSD_READ_BYTES = 0, LPROC_OSD_WRITE_BYTES = 1, @@ -405,34 +434,29 @@ enum { LPROC_OSD_LAST, }; +extern struct kmem_cache *osd_zapit_cachep; /* osd_lproc.c */ -extern struct lprocfs_seq_vars lprocfs_osd_obd_vars[]; +extern struct lprocfs_vars lprocfs_osd_obd_vars[]; int osd_procfs_init(struct osd_device *osd, const char *name); int osd_procfs_fini(struct osd_device *osd); -int udmu_zap_cursor_retrieve_key(const struct lu_env *env, - zap_cursor_t *zc, char *key, int max); -int udmu_zap_cursor_retrieve_value(const struct lu_env *env, - zap_cursor_t *zc, char *buf, - int buf_size, int *bytes_read); - /* osd_object.c */ +extern char *osd_obj_tag; void osd_object_sa_dirty_rele(struct osd_thandle *oh); int __osd_obj2dbuf(const struct lu_env *env, objset_t *os, - uint64_t oid, dmu_buf_t **dbp, void *tag); + uint64_t oid, dmu_buf_t **dbp); struct lu_object *osd_object_alloc(const struct lu_env *env, const struct lu_object_header *hdr, struct lu_device *d); int osd_object_sa_update(struct osd_object *obj, sa_attr_type_t type, void *buf, uint32_t buflen, struct osd_thandle *oh); -int __osd_zap_create(const struct lu_env *env, udmu_objset_t *uos, +int __osd_zap_create(const struct lu_env *env, struct osd_device *osd, dmu_buf_t **zap_dbp, dmu_tx_t *tx, struct lu_attr *la, - uint64_t parent, void *tag, zap_flags_t flags); -int __osd_object_create(const struct lu_env *env, udmu_objset_t *uos, + uint64_t parent, zap_flags_t flags); +int __osd_object_create(const struct lu_env *env, struct osd_object *obj, dmu_buf_t **dbp, dmu_tx_t *tx, struct lu_attr *la, - uint64_t parent, void *tag); -int __osd_object_free(udmu_objset_t *uos, uint64_t oid, dmu_tx_t *tx); + uint64_t parent); /* osd_oi.c */ int osd_oi_init(const struct lu_env *env, struct osd_device *o); @@ -442,37 +466,45 @@ int osd_fid_lookup(const struct lu_env *env, uint64_t osd_get_name_n_idx(const struct lu_env *env, struct osd_device *osd, const struct lu_fid *fid, char *buf); int osd_options_init(void); -int osd_convert_root_to_new_seq(const struct lu_env *env, - struct osd_device *o); int osd_ost_seq_exists(const struct lu_env *env, struct osd_device *osd, __u64 seq); /* osd_index.c */ int osd_index_try(const struct lu_env *env, struct dt_object *dt, const struct dt_index_features *feat); int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd, - obd_seq seq, struct lu_seq_range *range); + u64 seq, struct lu_seq_range *range); +void osd_zap_cursor_init_serialized(zap_cursor_t *zc, struct objset *os, + uint64_t id, uint64_t dirhash); +int osd_zap_cursor_init(zap_cursor_t **zc, struct objset *os, + uint64_t id, uint64_t dirhash); +void osd_zap_cursor_fini(zap_cursor_t *zc); +uint64_t osd_zap_cursor_serialize(zap_cursor_t *zc); /* osd_xattr.c */ -int __osd_xattr_load(udmu_objset_t *uos, uint64_t dnode, nvlist_t **sa_xattr); -int __osd_xattr_get_large(const struct lu_env *env, udmu_objset_t *uos, +int __osd_xattr_load(struct osd_device *osd, uint64_t dnode, + nvlist_t **sa_xattr); +int __osd_xattr_get_large(const struct lu_env *env, struct osd_device *osd, uint64_t xattr, struct lu_buf *buf, const char *name, int *sizep); int osd_xattr_get(const struct lu_env *env, struct dt_object *dt, - struct lu_buf *buf, const char *name, - struct lustre_capa *capa); + struct lu_buf *buf, const char *name); int osd_declare_xattr_set(const struct lu_env *env, struct dt_object *dt, const struct lu_buf *buf, const char *name, int fl, struct thandle *handle); int osd_xattr_set(const struct lu_env *env, struct dt_object *dt, const struct lu_buf *buf, const char *name, int fl, - struct thandle *handle, struct lustre_capa *capa); + struct thandle *handle); int osd_declare_xattr_del(const struct lu_env *env, struct dt_object *dt, const char *name, struct thandle *handle); int osd_xattr_del(const struct lu_env *env, struct dt_object *dt, - const char *name, struct thandle *handle, - struct lustre_capa *capa); + const char *name, struct thandle *handle); +void osd_declare_xattrs_destroy(const struct lu_env *env, + struct osd_object *obj, + struct osd_thandle *oh); +int osd_xattrs_destroy(const struct lu_env *env, + struct osd_object *obj, struct osd_thandle *oh); int osd_xattr_list(const struct lu_env *env, struct dt_object *dt, - struct lu_buf *lb, struct lustre_capa *capa); + const struct lu_buf *lb); void __osd_xattr_declare_set(const struct lu_env *env, struct osd_object *obj, int vallen, const char *name, struct osd_thandle *oh); int __osd_sa_xattr_set(const struct lu_env *env, struct osd_object *obj, @@ -484,7 +516,7 @@ int __osd_xattr_set(const struct lu_env *env, struct osd_object *obj, static inline int osd_xattr_set_internal(const struct lu_env *env, struct osd_object *obj, const struct lu_buf *buf, const char *name, int fl, - struct osd_thandle *oh, struct lustre_capa *capa) + struct osd_thandle *oh) { int rc; @@ -501,16 +533,16 @@ osd_xattr_set_internal(const struct lu_env *env, struct osd_object *obj, static inline uint64_t attrs_fs2zfs(const uint32_t flags) { - return (((flags & FS_APPEND_FL) ? ZFS_APPENDONLY : 0) | - ((flags & FS_NODUMP_FL) ? ZFS_NODUMP : 0) | - ((flags & FS_IMMUTABLE_FL) ? ZFS_IMMUTABLE : 0)); + return (flags & LUSTRE_APPEND_FL ? ZFS_APPENDONLY : 0) | + (flags & LUSTRE_NODUMP_FL ? ZFS_NODUMP : 0) | + (flags & LUSTRE_IMMUTABLE_FL ? ZFS_IMMUTABLE : 0); } static inline uint32_t attrs_zfs2fs(const uint64_t flags) { - return (((flags & ZFS_APPENDONLY) ? FS_APPEND_FL : 0) | - ((flags & ZFS_NODUMP) ? FS_NODUMP_FL : 0) | - ((flags & ZFS_IMMUTABLE) ? FS_IMMUTABLE_FL : 0)); + return (flags & ZFS_APPENDONLY ? LUSTRE_APPEND_FL : 0) | + (flags & ZFS_NODUMP ? LUSTRE_NODUMP_FL : 0) | + (flags & ZFS_IMMUTABLE ? LUSTRE_IMMUTABLE_FL : 0); } #endif @@ -523,7 +555,32 @@ static inline void dsl_pool_config_enter(dsl_pool_t *dp, char *name) static inline void dsl_pool_config_exit(dsl_pool_t *dp, char *name) { } +#endif + +#ifdef HAVE_SPA_MAXBLOCKSIZE +#define osd_spa_maxblocksize(spa) spa_maxblocksize(spa) +#define osd_spa_maxblockshift(spa) fls64(spa_maxblocksize(spa) - 1) +#else +#define osd_spa_maxblocksize(spa) SPA_MAXBLOCKSIZE +#define osd_spa_maxblockshift(spa) SPA_MAXBLOCKSHIFT +#define SPA_OLD_MAXBLOCKSIZE SPA_MAXBLOCKSIZE +#endif + +#ifdef HAVE_SA_SPILL_ALLOC +static inline void * +osd_zio_buf_alloc(size_t size) +{ + return sa_spill_alloc(KM_SLEEP); +} +static inline void +osd_zio_buf_free(void *buf, size_t size) +{ + sa_spill_free(buf); +} +#else +#define osd_zio_buf_alloc(size) zio_buf_alloc(size) +#define osd_zio_buf_free(buf, size) zio_buf_free(buf, size) #endif #endif /* _OSD_INTERNAL_H */