X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Finclude%2Flu_object.h;h=8a9635e4cf735f1e10777e5d7b80cdf422fa92b9;hp=b3125392996530e33e6adddc0fac3d5d6a1c2deb;hb=45222b2ef279d62ac3aab0e7babc55d77e3c93a2;hpb=908f7dc914c3dfc67ee8aa9246844ae92e63a3a0 diff --git a/lustre/include/lu_object.h b/lustre/include/lu_object.h index b312539..8a9635e 100644 --- a/lustre/include/lu_object.h +++ b/lustre/include/lu_object.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -27,7 +23,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2013, Intel Corporation. + * Copyright (c) 2011, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -39,13 +35,15 @@ #include #include -#include +#include #include +#include struct seq_file; struct proc_dir_entry; struct lustre_cfg; struct lprocfs_stats; +struct obd_type; /** \defgroup lu lu * lu_* data-types represent server-side entities shared by data and meta-data @@ -169,10 +167,6 @@ typedef enum { /* This is a new object to be allocated, or the file * corresponding to the object does not exists. */ LOC_F_NEW = 0x00000001, - - /* When find a dying object, just return -EAGAIN at once instead of - * blocking the thread. */ - LOC_F_NOWAIT = 0x00000002, } loc_flags_t; /** @@ -327,10 +321,6 @@ struct lu_device_type { */ const struct lu_device_type_operations *ldt_ops; /** - * \todo XXX: temporary pointer to associated obd_type. - */ - struct obd_type *ldt_obd_type; - /** * \todo XXX: temporary: context tags used by obd_*() calls. */ __u32 ldt_ctx_tags; @@ -338,12 +328,6 @@ struct lu_device_type { * Number of existing device type instances. */ atomic_t ldt_device_nr; - /** - * Linkage into a global list of all device types. - * - * \see lu_device_types. - */ - struct list_head ldt_linkage; }; /** @@ -405,14 +389,20 @@ static inline int lu_device_is_md(const struct lu_device *d) * Common object attributes. */ struct lu_attr { + /** + * valid bits + * + * \see enum la_valid + */ + __u64 la_valid; /** size in bytes */ __u64 la_size; - /** modification time in seconds since Epoch */ - obd_time la_mtime; - /** access time in seconds since Epoch */ - obd_time la_atime; - /** change time in seconds since Epoch */ - obd_time la_ctime; + /** modification time in seconds since Epoch */ + s64 la_mtime; + /** access time in seconds since Epoch */ + s64 la_atime; + /** change time in seconds since Epoch */ + s64 la_ctime; /** 512-byte blocks allocated to object */ __u64 la_blocks; /** permission bits and file type */ @@ -431,31 +421,10 @@ struct lu_attr { __u32 la_blksize; /** real device */ __u32 la_rdev; - /** - * valid bits - * - * \see enum la_valid - */ - __u64 la_valid; -}; - -/** Bit-mask of valid attributes */ -enum la_valid { - LA_ATIME = 1 << 0, - LA_MTIME = 1 << 1, - LA_CTIME = 1 << 2, - LA_SIZE = 1 << 3, - LA_MODE = 1 << 4, - LA_UID = 1 << 5, - LA_GID = 1 << 6, - LA_BLOCKS = 1 << 7, - LA_TYPE = 1 << 8, - LA_FLAGS = 1 << 9, - LA_NLINK = 1 << 10, - LA_RDEV = 1 << 11, - LA_BLKSIZE = 1 << 12, - LA_KILL_SUID = 1 << 13, - LA_KILL_SGID = 1 << 14, + /** project id */ + __u32 la_projid; + /** set layout version to OST objects. */ + __u32 la_layout_version; }; /** @@ -494,17 +463,23 @@ enum lu_object_header_flags { /** * Mark this object has already been taken out of cache. */ - LU_OBJECT_UNHASHED = 1, + LU_OBJECT_UNHASHED = 1, + /** + * Object is initialized, when object is found in cache, it may not be + * intialized yet, the object allocator will initialize it. + */ + LU_OBJECT_INITED = 2 }; enum lu_object_header_attr { - LOHA_EXISTS = 1 << 0, - LOHA_REMOTE = 1 << 1, - /** - * UNIX file type is stored in S_IFMT bits. - */ - LOHA_FT_START = 001 << 12, /**< S_IFIFO */ - LOHA_FT_END = 017 << 12, /**< S_IFMT */ + LOHA_EXISTS = 1 << 0, + LOHA_REMOTE = 1 << 1, + LOHA_HAS_AGENT_ENTRY = 1 << 2, + /** + * UNIX file type is stored in S_IFMT bits. + */ + LOHA_FT_START = 001 << 12, /**< S_IFIFO */ + LOHA_FT_END = 017 << 12, /**< S_IFMT */ }; /** @@ -558,39 +533,14 @@ struct lu_object_header { struct fld; -struct lu_site_bkt_data { - /** - * number of busy object on this bucket - */ - long lsb_busy; - /** - * LRU list, updated on each access to object. Protected by - * bucket lock of lu_site::ls_obj_hash. - * - * "Cold" end of LRU is lu_site::ls_lru.next. Accessed object are - * moved to the lu_site::ls_lru.prev (this is due to the non-existence - * of list_for_each_entry_safe_reverse()). - */ - struct list_head lsb_lru; - /** - * Wait-queue signaled when an object in this site is ultimately - * destroyed (lu_object_free()). It is used by lu_object_find() to - * wait before re-trying when object in the process of destruction is - * found in the hash table. - * - * \see htable_lookup(). - */ - wait_queue_head_t lsb_marche_funebre; -}; - enum { - LU_SS_CREATED = 0, - LU_SS_CACHE_HIT, - LU_SS_CACHE_MISS, - LU_SS_CACHE_RACE, - LU_SS_CACHE_DEATH_RACE, - LU_SS_LRU_PURGED, - LU_SS_LAST_STAT + LU_SS_CREATED = 0, + LU_SS_CACHE_HIT, + LU_SS_CACHE_MISS, + LU_SS_CACHE_RACE, + LU_SS_CACHE_DEATH_RACE, + LU_SS_LRU_PURGED, + LU_SS_LAST_STAT }; /** @@ -607,7 +557,7 @@ struct lu_site { /** * objects hash table */ - cfs_hash_t *ls_obj_hash; + struct cfs_hash *ls_obj_hash; /** * index of bucket on hash table while purging */ @@ -642,16 +592,19 @@ struct lu_site { * XXX: a hack! fld has to find md_site via site, remove when possible */ struct seq_server_site *ld_seq_site; + /** + * Pointer to the lu_target for this site. + */ + struct lu_target *ls_tgt; + + /** + * Number of objects in lsb_lru_lists - used for shrinking + */ + struct percpu_counter ls_lru_len_counter; }; -static inline struct lu_site_bkt_data * -lu_site_bkt_from_fid(struct lu_site *site, struct lu_fid *fid) -{ - cfs_hash_bd_t bd; - - cfs_hash_bd_get(site->ls_obj_hash, fid, &bd); - return cfs_hash_bd_extra_get(site->ls_obj_hash, &bd); -} +wait_queue_head_t * +lu_site_wq_from_fid(struct lu_site *site, struct lu_fid *fid); static inline struct seq_server_site *lu_site2seq(const struct lu_site *s) { @@ -708,7 +661,7 @@ static inline void lu_object_get(struct lu_object *o) } /** - * Return true of object will not be cached after last reference to it is + * Return true if object will not be cached after last reference to it is * released. */ static inline int lu_object_is_dying(const struct lu_object_header *h) @@ -716,11 +669,25 @@ static inline int lu_object_is_dying(const struct lu_object_header *h) return test_bit(LU_OBJECT_HEARD_BANSHEE, &h->loh_flags); } +/** + * Return true if object is initialized. + */ +static inline int lu_object_is_inited(const struct lu_object_header *h) +{ + return test_bit(LU_OBJECT_INITED, &h->loh_flags); +} + void lu_object_put(const struct lu_env *env, struct lu_object *o); void lu_object_put_nocache(const struct lu_env *env, struct lu_object *o); void lu_object_unhash(const struct lu_env *env, struct lu_object *o); +int lu_site_purge_objects(const struct lu_env *env, struct lu_site *s, int nr, + int canblock); -int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr); +static inline int lu_site_purge(const struct lu_env *env, struct lu_site *s, + int nr) +{ + return lu_site_purge_objects(env, s, nr, 1); +} void lu_site_print(const struct lu_env *env, struct lu_site *s, void *cookie, lu_printer_t printer); @@ -731,8 +698,6 @@ struct lu_object *lu_object_find_at(const struct lu_env *env, struct lu_device *dev, const struct lu_fid *f, const struct lu_object_conf *conf); -void lu_object_purge(const struct lu_env *env, struct lu_device *dev, - const struct lu_fid *f); struct lu_object *lu_object_find_slice(const struct lu_env *env, struct lu_device *dev, const struct lu_fid *f, @@ -799,7 +764,7 @@ do { \ if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ lu_object_print(env, &msgdata, lu_cdebug_printer, object);\ - CDEBUG(mask, format , ## __VA_ARGS__); \ + CDEBUG(mask, format "\n", ## __VA_ARGS__); \ } \ } while (0) @@ -841,6 +806,22 @@ int lu_object_invariant(const struct lu_object *o); */ #define lu_object_remote(o) unlikely((o)->lo_header->loh_attr & LOHA_REMOTE) +/** + * Check whether the object as agent entry on current target + */ +#define lu_object_has_agent_entry(o) \ + unlikely((o)->lo_header->loh_attr & LOHA_HAS_AGENT_ENTRY) + +static inline void lu_object_set_agent_entry(struct lu_object *o) +{ + o->lo_header->loh_attr |= LOHA_HAS_AGENT_ENTRY; +} + +static inline void lu_object_clear_agent_entry(struct lu_object *o) +{ + o->lo_header->loh_attr &= ~LOHA_HAS_AGENT_ENTRY; +} + static inline int lu_object_assert_exists(const struct lu_object *o) { return lu_object_exists(o); @@ -857,7 +838,8 @@ static inline int lu_object_assert_not_exists(const struct lu_object *o) static inline __u32 lu_object_attr(const struct lu_object *o) { LASSERT(lu_object_exists(o) != 0); - return o->lo_header->loh_attr; + + return o->lo_header->loh_attr & S_IFMT; } static inline void lu_object_ref_add(struct lu_object *o, @@ -904,7 +886,9 @@ struct lu_rdpg { enum lu_xattr_flags { LU_XATTR_REPLACE = (1 << 0), - LU_XATTR_CREATE = (1 << 1) + LU_XATTR_CREATE = (1 << 1), + LU_XATTR_MERGE = (1 << 2), + LU_XATTR_SPLIT = (1 << 3), }; /** @} helpers */ @@ -916,6 +900,7 @@ enum lu_xattr_flags { enum lu_context_state { LCS_INITIALIZED = 1, LCS_ENTERED, + LCS_LEAVING, LCS_LEFT, LCS_FINALIZED }; @@ -995,10 +980,6 @@ enum lu_context_tag { */ LCT_DT_THREAD = 1 << 1, /** - * Context for transaction handle - */ - LCT_TX_HANDLE = 1 << 2, - /** * Thread on client */ LCT_CL_THREAD = 1 << 3, @@ -1130,20 +1111,20 @@ struct lu_context_key { }; #define LU_KEY_INIT(mod, type) \ - static void* mod##_key_init(const struct lu_context *ctx, \ - struct lu_context_key *key) \ - { \ - type *value; \ - \ - CLASSERT(PAGE_CACHE_SIZE >= sizeof (*value)); \ + static void *mod##_key_init(const struct lu_context *ctx, \ + struct lu_context_key *key) \ + { \ + type *value; \ \ - OBD_ALLOC_PTR(value); \ - if (value == NULL) \ - value = ERR_PTR(-ENOMEM); \ + CLASSERT(PAGE_SIZE >= sizeof(*value)); \ \ - return value; \ - } \ - struct __##mod##__dummy_init {;} /* semicolon catcher */ + OBD_ALLOC_PTR(value); \ + if (value == NULL) \ + value = ERR_PTR(-ENOMEM); \ + \ + return value; \ + } \ + struct __##mod##__dummy_init { ; } /* semicolon catcher */ #define LU_KEY_FINI(mod, type) \ static void mod##_key_fini(const struct lu_context *ctx, \ @@ -1279,6 +1260,10 @@ void lu_env_fini (struct lu_env *env); int lu_env_refill(struct lu_env *env); int lu_env_refill_by_tags(struct lu_env *env, __u32 ctags, __u32 stags); +struct lu_env *lu_env_find(void); +int lu_env_add(struct lu_env *env); +void lu_env_remove(struct lu_env *env); + /** @} lu_context */ /** @@ -1286,7 +1271,6 @@ int lu_env_refill_by_tags(struct lu_env *env, __u32 ctags, __u32 stags); * ll_rd_*()-style functions. */ int lu_site_stats_seq_print(const struct lu_site *s, struct seq_file *m); -int lu_site_stats_print(const struct lu_site *s, char *page, int count); /** * Common name structure to be passed around for various name related methods. @@ -1296,6 +1280,26 @@ struct lu_name { int ln_namelen; }; +static inline bool name_is_dot_or_dotdot(const char *name, int namelen) +{ + return name[0] == '.' && + (namelen == 1 || (namelen == 2 && name[1] == '.')); +} + +static inline bool lu_name_is_dot_or_dotdot(const struct lu_name *lname) +{ + return name_is_dot_or_dotdot(lname->ln_name, lname->ln_namelen); +} + +static inline bool lu_name_is_valid_len(const char *name, size_t name_len) +{ + return name != NULL && + name_len > 0 && + name_len < INT_MAX && + strlen(name) == name_len && + memchr(name, '/', name_len) == NULL; +} + /** * Validate names (path components) * @@ -1307,12 +1311,7 @@ struct lu_name { */ static inline bool lu_name_is_valid_2(const char *name, size_t name_len) { - return name != NULL && - name_len > 0 && - name_len < INT_MAX && - name[name_len] == '\0' && - strlen(name) == name_len && - memchr(name, '/', name_len) == NULL; + return lu_name_is_valid_len(name, name_len) && name[name_len] == '\0'; } static inline bool lu_name_is_valid(const struct lu_name *ln) @@ -1336,6 +1335,15 @@ struct lu_buf { #define DLUBUF "(%p %zu)" #define PLUBUF(buf) (buf)->lb_buf, (buf)->lb_len + +/* read buffer params, should be filled out by out */ +struct lu_rdbuf { + /** number of buffers */ + unsigned int rb_nbufs; + /** pointers to buffers */ + struct lu_buf rb_bufs[]; +}; + /** * One-time initializers, called at obdclass module initialization, not * exported. @@ -1376,5 +1384,184 @@ void lu_buf_realloc(struct lu_buf *buf, size_t size); int lu_buf_check_and_grow(struct lu_buf *buf, size_t len); struct lu_buf *lu_buf_check_and_alloc(struct lu_buf *buf, size_t len); +extern __u32 lu_context_tags_default; +extern __u32 lu_session_tags_default; + +static inline bool lu_device_is_cl(const struct lu_device *d) +{ + return d->ld_type->ldt_tags & LU_DEVICE_CL; +} + +static inline bool lu_object_is_cl(const struct lu_object *o) +{ + return lu_device_is_cl(o->lo_dev); +} + +/* Generic subset of tgts */ +struct lu_tgt_pool { + __u32 *op_array; /* array of index of + * lov_obd->lov_tgts */ + unsigned int op_count; /* number of tgts in the array */ + unsigned int op_size; /* allocated size of op_array */ + struct rw_semaphore op_rw_sem; /* to protect lu_tgt_pool use */ +}; + +/* round-robin QoS data for LOD/LMV */ +struct lu_qos_rr { + spinlock_t lqr_alloc; /* protect allocation index */ + __u32 lqr_start_idx; /* start index of new inode */ + __u32 lqr_offset_idx;/* aliasing for start_idx */ + int lqr_start_count;/* reseed counter */ + struct lu_tgt_pool lqr_pool; /* round-robin optimized list */ + unsigned long lqr_dirty:1; /* recalc round-robin list */ +}; + +/* QoS data per MDS/OSS */ +struct lu_svr_qos { + struct obd_uuid lsq_uuid; /* ptlrpc's c_remote_uuid */ + struct list_head lsq_svr_list; /* link to lq_svr_list */ + __u64 lsq_bavail; /* total bytes avail on svr */ + __u64 lsq_iavail; /* tital inode avail on svr */ + __u64 lsq_penalty; /* current penalty */ + __u64 lsq_penalty_per_obj; /* penalty decrease + * every obj*/ + time64_t lsq_used; /* last used time, seconds */ + __u32 lsq_tgt_count; /* number of tgts on this svr */ + __u32 lsq_id; /* unique svr id */ +}; + +/* QoS data per MDT/OST */ +struct lu_tgt_qos { + struct lu_svr_qos *ltq_svr; /* svr info */ + __u64 ltq_penalty; /* current penalty */ + __u64 ltq_penalty_per_obj; /* penalty decrease + * every obj*/ + __u64 ltq_weight; /* net weighting */ + time64_t ltq_used; /* last used time, seconds */ + bool ltq_usable:1; /* usable for striping */ +}; + +/* target descriptor */ +struct lu_tgt_desc { + union { + struct dt_device *ltd_tgt; + struct obd_device *ltd_obd; + }; + struct obd_export *ltd_exp; + struct obd_uuid ltd_uuid; + __u32 ltd_index; + __u32 ltd_gen; + struct list_head ltd_kill; + struct ptlrpc_thread *ltd_recovery_thread; + struct mutex ltd_fid_mutex; + struct lu_tgt_qos ltd_qos; /* qos info per target */ + struct obd_statfs ltd_statfs; + time64_t ltd_statfs_age; + unsigned long ltd_active:1,/* is this target up for requests */ + ltd_activate:1,/* should target be activated */ + ltd_reap:1, /* should this target be deleted */ + ltd_got_update_log:1, /* Already got update log */ + ltd_connecting:1; /* target is connecting */ +}; + +/* number of pointers at 1st level */ +#define TGT_PTRS (PAGE_SIZE / sizeof(void *)) +/* number of pointers at 2nd level */ +#define TGT_PTRS_PER_BLOCK (PAGE_SIZE / sizeof(void *)) + +struct lu_tgt_desc_idx { + struct lu_tgt_desc *ldi_tgt[TGT_PTRS_PER_BLOCK]; +}; + +/* QoS data for LOD/LMV */ +struct lu_qos { + struct list_head lq_svr_list; /* lu_svr_qos list */ + struct rw_semaphore lq_rw_sem; + __u32 lq_active_svr_count; + unsigned int lq_prio_free; /* priority for free space */ + unsigned int lq_threshold_rr;/* priority for rr */ + struct lu_qos_rr lq_rr; /* round robin qos data */ + unsigned long lq_dirty:1, /* recalc qos data */ + lq_same_space:1,/* the servers all have approx. + * the same space avail */ + lq_reset:1; /* zero current penalties */ +}; + +struct lu_tgt_descs { + union { + struct lov_desc ltd_lov_desc; + struct lmv_desc ltd_lmv_desc; + }; + /* list of known TGTs */ + struct lu_tgt_desc_idx *ltd_tgt_idx[TGT_PTRS]; + /* Size of the lu_tgts array, granted to be a power of 2 */ + __u32 ltd_tgts_size; + /* bitmap of TGTs available */ + struct cfs_bitmap *ltd_tgt_bitmap; + /* TGTs scheduled to be deleted */ + __u32 ltd_death_row; + /* Table refcount used for delayed deletion */ + int ltd_refcount; + /* mutex to serialize concurrent updates to the tgt table */ + struct mutex ltd_mutex; + /* read/write semaphore used for array relocation */ + struct rw_semaphore ltd_rw_sem; + /* QoS */ + struct lu_qos ltd_qos; + /* all tgts in a packed array */ + struct lu_tgt_pool ltd_tgt_pool; + /* true if tgt is MDT */ + bool ltd_is_mdt; +}; + +#define LTD_TGT(ltd, index) \ + (ltd)->ltd_tgt_idx[(index) / \ + TGT_PTRS_PER_BLOCK]->ldi_tgt[(index) % TGT_PTRS_PER_BLOCK] + +u64 lu_prandom_u64_max(u64 ep_ro); +void lu_qos_rr_init(struct lu_qos_rr *lqr); +int lu_qos_add_tgt(struct lu_qos *qos, struct lu_tgt_desc *ltd); +void lu_tgt_qos_weight_calc(struct lu_tgt_desc *tgt); + +int lu_tgt_descs_init(struct lu_tgt_descs *ltd, bool is_mdt); +void lu_tgt_descs_fini(struct lu_tgt_descs *ltd); +int ltd_add_tgt(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt); +void ltd_del_tgt(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt); +bool ltd_qos_is_usable(struct lu_tgt_descs *ltd); +int ltd_qos_penalties_calc(struct lu_tgt_descs *ltd); +int ltd_qos_update(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt, + __u64 *total_wt); + +static inline struct lu_tgt_desc *ltd_first_tgt(struct lu_tgt_descs *ltd) +{ + int index; + + index = find_first_bit(ltd->ltd_tgt_bitmap->data, + ltd->ltd_tgt_bitmap->size); + return (index < ltd->ltd_tgt_bitmap->size) ? LTD_TGT(ltd, index) : NULL; +} + +static inline struct lu_tgt_desc *ltd_next_tgt(struct lu_tgt_descs *ltd, + struct lu_tgt_desc *tgt) +{ + int index; + + if (!tgt) + return NULL; + + index = tgt->ltd_index; + LASSERT(index < ltd->ltd_tgt_bitmap->size); + index = find_next_bit(ltd->ltd_tgt_bitmap->data, + ltd->ltd_tgt_bitmap->size, index + 1); + return (index < ltd->ltd_tgt_bitmap->size) ? LTD_TGT(ltd, index) : NULL; +} + +#define ltd_foreach_tgt(ltd, tgt) \ + for (tgt = ltd_first_tgt(ltd); tgt; tgt = ltd_next_tgt(ltd, tgt)) + +#define ltd_foreach_tgt_safe(ltd, tgt, tmp) \ + for (tgt = ltd_first_tgt(ltd), tmp = ltd_next_tgt(ltd, tgt); tgt; \ + tgt = tmp, tmp = ltd_next_tgt(ltd, tgt)) + /** @} lu */ #endif /* __LUSTRE_LU_OBJECT_H */