X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;ds=sidebyside;f=lustre%2Finclude%2Flu_object.h;h=15ebba65f4d5d50187fd39e20d3eed393d27722e;hb=45222b2ef279d62ac3aab0e7babc55d77e3c93a2;hp=64384b79a6eb8405be3d0ee64c92a11966e26725;hpb=9832985e363eb68ecfa904b274adedddb93becd7;p=fs%2Flustre-release.git diff --git a/lustre/include/lu_object.h b/lustre/include/lu_object.h index 64384b7..8a9635e 100644 --- a/lustre/include/lu_object.h +++ b/lustre/include/lu_object.h @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -17,17 +15,15 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -38,20 +34,16 @@ #define __LUSTRE_LU_OBJECT_H #include - -/* - * struct lu_fid - */ -#include - #include - +#include #include +#include struct seq_file; struct proc_dir_entry; struct lustre_cfg; struct lprocfs_stats; +struct obd_type; /** \defgroup lu lu * lu_* data-types represent server-side entities shared by data and meta-data @@ -143,7 +135,7 @@ struct lu_device_operations { * repeatedly, until no new objects are created. * * \post ergo(!IS_ERR(result), result->lo_dev == d && - * result->lo_ops != NULL); + * result->lo_ops != NULL); */ struct lu_object *(*ldo_object_alloc)(const struct lu_env *env, const struct lu_object_header *h, @@ -156,14 +148,37 @@ struct lu_device_operations { int (*ldo_recovery_complete)(const struct lu_env *, struct lu_device *); + /** + * initialize local objects for device. this method called after layer has + * been initialized (after LCFG_SETUP stage) and before it starts serving + * user requests. + */ + + int (*ldo_prepare)(const struct lu_env *, + struct lu_device *parent, + struct lu_device *dev); + }; /** + * For lu_object_conf flags + */ +typedef enum { + /* This is a new object to be allocated, or the file + * corresponding to the object does not exists. */ + LOC_F_NEW = 0x00000001, +} loc_flags_t; + +/** * Object configuration, describing particulars of object being created. On * server this is not used, as server objects are full identified by fid. On * client configuration contains struct lustre_md. */ struct lu_object_conf { + /** + * Some hints for obj find and alloc. + */ + loc_flags_t loc_flags; }; /** @@ -177,7 +192,7 @@ typedef int (*lu_printer_t)(const struct lu_env *env, void *cookie, const char *format, ...) __attribute__ ((format (printf, 3, 4))); -/* +/** * Operations specific for particular lu_object. */ struct lu_object_operations { @@ -241,17 +256,17 @@ struct lu_device_type; * Device: a layer in the server side abstraction stacking. */ struct lu_device { - /** - * reference count. This is incremented, in particular, on each object - * created at this layer. - * - * \todo XXX which means that atomic_t is probably too small. - */ - atomic_t ld_ref; - /** - * Pointer to device type. Never modified once set. - */ - struct lu_device_type *ld_type; + /** + * reference count. This is incremented, in particular, on each object + * created at this layer. + * + * \todo XXX which means that atomic_t is probably too small. + */ + atomic_t ld_ref; + /** + * Pointer to device type. Never modified once set. + */ + struct lu_device_type *ld_type; /** * Operation vector for this device. */ @@ -259,15 +274,19 @@ struct lu_device { /** * Stack this device belongs to. */ - struct lu_site *ld_site; - struct proc_dir_entry *ld_proc_entry; + struct lu_site *ld_site; + struct proc_dir_entry *ld_proc_entry; /** \todo XXX: temporary back pointer into obd. */ - struct obd_device *ld_obd; + struct obd_device *ld_obd; /** * A list of references to this object, for debugging. */ struct lu_ref ld_reference; + /** + * Link the device to the site. + **/ + struct list_head ld_linkage; }; struct lu_device_type_operations; @@ -292,33 +311,23 @@ struct lu_device_type { /** * Tag bits. Taken from enum lu_device_tag. Never modified once set. */ - __u32 ldt_tags; + __u32 ldt_tags; /** * Name of this class. Unique system-wide. Never modified once set. */ - char *ldt_name; + char *ldt_name; /** * Operations for this type. */ const struct lu_device_type_operations *ldt_ops; /** - * \todo XXX: temporary pointer to associated obd_type. - */ - struct obd_type *ldt_obd_type; - /** * \todo XXX: temporary: context tags used by obd_*() calls. */ - __u32 ldt_ctx_tags; + __u32 ldt_ctx_tags; /** * Number of existing device type instances. */ - unsigned ldt_device_nr; - /** - * Linkage into a global list of all device types. - * - * \see lu_device_types. - */ - struct list_head ldt_linkage; + atomic_t ldt_device_nr; }; /** @@ -371,29 +380,29 @@ struct lu_device_type_operations { void (*ldto_stop)(struct lu_device_type *t); }; -/** - * Flags for the object layers. - */ -enum lu_object_flags { - /** - * this flags is set if lu_object_operations::loo_object_init() has - * been called for this layer. Used by lu_object_alloc(). - */ - LU_OBJECT_ALLOCATED = (1 << 0) -}; +static inline int lu_device_is_md(const struct lu_device *d) +{ + return ergo(d != NULL, d->ld_type->ldt_tags & LU_DEVICE_MD); +} /** * Common object attributes. */ struct lu_attr { + /** + * valid bits + * + * \see enum la_valid + */ + __u64 la_valid; /** size in bytes */ __u64 la_size; - /** modification time in seconds since Epoch */ - __u64 la_mtime; - /** access time in seconds since Epoch */ - __u64 la_atime; - /** change time in seconds since Epoch */ - __u64 la_ctime; + /** modification time in seconds since Epoch */ + s64 la_mtime; + /** access time in seconds since Epoch */ + s64 la_atime; + /** change time in seconds since Epoch */ + s64 la_ctime; /** 512-byte blocks allocated to object */ __u64 la_blocks; /** permission bits and file type */ @@ -412,85 +421,68 @@ struct lu_attr { __u32 la_blksize; /** real device */ __u32 la_rdev; - /** - * valid bits - * - * \see enum la_valid - */ - __u64 la_valid; -}; - -/** Bit-mask of valid attributes */ -enum la_valid { - LA_ATIME = 1 << 0, - LA_MTIME = 1 << 1, - LA_CTIME = 1 << 2, - LA_SIZE = 1 << 3, - LA_MODE = 1 << 4, - LA_UID = 1 << 5, - LA_GID = 1 << 6, - LA_BLOCKS = 1 << 7, - LA_TYPE = 1 << 8, - LA_FLAGS = 1 << 9, - LA_NLINK = 1 << 10, - LA_RDEV = 1 << 11, - LA_BLKSIZE = 1 << 12, + /** project id */ + __u32 la_projid; + /** set layout version to OST objects. */ + __u32 la_layout_version; }; -/* +/** * Layer in the layered object. */ struct lu_object { - /* + /** * Header for this object. */ - struct lu_object_header *lo_header; - /* + struct lu_object_header *lo_header; + /** * Device for this layer. */ - struct lu_device *lo_dev; - /* + struct lu_device *lo_dev; + /** * Operations for this object. */ - struct lu_object_operations *lo_ops; - /* - * Linkage into list of all layers. - */ - struct list_head lo_linkage; - /* - * Depth. Top level layer depth is 0. - */ - int lo_depth; - /* - * Flags from enum lu_object_flags. - */ - unsigned long lo_flags; + const struct lu_object_operations *lo_ops; /** - * Link to the device, for debugging. + * Linkage into list of all layers. */ - struct lu_ref_link *lo_dev_ref; + struct list_head lo_linkage; + /** + * Link to the device, for debugging. + */ + struct lu_ref_link lo_dev_ref; }; enum lu_object_header_flags { - /* - * Don't keep this object in cache. Object will be destroyed as soon - * as last reference to it is released. This flag cannot be cleared - * once set. - */ - LU_OBJECT_HEARD_BANSHEE = 0 + /** + * Don't keep this object in cache. Object will be destroyed as soon + * as last reference to it is released. This flag cannot be cleared + * once set. + */ + LU_OBJECT_HEARD_BANSHEE = 0, + /** + * Mark this object has already been taken out of cache. + */ + LU_OBJECT_UNHASHED = 1, + /** + * Object is initialized, when object is found in cache, it may not be + * intialized yet, the object allocator will initialize it. + */ + LU_OBJECT_INITED = 2 }; enum lu_object_header_attr { - LOHA_EXISTS = 1 << 0, - LOHA_REMOTE = 1 << 1, - /* - * UNIX file type is stored in S_IFMT bits. - */ - LOHA_FT_START = 1 << 12, /* S_IFIFO */ - LOHA_FT_END = 1 << 15, /* S_IFREG */ + LOHA_EXISTS = 1 << 0, + LOHA_REMOTE = 1 << 1, + LOHA_HAS_AGENT_ENTRY = 1 << 2, + /** + * UNIX file type is stored in S_IFMT bits. + */ + LOHA_FT_START = 001 << 12, /**< S_IFIFO */ + LOHA_FT_END = 017 << 12, /**< S_IFMT */ }; -/* +/** * "Compound" object, consisting of multiple layers. * * Compound object with given fid is unique with given lu_site. @@ -502,45 +494,55 @@ enum lu_object_header_attr { * whether object is backed by persistent storage entity. */ struct lu_object_header { - /** - * Object flags from enum lu_object_header_flags. Set and checked - * atomically. - */ - unsigned long loh_flags; - /** - * Object reference count. Protected by lu_site::ls_guard. - */ - atomic_t loh_ref; - /** - * Fid, uniquely identifying this object. - */ - struct lu_fid loh_fid; - /** - * Common object attributes, cached for efficiency. From enum - * lu_object_header_attr. - */ - __u32 loh_attr; - /** - * Linkage into per-site hash table. Protected by lu_site::ls_guard. - */ - struct hlist_node loh_hash; - /** - * Linkage into per-site LRU list. Protected by lu_site::ls_guard. - */ - struct list_head loh_lru; - /** - * Linkage into list of layers. Never modified once set (except lately - * during object destruction). No locking is necessary. - */ - struct list_head loh_layers; - /** - * A list of references to this object, for debugging. - */ - struct lu_ref loh_reference; + /** + * Fid, uniquely identifying this object. + */ + struct lu_fid loh_fid; + /** + * Object flags from enum lu_object_header_flags. Set and checked + * atomically. + */ + unsigned long loh_flags; + /** + * Object reference count. Protected by lu_site::ls_guard. + */ + atomic_t loh_ref; + /** + * Common object attributes, cached for efficiency. From enum + * lu_object_header_attr. + */ + __u32 loh_attr; + /** + * Linkage into per-site hash table. Protected by lu_site::ls_guard. + */ + struct hlist_node loh_hash; + /** + * Linkage into per-site LRU list. Protected by lu_site::ls_guard. + */ + struct list_head loh_lru; + /** + * Linkage into list of layers. Never modified once set (except lately + * during object destruction). No locking is necessary. + */ + struct list_head loh_layers; + /** + * A list of references to this object, for debugging. + */ + struct lu_ref loh_reference; }; struct fld; +enum { + LU_SS_CREATED = 0, + LU_SS_CACHE_HIT, + LU_SS_CACHE_MISS, + LU_SS_CACHE_RACE, + LU_SS_CACHE_DEATH_RACE, + LU_SS_LRU_PURGED, + LU_SS_LAST_STAT +}; + /** * lu_site is a "compartment" within which objects are unique, and LRU * discipline is maintained. @@ -553,118 +555,61 @@ struct fld; */ struct lu_site { /** - * Site-wide lock. - * - * lock protecting: - * - * - lu_site::ls_hash hash table (and its linkages in objects); - * - * - lu_site::ls_lru list (and its linkages in objects); - * - * - 0/1 transitions of object lu_object_header::loh_ref - * reference count; - * - * yes, it's heavy. - */ - rwlock_t ls_guard; - /** - * Hash-table where objects are indexed by fid. - */ - struct hlist_head *ls_hash; - /** - * Bit-mask for hash-table size. - */ - int ls_hash_mask; - /** - * Order of hash-table. - */ - int ls_hash_bits; - /** - * Number of buckets in the hash-table. - */ - int ls_hash_size; - - /** - * LRU list, updated on each access to object. Protected by - * lu_site::ls_guard. - * - * "Cold" end of LRU is lu_site::ls_lru.next. Accessed object are - * moved to the lu_site::ls_lru.prev (this is due to the non-existence - * of list_for_each_entry_safe_reverse()). - */ - struct list_head ls_lru; - /** - * Total number of objects in this site. Protected by - * lu_site::ls_guard. - */ - unsigned ls_total; - /** - * Total number of objects in this site with reference counter greater - * than 0. Protected by lu_site::ls_guard. - */ - unsigned ls_busy; - - /** - * Top-level device for this stack. - */ - struct lu_device *ls_top_dev; - /* - * mds number of this site. - */ - mdsno_t ls_node_id; - /* - * Fid location database - */ - struct lu_server_fld *ls_server_fld; - struct lu_client_fld *ls_client_fld; - - /* - * Server Seq Manager - */ - struct lu_server_seq *ls_server_seq; - - /* - * Controller Seq Manager - */ - struct lu_server_seq *ls_control_seq; - struct obd_export *ls_control_exp; + * objects hash table + */ + struct cfs_hash *ls_obj_hash; + /** + * index of bucket on hash table while purging + */ + unsigned int ls_purge_start; + /** + * Top-level device for this stack. + */ + struct lu_device *ls_top_dev; + /** + * Bottom-level device for this stack + */ + struct lu_device *ls_bottom_dev; + /** + * Linkage into global list of sites. + */ + struct list_head ls_linkage; + /** + * List for lu device for this site, protected + * by ls_ld_lock. + **/ + struct list_head ls_ld_linkage; + spinlock_t ls_ld_lock; + /** + * Lock to serialize site purge. + */ + struct mutex ls_purge_mutex; + /** + * lu_site stats + */ + struct lprocfs_stats *ls_stats; + /** + * XXX: a hack! fld has to find md_site via site, remove when possible + */ + struct seq_server_site *ld_seq_site; + /** + * Pointer to the lu_target for this site. + */ + struct lu_target *ls_tgt; + + /** + * Number of objects in lsb_lru_lists - used for shrinking + */ + struct percpu_counter ls_lru_len_counter; +}; - /* - * Client Seq Manager - */ - struct lu_client_seq *ls_client_seq; - - /* statistical counters. Protected by nothing, races are accepted. */ - struct { - __u32 s_created; - __u32 s_cache_hit; - __u32 s_cache_miss; - /* - * Number of hash-table entry checks made. - * - * ->s_cache_check / (->s_cache_miss + ->s_cache_hit) - * - * is an average number of hash slots inspected during single - * lookup. - */ - __u32 s_cache_check; - /** Races with cache insertions. */ - __u32 s_cache_race; - /** - * Races with object destruction. - * - * \see lu_site::ls_marche_funebre. - */ - __u32 s_cache_death_race; - __u32 s_lru_purged; - } ls_stats; +wait_queue_head_t * +lu_site_wq_from_fid(struct lu_site *site, struct lu_fid *fid); - /** - * Linkage into global list of sites. - */ - struct list_head ls_linkage; - struct lprocfs_stats *ls_time_stats; -}; +static inline struct seq_server_site *lu_site2seq(const struct lu_site *s) +{ + return s->ld_seq_site; +} /** \name ctors * Constructors/destructors. @@ -679,21 +624,23 @@ void lu_device_get (struct lu_device *d); void lu_device_put (struct lu_device *d); int lu_device_init (struct lu_device *d, struct lu_device_type *t); void lu_device_fini (struct lu_device *d); -int lu_object_header_init(struct lu_object_header *h); +int lu_object_header_init(struct lu_object_header *h); void lu_object_header_fini(struct lu_object_header *h); int lu_object_init (struct lu_object *o, - struct lu_object_header *h, struct lu_device *d); + struct lu_object_header *h, struct lu_device *d); void lu_object_fini (struct lu_object *o); void lu_object_add_top (struct lu_object_header *h, struct lu_object *o); void lu_object_add (struct lu_object *before, struct lu_object *o); +void lu_dev_add_linkage(struct lu_site *s, struct lu_device *d); +void lu_dev_del_linkage(struct lu_site *s, struct lu_device *d); + /** * Helpers to initialize and finalize device types. */ int lu_device_type_init(struct lu_device_type *ldt); void lu_device_type_fini(struct lu_device_type *ldt); -void lu_types_stop(void); /** @} ctors */ @@ -709,22 +656,38 @@ void lu_types_stop(void); */ static inline void lu_object_get(struct lu_object *o) { - LASSERT(atomic_read(&o->lo_header->loh_ref) > 0); - atomic_inc(&o->lo_header->loh_ref); + LASSERT(atomic_read(&o->lo_header->loh_ref) > 0); + atomic_inc(&o->lo_header->loh_ref); } /** - * Return true of object will not be cached after last reference to it is + * Return true if object will not be cached after last reference to it is * released. */ static inline int lu_object_is_dying(const struct lu_object_header *h) { - return test_bit(LU_OBJECT_HEARD_BANSHEE, &h->loh_flags); + return test_bit(LU_OBJECT_HEARD_BANSHEE, &h->loh_flags); +} + +/** + * Return true if object is initialized. + */ +static inline int lu_object_is_inited(const struct lu_object_header *h) +{ + return test_bit(LU_OBJECT_INITED, &h->loh_flags); } void lu_object_put(const struct lu_env *env, struct lu_object *o); +void lu_object_put_nocache(const struct lu_env *env, struct lu_object *o); +void lu_object_unhash(const struct lu_env *env, struct lu_object *o); +int lu_site_purge_objects(const struct lu_env *env, struct lu_site *s, int nr, + int canblock); -int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr); +static inline int lu_site_purge(const struct lu_env *env, struct lu_site *s, + int nr) +{ + return lu_site_purge_objects(env, s, nr, 1); +} void lu_site_print(const struct lu_env *env, struct lu_site *s, void *cookie, lu_printer_t printer); @@ -751,8 +714,8 @@ struct lu_object *lu_object_find_slice(const struct lu_env *env, */ static inline struct lu_object *lu_object_top(struct lu_object_header *h) { - LASSERT(!list_empty(&h->loh_layers)); - return container_of0(h->loh_layers.next, struct lu_object, lo_linkage); + LASSERT(!list_empty(&h->loh_layers)); + return container_of0(h->loh_layers.next, struct lu_object, lo_linkage); } /** @@ -771,97 +734,102 @@ static inline const struct lu_fid *lu_object_fid(const struct lu_object *o) return &o->lo_header->loh_fid; } -/* +/** * return device operations vector for this object */ -static inline struct lu_device_operations * +static const inline struct lu_device_operations * lu_object_ops(const struct lu_object *o) { return o->lo_dev->ld_ops; } -/* +/** * Given a compound object, find its slice, corresponding to the device type - * @dtype. + * \a dtype. */ struct lu_object *lu_object_locate(struct lu_object_header *h, - struct lu_device_type *dtype); - -struct lu_cdebug_print_info { - int lpi_subsys; - int lpi_mask; - const char *lpi_file; - const char *lpi_fn; - int lpi_line; -}; + const struct lu_device_type *dtype); -/* +/** * Printer function emitting messages through libcfs_debug_msg(). */ int lu_cdebug_printer(const struct lu_env *env, void *cookie, const char *format, ...); -#define DECLARE_LU_CDEBUG_PRINT_INFO(var, mask) \ - struct lu_cdebug_print_info var = { \ - .lpi_subsys = DEBUG_SUBSYSTEM, \ - .lpi_mask = (mask), \ - .lpi_file = __FILE__, \ - .lpi_fn = __FUNCTION__, \ - .lpi_line = __LINE__ \ - }; - -/* - * Print object description followed by user-supplied message. +/** + * Print object description followed by a user-supplied message. */ -#define LU_OBJECT_DEBUG(mask, env, object, format, ...) \ -({ \ - static DECLARE_LU_CDEBUG_PRINT_INFO(__info, mask); \ - \ - lu_object_print(env, &__info, lu_cdebug_printer, object); \ - CDEBUG(mask, format , ## __VA_ARGS__); \ -}) +#define LU_OBJECT_DEBUG(mask, env, object, format, ...) \ +do { \ + if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ + lu_object_print(env, &msgdata, lu_cdebug_printer, object);\ + CDEBUG(mask, format "\n", ## __VA_ARGS__); \ + } \ +} while (0) -/* - * Print human readable representation of the @o to the @f. +/** + * Print short object description followed by a user-supplied message. */ -void lu_object_print(const struct lu_env *env, void *cookie, - lu_printer_t printer, const struct lu_object *o); +#define LU_OBJECT_HEADER(mask, env, object, format, ...) \ +do { \ + if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ + lu_object_header_print(env, &msgdata, lu_cdebug_printer,\ + (object)->lo_header); \ + lu_cdebug_printer(env, &msgdata, "\n"); \ + CDEBUG(mask, format , ## __VA_ARGS__); \ + } \ +} while (0) -/* +void lu_object_print (const struct lu_env *env, void *cookie, + lu_printer_t printer, const struct lu_object *o); +void lu_object_header_print(const struct lu_env *env, void *cookie, + lu_printer_t printer, + const struct lu_object_header *hdr); + +/** * Check object consistency. */ int lu_object_invariant(const struct lu_object *o); -/* - * Finalize and free devices in the device stack. + +/** + * Check whether object exists, no matter on local or remote storage. + * Note: LOHA_EXISTS will be set once some one created the object, + * and it does not needs to be committed to storage. */ -void lu_stack_fini(const struct lu_env *env, struct lu_device *top); +#define lu_object_exists(o) ((o)->lo_header->loh_attr & LOHA_EXISTS) -/* - * Returns 1 iff object @o exists on the stable storage, - * returns -1 iff object @o is on remote server. +/** + * Check whether object on the remote storage. + */ +#define lu_object_remote(o) unlikely((o)->lo_header->loh_attr & LOHA_REMOTE) + +/** + * Check whether the object as agent entry on current target */ -static inline int lu_object_exists(const struct lu_object *o) +#define lu_object_has_agent_entry(o) \ + unlikely((o)->lo_header->loh_attr & LOHA_HAS_AGENT_ENTRY) + +static inline void lu_object_set_agent_entry(struct lu_object *o) +{ + o->lo_header->loh_attr |= LOHA_HAS_AGENT_ENTRY; +} + +static inline void lu_object_clear_agent_entry(struct lu_object *o) { - __u32 attr; - - attr = o->lo_header->loh_attr; - if (attr & LOHA_REMOTE) - return -1; - else if (attr & LOHA_EXISTS) - return +1; - else - return 0; + o->lo_header->loh_attr &= ~LOHA_HAS_AGENT_ENTRY; } static inline int lu_object_assert_exists(const struct lu_object *o) { - return lu_object_exists(o) != 0; + return lu_object_exists(o); } static inline int lu_object_assert_not_exists(const struct lu_object *o) { - return lu_object_exists(o) <= 0; + return !lu_object_exists(o); } /** @@ -869,15 +837,24 @@ static inline int lu_object_assert_not_exists(const struct lu_object *o) */ static inline __u32 lu_object_attr(const struct lu_object *o) { - LASSERT(lu_object_exists(o) > 0); - return o->lo_header->loh_attr; + LASSERT(lu_object_exists(o) != 0); + + return o->lo_header->loh_attr & S_IFMT; +} + +static inline void lu_object_ref_add(struct lu_object *o, + const char *scope, + const void *source) +{ + lu_ref_add(&o->lo_header->loh_reference, scope, source); } -static inline struct lu_ref_link *lu_object_ref_add(struct lu_object *o, - const char *scope, - const void *source) +static inline void lu_object_ref_add_at(struct lu_object *o, + struct lu_ref_link *link, + const char *scope, + const void *source) { - return lu_ref_add(&o->lo_header->loh_reference, scope, source); + lu_ref_add_at(&o->lo_header->loh_reference, link, scope, source); } static inline void lu_object_ref_del(struct lu_object *o, @@ -893,17 +870,25 @@ static inline void lu_object_ref_del_at(struct lu_object *o, lu_ref_del_at(&o->lo_header->loh_reference, link, scope, source); } +/** input params, should be filled out by mdt */ struct lu_rdpg { - /* input params, should be filled out by mdt */ - __u64 rp_hash; /* hash */ - int rp_count; /* count in bytes */ - int rp_npages; /* number of pages */ - struct page **rp_pages; /* pointers to pages */ + /** hash */ + __u64 rp_hash; + /** count in bytes */ + unsigned int rp_count; + /** number of pages */ + unsigned int rp_npages; + /** requested attr */ + __u32 rp_attrs; + /** pointers to pages */ + struct page **rp_pages; }; enum lu_xattr_flags { - LU_XATTR_REPLACE = (1 << 0), - LU_XATTR_CREATE = (1 << 1) + LU_XATTR_REPLACE = (1 << 0), + LU_XATTR_CREATE = (1 << 1), + LU_XATTR_MERGE = (1 << 2), + LU_XATTR_SPLIT = (1 << 3), }; /** @} helpers */ @@ -915,6 +900,7 @@ enum lu_xattr_flags { enum lu_context_state { LCS_INITIALIZED = 1, LCS_ENTERED, + LCS_LEAVING, LCS_LEFT, LCS_FINALIZED }; @@ -952,6 +938,7 @@ struct lu_context { * from enum lu_context_tag. */ __u32 lc_tags; + enum lu_context_state lc_state; /** * Pointer to the home service thread. NULL for other execution * contexts. @@ -961,19 +948,22 @@ struct lu_context { * Pointer to an array with key values. Internal implementation * detail. */ - void **lc_value; - enum lu_context_state lc_state; - /** - * Linkage into a list of all remembered contexts. Only - * `non-transient' contexts, i.e., ones created for service threads - * are placed here. - */ - struct list_head lc_remember; - /** - * Version counter used to skip calls to lu_context_refill() when no - * keys were registered. - */ - unsigned lc_version; + void **lc_value; + /** + * Linkage into a list of all remembered contexts. Only + * `non-transient' contexts, i.e., ones created for service threads + * are placed here. + */ + struct list_head lc_remember; + /** + * Version counter used to skip calls to lu_context_refill() when no + * keys were registered. + */ + unsigned lc_version; + /** + * Debugging cookie. + */ + unsigned lc_cookie; }; /** @@ -990,10 +980,6 @@ enum lu_context_tag { */ LCT_DT_THREAD = 1 << 1, /** - * Context for transaction handle - */ - LCT_TX_HANDLE = 1 << 2, - /** * Thread on client */ LCT_CL_THREAD = 1 << 3, @@ -1002,7 +988,22 @@ enum lu_context_tag { * a client. */ LCT_SESSION = 1 << 4, - + /** + * A per-request data on OSP device + */ + LCT_OSP_THREAD = 1 << 5, + /** + * MGS device thread + */ + LCT_MG_THREAD = 1 << 6, + /** + * Context for local operations + */ + LCT_LOCAL = 1 << 7, + /** + * session for server thread + **/ + LCT_SERVER_SESSION = 1 << 8, /** * Set when at least one of keys, having values in this context has * non-NULL lu_context_key::lct_exit() method. This is used to @@ -1089,41 +1090,41 @@ struct lu_context_key { */ void (*lct_exit)(const struct lu_context *ctx, struct lu_context_key *key, void *data); - /** - * Internal implementation detail: index within lu_context::lc_value[] - * reserved for this key. - */ - int lct_index; - /** - * Internal implementation detail: number of values created for this - * key. - */ - atomic_t lct_used; - /** - * Internal implementation detail: module for this key. - */ - struct module *lct_owner; - /** - * References to this key. For debugging. - */ - struct lu_ref lct_reference; + /** + * Internal implementation detail: index within lu_context::lc_value[] + * reserved for this key. + */ + int lct_index; + /** + * Internal implementation detail: number of values created for this + * key. + */ + atomic_t lct_used; + /** + * Internal implementation detail: module for this key. + */ + struct module *lct_owner; + /** + * References to this key. For debugging. + */ + struct lu_ref lct_reference; }; #define LU_KEY_INIT(mod, type) \ - static void* mod##_key_init(const struct lu_context *ctx, \ - struct lu_context_key *key) \ - { \ - type *value; \ + static void *mod##_key_init(const struct lu_context *ctx, \ + struct lu_context_key *key) \ + { \ + type *value; \ \ - CLASSERT(CFS_PAGE_SIZE >= sizeof (*value)); \ + CLASSERT(PAGE_SIZE >= sizeof(*value)); \ \ - OBD_ALLOC_PTR(value); \ - if (value == NULL) \ - value = ERR_PTR(-ENOMEM); \ - \ - return value; \ - } \ - struct __##mod##__dummy_init {;} /* semicolon catcher */ + OBD_ALLOC_PTR(value); \ + if (value == NULL) \ + value = ERR_PTR(-ENOMEM); \ + \ + return value; \ + } \ + struct __##mod##__dummy_init { ; } /* semicolon catcher */ #define LU_KEY_FINI(mod, type) \ static void mod##_key_fini(const struct lu_context *ctx, \ @@ -1166,50 +1167,52 @@ void lu_context_key_revive (struct lu_context_key *key); #define LU_KEY_INIT_GENERIC(mod) \ static void mod##_key_init_generic(struct lu_context_key *k, ...) \ - { \ + { \ struct lu_context_key *key = k; \ - va_list args; \ - \ - va_start(args, k); \ - do { \ - LU_CONTEXT_KEY_INIT(key); \ + va_list args; \ + \ + va_start(args, k); \ + do { \ + LU_CONTEXT_KEY_INIT(key); \ key = va_arg(args, struct lu_context_key *); \ - } while (key != NULL); \ - va_end(args); \ + } while (key != NULL); \ + va_end(args); \ } -#define LU_TYPE_INIT(mod, ...) \ +#define LU_TYPE_INIT(mod, ...) \ LU_KEY_INIT_GENERIC(mod) \ - static int mod##_type_init(struct lu_device_type *t) \ - { \ + static int mod##_type_init(struct lu_device_type *t) \ + { \ mod##_key_init_generic(__VA_ARGS__, NULL); \ return lu_context_key_register_many(__VA_ARGS__, NULL); \ - } \ + } \ struct __##mod##_dummy_type_init {;} -#define LU_TYPE_FINI(mod, ...) \ - static void mod##_type_fini(struct lu_device_type *t) \ - { \ +#define LU_TYPE_FINI(mod, ...) \ + static void mod##_type_fini(struct lu_device_type *t) \ + { \ lu_context_key_degister_many(__VA_ARGS__, NULL); \ - } \ + } \ struct __##mod##_dummy_type_fini {;} #define LU_TYPE_START(mod, ...) \ static void mod##_type_start(struct lu_device_type *t) \ { \ + lu_context_key_revive_many(__VA_ARGS__, NULL); \ } \ struct __##mod##_dummy_type_start {;} #define LU_TYPE_STOP(mod, ...) \ static void mod##_type_stop(struct lu_device_type *t) \ { \ + lu_context_key_quiesce_many(__VA_ARGS__, NULL); \ } \ struct __##mod##_dummy_type_stop {;} -#define LU_TYPE_INIT_FINI(mod, ...) \ - LU_TYPE_INIT(mod, __VA_ARGS__); \ +#define LU_TYPE_INIT_FINI(mod, ...) \ + LU_TYPE_INIT(mod, __VA_ARGS__); \ LU_TYPE_FINI(mod, __VA_ARGS__); \ LU_TYPE_START(mod, __VA_ARGS__); \ LU_TYPE_STOP(mod, __VA_ARGS__) @@ -1230,6 +1233,14 @@ void lu_context_key_degister_many(struct lu_context_key *k, ...); void lu_context_key_revive_many (struct lu_context_key *k, ...); void lu_context_key_quiesce_many (struct lu_context_key *k, ...); +/* + * update/clear ctx/ses tags. + */ +void lu_context_tags_update(__u32 tags); +void lu_context_tags_clear(__u32 tags); +void lu_session_tags_update(__u32 tags); +void lu_session_tags_clear(__u32 tags); + /** * Environment. */ @@ -1244,8 +1255,14 @@ struct lu_env { struct lu_context *le_ses; }; -int lu_env_init(struct lu_env *env, struct lu_context *ses, __u32 tags); -void lu_env_fini(struct lu_env *env); +int lu_env_init (struct lu_env *env, __u32 tags); +void lu_env_fini (struct lu_env *env); +int lu_env_refill(struct lu_env *env); +int lu_env_refill_by_tags(struct lu_env *env, __u32 ctags, __u32 stags); + +struct lu_env *lu_env_find(void); +int lu_env_add(struct lu_env *env); +void lu_env_remove(struct lu_env *env); /** @} lu_context */ @@ -1253,30 +1270,80 @@ void lu_env_fini(struct lu_env *env); * Output site statistical counters into a buffer. Suitable for * ll_rd_*()-style functions. */ -int lu_site_stats_print(const struct lu_site *s, char *page, int count); +int lu_site_stats_seq_print(const struct lu_site *s, struct seq_file *m); /** * Common name structure to be passed around for various name related methods. */ struct lu_name { - char *ln_name; - int ln_namelen; + const char *ln_name; + int ln_namelen; }; +static inline bool name_is_dot_or_dotdot(const char *name, int namelen) +{ + return name[0] == '.' && + (namelen == 1 || (namelen == 2 && name[1] == '.')); +} + +static inline bool lu_name_is_dot_or_dotdot(const struct lu_name *lname) +{ + return name_is_dot_or_dotdot(lname->ln_name, lname->ln_namelen); +} + +static inline bool lu_name_is_valid_len(const char *name, size_t name_len) +{ + return name != NULL && + name_len > 0 && + name_len < INT_MAX && + strlen(name) == name_len && + memchr(name, '/', name_len) == NULL; +} + +/** + * Validate names (path components) + * + * To be valid \a name must be non-empty, '\0' terminated of length \a + * name_len, and not contain '/'. The maximum length of a name (before + * say -ENAMETOOLONG will be returned) is really controlled by llite + * and the server. We only check for something insane coming from bad + * integer handling here. + */ +static inline bool lu_name_is_valid_2(const char *name, size_t name_len) +{ + return lu_name_is_valid_len(name, name_len) && name[name_len] == '\0'; +} + +static inline bool lu_name_is_valid(const struct lu_name *ln) +{ + return lu_name_is_valid_2(ln->ln_name, ln->ln_namelen); +} + +#define DNAME "%.*s" +#define PNAME(ln) \ + (lu_name_is_valid(ln) ? (ln)->ln_namelen : 0), \ + (lu_name_is_valid(ln) ? (ln)->ln_name : "") + /** * Common buffer structure to be passed around for various xattr_{s,g}et() * methods. */ struct lu_buf { - void *lb_buf; - ssize_t lb_len; + void *lb_buf; + size_t lb_len; }; -/** null buffer */ -extern struct lu_buf LU_BUF_NULL; - -#define DLUBUF "(%p %z)" +#define DLUBUF "(%p %zu)" #define PLUBUF(buf) (buf)->lb_buf, (buf)->lb_len + +/* read buffer params, should be filled out by out */ +struct lu_rdbuf { + /** number of buffers */ + unsigned int rb_nbufs; + /** pointers to buffers */ + struct lu_buf rb_bufs[]; +}; + /** * One-time initializers, called at obdclass module initialization, not * exported. @@ -1292,17 +1359,8 @@ int lu_global_init(void); */ void lu_global_fini(void); -enum { - LU_TIME_FIND_LOOKUP, - LU_TIME_FIND_ALLOC, - LU_TIME_FIND_INSERT, - LU_TIME_NR -}; - -extern const char *lu_time_names[LU_TIME_NR]; - struct lu_kmem_descr { - cfs_mem_cache_t **ckd_cache; + struct kmem_cache **ckd_cache; const char *ckd_name; const size_t ckd_size; }; @@ -1310,6 +1368,200 @@ struct lu_kmem_descr { int lu_kmem_init(struct lu_kmem_descr *caches); void lu_kmem_fini(struct lu_kmem_descr *caches); -/** @} lu */ +void lu_object_assign_fid(const struct lu_env *env, struct lu_object *o, + const struct lu_fid *fid); +struct lu_object *lu_object_anon(const struct lu_env *env, + struct lu_device *dev, + const struct lu_object_conf *conf); + +/** null buffer */ +extern struct lu_buf LU_BUF_NULL; + +void lu_buf_free(struct lu_buf *buf); +void lu_buf_alloc(struct lu_buf *buf, size_t size); +void lu_buf_realloc(struct lu_buf *buf, size_t size); + +int lu_buf_check_and_grow(struct lu_buf *buf, size_t len); +struct lu_buf *lu_buf_check_and_alloc(struct lu_buf *buf, size_t len); + +extern __u32 lu_context_tags_default; +extern __u32 lu_session_tags_default; +static inline bool lu_device_is_cl(const struct lu_device *d) +{ + return d->ld_type->ldt_tags & LU_DEVICE_CL; +} + +static inline bool lu_object_is_cl(const struct lu_object *o) +{ + return lu_device_is_cl(o->lo_dev); +} + +/* Generic subset of tgts */ +struct lu_tgt_pool { + __u32 *op_array; /* array of index of + * lov_obd->lov_tgts */ + unsigned int op_count; /* number of tgts in the array */ + unsigned int op_size; /* allocated size of op_array */ + struct rw_semaphore op_rw_sem; /* to protect lu_tgt_pool use */ +}; + +/* round-robin QoS data for LOD/LMV */ +struct lu_qos_rr { + spinlock_t lqr_alloc; /* protect allocation index */ + __u32 lqr_start_idx; /* start index of new inode */ + __u32 lqr_offset_idx;/* aliasing for start_idx */ + int lqr_start_count;/* reseed counter */ + struct lu_tgt_pool lqr_pool; /* round-robin optimized list */ + unsigned long lqr_dirty:1; /* recalc round-robin list */ +}; + +/* QoS data per MDS/OSS */ +struct lu_svr_qos { + struct obd_uuid lsq_uuid; /* ptlrpc's c_remote_uuid */ + struct list_head lsq_svr_list; /* link to lq_svr_list */ + __u64 lsq_bavail; /* total bytes avail on svr */ + __u64 lsq_iavail; /* tital inode avail on svr */ + __u64 lsq_penalty; /* current penalty */ + __u64 lsq_penalty_per_obj; /* penalty decrease + * every obj*/ + time64_t lsq_used; /* last used time, seconds */ + __u32 lsq_tgt_count; /* number of tgts on this svr */ + __u32 lsq_id; /* unique svr id */ +}; + +/* QoS data per MDT/OST */ +struct lu_tgt_qos { + struct lu_svr_qos *ltq_svr; /* svr info */ + __u64 ltq_penalty; /* current penalty */ + __u64 ltq_penalty_per_obj; /* penalty decrease + * every obj*/ + __u64 ltq_weight; /* net weighting */ + time64_t ltq_used; /* last used time, seconds */ + bool ltq_usable:1; /* usable for striping */ +}; + +/* target descriptor */ +struct lu_tgt_desc { + union { + struct dt_device *ltd_tgt; + struct obd_device *ltd_obd; + }; + struct obd_export *ltd_exp; + struct obd_uuid ltd_uuid; + __u32 ltd_index; + __u32 ltd_gen; + struct list_head ltd_kill; + struct ptlrpc_thread *ltd_recovery_thread; + struct mutex ltd_fid_mutex; + struct lu_tgt_qos ltd_qos; /* qos info per target */ + struct obd_statfs ltd_statfs; + time64_t ltd_statfs_age; + unsigned long ltd_active:1,/* is this target up for requests */ + ltd_activate:1,/* should target be activated */ + ltd_reap:1, /* should this target be deleted */ + ltd_got_update_log:1, /* Already got update log */ + ltd_connecting:1; /* target is connecting */ +}; + +/* number of pointers at 1st level */ +#define TGT_PTRS (PAGE_SIZE / sizeof(void *)) +/* number of pointers at 2nd level */ +#define TGT_PTRS_PER_BLOCK (PAGE_SIZE / sizeof(void *)) + +struct lu_tgt_desc_idx { + struct lu_tgt_desc *ldi_tgt[TGT_PTRS_PER_BLOCK]; +}; + +/* QoS data for LOD/LMV */ +struct lu_qos { + struct list_head lq_svr_list; /* lu_svr_qos list */ + struct rw_semaphore lq_rw_sem; + __u32 lq_active_svr_count; + unsigned int lq_prio_free; /* priority for free space */ + unsigned int lq_threshold_rr;/* priority for rr */ + struct lu_qos_rr lq_rr; /* round robin qos data */ + unsigned long lq_dirty:1, /* recalc qos data */ + lq_same_space:1,/* the servers all have approx. + * the same space avail */ + lq_reset:1; /* zero current penalties */ +}; + +struct lu_tgt_descs { + union { + struct lov_desc ltd_lov_desc; + struct lmv_desc ltd_lmv_desc; + }; + /* list of known TGTs */ + struct lu_tgt_desc_idx *ltd_tgt_idx[TGT_PTRS]; + /* Size of the lu_tgts array, granted to be a power of 2 */ + __u32 ltd_tgts_size; + /* bitmap of TGTs available */ + struct cfs_bitmap *ltd_tgt_bitmap; + /* TGTs scheduled to be deleted */ + __u32 ltd_death_row; + /* Table refcount used for delayed deletion */ + int ltd_refcount; + /* mutex to serialize concurrent updates to the tgt table */ + struct mutex ltd_mutex; + /* read/write semaphore used for array relocation */ + struct rw_semaphore ltd_rw_sem; + /* QoS */ + struct lu_qos ltd_qos; + /* all tgts in a packed array */ + struct lu_tgt_pool ltd_tgt_pool; + /* true if tgt is MDT */ + bool ltd_is_mdt; +}; + +#define LTD_TGT(ltd, index) \ + (ltd)->ltd_tgt_idx[(index) / \ + TGT_PTRS_PER_BLOCK]->ldi_tgt[(index) % TGT_PTRS_PER_BLOCK] + +u64 lu_prandom_u64_max(u64 ep_ro); +void lu_qos_rr_init(struct lu_qos_rr *lqr); +int lu_qos_add_tgt(struct lu_qos *qos, struct lu_tgt_desc *ltd); +void lu_tgt_qos_weight_calc(struct lu_tgt_desc *tgt); + +int lu_tgt_descs_init(struct lu_tgt_descs *ltd, bool is_mdt); +void lu_tgt_descs_fini(struct lu_tgt_descs *ltd); +int ltd_add_tgt(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt); +void ltd_del_tgt(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt); +bool ltd_qos_is_usable(struct lu_tgt_descs *ltd); +int ltd_qos_penalties_calc(struct lu_tgt_descs *ltd); +int ltd_qos_update(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt, + __u64 *total_wt); + +static inline struct lu_tgt_desc *ltd_first_tgt(struct lu_tgt_descs *ltd) +{ + int index; + + index = find_first_bit(ltd->ltd_tgt_bitmap->data, + ltd->ltd_tgt_bitmap->size); + return (index < ltd->ltd_tgt_bitmap->size) ? LTD_TGT(ltd, index) : NULL; +} + +static inline struct lu_tgt_desc *ltd_next_tgt(struct lu_tgt_descs *ltd, + struct lu_tgt_desc *tgt) +{ + int index; + + if (!tgt) + return NULL; + + index = tgt->ltd_index; + LASSERT(index < ltd->ltd_tgt_bitmap->size); + index = find_next_bit(ltd->ltd_tgt_bitmap->data, + ltd->ltd_tgt_bitmap->size, index + 1); + return (index < ltd->ltd_tgt_bitmap->size) ? LTD_TGT(ltd, index) : NULL; +} + +#define ltd_foreach_tgt(ltd, tgt) \ + for (tgt = ltd_first_tgt(ltd); tgt; tgt = ltd_next_tgt(ltd, tgt)) + +#define ltd_foreach_tgt_safe(ltd, tgt, tmp) \ + for (tgt = ltd_first_tgt(ltd), tmp = ltd_next_tgt(ltd, tgt); tgt; \ + tgt = tmp, tmp = ltd_next_tgt(ltd, tgt)) + +/** @} lu */ #endif /* __LUSTRE_LU_OBJECT_H */