*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2012, Intel Corporation.
+ * Copyright (c) 2011, 2017, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#define __LUSTRE_LU_OBJECT_H
#include <stdarg.h>
-
-/*
- * struct lu_fid
- */
#include <libcfs/libcfs.h>
-
-#include <lustre/lustre_idl.h>
-
+#include <uapi/linux/lustre/lustre_idl.h>
#include <lu_ref.h>
+#include <linux/percpu_counter.h>
+#include <linux/rhashtable.h>
+#include <linux/ctype.h>
+#include <obd_target.h>
struct seq_file;
struct proc_dir_entry;
struct lustre_cfg;
struct lprocfs_stats;
+struct obd_type;
/** \defgroup lu lu
* lu_* data-types represent server-side entities shared by data and meta-data
struct lu_object_header;
struct lu_context;
struct lu_env;
+struct lu_name;
/**
* Operations common for data and meta-data devices.
struct lu_device *parent,
struct lu_device *dev);
+
+ /**
+ * Allocate new FID for file with @name under @parent
+ *
+ * \param[in] env execution environment for this thread
+ * \param[in] dev dt device
+ * \param[out] fid new FID allocated
+ * \param[in] parent parent object
+ * \param[in] name lu_name
+ *
+ * \retval 0 0 FID allocated successfully.
+ * \retval 1 1 FID allocated successfully and new sequence
+ * requested from seq meta server
+ * \retval negative negative errno if FID allocation failed.
+ */
+ int (*ldo_fid_alloc)(const struct lu_env *env,
+ struct lu_device *dev,
+ struct lu_fid *fid,
+ struct lu_object *parent,
+ const struct lu_name *name);
};
/**
*/
void (*loo_object_delete)(const struct lu_env *env,
struct lu_object *o);
- /**
- * Dual to lu_device_operations::ldo_object_alloc(). Called when
- * object is removed from memory.
- */
- void (*loo_object_free)(const struct lu_env *env,
- struct lu_object *o);
+ /**
+ * Dual to lu_device_operations::ldo_object_alloc(). Called when
+ * object is removed from memory. Must use call_rcu or kfree_rcu
+ * if the object contains an lu_object_header.
+ */
+ void (*loo_object_free)(const struct lu_env *env,
+ struct lu_object *o);
/**
* Called when last active reference to the object is released (and
* object returns to the cache). This method is optional.
* Device: a layer in the server side abstraction stacking.
*/
struct lu_device {
- /**
- * reference count. This is incremented, in particular, on each object
- * created at this layer.
- *
- * \todo XXX which means that atomic_t is probably too small.
- */
- cfs_atomic_t ld_ref;
- /**
- * Pointer to device type. Never modified once set.
- */
- struct lu_device_type *ld_type;
+ /**
+ * reference count. This is incremented, in particular, on each object
+ * created at this layer.
+ *
+ * \todo XXX which means that atomic_t is probably too small.
+ */
+ atomic_t ld_ref;
+ /**
+ * Pointer to device type. Never modified once set.
+ */
+ struct lu_device_type *ld_type;
/**
* Operation vector for this device.
*/
/**
* Link the device to the site.
**/
- cfs_list_t ld_linkage;
+ struct list_head ld_linkage;
};
struct lu_device_type_operations;
* device types.
*/
enum lu_device_tag {
- /** this is meta-data device */
- LU_DEVICE_MD = (1 << 0),
- /** this is data device */
- LU_DEVICE_DT = (1 << 1),
- /** data device in the client stack */
- LU_DEVICE_CL = (1 << 2)
+ /** this is meta-data device */
+ LU_DEVICE_MD = BIT(0),
+ /** this is data device */
+ LU_DEVICE_DT = BIT(1),
+ /** data device in the client stack */
+ LU_DEVICE_CL = BIT(2)
};
/**
*/
const struct lu_device_type_operations *ldt_ops;
/**
- * \todo XXX: temporary pointer to associated obd_type.
- */
- struct obd_type *ldt_obd_type;
- /**
* \todo XXX: temporary: context tags used by obd_*() calls.
*/
__u32 ldt_ctx_tags;
/**
* Number of existing device type instances.
*/
- unsigned ldt_device_nr;
- /**
- * Linkage into a global list of all device types.
- *
- * \see lu_device_types.
- */
- cfs_list_t ldt_linkage;
+ atomic_t ldt_device_nr;
};
/**
void (*ldto_stop)(struct lu_device_type *t);
};
-/**
- * Flags for the object layers.
- */
-enum lu_object_flags {
- /**
- * this flags is set if lu_object_operations::loo_object_init() has
- * been called for this layer. Used by lu_object_alloc().
- */
- LU_OBJECT_ALLOCATED = (1 << 0)
-};
+static inline int lu_device_is_md(const struct lu_device *d)
+{
+ return ergo(d != NULL, d->ld_type->ldt_tags & LU_DEVICE_MD);
+}
/**
* Common object attributes.
*/
struct lu_attr {
+ /**
+ * valid bits
+ *
+ * \see enum la_valid
+ */
+ __u64 la_valid;
/** size in bytes */
- __u64 la_size;
- /** modification time in seconds since Epoch */
- obd_time la_mtime;
- /** access time in seconds since Epoch */
- obd_time la_atime;
- /** change time in seconds since Epoch */
- obd_time la_ctime;
+ __u64 la_size;
+ /** modification time in seconds since Epoch */
+ s64 la_mtime;
+ /** access time in seconds since Epoch */
+ s64 la_atime;
+ /** change time in seconds since Epoch */
+ s64 la_ctime;
+ /** create time in seconds since Epoch */
+ s64 la_btime;
/** 512-byte blocks allocated to object */
- __u64 la_blocks;
+ __u64 la_blocks;
/** permission bits and file type */
- __u32 la_mode;
+ __u32 la_mode;
/** owner id */
- __u32 la_uid;
+ __u32 la_uid;
/** group id */
- __u32 la_gid;
+ __u32 la_gid;
/** object flags */
- __u32 la_flags;
+ __u32 la_flags;
/** number of persistent references to this object */
- __u32 la_nlink;
+ __u32 la_nlink;
/** blk bits of the object*/
- __u32 la_blkbits;
+ __u32 la_blkbits;
/** blk size of the object*/
- __u32 la_blksize;
+ __u32 la_blksize;
/** real device */
- __u32 la_rdev;
- /**
- * valid bits
- *
- * \see enum la_valid
- */
- __u64 la_valid;
+ __u32 la_rdev;
+ /** project id */
+ __u32 la_projid;
+ /** set layout version to OST objects. */
+ __u32 la_layout_version;
+ /** dirent count */
+ __u64 la_dirent_count;
};
-/** Bit-mask of valid attributes */
-enum la_valid {
- LA_ATIME = 1 << 0,
- LA_MTIME = 1 << 1,
- LA_CTIME = 1 << 2,
- LA_SIZE = 1 << 3,
- LA_MODE = 1 << 4,
- LA_UID = 1 << 5,
- LA_GID = 1 << 6,
- LA_BLOCKS = 1 << 7,
- LA_TYPE = 1 << 8,
- LA_FLAGS = 1 << 9,
- LA_NLINK = 1 << 10,
- LA_RDEV = 1 << 11,
- LA_BLKSIZE = 1 << 12,
- LA_KILL_SUID = 1 << 13,
- LA_KILL_SGID = 1 << 14,
-};
+#define LU_DIRENT_COUNT_UNSET ~0ULL
/**
* Layer in the layered object.
/**
* Linkage into list of all layers.
*/
- cfs_list_t lo_linkage;
- /**
- * Depth. Top level layer depth is 0.
- */
- int lo_depth;
+ struct list_head lo_linkage;
/**
- * Flags from enum lu_object_flags.
+ * Link to the device, for debugging.
*/
- __u32 lo_flags;
- /**
- * Link to the device, for debugging.
- */
- struct lu_ref_link *lo_dev_ref;
+ struct lu_ref_link lo_dev_ref;
};
enum lu_object_header_flags {
- /**
- * Don't keep this object in cache. Object will be destroyed as soon
- * as last reference to it is released. This flag cannot be cleared
- * once set.
- */
- LU_OBJECT_HEARD_BANSHEE = 0
+ /**
+ * Don't keep this object in cache. Object will be destroyed as soon
+ * as last reference to it is released. This flag cannot be cleared
+ * once set.
+ */
+ LU_OBJECT_HEARD_BANSHEE = 0,
+ /**
+ * Mark this object has already been taken out of cache.
+ */
+ LU_OBJECT_UNHASHED = 1,
+ /**
+ * Object is initialized, when object is found in cache, it may not be
+ * intialized yet, the object allocator will initialize it.
+ */
+ LU_OBJECT_INITED = 2,
};
enum lu_object_header_attr {
- LOHA_EXISTS = 1 << 0,
- LOHA_REMOTE = 1 << 1,
- /**
- * UNIX file type is stored in S_IFMT bits.
- */
- LOHA_FT_START = 001 << 12, /**< S_IFIFO */
- LOHA_FT_END = 017 << 12, /**< S_IFMT */
+ LOHA_EXISTS = BIT(0),
+ LOHA_REMOTE = BIT(1),
+ LOHA_HAS_AGENT_ENTRY = BIT(2),
+ /**
+ * UNIX file type is stored in S_IFMT bits.
+ */
+ LOHA_FT_START = 001 << 12, /**< S_IFIFO */
+ LOHA_FT_END = 017 << 12, /**< S_IFMT */
};
/**
* it is created for things like not-yet-existing child created by mkdir or
* create calls. lu_object_operations::loo_exists() can be used to check
* whether object is backed by persistent storage entity.
+ * Any object containing this structre which might be placed in an
+ * rhashtable via loh_hash MUST be freed using call_rcu() or rcu_kfree().
*/
struct lu_object_header {
- /**
- * Object flags from enum lu_object_header_flags. Set and checked
- * atomically.
- */
- unsigned long loh_flags;
- /**
- * Object reference count. Protected by lu_site::ls_guard.
- */
- cfs_atomic_t loh_ref;
- /**
- * Fid, uniquely identifying this object.
- */
- struct lu_fid loh_fid;
- /**
- * Common object attributes, cached for efficiency. From enum
- * lu_object_header_attr.
- */
- __u32 loh_attr;
- /**
- * Linkage into per-site hash table. Protected by lu_site::ls_guard.
- */
- cfs_hlist_node_t loh_hash;
- /**
- * Linkage into per-site LRU list. Protected by lu_site::ls_guard.
- */
- cfs_list_t loh_lru;
- /**
- * Linkage into list of layers. Never modified once set (except lately
- * during object destruction). No locking is necessary.
- */
- cfs_list_t loh_layers;
- /**
- * A list of references to this object, for debugging.
- */
- struct lu_ref loh_reference;
+ /**
+ * Fid, uniquely identifying this object.
+ */
+ struct lu_fid loh_fid;
+ /**
+ * Object flags from enum lu_object_header_flags. Set and checked
+ * atomically.
+ */
+ unsigned long loh_flags;
+ /**
+ * Object reference count. Protected by lu_site::ls_guard.
+ */
+ atomic_t loh_ref;
+ /**
+ * Common object attributes, cached for efficiency. From enum
+ * lu_object_header_attr.
+ */
+ __u32 loh_attr;
+ /**
+ * Linkage into per-site hash table.
+ */
+ struct rhash_head loh_hash;
+ /**
+ * Linkage into per-site LRU list. Protected by lu_site::ls_guard.
+ */
+ struct list_head loh_lru;
+ /**
+ * Linkage into list of layers. Never modified once set (except lately
+ * during object destruction). No locking is necessary.
+ */
+ struct list_head loh_layers;
+ /**
+ * A list of references to this object, for debugging.
+ */
+ struct lu_ref loh_reference;
+ /*
+ * Handle used for kfree_rcu() or similar.
+ */
+ struct rcu_head loh_rcu;
};
struct fld;
-struct lu_site_bkt_data {
- /**
- * number of busy object on this bucket
- */
- long lsb_busy;
- /**
- * LRU list, updated on each access to object. Protected by
- * bucket lock of lu_site::ls_obj_hash.
- *
- * "Cold" end of LRU is lu_site::ls_lru.next. Accessed object are
- * moved to the lu_site::ls_lru.prev (this is due to the non-existence
- * of list_for_each_entry_safe_reverse()).
- */
- cfs_list_t lsb_lru;
- /**
- * Wait-queue signaled when an object in this site is ultimately
- * destroyed (lu_object_free()). It is used by lu_object_find() to
- * wait before re-trying when object in the process of destruction is
- * found in the hash table.
- *
- * \see htable_lookup().
- */
- cfs_waitq_t lsb_marche_funebre;
-};
-
enum {
- LU_SS_CREATED = 0,
- LU_SS_CACHE_HIT,
- LU_SS_CACHE_MISS,
- LU_SS_CACHE_RACE,
- LU_SS_CACHE_DEATH_RACE,
- LU_SS_LRU_PURGED,
- LU_SS_LAST_STAT
+ LU_SS_CREATED = 0,
+ LU_SS_CACHE_HIT,
+ LU_SS_CACHE_MISS,
+ LU_SS_CACHE_RACE,
+ LU_SS_CACHE_DEATH_RACE,
+ LU_SS_LRU_PURGED,
+ LU_SS_LAST_STAT
};
/**
/**
* objects hash table
*/
- cfs_hash_t *ls_obj_hash;
+ struct rhashtable ls_obj_hash;
+ /*
+ * buckets for summary data
+ */
+ struct lu_site_bkt_data *ls_bkts;
+ int ls_bkt_cnt;
+ u32 ls_bkt_seed;
/**
* index of bucket on hash table while purging
*/
- int ls_purge_start;
- /**
- * Top-level device for this stack.
- */
- struct lu_device *ls_top_dev;
+ unsigned int ls_purge_start;
+ /**
+ * Top-level device for this stack.
+ */
+ struct lu_device *ls_top_dev;
/**
* Bottom-level device for this stack
*/
struct lu_device *ls_bottom_dev;
- /**
- * Linkage into global list of sites.
- */
- cfs_list_t ls_linkage;
- /**
- * List for lu device for this site, protected
- * by ls_ld_lock.
- **/
- cfs_list_t ls_ld_linkage;
+ /**
+ * Linkage into global list of sites.
+ */
+ struct list_head ls_linkage;
+ /**
+ * List for lu device for this site, protected
+ * by ls_ld_lock.
+ **/
+ struct list_head ls_ld_linkage;
spinlock_t ls_ld_lock;
-
+ /**
+ * Lock to serialize site purge.
+ */
+ struct mutex ls_purge_mutex;
/**
* lu_site stats
*/
* XXX: a hack! fld has to find md_site via site, remove when possible
*/
struct seq_server_site *ld_seq_site;
+ /**
+ * Pointer to the lu_target for this site.
+ */
+ struct lu_target *ls_tgt;
+
+ /**
+ * Number of objects in lsb_lru_lists - used for shrinking
+ */
+ struct percpu_counter ls_lru_len_counter;
};
-static inline struct lu_site_bkt_data *
-lu_site_bkt_from_fid(struct lu_site *site, struct lu_fid *fid)
-{
- cfs_hash_bd_t bd;
+wait_queue_head_t *
+lu_site_wq_from_fid(struct lu_site *site, struct lu_fid *fid);
- cfs_hash_bd_get(site->ls_obj_hash, fid, &bd);
- return cfs_hash_bd_extra_get(site->ls_obj_hash, &bd);
+static inline struct seq_server_site *lu_site2seq(const struct lu_site *s)
+{
+ return s->ld_seq_site;
}
/** \name ctors
void lu_object_fini (struct lu_object *o);
void lu_object_add_top (struct lu_object_header *h, struct lu_object *o);
void lu_object_add (struct lu_object *before, struct lu_object *o);
-
+struct lu_object *lu_object_get_first(struct lu_object_header *h,
+ struct lu_device *dev);
void lu_dev_add_linkage(struct lu_site *s, struct lu_device *d);
void lu_dev_del_linkage(struct lu_site *s, struct lu_device *d);
int lu_device_type_init(struct lu_device_type *ldt);
void lu_device_type_fini(struct lu_device_type *ldt);
-void lu_types_stop(void);
/** @} ctors */
*/
static inline void lu_object_get(struct lu_object *o)
{
- LASSERT(cfs_atomic_read(&o->lo_header->loh_ref) > 0);
- cfs_atomic_inc(&o->lo_header->loh_ref);
+ LASSERT(atomic_read(&o->lo_header->loh_ref) > 0);
+ atomic_inc(&o->lo_header->loh_ref);
}
/**
- * Return true of object will not be cached after last reference to it is
+ * Return true if object will not be cached after last reference to it is
* released.
*/
static inline int lu_object_is_dying(const struct lu_object_header *h)
return test_bit(LU_OBJECT_HEARD_BANSHEE, &h->loh_flags);
}
+/**
+ * Return true if object is initialized.
+ */
+static inline int lu_object_is_inited(const struct lu_object_header *h)
+{
+ return test_bit(LU_OBJECT_INITED, &h->loh_flags);
+}
+
void lu_object_put(const struct lu_env *env, struct lu_object *o);
void lu_object_put_nocache(const struct lu_env *env, struct lu_object *o);
+void lu_object_unhash(const struct lu_env *env, struct lu_object *o);
+int lu_site_purge_objects(const struct lu_env *env, struct lu_site *s, int nr,
+ int canblock);
-int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr);
+static inline int lu_site_purge(const struct lu_env *env, struct lu_site *s,
+ int nr)
+{
+ return lu_site_purge_objects(env, s, nr, 1);
+}
-void lu_site_print(const struct lu_env *env, struct lu_site *s, void *cookie,
- lu_printer_t printer);
+void lu_site_print(const struct lu_env *env, struct lu_site *s, atomic_t *ref,
+ int msg_flags, lu_printer_t printer);
struct lu_object *lu_object_find(const struct lu_env *env,
struct lu_device *dev, const struct lu_fid *f,
const struct lu_object_conf *conf);
*/
static inline struct lu_object *lu_object_top(struct lu_object_header *h)
{
- LASSERT(!cfs_list_empty(&h->loh_layers));
- return container_of0(h->loh_layers.next, struct lu_object, lo_linkage);
+ LASSERT(!list_empty(&h->loh_layers));
+ return container_of(h->loh_layers.next, struct lu_object, lo_linkage);
}
/**
*/
static inline struct lu_object *lu_object_next(const struct lu_object *o)
{
- return container_of0(o->lo_linkage.next, struct lu_object, lo_linkage);
+ return container_of(o->lo_linkage.next, struct lu_object, lo_linkage);
}
/**
*/
#define LU_OBJECT_DEBUG(mask, env, object, format, ...) \
do { \
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
- \
if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
lu_object_print(env, &msgdata, lu_cdebug_printer, object);\
- CDEBUG(mask, format , ## __VA_ARGS__); \
+ CDEBUG(mask, format "\n", ## __VA_ARGS__); \
} \
} while (0)
*/
#define LU_OBJECT_HEADER(mask, env, object, format, ...) \
do { \
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
- \
if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
lu_object_header_print(env, &msgdata, lu_cdebug_printer,\
(object)->lo_header); \
lu_cdebug_printer(env, &msgdata, "\n"); \
*/
#define lu_object_remote(o) unlikely((o)->lo_header->loh_attr & LOHA_REMOTE)
+/**
+ * Check whether the object as agent entry on current target
+ */
+#define lu_object_has_agent_entry(o) \
+ unlikely((o)->lo_header->loh_attr & LOHA_HAS_AGENT_ENTRY)
+
+static inline void lu_object_set_agent_entry(struct lu_object *o)
+{
+ o->lo_header->loh_attr |= LOHA_HAS_AGENT_ENTRY;
+}
+
+static inline void lu_object_clear_agent_entry(struct lu_object *o)
+{
+ o->lo_header->loh_attr &= ~LOHA_HAS_AGENT_ENTRY;
+}
+
static inline int lu_object_assert_exists(const struct lu_object *o)
{
return lu_object_exists(o);
static inline __u32 lu_object_attr(const struct lu_object *o)
{
LASSERT(lu_object_exists(o) != 0);
- return o->lo_header->loh_attr;
+
+ return o->lo_header->loh_attr & S_IFMT;
}
-static inline struct lu_ref_link *lu_object_ref_add(struct lu_object *o,
- const char *scope,
- const void *source)
+static inline void lu_object_ref_add(struct lu_object *o,
+ const char *scope,
+ const void *source)
{
- return lu_ref_add(&o->lo_header->loh_reference, scope, source);
+ lu_ref_add(&o->lo_header->loh_reference, scope, source);
+}
+
+static inline void lu_object_ref_add_at(struct lu_object *o,
+ struct lu_ref_link *link,
+ const char *scope,
+ const void *source)
+{
+ lu_ref_add_at(&o->lo_header->loh_reference, link, scope, source);
}
static inline void lu_object_ref_del(struct lu_object *o,
};
enum lu_xattr_flags {
- LU_XATTR_REPLACE = (1 << 0),
- LU_XATTR_CREATE = (1 << 1)
+ LU_XATTR_REPLACE = BIT(0),
+ LU_XATTR_CREATE = BIT(1),
+ LU_XATTR_MERGE = BIT(2),
+ LU_XATTR_SPLIT = BIT(3),
};
/** @} helpers */
enum lu_context_state {
LCS_INITIALIZED = 1,
LCS_ENTERED,
+ LCS_LEAVING,
LCS_LEFT,
LCS_FINALIZED
};
* Pointer to an array with key values. Internal implementation
* detail.
*/
- void **lc_value;
- /**
- * Linkage into a list of all remembered contexts. Only
- * `non-transient' contexts, i.e., ones created for service threads
- * are placed here.
- */
- cfs_list_t lc_remember;
- /**
- * Version counter used to skip calls to lu_context_refill() when no
- * keys were registered.
- */
- unsigned lc_version;
+ void **lc_value;
+ /**
+ * Linkage into a list of all remembered contexts. Only
+ * `non-transient' contexts, i.e., ones created for service threads
+ * are placed here.
+ */
+ struct list_head lc_remember;
+ /**
+ * Version counter used to skip calls to lu_context_refill() when no
+ * keys were registered.
+ */
+ unsigned lc_version;
/**
* Debugging cookie.
*/
- unsigned lc_cookie;
+ unsigned lc_cookie;
};
/**
*/
enum lu_context_tag {
- /**
- * Thread on md server
- */
- LCT_MD_THREAD = 1 << 0,
- /**
- * Thread on dt server
- */
- LCT_DT_THREAD = 1 << 1,
- /**
- * Context for transaction handle
- */
- LCT_TX_HANDLE = 1 << 2,
- /**
- * Thread on client
- */
- LCT_CL_THREAD = 1 << 3,
- /**
- * A per-request session on a server, and a per-system-call session on
- * a client.
- */
- LCT_SESSION = 1 << 4,
- /**
- * A per-request data on OSP device
- */
- LCT_OSP_THREAD = 1 << 5,
- /**
- * MGS device thread
- */
- LCT_MG_THREAD = 1 << 6,
- /**
- * Context for local operations
- */
- LCT_LOCAL = 1 << 7,
- /**
- * Set when at least one of keys, having values in this context has
- * non-NULL lu_context_key::lct_exit() method. This is used to
- * optimize lu_context_exit() call.
- */
- LCT_HAS_EXIT = 1 << 28,
- /**
- * Don't add references for modules creating key values in that context.
- * This is only for contexts used internally by lu_object framework.
- */
- LCT_NOREF = 1 << 29,
- /**
- * Key is being prepared for retiring, don't create new values for it.
- */
- LCT_QUIESCENT = 1 << 30,
- /**
- * Context should be remembered.
- */
- LCT_REMEMBER = 1 << 31,
- /**
- * Contexts usable in cache shrinker thread.
- */
- LCT_SHRINKER = LCT_MD_THREAD|LCT_DT_THREAD|LCT_CL_THREAD|LCT_NOREF
+ /**
+ * Thread on md server
+ */
+ LCT_MD_THREAD = BIT(0),
+ /**
+ * Thread on dt server
+ */
+ LCT_DT_THREAD = BIT(1),
+ /**
+ * Thread on client
+ */
+ LCT_CL_THREAD = BIT(3),
+ /**
+ * A per-request session on a server, and a per-system-call session on
+ * a client.
+ */
+ LCT_SESSION = BIT(4),
+ /**
+ * A per-request data on OSP device
+ */
+ LCT_OSP_THREAD = BIT(5),
+ /**
+ * MGS device thread
+ */
+ LCT_MG_THREAD = BIT(6),
+ /**
+ * Context for local operations
+ */
+ LCT_LOCAL = BIT(7),
+ /**
+ * session for server thread
+ **/
+ LCT_SERVER_SESSION = BIT(8),
+ /**
+ * Set when at least one of keys, having values in this context has
+ * non-NULL lu_context_key::lct_exit() method. This is used to
+ * optimize lu_context_exit() call.
+ */
+ LCT_HAS_EXIT = BIT(28),
+ /**
+ * Don't add references for modules creating key values in that context.
+ * This is only for contexts used internally by lu_object framework.
+ */
+ LCT_NOREF = BIT(29),
+ /**
+ * Key is being prepared for retiring, don't create new values for it.
+ */
+ LCT_QUIESCENT = BIT(30),
+ /**
+ * Context should be remembered.
+ */
+ LCT_REMEMBER = BIT(31),
+ /**
+ * Contexts usable in cache shrinker thread.
+ */
+ LCT_SHRINKER = LCT_MD_THREAD|LCT_DT_THREAD|LCT_CL_THREAD|LCT_NOREF,
};
/**
*/
void (*lct_exit)(const struct lu_context *ctx,
struct lu_context_key *key, void *data);
- /**
- * Internal implementation detail: index within lu_context::lc_value[]
- * reserved for this key.
- */
- int lct_index;
- /**
- * Internal implementation detail: number of values created for this
- * key.
- */
- cfs_atomic_t lct_used;
- /**
- * Internal implementation detail: module for this key.
- */
- cfs_module_t *lct_owner;
- /**
- * References to this key. For debugging.
- */
- struct lu_ref lct_reference;
+ /**
+ * Internal implementation detail: index within lu_context::lc_value[]
+ * reserved for this key.
+ */
+ int lct_index;
+ /**
+ * Internal implementation detail: number of values created for this
+ * key.
+ */
+ atomic_t lct_used;
+ /**
+ * Internal implementation detail: module for this key.
+ */
+ struct module *lct_owner;
+ /**
+ * References to this key. For debugging.
+ */
+ struct lu_ref lct_reference;
};
#define LU_KEY_INIT(mod, type) \
- static void* mod##_key_init(const struct lu_context *ctx, \
- struct lu_context_key *key) \
- { \
- type *value; \
- \
- CLASSERT(CFS_PAGE_SIZE >= sizeof (*value)); \
+ static void *mod##_key_init(const struct lu_context *ctx, \
+ struct lu_context_key *key) \
+ { \
+ type *value; \
\
- OBD_ALLOC_PTR(value); \
- if (value == NULL) \
- value = ERR_PTR(-ENOMEM); \
+ BUILD_BUG_ON(PAGE_SIZE < sizeof(*value)); \
\
- return value; \
- } \
- struct __##mod##__dummy_init {;} /* semicolon catcher */
+ OBD_ALLOC_PTR(value); \
+ if (value == NULL) \
+ value = ERR_PTR(-ENOMEM); \
+ \
+ return value; \
+ } \
+ struct __##mod##__dummy_init { ; } /* semicolon catcher */
#define LU_KEY_FINI(mod, type) \
static void mod##_key_fini(const struct lu_context *ctx, \
void lu_context_key_degister(struct lu_context_key *key);
void *lu_context_key_get (const struct lu_context *ctx,
const struct lu_context_key *key);
-void lu_context_key_quiesce (struct lu_context_key *key);
-void lu_context_key_revive (struct lu_context_key *key);
+void lu_context_key_quiesce(struct lu_device_type *t,
+ struct lu_context_key *key);
+void lu_context_key_revive(struct lu_context_key *key);
/*
} \
struct __##mod##_dummy_type_start {;}
-#define LU_TYPE_STOP(mod, ...) \
- static void mod##_type_stop(struct lu_device_type *t) \
- { \
- lu_context_key_quiesce_many(__VA_ARGS__, NULL); \
- } \
- struct __##mod##_dummy_type_stop {;}
+#define LU_TYPE_STOP(mod, ...) \
+ static void mod##_type_stop(struct lu_device_type *t) \
+ { \
+ lu_context_key_quiesce_many(t, __VA_ARGS__, NULL); \
+ } \
+ struct __##mod##_dummy_type_stop { }
int lu_context_key_register_many(struct lu_context_key *k, ...);
void lu_context_key_degister_many(struct lu_context_key *k, ...);
void lu_context_key_revive_many (struct lu_context_key *k, ...);
-void lu_context_key_quiesce_many (struct lu_context_key *k, ...);
+void lu_context_key_quiesce_many(struct lu_device_type *t,
+ struct lu_context_key *k, ...);
/*
* update/clear ctx/ses tags.
int lu_env_refill(struct lu_env *env);
int lu_env_refill_by_tags(struct lu_env *env, __u32 ctags, __u32 stags);
-/** @} lu_context */
+static inline void* lu_env_info(const struct lu_env *env,
+ const struct lu_context_key *key)
+{
+ void *info;
+ info = lu_context_key_get(&env->le_ctx, key);
+ if (!info) {
+ if (!lu_env_refill((struct lu_env *)env))
+ info = lu_context_key_get(&env->le_ctx, key);
+ }
+ LASSERT(info);
+ return info;
+}
-struct lu_ucred {
- __u32 uc_valid;
- __u32 uc_o_uid;
- __u32 uc_o_gid;
- __u32 uc_o_fsuid;
- __u32 uc_o_fsgid;
- __u32 uc_uid;
- __u32 uc_gid;
- __u32 uc_fsuid;
- __u32 uc_fsgid;
- __u32 uc_suppgids[2];
- cfs_cap_t uc_cap;
- __u32 uc_umask;
- cfs_group_info_t *uc_ginfo;
- struct md_identity *uc_identity;
-};
-struct lu_ucred *lu_ucred(const struct lu_env *env);
-struct lu_ucred *lu_ucred_check(const struct lu_env *env);
-struct lu_ucred *lu_ucred_assert(const struct lu_env *env);
+struct lu_env *lu_env_find(void);
+int lu_env_add(struct lu_env *env);
+int lu_env_add_task(struct lu_env *env, struct task_struct *task);
+void lu_env_remove(struct lu_env *env);
+
+/** @} lu_context */
/**
* Output site statistical counters into a buffer. Suitable for
* ll_rd_*()-style functions.
*/
-int lu_site_stats_print(const struct lu_site *s, char *page, int count);
+int lu_site_stats_seq_print(const struct lu_site *s, struct seq_file *m);
/**
* Common name structure to be passed around for various name related methods.
int ln_namelen;
};
+static inline bool name_is_dot_or_dotdot(const char *name, int namelen)
+{
+ return name[0] == '.' &&
+ (namelen == 1 || (namelen == 2 && name[1] == '.'));
+}
+
+static inline bool lu_name_is_dot_or_dotdot(const struct lu_name *lname)
+{
+ return name_is_dot_or_dotdot(lname->ln_name, lname->ln_namelen);
+}
+
+static inline bool lu_name_is_temp_file(const char *name, int namelen,
+ bool dot_prefix, int suffixlen)
+{
+ int lower = 0;
+ int upper = 0;
+ int digit = 0;
+ int len = suffixlen;
+
+ if (dot_prefix && name[0] != '.')
+ return false;
+
+ if (namelen < dot_prefix + suffixlen + 2 ||
+ name[namelen - suffixlen - 1] != '.')
+ return false;
+
+ while (len) {
+ lower += islower(name[namelen - len]);
+ upper += isupper(name[namelen - len]);
+ digit += isdigit(name[namelen - len]);
+ len--;
+ }
+ /* mktemp() filename suffixes will have a mix of upper- and lower-case
+ * letters and/or numbers, not all numbers, or all upper or lower-case.
+ * About 0.07% of randomly-generated names will slip through,
+ * but this avoids 99.93% of cross-MDT renames for those files.
+ */
+ if ((digit >= suffixlen - 1 && !isdigit(name[namelen - suffixlen])) ||
+ upper == suffixlen || lower == suffixlen)
+ return false;
+
+ return true;
+}
+
+static inline bool lu_name_is_backup_file(const char *name, int namelen,
+ int *suffixlen)
+{
+ if (namelen > 1 &&
+ name[namelen - 2] != '.' && name[namelen - 1] == '~') {
+ if (suffixlen)
+ *suffixlen = 1;
+ return true;
+ }
+
+ if (namelen > 4 && name[namelen - 4] == '.' &&
+ (!strncasecmp(name + namelen - 3, "bak", 3) ||
+ !strncasecmp(name + namelen - 3, "sav", 3))) {
+ if (suffixlen)
+ *suffixlen = 4;
+ return true;
+ }
+
+ if (namelen > 5 && name[namelen - 5] == '.' &&
+ !strncasecmp(name + namelen - 4, "orig", 4)) {
+ if (suffixlen)
+ *suffixlen = 5;
+ return true;
+ }
+
+ return false;
+}
+
+static inline bool lu_name_is_valid_len(const char *name, size_t name_len)
+{
+ return name != NULL &&
+ name_len > 0 &&
+ name_len < INT_MAX &&
+ strlen(name) == name_len &&
+ memchr(name, '/', name_len) == NULL;
+}
+
+/**
+ * Validate names (path components)
+ *
+ * To be valid \a name must be non-empty, '\0' terminated of length \a
+ * name_len, and not contain '/'. The maximum length of a name (before
+ * say -ENAMETOOLONG will be returned) is really controlled by llite
+ * and the server. We only check for something insane coming from bad
+ * integer handling here.
+ */
+static inline bool lu_name_is_valid_2(const char *name, size_t name_len)
+{
+ return lu_name_is_valid_len(name, name_len) && name[name_len] == '\0';
+}
+
+static inline bool lu_name_is_valid(const struct lu_name *ln)
+{
+ return lu_name_is_valid_2(ln->ln_name, ln->ln_namelen);
+}
+
+#define DNAME "%.*s"
+#define PNAME(ln) \
+ (lu_name_is_valid(ln) ? (ln)->ln_namelen : 0), \
+ (lu_name_is_valid(ln) ? (ln)->ln_name : "")
+
/**
* Common buffer structure to be passed around for various xattr_{s,g}et()
* methods.
*/
struct lu_buf {
- void *lb_buf;
- ssize_t lb_len;
+ void *lb_buf;
+ size_t lb_len;
};
-/** null buffer */
-extern struct lu_buf LU_BUF_NULL;
-
#define DLUBUF "(%p %zu)"
#define PLUBUF(buf) (buf)->lb_buf, (buf)->lb_len
+
+/* read buffer params, should be filled out by out */
+struct lu_rdbuf {
+ /** number of buffers */
+ unsigned int rb_nbufs;
+ /** pointers to buffers */
+ struct lu_buf rb_bufs[];
+};
+
/**
* One-time initializers, called at obdclass module initialization, not
* exported.
void lu_global_fini(void);
struct lu_kmem_descr {
- cfs_mem_cache_t **ckd_cache;
+ struct kmem_cache **ckd_cache;
const char *ckd_name;
const size_t ckd_size;
};
struct lu_device *dev,
const struct lu_object_conf *conf);
+/** null buffer */
+extern struct lu_buf LU_BUF_NULL;
+
+void lu_buf_free(struct lu_buf *buf);
+void lu_buf_alloc(struct lu_buf *buf, size_t size);
+void lu_buf_realloc(struct lu_buf *buf, size_t size);
+
+int lu_buf_check_and_grow(struct lu_buf *buf, size_t len);
+struct lu_buf *lu_buf_check_and_alloc(struct lu_buf *buf, size_t len);
+
+extern __u32 lu_context_tags_default;
+extern __u32 lu_session_tags_default;
+
+static inline bool lu_device_is_cl(const struct lu_device *d)
+{
+ return d->ld_type->ldt_tags & LU_DEVICE_CL;
+}
+
+static inline bool lu_object_is_cl(const struct lu_object *o)
+{
+ return lu_device_is_cl(o->lo_dev);
+}
+
+/* round-robin QoS data for LOD/LMV */
+struct lu_qos_rr {
+ spinlock_t lqr_alloc; /* protect allocation index */
+ __u32 lqr_start_idx; /* start index of new inode */
+ __u32 lqr_offset_idx;/* aliasing for start_idx */
+ int lqr_start_count;/* reseed counter */
+ struct lu_tgt_pool lqr_pool; /* round-robin optimized list */
+ unsigned long lqr_dirty:1; /* recalc round-robin list */
+};
+
+/* QoS data per MDS/OSS */
+struct lu_svr_qos {
+ struct obd_uuid lsq_uuid; /* ptlrpc's c_remote_uuid */
+ struct list_head lsq_svr_list; /* link to lq_svr_list */
+ __u64 lsq_bavail; /* total bytes avail on svr */
+ __u64 lsq_iavail; /* tital inode avail on svr */
+ __u64 lsq_penalty; /* current penalty */
+ __u64 lsq_penalty_per_obj; /* penalty decrease
+ * every obj*/
+ time64_t lsq_used; /* last used time, seconds */
+ __u32 lsq_tgt_count; /* number of tgts on this svr */
+ __u32 lsq_id; /* unique svr id */
+};
+
+/* QoS data per MDT/OST */
+struct lu_tgt_qos {
+ struct lu_svr_qos *ltq_svr; /* svr info */
+ __u64 ltq_penalty; /* current penalty */
+ __u64 ltq_penalty_per_obj; /* penalty decrease
+ * every obj*/
+ __u64 ltq_weight; /* net weighting */
+ time64_t ltq_used; /* last used time, seconds */
+ bool ltq_usable:1; /* usable for striping */
+};
+
+/* target descriptor */
+struct lu_tgt_desc {
+ union {
+ struct dt_device *ltd_tgt;
+ struct obd_device *ltd_obd;
+ };
+ struct obd_export *ltd_exp;
+ struct obd_uuid ltd_uuid;
+ __u32 ltd_index;
+ __u32 ltd_gen;
+ struct list_head ltd_kill;
+ struct task_struct *ltd_recovery_task;
+ struct mutex ltd_fid_mutex;
+ struct lu_tgt_qos ltd_qos; /* qos info per target */
+ struct obd_statfs ltd_statfs;
+ time64_t ltd_statfs_age;
+ unsigned long ltd_active:1,/* is this target up for requests */
+ ltd_activate:1,/* should target be activated */
+ ltd_reap:1, /* should this target be deleted */
+ ltd_got_update_log:1, /* Already got update log */
+ ltd_connecting:1; /* target is connecting */
+};
+
+/* number of pointers at 1st level */
+#define TGT_PTRS (PAGE_SIZE / sizeof(void *))
+/* number of pointers at 2nd level */
+#define TGT_PTRS_PER_BLOCK (PAGE_SIZE / sizeof(void *))
+
+struct lu_tgt_desc_idx {
+ struct lu_tgt_desc *ldi_tgt[TGT_PTRS_PER_BLOCK];
+};
+
+/* QoS data for LOD/LMV */
+struct lu_qos {
+ struct list_head lq_svr_list; /* lu_svr_qos list */
+ struct rw_semaphore lq_rw_sem;
+ __u32 lq_active_svr_count;
+ unsigned int lq_prio_free; /* priority for free space */
+ unsigned int lq_threshold_rr;/* priority for rr */
+ struct lu_qos_rr lq_rr; /* round robin qos data */
+ unsigned long lq_dirty:1, /* recalc qos data */
+ lq_same_space:1,/* the servers all have approx.
+ * the same space avail */
+ lq_reset:1; /* zero current penalties */
+};
+
+struct lu_tgt_descs {
+ union {
+ struct lov_desc ltd_lov_desc;
+ struct lmv_desc ltd_lmv_desc;
+ };
+ /* list of known TGTs */
+ struct lu_tgt_desc_idx *ltd_tgt_idx[TGT_PTRS];
+ /* Size of the lu_tgts array, granted to be a power of 2 */
+ __u32 ltd_tgts_size;
+ /* bitmap of TGTs available */
+ unsigned long *ltd_tgt_bitmap;
+ /* TGTs scheduled to be deleted */
+ __u32 ltd_death_row;
+ /* Table refcount used for delayed deletion */
+ int ltd_refcount;
+ /* mutex to serialize concurrent updates to the tgt table */
+ struct mutex ltd_mutex;
+ /* read/write semaphore used for array relocation */
+ struct rw_semaphore ltd_rw_sem;
+ /* QoS */
+ struct lu_qos ltd_qos;
+ /* all tgts in a packed array */
+ struct lu_tgt_pool ltd_tgt_pool;
+ /* true if tgt is MDT */
+ bool ltd_is_mdt;
+};
+
+#define LTD_TGT(ltd, index) \
+ (ltd)->ltd_tgt_idx[(index) / \
+ TGT_PTRS_PER_BLOCK]->ldi_tgt[(index) % TGT_PTRS_PER_BLOCK]
+
+u64 lu_prandom_u64_max(u64 ep_ro);
+void lu_qos_rr_init(struct lu_qos_rr *lqr);
+int lu_qos_add_tgt(struct lu_qos *qos, struct lu_tgt_desc *ltd);
+void lu_tgt_qos_weight_calc(struct lu_tgt_desc *tgt);
+
+int lu_tgt_descs_init(struct lu_tgt_descs *ltd, bool is_mdt);
+void lu_tgt_descs_fini(struct lu_tgt_descs *ltd);
+int ltd_add_tgt(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt);
+void ltd_del_tgt(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt);
+bool ltd_qos_is_usable(struct lu_tgt_descs *ltd);
+int ltd_qos_penalties_calc(struct lu_tgt_descs *ltd);
+int ltd_qos_update(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt,
+ __u64 *total_wt);
+
+static inline struct lu_tgt_desc *ltd_first_tgt(struct lu_tgt_descs *ltd)
+{
+ int index;
+
+ index = find_first_bit(ltd->ltd_tgt_bitmap,
+ ltd->ltd_tgts_size);
+ return (index < ltd->ltd_tgts_size) ? LTD_TGT(ltd, index) : NULL;
+}
+
+static inline struct lu_tgt_desc *ltd_next_tgt(struct lu_tgt_descs *ltd,
+ struct lu_tgt_desc *tgt)
+{
+ int index;
+
+ if (!tgt)
+ return NULL;
+
+ index = tgt->ltd_index;
+ LASSERT(index < ltd->ltd_tgts_size);
+ index = find_next_bit(ltd->ltd_tgt_bitmap,
+ ltd->ltd_tgts_size, index + 1);
+ return (index < ltd->ltd_tgts_size) ? LTD_TGT(ltd, index) : NULL;
+}
+
+#define ltd_foreach_tgt(ltd, tgt) \
+ for (tgt = ltd_first_tgt(ltd); tgt; tgt = ltd_next_tgt(ltd, tgt))
+
+#define ltd_foreach_tgt_safe(ltd, tgt, tmp) \
+ for (tgt = ltd_first_tgt(ltd), tmp = ltd_next_tgt(ltd, tgt); tgt; \
+ tgt = tmp, tmp = ltd_next_tgt(ltd, tgt))
+
/** @} lu */
#endif /* __LUSTRE_LU_OBJECT_H */