*/
/*
* This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
*/
#ifndef __LUSTRE_LU_OBJECT_H
#include <uapi/linux/lustre/lustre_idl.h>
#include <lu_ref.h>
#include <linux/percpu_counter.h>
+#include <linux/rhashtable.h>
+#include <linux/ctype.h>
struct seq_file;
struct proc_dir_entry;
struct lu_object_header;
struct lu_context;
struct lu_env;
+struct lu_name;
/**
* Operations common for data and meta-data devices.
struct lu_device *parent,
struct lu_device *dev);
+
+ /**
+ * Allocate new FID for file with @name under @parent
+ *
+ * \param[in] env execution environment for this thread
+ * \param[in] dev dt device
+ * \param[out] fid new FID allocated
+ * \param[in] parent parent object
+ * \param[in] name lu_name
+ *
+ * \retval 0 0 FID allocated successfully.
+ * \retval 1 1 FID allocated successfully and new sequence
+ * requested from seq meta server
+ * \retval negative negative errno if FID allocation failed.
+ */
+ int (*ldo_fid_alloc)(const struct lu_env *env,
+ struct lu_device *dev,
+ struct lu_fid *fid,
+ struct lu_object *parent,
+ const struct lu_name *name);
};
/**
*/
void (*loo_object_delete)(const struct lu_env *env,
struct lu_object *o);
- /**
- * Dual to lu_device_operations::ldo_object_alloc(). Called when
- * object is removed from memory.
- */
- void (*loo_object_free)(const struct lu_env *env,
- struct lu_object *o);
+ /**
+ * Dual to lu_device_operations::ldo_object_alloc(). Called when
+ * object is removed from memory. Must use call_rcu or kfree_rcu
+ * if the object contains an lu_object_header.
+ */
+ void (*loo_object_free)(const struct lu_env *env,
+ struct lu_object *o);
/**
* Called when last active reference to the object is released (and
* object returns to the cache). This method is optional.
* device types.
*/
enum lu_device_tag {
- /** this is meta-data device */
- LU_DEVICE_MD = (1 << 0),
- /** this is data device */
- LU_DEVICE_DT = (1 << 1),
- /** data device in the client stack */
- LU_DEVICE_CL = (1 << 2)
+ /** this is meta-data device */
+ LU_DEVICE_MD = BIT(0),
+ /** this is data device */
+ LU_DEVICE_DT = BIT(1),
+ /** data device in the client stack */
+ LU_DEVICE_CL = BIT(2)
};
/**
*
* \see enum la_valid
*/
- __u64 la_valid;
+ __u64 la_valid;
/** size in bytes */
- __u64 la_size;
+ __u64 la_size;
/** modification time in seconds since Epoch */
s64 la_mtime;
/** access time in seconds since Epoch */
s64 la_atime;
/** change time in seconds since Epoch */
s64 la_ctime;
+ /** create time in seconds since Epoch */
+ s64 la_btime;
/** 512-byte blocks allocated to object */
- __u64 la_blocks;
+ __u64 la_blocks;
/** permission bits and file type */
- __u32 la_mode;
+ __u32 la_mode;
/** owner id */
- __u32 la_uid;
+ __u32 la_uid;
/** group id */
- __u32 la_gid;
+ __u32 la_gid;
/** object flags */
- __u32 la_flags;
+ __u32 la_flags;
/** number of persistent references to this object */
- __u32 la_nlink;
+ __u32 la_nlink;
/** blk bits of the object*/
- __u32 la_blkbits;
+ __u32 la_blkbits;
/** blk size of the object*/
- __u32 la_blksize;
+ __u32 la_blksize;
/** real device */
- __u32 la_rdev;
+ __u32 la_rdev;
/** project id */
- __u32 la_projid;
+ __u32 la_projid;
/** set layout version to OST objects. */
__u32 la_layout_version;
+ /** dirent count */
+ __u64 la_dirent_count;
};
+#define LU_DIRENT_COUNT_UNSET ~0ULL
+
/**
* Layer in the layered object.
*/
* intialized yet, the object allocator will initialize it.
*/
LU_OBJECT_INITED = 2,
- /**
- * Object is being purged, so mustn't be returned by
- * htable_lookup()
- */
- LU_OBJECT_PURGING = 3,
};
enum lu_object_header_attr {
- LOHA_EXISTS = 1 << 0,
- LOHA_REMOTE = 1 << 1,
- LOHA_HAS_AGENT_ENTRY = 1 << 2,
+ LOHA_EXISTS = BIT(0),
+ LOHA_REMOTE = BIT(1),
+ LOHA_HAS_AGENT_ENTRY = BIT(2),
/**
* UNIX file type is stored in S_IFMT bits.
*/
* it is created for things like not-yet-existing child created by mkdir or
* create calls. lu_object_operations::loo_exists() can be used to check
* whether object is backed by persistent storage entity.
+ * Any object containing this structre which might be placed in an
+ * rhashtable via loh_hash MUST be freed using call_rcu() or rcu_kfree().
*/
struct lu_object_header {
/**
*/
__u32 loh_attr;
/**
- * Linkage into per-site hash table. Protected by lu_site::ls_guard.
+ * Linkage into per-site hash table.
*/
- struct hlist_node loh_hash;
+ struct rhash_head loh_hash;
/**
* Linkage into per-site LRU list. Protected by lu_site::ls_guard.
*/
* A list of references to this object, for debugging.
*/
struct lu_ref loh_reference;
+ /*
+ * Handle used for kfree_rcu() or similar.
+ */
+ struct rcu_head loh_rcu;
};
struct fld;
/**
* objects hash table
*/
- struct cfs_hash *ls_obj_hash;
+ struct rhashtable ls_obj_hash;
/*
* buckets for summary data
*/
void lu_device_fini (struct lu_device *d);
int lu_object_header_init(struct lu_object_header *h);
void lu_object_header_fini(struct lu_object_header *h);
+void lu_object_header_free(struct lu_object_header *h);
int lu_object_init (struct lu_object *o,
struct lu_object_header *h, struct lu_device *d);
void lu_object_fini (struct lu_object *o);
void lu_object_add_top (struct lu_object_header *h, struct lu_object *o);
void lu_object_add (struct lu_object *before, struct lu_object *o);
-
+struct lu_object *lu_object_get_first(struct lu_object_header *h,
+ struct lu_device *dev);
void lu_dev_add_linkage(struct lu_site *s, struct lu_device *d);
void lu_dev_del_linkage(struct lu_site *s, struct lu_device *d);
return lu_site_purge_objects(env, s, nr, 1);
}
-void lu_site_print(const struct lu_env *env, struct lu_site *s, void *cookie,
- lu_printer_t printer);
+void lu_site_print(const struct lu_env *env, struct lu_site *s, atomic_t *ref,
+ int msg_flags, lu_printer_t printer);
struct lu_object *lu_object_find(const struct lu_env *env,
struct lu_device *dev, const struct lu_fid *f,
const struct lu_object_conf *conf);
static inline struct lu_object *lu_object_top(struct lu_object_header *h)
{
LASSERT(!list_empty(&h->loh_layers));
- return container_of0(h->loh_layers.next, struct lu_object, lo_linkage);
+ return container_of(h->loh_layers.next, struct lu_object, lo_linkage);
}
/**
*/
static inline struct lu_object *lu_object_next(const struct lu_object *o)
{
- return container_of0(o->lo_linkage.next, struct lu_object, lo_linkage);
+ return container_of(o->lo_linkage.next, struct lu_object, lo_linkage);
}
/**
};
enum lu_xattr_flags {
- LU_XATTR_REPLACE = (1 << 0),
- LU_XATTR_CREATE = (1 << 1),
- LU_XATTR_MERGE = (1 << 2),
- LU_XATTR_SPLIT = (1 << 3),
+ LU_XATTR_REPLACE = BIT(0),
+ LU_XATTR_CREATE = BIT(1),
+ LU_XATTR_MERGE = BIT(2),
+ LU_XATTR_SPLIT = BIT(3),
+ LU_XATTR_PURGE = BIT(4),
};
/** @} helpers */
*/
enum lu_context_tag {
- /**
- * Thread on md server
- */
- LCT_MD_THREAD = 1 << 0,
- /**
- * Thread on dt server
- */
- LCT_DT_THREAD = 1 << 1,
- /**
- * Thread on client
- */
- LCT_CL_THREAD = 1 << 3,
- /**
- * A per-request session on a server, and a per-system-call session on
- * a client.
- */
- LCT_SESSION = 1 << 4,
- /**
- * A per-request data on OSP device
- */
- LCT_OSP_THREAD = 1 << 5,
- /**
- * MGS device thread
- */
- LCT_MG_THREAD = 1 << 6,
- /**
- * Context for local operations
- */
- LCT_LOCAL = 1 << 7,
+ /**
+ * Thread on md server
+ */
+ LCT_MD_THREAD = BIT(0),
+ /**
+ * Thread on dt server
+ */
+ LCT_DT_THREAD = BIT(1),
+ /**
+ * Thread on client
+ */
+ LCT_CL_THREAD = BIT(3),
+ /**
+ * A per-request session on a server, and a per-system-call session on
+ * a client.
+ */
+ LCT_SESSION = BIT(4),
+ /**
+ * A per-request data on OSP device
+ */
+ LCT_OSP_THREAD = BIT(5),
+ /**
+ * MGS device thread
+ */
+ LCT_MG_THREAD = BIT(6),
+ /**
+ * Context for local operations
+ */
+ LCT_LOCAL = BIT(7),
/**
* session for server thread
**/
- LCT_SERVER_SESSION = 1 << 8,
- /**
- * Set when at least one of keys, having values in this context has
- * non-NULL lu_context_key::lct_exit() method. This is used to
- * optimize lu_context_exit() call.
- */
- LCT_HAS_EXIT = 1 << 28,
- /**
- * Don't add references for modules creating key values in that context.
- * This is only for contexts used internally by lu_object framework.
- */
- LCT_NOREF = 1 << 29,
- /**
- * Key is being prepared for retiring, don't create new values for it.
- */
- LCT_QUIESCENT = 1 << 30,
- /**
- * Context should be remembered.
- */
- LCT_REMEMBER = 1 << 31,
- /**
- * Contexts usable in cache shrinker thread.
- */
- LCT_SHRINKER = LCT_MD_THREAD|LCT_DT_THREAD|LCT_CL_THREAD|LCT_NOREF
+ LCT_SERVER_SESSION = BIT(8),
+ /**
+ * Set when at least one of keys, having values in this context has
+ * non-NULL lu_context_key::lct_exit() method. This is used to
+ * optimize lu_context_exit() call.
+ */
+ LCT_HAS_EXIT = BIT(28),
+ /**
+ * Don't add references for modules creating key values in that context.
+ * This is only for contexts used internally by lu_object framework.
+ */
+ LCT_NOREF = BIT(29),
+ /**
+ * Key is being prepared for retiring, don't create new values for it.
+ */
+ LCT_QUIESCENT = BIT(30),
+ /**
+ * Context should be remembered.
+ */
+ LCT_REMEMBER = BIT(31),
+ /**
+ * Contexts usable in cache shrinker thread.
+ */
+ LCT_SHRINKER = LCT_MD_THREAD|LCT_DT_THREAD|LCT_CL_THREAD|LCT_NOREF,
};
/**
void lu_context_key_degister(struct lu_context_key *key);
void *lu_context_key_get (const struct lu_context *ctx,
const struct lu_context_key *key);
-void lu_context_key_quiesce (struct lu_context_key *key);
-void lu_context_key_revive (struct lu_context_key *key);
+void lu_context_key_quiesce(struct lu_device_type *t,
+ struct lu_context_key *key);
+void lu_context_key_revive(struct lu_context_key *key);
/*
} \
struct __##mod##_dummy_type_start {;}
-#define LU_TYPE_STOP(mod, ...) \
- static void mod##_type_stop(struct lu_device_type *t) \
- { \
- lu_context_key_quiesce_many(__VA_ARGS__, NULL); \
- } \
- struct __##mod##_dummy_type_stop {;}
+#define LU_TYPE_STOP(mod, ...) \
+ static void mod##_type_stop(struct lu_device_type *t) \
+ { \
+ lu_context_key_quiesce_many(t, __VA_ARGS__, NULL); \
+ } \
+ struct __##mod##_dummy_type_stop { }
int lu_context_key_register_many(struct lu_context_key *k, ...);
void lu_context_key_degister_many(struct lu_context_key *k, ...);
void lu_context_key_revive_many (struct lu_context_key *k, ...);
-void lu_context_key_quiesce_many (struct lu_context_key *k, ...);
+void lu_context_key_quiesce_many(struct lu_device_type *t,
+ struct lu_context_key *k, ...);
/*
* update/clear ctx/ses tags.
struct lu_env *lu_env_find(void);
int lu_env_add(struct lu_env *env);
+int lu_env_add_task(struct lu_env *env, struct task_struct *task);
void lu_env_remove(struct lu_env *env);
/** @} lu_context */
return name_is_dot_or_dotdot(lname->ln_name, lname->ln_namelen);
}
+static inline bool lu_name_is_temp_file(const char *name, int namelen,
+ bool dot_prefix, int suffixlen)
+{
+ int lower = 0;
+ int upper = 0;
+ int digit = 0;
+ int len = suffixlen;
+
+ if (dot_prefix && name[0] != '.')
+ return false;
+
+ if (namelen < dot_prefix + suffixlen + 2 ||
+ name[namelen - suffixlen - 1] != '.')
+ return false;
+
+ while (len) {
+ lower += islower(name[namelen - len]);
+ upper += isupper(name[namelen - len]);
+ digit += isdigit(name[namelen - len]);
+ len--;
+ }
+ /* mktemp() filename suffixes will have a mix of upper- and lower-case
+ * letters and/or numbers, not all numbers, or all upper or lower-case.
+ * About 0.07% of randomly-generated names will slip through,
+ * but this avoids 99.93% of cross-MDT renames for those files.
+ */
+ if ((digit >= suffixlen - 1 && !isdigit(name[namelen - suffixlen])) ||
+ upper == suffixlen || lower == suffixlen)
+ return false;
+
+ return true;
+}
+
+static inline bool lu_name_is_backup_file(const char *name, int namelen,
+ int *suffixlen)
+{
+ if (namelen > 1 &&
+ name[namelen - 2] != '.' && name[namelen - 1] == '~') {
+ if (suffixlen)
+ *suffixlen = 1;
+ return true;
+ }
+
+ if (namelen > 4 && name[namelen - 4] == '.' &&
+ (!strncasecmp(name + namelen - 3, "bak", 3) ||
+ !strncasecmp(name + namelen - 3, "sav", 3))) {
+ if (suffixlen)
+ *suffixlen = 4;
+ return true;
+ }
+
+ if (namelen > 5 && name[namelen - 5] == '.' &&
+ !strncasecmp(name + namelen - 4, "orig", 4)) {
+ if (suffixlen)
+ *suffixlen = 5;
+ return true;
+ }
+
+ return false;
+}
+
static inline bool lu_name_is_valid_len(const char *name, size_t name_len)
{
return name != NULL &&
/* Generic subset of tgts */
struct lu_tgt_pool {
__u32 *op_array; /* array of index of
- * lov_obd->lov_tgts */
+ * lov_obd->lov_tgts
+ */
unsigned int op_count; /* number of tgts in the array */
unsigned int op_size; /* allocated size of op_array */
struct rw_semaphore op_rw_sem; /* to protect lu_tgt_pool use */
};
+int tgt_pool_init(struct lu_tgt_pool *op, unsigned int count);
+int tgt_pool_add(struct lu_tgt_pool *op, __u32 idx, unsigned int min_count);
+int tgt_pool_remove(struct lu_tgt_pool *op, __u32 idx);
+int tgt_pool_free(struct lu_tgt_pool *op);
+int tgt_check_index(int idx, struct lu_tgt_pool *osts);
+int tgt_pool_extend(struct lu_tgt_pool *op, unsigned int min_count);
+
+/* bitflags used in rr / qos allocation */
+enum lq_flag {
+ LQ_DIRTY = 0, /* recalc qos data */
+ LQ_SAME_SPACE, /* the OSTs all have approx.
+ * the same space avail */
+ LQ_RESET, /* zero current penalties */
+};
+
+#ifdef HAVE_SERVER_SUPPORT
/* round-robin QoS data for LOD/LMV */
struct lu_qos_rr {
spinlock_t lqr_alloc; /* protect allocation index */
__u32 lqr_offset_idx;/* aliasing for start_idx */
int lqr_start_count;/* reseed counter */
struct lu_tgt_pool lqr_pool; /* round-robin optimized list */
- unsigned long lqr_dirty:1; /* recalc round-robin list */
+ unsigned long lqr_flags;
};
+static inline void lu_qos_rr_init(struct lu_qos_rr *lqr)
+{
+ spin_lock_init(&lqr->lqr_alloc);
+ set_bit(LQ_DIRTY, &lqr->lqr_flags);
+}
+
+#endif /* HAVE_SERVER_SUPPORT */
+
/* QoS data per MDS/OSS */
struct lu_svr_qos {
struct obd_uuid lsq_uuid; /* ptlrpc's c_remote_uuid */
ltd_connecting:1; /* target is connecting */
};
-/* number of pointers at 1st level */
-#define TGT_PTRS (PAGE_SIZE / sizeof(void *))
/* number of pointers at 2nd level */
#define TGT_PTRS_PER_BLOCK (PAGE_SIZE / sizeof(void *))
+/* number of pointers at 1st level - only need as many as max OST/MDT count */
+#define TGT_PTRS ((LOV_ALL_STRIPES + 1) / TGT_PTRS_PER_BLOCK)
struct lu_tgt_desc_idx {
struct lu_tgt_desc *ldi_tgt[TGT_PTRS_PER_BLOCK];
__u32 lq_active_svr_count;
unsigned int lq_prio_free; /* priority for free space */
unsigned int lq_threshold_rr;/* priority for rr */
+#ifdef HAVE_SERVER_SUPPORT
struct lu_qos_rr lq_rr; /* round robin qos data */
+#endif
+ unsigned long lq_flags;
+#if 0
unsigned long lq_dirty:1, /* recalc qos data */
lq_same_space:1,/* the servers all have approx.
* the same space avail */
lq_reset:1; /* zero current penalties */
+#endif
};
struct lu_tgt_descs {
/* Size of the lu_tgts array, granted to be a power of 2 */
__u32 ltd_tgts_size;
/* bitmap of TGTs available */
- struct cfs_bitmap *ltd_tgt_bitmap;
+ unsigned long *ltd_tgt_bitmap;
/* TGTs scheduled to be deleted */
__u32 ltd_death_row;
/* Table refcount used for delayed deletion */
};
#define LTD_TGT(ltd, index) \
- (ltd)->ltd_tgt_idx[(index) / \
- TGT_PTRS_PER_BLOCK]->ldi_tgt[(index) % TGT_PTRS_PER_BLOCK]
+ (ltd)->ltd_tgt_idx[(index) / TGT_PTRS_PER_BLOCK]-> \
+ ldi_tgt[(index) % TGT_PTRS_PER_BLOCK]
u64 lu_prandom_u64_max(u64 ep_ro);
-void lu_qos_rr_init(struct lu_qos_rr *lqr);
int lu_qos_add_tgt(struct lu_qos *qos, struct lu_tgt_desc *ltd);
void lu_tgt_qos_weight_calc(struct lu_tgt_desc *tgt);
{
int index;
- index = find_first_bit(ltd->ltd_tgt_bitmap->data,
- ltd->ltd_tgt_bitmap->size);
- return (index < ltd->ltd_tgt_bitmap->size) ? LTD_TGT(ltd, index) : NULL;
+ index = find_first_bit(ltd->ltd_tgt_bitmap,
+ ltd->ltd_tgts_size);
+ return (index < ltd->ltd_tgts_size) ? LTD_TGT(ltd, index) : NULL;
}
static inline struct lu_tgt_desc *ltd_next_tgt(struct lu_tgt_descs *ltd,
return NULL;
index = tgt->ltd_index;
- LASSERT(index < ltd->ltd_tgt_bitmap->size);
- index = find_next_bit(ltd->ltd_tgt_bitmap->data,
- ltd->ltd_tgt_bitmap->size, index + 1);
- return (index < ltd->ltd_tgt_bitmap->size) ? LTD_TGT(ltd, index) : NULL;
+ LASSERT(index < ltd->ltd_tgts_size);
+ index = find_next_bit(ltd->ltd_tgt_bitmap,
+ ltd->ltd_tgts_size, index + 1);
+ return (index < ltd->ltd_tgts_size) ? LTD_TGT(ltd, index) : NULL;
}
#define ltd_foreach_tgt(ltd, tgt) \