Whamcloud - gitweb
LU-3540 lod: update recovery thread
[fs/lustre-release.git] / lustre / include / lu_object.h
index c093d31..d67d8fc 100644 (file)
@@ -27,7 +27,7 @@
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, 2013, Intel Corporation.
+ * Copyright (c) 2011, 2014, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -169,6 +169,10 @@ typedef enum {
        /* This is a new object to be allocated, or the file
         * corresponding to the object does not exists. */
        LOC_F_NEW       = 0x00000001,
+
+       /* When find a dying object, just return -EAGAIN at once instead of
+        * blocking the thread. */
+       LOC_F_NOWAIT    = 0x00000002,
 } loc_flags_t;
 
 /**
@@ -258,17 +262,17 @@ struct lu_device_type;
  * Device: a layer in the server side abstraction stacking.
  */
 struct lu_device {
-        /**
-         * reference count. This is incremented, in particular, on each object
-         * created at this layer.
-         *
-         * \todo XXX which means that atomic_t is probably too small.
-         */
-        cfs_atomic_t                       ld_ref;
-        /**
-         * Pointer to device type. Never modified once set.
-         */
-        struct lu_device_type       *ld_type;
+       /**
+        * reference count. This is incremented, in particular, on each object
+        * created at this layer.
+        *
+        * \todo XXX which means that atomic_t is probably too small.
+        */
+       atomic_t                           ld_ref;
+       /**
+        * Pointer to device type. Never modified once set.
+        */
+       struct lu_device_type             *ld_type;
         /**
          * Operation vector for this device.
          */
@@ -288,7 +292,7 @@ struct lu_device {
         /**
          * Link the device to the site.
          **/
-        cfs_list_t                         ld_linkage;
+       struct list_head                   ld_linkage;
 };
 
 struct lu_device_type_operations;
@@ -333,13 +337,13 @@ struct lu_device_type {
         /**
          * Number of existing device type instances.
          */
-        unsigned                                ldt_device_nr;
-        /**
-         * Linkage into a global list of all device types.
-         *
-         * \see lu_device_types.
-         */
-        cfs_list_t                              ldt_linkage;
+       atomic_t                                ldt_device_nr;
+       /**
+        * Linkage into a global list of all device types.
+        *
+        * \see lu_device_types.
+        */
+       struct list_head                        ldt_linkage;
 };
 
 /**
@@ -398,28 +402,17 @@ static inline int lu_device_is_md(const struct lu_device *d)
 }
 
 /**
- * Flags for the object layers.
- */
-enum lu_object_flags {
-        /**
-         * this flags is set if lu_object_operations::loo_object_init() has
-         * been called for this layer. Used by lu_object_alloc().
-         */
-        LU_OBJECT_ALLOCATED = (1 << 0)
-};
-
-/**
  * Common object attributes.
  */
 struct lu_attr {
         /** size in bytes */
         __u64          la_size;
-        /** modification time in seconds since Epoch */
-        obd_time       la_mtime;
-        /** access time in seconds since Epoch */
-        obd_time       la_atime;
-        /** change time in seconds since Epoch */
-        obd_time       la_ctime;
+       /** modification time in seconds since Epoch */
+       s64             la_mtime;
+       /** access time in seconds since Epoch */
+       s64             la_atime;
+       /** change time in seconds since Epoch */
+       s64             la_ctime;
         /** 512-byte blocks allocated to object */
         __u64          la_blocks;
         /** permission bits and file type */
@@ -446,6 +439,44 @@ struct lu_attr {
         __u64          la_valid;
 };
 
+static inline void lu_attr_cpu_to_le(struct lu_attr *dst_attr,
+                                    struct lu_attr *src_attr)
+{
+       dst_attr->la_size = cpu_to_le64(src_attr->la_size);
+       dst_attr->la_mtime = cpu_to_le64(src_attr->la_mtime);
+       dst_attr->la_atime = cpu_to_le64(src_attr->la_atime);
+       dst_attr->la_ctime = cpu_to_le64(src_attr->la_ctime);
+       dst_attr->la_blocks = cpu_to_le64(src_attr->la_blocks);
+       dst_attr->la_mode = cpu_to_le32(src_attr->la_mode);
+       dst_attr->la_uid = cpu_to_le32(src_attr->la_uid);
+       dst_attr->la_gid = cpu_to_le32(src_attr->la_gid);
+       dst_attr->la_flags = cpu_to_le32(src_attr->la_flags);
+       dst_attr->la_nlink = cpu_to_le32(src_attr->la_nlink);
+       dst_attr->la_blkbits = cpu_to_le32(src_attr->la_blkbits);
+       dst_attr->la_blksize = cpu_to_le32(src_attr->la_blksize);
+       dst_attr->la_rdev = cpu_to_le32(src_attr->la_rdev);
+       dst_attr->la_valid = cpu_to_le64(src_attr->la_valid);
+}
+
+static inline void lu_attr_le_to_cpu(struct lu_attr *dst_attr,
+                                    struct lu_attr *src_attr)
+{
+       dst_attr->la_size = le64_to_cpu(src_attr->la_size);
+       dst_attr->la_mtime = le64_to_cpu(src_attr->la_mtime);
+       dst_attr->la_atime = le64_to_cpu(src_attr->la_atime);
+       dst_attr->la_ctime = le64_to_cpu(src_attr->la_ctime);
+       dst_attr->la_blocks = le64_to_cpu(src_attr->la_blocks);
+       dst_attr->la_mode = le32_to_cpu(src_attr->la_mode);
+       dst_attr->la_uid = le32_to_cpu(src_attr->la_uid);
+       dst_attr->la_gid = le32_to_cpu(src_attr->la_gid);
+       dst_attr->la_flags = le32_to_cpu(src_attr->la_flags);
+       dst_attr->la_nlink = le32_to_cpu(src_attr->la_nlink);
+       dst_attr->la_blkbits = le32_to_cpu(src_attr->la_blkbits);
+       dst_attr->la_blksize = le32_to_cpu(src_attr->la_blksize);
+       dst_attr->la_rdev = le32_to_cpu(src_attr->la_rdev);
+       dst_attr->la_valid = le64_to_cpu(src_attr->la_valid);
+}
+
 /** Bit-mask of valid attributes */
 enum la_valid {
         LA_ATIME = 1 << 0,
@@ -484,15 +515,7 @@ struct lu_object {
         /**
          * Linkage into list of all layers.
          */
-        cfs_list_t                         lo_linkage;
-        /**
-         * Depth. Top level layer depth is 0.
-         */
-        int                                lo_depth;
-       /**
-        * Flags from enum lu_object_flags.
-        */
-       __u32                                   lo_flags;
+       struct list_head                   lo_linkage;
        /**
         * Link to the device, for debugging.
         */
@@ -509,7 +532,7 @@ enum lu_object_header_flags {
        /**
         * Mark this object has already been taken out of cache.
         */
-       LU_OBJECT_UNHASHED = 1
+       LU_OBJECT_UNHASHED = 1,
 };
 
 enum lu_object_header_attr {
@@ -534,68 +557,68 @@ enum lu_object_header_attr {
  * whether object is backed by persistent storage entity.
  */
 struct lu_object_header {
-        /**
-         * Object flags from enum lu_object_header_flags. Set and checked
-         * atomically.
-         */
-        unsigned long          loh_flags;
-        /**
-         * Object reference count. Protected by lu_site::ls_guard.
-         */
-        cfs_atomic_t           loh_ref;
-        /**
-         * Fid, uniquely identifying this object.
-         */
-        struct lu_fid          loh_fid;
-        /**
-         * Common object attributes, cached for efficiency. From enum
-         * lu_object_header_attr.
-         */
-        __u32                  loh_attr;
-        /**
-         * Linkage into per-site hash table. Protected by lu_site::ls_guard.
-         */
-        cfs_hlist_node_t       loh_hash;
-        /**
-         * Linkage into per-site LRU list. Protected by lu_site::ls_guard.
-         */
-        cfs_list_t             loh_lru;
-        /**
-         * Linkage into list of layers. Never modified once set (except lately
-         * during object destruction). No locking is necessary.
-         */
-        cfs_list_t             loh_layers;
-        /**
-         * A list of references to this object, for debugging.
-         */
-        struct lu_ref          loh_reference;
+       /**
+        * Fid, uniquely identifying this object.
+        */
+       struct lu_fid           loh_fid;
+       /**
+        * Object flags from enum lu_object_header_flags. Set and checked
+        * atomically.
+        */
+       unsigned long           loh_flags;
+       /**
+        * Object reference count. Protected by lu_site::ls_guard.
+        */
+       atomic_t                loh_ref;
+       /**
+        * Common object attributes, cached for efficiency. From enum
+        * lu_object_header_attr.
+        */
+       __u32                   loh_attr;
+       /**
+        * Linkage into per-site hash table. Protected by lu_site::ls_guard.
+        */
+       struct hlist_node       loh_hash;
+       /**
+        * Linkage into per-site LRU list. Protected by lu_site::ls_guard.
+        */
+       struct list_head        loh_lru;
+       /**
+        * Linkage into list of layers. Never modified once set (except lately
+        * during object destruction). No locking is necessary.
+        */
+       struct list_head        loh_layers;
+       /**
+        * A list of references to this object, for debugging.
+        */
+       struct lu_ref           loh_reference;
 };
 
 struct fld;
 
 struct lu_site_bkt_data {
-        /**
-         * number of busy object on this bucket
-         */
-        long                      lsb_busy;
-        /**
-         * LRU list, updated on each access to object. Protected by
-         * bucket lock of lu_site::ls_obj_hash.
-         *
-         * "Cold" end of LRU is lu_site::ls_lru.next. Accessed object are
-         * moved to the lu_site::ls_lru.prev (this is due to the non-existence
-         * of list_for_each_entry_safe_reverse()).
-         */
-        cfs_list_t                lsb_lru;
-        /**
-         * Wait-queue signaled when an object in this site is ultimately
-         * destroyed (lu_object_free()). It is used by lu_object_find() to
-         * wait before re-trying when object in the process of destruction is
-         * found in the hash table.
-         *
-         * \see htable_lookup().
-         */
-        cfs_waitq_t               lsb_marche_funebre;
+       /**
+        * number of object in this bucket on the lsb_lru list.
+        */
+       long                    lsb_lru_len;
+       /**
+        * LRU list, updated on each access to object. Protected by
+        * bucket lock of lu_site::ls_obj_hash.
+        *
+        * "Cold" end of LRU is lu_site::ls_lru.next. Accessed object are
+        * moved to the lu_site::ls_lru.prev (this is due to the non-existence
+        * of list_for_each_entry_safe_reverse()).
+        */
+       struct list_head        lsb_lru;
+       /**
+        * Wait-queue signaled when an object in this site is ultimately
+        * destroyed (lu_object_free()). It is used by lu_object_find() to
+        * wait before re-trying when object in the process of destruction is
+        * found in the hash table.
+        *
+        * \see htable_lookup().
+        */
+       wait_queue_head_t       lsb_marche_funebre;
 };
 
 enum {
@@ -622,30 +645,33 @@ struct lu_site {
         /**
          * objects hash table
          */
-        cfs_hash_t               *ls_obj_hash;
+       cfs_hash_t              *ls_obj_hash;
         /**
          * index of bucket on hash table while purging
          */
-        int                       ls_purge_start;
-        /**
-         * Top-level device for this stack.
-         */
-        struct lu_device         *ls_top_dev;
+       unsigned int            ls_purge_start;
+       /**
+        * Top-level device for this stack.
+        */
+       struct lu_device        *ls_top_dev;
        /**
         * Bottom-level device for this stack
         */
        struct lu_device        *ls_bottom_dev;
-        /**
-         * Linkage into global list of sites.
-         */
-        cfs_list_t                ls_linkage;
-        /**
-         * List for lu device for this site, protected
-         * by ls_ld_lock.
-         **/
-        cfs_list_t                ls_ld_linkage;
+       /**
+        * Linkage into global list of sites.
+        */
+       struct list_head        ls_linkage;
+       /**
+        * List for lu device for this site, protected
+        * by ls_ld_lock.
+        **/
+       struct list_head        ls_ld_linkage;
        spinlock_t              ls_ld_lock;
-
+       /**
+        * Lock to serialize site purge.
+        */
+       struct mutex            ls_purge_mutex;
        /**
         * lu_site stats
         */
@@ -654,6 +680,10 @@ struct lu_site {
         * XXX: a hack! fld has to find md_site via site, remove when possible
         */
        struct seq_server_site  *ld_seq_site;
+       /**
+        * Pointer to the lu_target for this site.
+        */
+       struct lu_target        *ls_tgt;
 };
 
 static inline struct lu_site_bkt_data *
@@ -700,7 +730,6 @@ void lu_dev_del_linkage(struct lu_site *s, struct lu_device *d);
 
 int  lu_device_type_init(struct lu_device_type *ldt);
 void lu_device_type_fini(struct lu_device_type *ldt);
-void lu_types_stop(void);
 
 /** @} ctors */
 
@@ -716,8 +745,8 @@ void lu_types_stop(void);
  */
 static inline void lu_object_get(struct lu_object *o)
 {
-        LASSERT(cfs_atomic_read(&o->lo_header->loh_ref) > 0);
-        cfs_atomic_inc(&o->lo_header->loh_ref);
+       LASSERT(atomic_read(&o->lo_header->loh_ref) > 0);
+       atomic_inc(&o->lo_header->loh_ref);
 }
 
 /**
@@ -760,8 +789,8 @@ struct lu_object *lu_object_find_slice(const struct lu_env *env,
  */
 static inline struct lu_object *lu_object_top(struct lu_object_header *h)
 {
-        LASSERT(!cfs_list_empty(&h->loh_layers));
-        return container_of0(h->loh_layers.next, struct lu_object, lo_linkage);
+       LASSERT(!list_empty(&h->loh_layers));
+       return container_of0(h->loh_layers.next, struct lu_object, lo_linkage);
 }
 
 /**
@@ -807,9 +836,8 @@ int lu_cdebug_printer(const struct lu_env *env,
  */
 #define LU_OBJECT_DEBUG(mask, env, object, format, ...)                   \
 do {                                                                      \
-        LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);                  \
-                                                                          \
         if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {                     \
+                LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);          \
                 lu_object_print(env, &msgdata, lu_cdebug_printer, object);\
                 CDEBUG(mask, format , ## __VA_ARGS__);                    \
         }                                                                 \
@@ -820,9 +848,8 @@ do {                                                                      \
  */
 #define LU_OBJECT_HEADER(mask, env, object, format, ...)                \
 do {                                                                    \
-        LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);                \
-                                                                        \
         if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {                   \
+                LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);        \
                 lu_object_header_print(env, &msgdata, lu_cdebug_printer,\
                                        (object)->lo_header);            \
                 lu_cdebug_printer(env, &msgdata, "\n");                 \
@@ -916,8 +943,8 @@ struct lu_rdpg {
 };
 
 enum lu_xattr_flags {
-        LU_XATTR_REPLACE = (1 << 0),
-        LU_XATTR_CREATE  = (1 << 1)
+       LU_XATTR_REPLACE = (1 << 0),
+       LU_XATTR_CREATE  = (1 << 1)
 };
 
 /** @} helpers */
@@ -976,22 +1003,22 @@ struct lu_context {
          * Pointer to an array with key values. Internal implementation
          * detail.
          */
-        void                 **lc_value;
-        /**
-         * Linkage into a list of all remembered contexts. Only
-         * `non-transient' contexts, i.e., ones created for service threads
-         * are placed here.
-         */
-        cfs_list_t             lc_remember;
-        /**
-         * Version counter used to skip calls to lu_context_refill() when no
-         * keys were registered.
-         */
-        unsigned               lc_version;
+       void                  **lc_value;
+       /**
+        * Linkage into a list of all remembered contexts. Only
+        * `non-transient' contexts, i.e., ones created for service threads
+        * are placed here.
+        */
+       struct list_head        lc_remember;
+       /**
+        * Version counter used to skip calls to lu_context_refill() when no
+        * keys were registered.
+        */
+       unsigned                lc_version;
         /**
          * Debugging cookie.
          */
-        unsigned               lc_cookie;
+       unsigned                lc_cookie;
 };
 
 /**
@@ -1031,7 +1058,11 @@ enum lu_context_tag {
         /**
          * Context for local operations
          */
-        LCT_LOCAL = 1 << 7,
+       LCT_LOCAL = 1 << 7,
+       /**
+        * session for server thread
+        **/
+       LCT_SERVER_SESSION = 1 << 8,
         /**
          * Set when at least one of keys, having values in this context has
          * non-NULL lu_context_key::lct_exit() method. This is used to
@@ -1118,24 +1149,24 @@ struct lu_context_key {
          */
         void   (*lct_exit)(const struct lu_context *ctx,
                            struct lu_context_key *key, void *data);
-        /**
-         * Internal implementation detail: index within lu_context::lc_value[]
-         * reserved for this key.
-         */
-        int      lct_index;
-        /**
-         * Internal implementation detail: number of values created for this
-         * key.
-         */
-        cfs_atomic_t lct_used;
-        /**
-         * Internal implementation detail: module for this key.
-         */
-        cfs_module_t *lct_owner;
-        /**
-         * References to this key. For debugging.
-         */
-        struct lu_ref  lct_reference;
+       /**
+        * Internal implementation detail: index within lu_context::lc_value[]
+        * reserved for this key.
+        */
+       int             lct_index;
+       /**
+        * Internal implementation detail: number of values created for this
+        * key.
+        */
+       atomic_t        lct_used;
+       /**
+        * Internal implementation detail: module for this key.
+        */
+       struct module   *lct_owner;
+       /**
+        * References to this key. For debugging.
+        */
+       struct lu_ref   lct_reference;
 };
 
 #define LU_KEY_INIT(mod, type)                                    \
@@ -1144,7 +1175,7 @@ struct lu_context_key {
         {                                                         \
                 type *value;                                      \
                                                                   \
-                CLASSERT(CFS_PAGE_SIZE >= sizeof (*value));       \
+               CLASSERT(PAGE_CACHE_SIZE >= sizeof (*value));       \
                                                                   \
                 OBD_ALLOC_PTR(value);                             \
                 if (value == NULL)                                \
@@ -1294,6 +1325,7 @@ int  lu_env_refill_by_tags(struct lu_env *env, __u32 ctags, __u32 stags);
  * Output site statistical counters into a buffer. Suitable for
  * ll_rd_*()-style functions.
  */
+int lu_site_stats_seq_print(const struct lu_site *s, struct seq_file *m);
 int lu_site_stats_print(const struct lu_site *s, char *page, int count);
 
 /**
@@ -1305,12 +1337,41 @@ struct lu_name {
 };
 
 /**
+ * Validate names (path components)
+ *
+ * To be valid \a name must be non-empty, '\0' terminated of length \a
+ * name_len, and not contain '/'. The maximum length of a name (before
+ * say -ENAMETOOLONG will be returned) is really controlled by llite
+ * and the server. We only check for something insane coming from bad
+ * integer handling here.
+ */
+static inline bool lu_name_is_valid_2(const char *name, size_t name_len)
+{
+       return name != NULL &&
+              name_len > 0 &&
+              name_len < INT_MAX &&
+              name[name_len] == '\0' &&
+              strlen(name) == name_len &&
+              memchr(name, '/', name_len) == NULL;
+}
+
+static inline bool lu_name_is_valid(const struct lu_name *ln)
+{
+       return lu_name_is_valid_2(ln->ln_name, ln->ln_namelen);
+}
+
+#define DNAME "%.*s"
+#define PNAME(ln)                                      \
+       (lu_name_is_valid(ln) ? (ln)->ln_namelen : 0),  \
+       (lu_name_is_valid(ln) ? (ln)->ln_name : "")
+
+/**
  * Common buffer structure to be passed around for various xattr_{s,g}et()
  * methods.
  */
 struct lu_buf {
-        void   *lb_buf;
-        ssize_t lb_len;
+       void   *lb_buf;
+       size_t  lb_len;
 };
 
 #define DLUBUF "(%p %zu)"
@@ -1331,7 +1392,7 @@ int lu_global_init(void);
 void lu_global_fini(void);
 
 struct lu_kmem_descr {
-        cfs_mem_cache_t **ckd_cache;
+       struct kmem_cache **ckd_cache;
         const char       *ckd_name;
         const size_t      ckd_size;
 };
@@ -1349,11 +1410,24 @@ struct lu_object *lu_object_anon(const struct lu_env *env,
 extern struct lu_buf LU_BUF_NULL;
 
 void lu_buf_free(struct lu_buf *buf);
-void lu_buf_alloc(struct lu_buf *buf, int size);
-void lu_buf_realloc(struct lu_buf *buf, int size);
+void lu_buf_alloc(struct lu_buf *buf, size_t size);
+void lu_buf_realloc(struct lu_buf *buf, size_t size);
+
+int lu_buf_check_and_grow(struct lu_buf *buf, size_t len);
+struct lu_buf *lu_buf_check_and_alloc(struct lu_buf *buf, size_t len);
+
+extern __u32 lu_context_tags_default;
+extern __u32 lu_session_tags_default;
+
+static inline bool lu_device_is_cl(const struct lu_device *d)
+{
+       return d->ld_type->ldt_tags & LU_DEVICE_CL;
+}
 
-int lu_buf_check_and_grow(struct lu_buf *buf, int len);
-struct lu_buf *lu_buf_check_and_alloc(struct lu_buf *buf, int len);
+static inline bool lu_object_is_cl(const struct lu_object *o)
+{
+       return lu_device_is_cl(o->lo_dev);
+}
 
 /** @} lu */
 #endif /* __LUSTRE_LU_OBJECT_H */