Whamcloud - gitweb
LU-1683 agl: increase lock cll_holds for AGL upcall
[fs/lustre-release.git] / lustre / include / cl_object.h
index 58ef32a..b0c11e3 100644 (file)
@@ -1,6 +1,4 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
  * GPL HEADER START
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  * GPL HEADER END
  */
 /*
- * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -276,6 +276,16 @@ struct cl_object_conf {
          * VFS inode. This is consumed by vvp.
          */
         struct inode             *coc_inode;
+       /**
+        * Validate object conf. If object is using an invalid conf,
+        * then invalidate it and set the new layout.
+        */
+       bool                      coc_validate_only;
+       /**
+        * Invalidate the current stripe configuration due to losing
+        * layout lock.
+        */
+       bool                      coc_invalidate;
 };
 
 /**
@@ -1054,6 +1064,15 @@ struct cl_page_operations {
          */
         int (*cpo_cancel)(const struct lu_env *env,
                           const struct cl_page_slice *slice);
+       /**
+        * Write out a page by kernel. This is only called by ll_writepage
+        * right now.
+        *
+        * \see cl_page_flush()
+        */
+       int (*cpo_flush)(const struct lu_env *env,
+                        const struct cl_page_slice *slice,
+                        struct cl_io *io);
         /** @} transfer */
 };
 
@@ -1062,10 +1081,10 @@ struct cl_page_operations {
  */
 #define CL_PAGE_DEBUG(mask, env, page, format, ...)                     \
 do {                                                                    \
-        static DECLARE_LU_CDEBUG_PRINT_INFO(__info, mask);              \
+        LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);                \
                                                                         \
         if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {                   \
-                cl_page_print(env, &__info, lu_cdebug_printer, page);   \
+                cl_page_print(env, &msgdata, lu_cdebug_printer, page);  \
                 CDEBUG(mask, format , ## __VA_ARGS__);                  \
         }                                                               \
 } while (0)
@@ -1073,14 +1092,14 @@ do {                                                                    \
 /**
  * Helper macro, dumping shorter information about \a page into a log.
  */
-#define CL_PAGE_HEADER(mask, env, page, format, ...)                    \
-do {                                                                    \
-        static DECLARE_LU_CDEBUG_PRINT_INFO(__info, mask);              \
-                                                                        \
-        if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {                   \
-                cl_page_header_print(env, &__info, lu_cdebug_printer, page); \
-                CDEBUG(mask, format , ## __VA_ARGS__);                  \
-        }                                                               \
+#define CL_PAGE_HEADER(mask, env, page, format, ...)                          \
+do {                                                                          \
+        LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);                      \
+                                                                              \
+        if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {                         \
+                cl_page_header_print(env, &msgdata, lu_cdebug_printer, page); \
+                CDEBUG(mask, format , ## __VA_ARGS__);                        \
+        }                                                                     \
 } while (0)
 
 /** @} cl_page */
@@ -1440,7 +1459,9 @@ enum cl_lock_flags {
         /** cancellation is pending for this lock. */
         CLF_CANCELPEND = 1 << 1,
         /** destruction is pending for this lock. */
-        CLF_DOOMED     = 1 << 2
+        CLF_DOOMED     = 1 << 2,
+        /** from enqueue RPC reply upcall. */
+        CLF_FROM_UPCALL= 1 << 3,
 };
 
 /**
@@ -1613,9 +1634,11 @@ struct cl_lock_slice {
  */
 enum cl_lock_transition {
         /** operation cannot be completed immediately. Wait for state change. */
-        CLO_WAIT   = 1,
+        CLO_WAIT        = 1,
         /** operation had to release lock mutex, restart. */
-        CLO_REPEAT = 2
+        CLO_REPEAT      = 2,
+        /** lower layer re-enqueued. */
+        CLO_REENQUEUED  = 3,
 };
 
 /**
@@ -1786,14 +1809,22 @@ struct cl_lock_operations {
 
 #define CL_LOCK_DEBUG(mask, env, lock, format, ...)                     \
 do {                                                                    \
-        static DECLARE_LU_CDEBUG_PRINT_INFO(__info, mask);              \
+        LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);                \
                                                                         \
         if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {                   \
-                cl_lock_print(env, &__info, lu_cdebug_printer, lock);   \
+                cl_lock_print(env, &msgdata, lu_cdebug_printer, lock);  \
                 CDEBUG(mask, format , ## __VA_ARGS__);                  \
         }                                                               \
 } while (0)
 
+#define CL_LOCK_ASSERT(expr, env, lock) do {                            \
+       if (likely(expr))                                               \
+               break;                                                  \
+                                                                       \
+       CL_LOCK_DEBUG(D_ERROR, env, lock, "failed at %s.\n", #expr);    \
+       LBUG();                                                         \
+} while (0)
+
 /** @} cl_lock */
 
 /** \addtogroup cl_page_list cl_page_list
@@ -1890,13 +1921,18 @@ enum cl_io_type {
         CIT_READ,
         /** write system call */
         CIT_WRITE,
-        /** truncate system call */
-        CIT_TRUNC,
+        /** truncate, utime system calls */
+        CIT_SETATTR,
         /**
          * page fault handling
          */
         CIT_FAULT,
         /**
+        * fsync system call handling
+        * To write out a range of file
+        */
+       CIT_FSYNC,
+       /**
          * Miscellaneous io. This is used for occasional io activity that
          * doesn't fit into other types. Currently this is used for:
          *
@@ -1943,11 +1979,6 @@ enum cl_io_state {
         CIS_FINI
 };
 
-enum cl_req_priority {
-        CRP_NORMAL,
-        CRP_CANCEL
-};
-
 /**
  * IO state private for a layer.
  *
@@ -2065,8 +2096,7 @@ struct cl_io_operations {
                 int  (*cio_submit)(const struct lu_env *env,
                                    const struct cl_io_slice *slice,
                                    enum cl_req_type crt,
-                                   struct cl_2queue *queue,
-                                   enum cl_req_priority priority);
+                                  struct cl_2queue *queue);
         } req_op[CRT_NR];
         /**
          * Read missing page.
@@ -2152,9 +2182,13 @@ enum cl_enq_flags {
          */
         CEF_NEVER        = 0x00000010,
         /**
+         * for async glimpse lock.
+         */
+        CEF_AGL          = 0x00000020,
+        /**
          * mask of enq_flags.
          */
-        CEF_MASK         = 0x0000001f
+        CEF_MASK         = 0x0000003f,
 };
 
 /**
@@ -2219,7 +2253,21 @@ enum cl_io_lock_dmd {
         /** Layers are free to decide between local and global locking. */
         CILR_MAYBE,
         /** Never lock: there is no cache (e.g., liblustre). */
-        CILR_NEVER
+        CILR_NEVER,
+        /** Peek lock: use existing locks, don't queue new ones */
+        CILR_PEEK
+};
+
+enum cl_fsync_mode {
+       /** start writeback, do not wait for them to finish */
+       CL_FSYNC_NONE  = 0,
+       /** start writeback and wait for them to finish */
+       CL_FSYNC_LOCAL = 1,
+       /** discard all of dirty pages in a specific file range */
+       CL_FSYNC_DISCARD = 2,
+       /** start writeback and make sure they have reached storage before
+        * return. OST_SYNC RPC must be issued and finished */
+       CL_FSYNC_ALL   = 3
 };
 
 struct cl_io_rw_common {
@@ -2256,11 +2304,6 @@ struct cl_io {
         struct cl_lockset              ci_lockset;
         /** lock requirements, this is just a help info for sublayers. */
         enum cl_io_lock_dmd            ci_lockreq;
-        /**
-         * This io has held grouplock, to inform sublayers that
-         * don't do lockless i/o.
-         */
-        int                            ci_no_srvlock;
         union {
                 struct cl_rd_io {
                         struct cl_io_rw_common rd;
@@ -2268,30 +2311,65 @@ struct cl_io {
                 struct cl_wr_io {
                         struct cl_io_rw_common wr;
                         int                    wr_append;
+                       int                    wr_sync;
                 } ci_wr;
                 struct cl_io_rw_common ci_rw;
-                struct cl_truncate_io {
-                        /** new size to which file is truncated */
-                        loff_t           tr_size;
-                        struct obd_capa *tr_capa;
-                } ci_truncate;
+                struct cl_setattr_io {
+                        struct ost_lvb   sa_attr;
+                        unsigned int     sa_valid;
+                        struct obd_capa *sa_capa;
+                } ci_setattr;
                 struct cl_fault_io {
                         /** page index within file. */
                         pgoff_t         ft_index;
                         /** bytes valid byte on a faulted page. */
                         int             ft_nob;
-                        /** writable page? */
+                        /** writable page? for nopage() only */
                         int             ft_writable;
                         /** page of an executable? */
                         int             ft_executable;
+                        /** page_mkwrite() */
+                        int             ft_mkwrite;
                         /** resulting page */
                         struct cl_page *ft_page;
                 } ci_fault;
+               struct cl_fsync_io {
+                       loff_t             fi_start;
+                       loff_t             fi_end;
+                       struct obd_capa   *fi_capa;
+                       /** file system level fid */
+                       struct lu_fid     *fi_fid;
+                       enum cl_fsync_mode fi_mode;
+                       /* how many pages were written/discarded */
+                       unsigned int       fi_nr_written;
+               } ci_fsync;
         } u;
         struct cl_2queue     ci_queue;
         size_t               ci_nob;
         int                  ci_result;
-        int                  ci_continue;
+       unsigned int         ci_continue:1,
+       /**
+        * This io has held grouplock, to inform sublayers that
+        * don't do lockless i/o.
+        */
+                            ci_no_srvlock:1,
+       /**
+        * The whole IO need to be restarted because layout has been changed
+        */
+                            ci_need_restart:1,
+       /**
+        * Ignore layout change.
+        * Most of the CIT_MISC operations can ignore layout change, because
+        * the purpose to create this kind of cl_io is to give an environment
+        * to run clio methods, for example:
+        *   1. request group lock;
+        *   2. flush caching pages by osc;
+        *   3. writepage
+        *   4. echo client
+        * So far, only direct IO and glimpse clio need restart if layout
+        * change during IO time.
+        */
+                            ci_ignore_layout:1;
         /**
          * Number of pages owned by this IO. For invariant checking.
          */
@@ -2366,10 +2444,12 @@ struct cl_io {
  * Per-transfer attributes.
  */
 struct cl_req_attr {
-        /** Generic attributes for the server consumption. */
-        struct obdo     *cra_oa;
-        /** Capability. */
-        struct obd_capa *cra_capa;
+       /** Generic attributes for the server consumption. */
+       struct obdo     *cra_oa;
+       /** Capability. */
+       struct obd_capa *cra_capa;
+       /** Jobid */
+       char             cra_jobid[JOBSTATS_JOBID_SIZE];
 };
 
 /**
@@ -2654,15 +2734,23 @@ static inline int cl_object_same(struct cl_object *o0, struct cl_object *o1)
 
 /** \defgroup cl_page cl_page
  * @{ */
-struct cl_page       *cl_page_lookup(struct cl_object_header *hdr,
+enum {
+        CLP_GANG_OKAY = 0,
+        CLP_GANG_RESCHED,
+        CLP_GANG_AGAIN,
+        CLP_GANG_ABORT
+};
+
+/* callback of cl_page_gang_lookup() */
+typedef int   (*cl_page_gang_cb_t)  (const struct lu_env *, struct cl_io *,
+                                     struct cl_page *, void *);
+int             cl_page_gang_lookup (const struct lu_env *env,
+                                     struct cl_object *obj,
+                                     struct cl_io *io,
+                                     pgoff_t start, pgoff_t end,
+                                     cl_page_gang_cb_t cb, void *cbdata);
+struct cl_page *cl_page_lookup      (struct cl_object_header *hdr,
                                      pgoff_t index);
-void                  cl_page_gang_lookup(const struct lu_env *env,
-                                          struct cl_object *obj,
-                                          struct cl_io *io,
-                                          pgoff_t start, pgoff_t end,
-                                          struct cl_page_list *plist,
-                                          int nonblock,
-                                          int *resched);
 struct cl_page *cl_page_find        (const struct lu_env *env,
                                      struct cl_object *obj,
                                      pgoff_t idx, struct page *vmpage,
@@ -2684,7 +2772,6 @@ cfs_page_t     *cl_page_vmpage      (const struct lu_env *env,
                                      struct cl_page *page);
 struct cl_page *cl_vmpage_page      (cfs_page_t *vmpage, struct cl_object *obj);
 struct cl_page *cl_page_top         (struct cl_page *page);
-int             cl_is_page          (const void *addr);
 
 const struct cl_page_slice *cl_page_at(const struct cl_page *page,
                                        const struct lu_device_type *dtype);
@@ -2728,6 +2815,8 @@ int  cl_page_cache_add  (const struct lu_env *env, struct cl_io *io,
 void cl_page_clip       (const struct lu_env *env, struct cl_page *pg,
                          int from, int to);
 int  cl_page_cancel     (const struct lu_env *env, struct cl_page *page);
+int  cl_page_flush      (const struct lu_env *env, struct cl_io *io,
+                        struct cl_page *pg);
 
 /** @} transfer */
 
@@ -2774,9 +2863,19 @@ struct cl_lock *cl_lock_peek(const struct lu_env *env, const struct cl_io *io,
 struct cl_lock *cl_lock_request(const struct lu_env *env, struct cl_io *io,
                                 const struct cl_lock_descr *need,
                                 const char *scope, const void *source);
-struct cl_lock *cl_lock_at_page(const struct lu_env *env, struct cl_object *obj,
-                                struct cl_page *page, struct cl_lock *except,
-                                int pending, int canceld);
+struct cl_lock *cl_lock_at_pgoff(const struct lu_env *env,
+                                struct cl_object *obj, pgoff_t index,
+                                struct cl_lock *except, int pending,
+                                int canceld);
+static inline struct cl_lock *cl_lock_at_page(const struct lu_env *env,
+                                             struct cl_object *obj,
+                                             struct cl_page *page,
+                                             struct cl_lock *except,
+                                             int pending, int canceld)
+{
+       return cl_lock_at_pgoff(env, obj, page->cp_index, except,
+                               pending, canceld);
+}
 
 const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
                                        const struct lu_device_type *dtype);
@@ -2786,21 +2885,24 @@ void  cl_lock_get_trust (struct cl_lock *lock);
 void  cl_lock_put       (const struct lu_env *env, struct cl_lock *lock);
 void  cl_lock_hold_add  (const struct lu_env *env, struct cl_lock *lock,
                          const char *scope, const void *source);
+void cl_lock_hold_release(const struct lu_env *env, struct cl_lock *lock,
+                         const char *scope, const void *source);
 void  cl_lock_unhold    (const struct lu_env *env, struct cl_lock *lock,
                          const char *scope, const void *source);
 void  cl_lock_release   (const struct lu_env *env, struct cl_lock *lock,
                          const char *scope, const void *source);
 void  cl_lock_user_add  (const struct lu_env *env, struct cl_lock *lock);
-int   cl_lock_user_del  (const struct lu_env *env, struct cl_lock *lock);
+void  cl_lock_user_del  (const struct lu_env *env, struct cl_lock *lock);
 
 enum cl_lock_state cl_lock_intransit(const struct lu_env *env,
                                      struct cl_lock *lock);
-
 void cl_lock_extransit(const struct lu_env *env, struct cl_lock *lock,
                        enum cl_lock_state state);
-
 int cl_lock_is_intransit(struct cl_lock *lock);
 
+int cl_lock_enqueue_wait(const struct lu_env *env, struct cl_lock *lock,
+                         int keep_mutex);
+
 /** \name statemachine statemachine
  * Interface to lock state machine consists of 3 parts:
  *
@@ -2842,6 +2944,7 @@ int   cl_enqueue_try(const struct lu_env *env, struct cl_lock *lock,
 int   cl_unuse_try  (const struct lu_env *env, struct cl_lock *lock);
 int   cl_wait_try   (const struct lu_env *env, struct cl_lock *lock);
 int   cl_use_try    (const struct lu_env *env, struct cl_lock *lock, int atomic);
+
 /** @} statemachine */
 
 void cl_lock_signal      (const struct lu_env *env, struct cl_lock *lock);
@@ -2856,8 +2959,7 @@ int  cl_lock_mutex_try  (const struct lu_env *env, struct cl_lock *lock);
 void cl_lock_mutex_put  (const struct lu_env *env, struct cl_lock *lock);
 int  cl_lock_is_mutexed (struct cl_lock *lock);
 int  cl_lock_nr_mutexed (const struct lu_env *env);
-int  cl_lock_page_out   (const struct lu_env *env, struct cl_lock *lock,
-                         int discard);
+int  cl_lock_discard_pages(const struct lu_env *env, struct cl_lock *lock);
 int  cl_lock_ext_match  (const struct cl_lock_descr *has,
                          const struct cl_lock_descr *need);
 int  cl_lock_descr_match(const struct cl_lock_descr *has,
@@ -2881,7 +2983,6 @@ void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock);
 void cl_lock_delete(const struct lu_env *env, struct cl_lock *lock);
 void cl_lock_error (const struct lu_env *env, struct cl_lock *lock, int error);
 void cl_locks_prune(const struct lu_env *env, struct cl_object *obj, int wait);
-int  cl_is_lock    (const void *addr);
 
 unsigned long cl_lock_weigh(const struct lu_env *env, struct cl_lock *lock);
 
@@ -2916,11 +3017,10 @@ int   cl_io_prepare_write(const struct lu_env *env, struct cl_io *io,
 int   cl_io_commit_write (const struct lu_env *env, struct cl_io *io,
                           struct cl_page *page, unsigned from, unsigned to);
 int   cl_io_submit_rw    (const struct lu_env *env, struct cl_io *io,
-                          enum cl_req_type iot, struct cl_2queue *queue,
-                          enum cl_req_priority priority);
+                         enum cl_req_type iot, struct cl_2queue *queue);
 int   cl_io_submit_sync  (const struct lu_env *env, struct cl_io *io,
-                          enum cl_req_type iot, struct cl_2queue *queue,
-                          enum cl_req_priority priority, long timeout);
+                         enum cl_req_type iot, struct cl_2queue *queue,
+                         long timeout);
 void  cl_io_rw_advance   (const struct lu_env *env, struct cl_io *io,
                           size_t nob);
 int   cl_io_cancel       (const struct lu_env *env, struct cl_io *io,
@@ -2935,6 +3035,25 @@ static inline int cl_io_is_append(const struct cl_io *io)
         return io->ci_type == CIT_WRITE && io->u.ci_wr.wr_append;
 }
 
+static inline int cl_io_is_sync_write(const struct cl_io *io)
+{
+       return io->ci_type == CIT_WRITE && io->u.ci_wr.wr_sync;
+}
+
+static inline int cl_io_is_mkwrite(const struct cl_io *io)
+{
+       return io->ci_type == CIT_FAULT && io->u.ci_fault.ft_mkwrite;
+}
+
+/**
+ * True, iff \a io is a truncate(2).
+ */
+static inline int cl_io_is_trunc(const struct cl_io *io)
+{
+        return io->ci_type == CIT_SETATTR &&
+                (io->u.ci_setattr.sa_valid & ATTR_SIZE);
+}
+
 struct cl_io *cl_io_top(struct cl_io *io);
 
 void cl_io_print(const struct lu_env *env, void *cookie,
@@ -2955,6 +3074,15 @@ do {                                                                    \
  * @{ */
 
 /**
+ * Last page in the page list.
+ */
+static inline struct cl_page *cl_page_list_last(struct cl_page_list *plist)
+{
+        LASSERT(plist->pl_nr > 0);
+        return cfs_list_entry(plist->pl_pages.prev, struct cl_page, cp_batch);
+}
+
+/**
  * Iterate over pages in a page list.
  */
 #define cl_page_list_for_each(page, list)                               \