X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Finclude%2Fcl_object.h;h=ad3512c0bea196fe760a62c0aa6f48724190365e;hp=f88eb8b5b9d9e1572ebf319cdc6ad8f555b87662;hb=cda353e6efae5013a26aedbe49d8aa6fb8fe456e;hpb=c2791674260b120c596885c0356f33852cd7a685

diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h
index f88eb8b..ad3512c 100644
--- a/lustre/include/cl_object.h
+++ b/lustre/include/cl_object.h
@@ -179,15 +179,15 @@ struct cl_attr {
  * Fields in cl_attr that are being set.
  */
 enum cl_attr_valid {
-	CAT_SIZE   = 1 << 0,
-	CAT_KMS    = 1 << 1,
-	CAT_MTIME  = 1 << 3,
-	CAT_ATIME  = 1 << 4,
-	CAT_CTIME  = 1 << 5,
-	CAT_BLOCKS = 1 << 6,
-	CAT_UID    = 1 << 7,
-	CAT_GID    = 1 << 8,
-	CAT_PROJID = 1 << 9
+	CAT_SIZE	= BIT(0),
+	CAT_KMS		= BIT(1),
+	CAT_MTIME	= BIT(3),
+	CAT_ATIME	= BIT(4),
+	CAT_CTIME	= BIT(5),
+	CAT_BLOCKS	= BIT(6),
+	CAT_UID		= BIT(7),
+	CAT_GID		= BIT(8),
+	CAT_PROJID	= BIT(9),
 };
 
 /**
@@ -290,8 +290,6 @@ struct cl_layout {
 	struct lu_buf	cl_buf;
 	/** size of layout in lov_mds_md format. */
 	size_t		cl_size;
-	/** size of DoM component if exists or zero otherwise */
-	u64		cl_dom_comp_size;
 	/** Layout generation. */
 	u32		cl_layout_gen;
 	/** whether layout is a composite one */
@@ -420,6 +418,13 @@ struct cl_object_operations {
 	void (*coo_req_attr_set)(const struct lu_env *env,
 				 struct cl_object *obj,
 				 struct cl_req_attr *attr);
+	/**
+	 * Flush \a obj data corresponding to \a lock. Used for DoM
+	 * locks in llite's cancelling blocking ast callback.
+	 */
+	int (*coo_object_flush)(const struct lu_env *env,
+				struct cl_object *obj,
+				struct ldlm_lock *lock);
 };
 
 /**
@@ -618,7 +623,7 @@ enum cl_page_state {
          *
          * \invariant cl_page::cp_owner == NULL && cl_page::cp_req == NULL
          */
-        CPS_CACHED,
+	CPS_CACHED = 1,
         /**
          * Page is exclusively owned by some cl_io. Page may end up in this
          * state as a result of
@@ -710,8 +715,13 @@ enum cl_page_type {
          *  to vmpage which is not belonging to the same object of cl_page.
          *  it is used in DirectIO and lockless IO. */
         CPT_TRANSIENT,
+	CPT_NR
 };
 
+#define	CP_STATE_BITS	4
+#define	CP_TYPE_BITS	2
+#define	CP_MAX_LAYER	3
+
 /**
  * Fields are protected by the lock on struct page, except for atomics and
  * immutables.
@@ -723,39 +733,47 @@ enum cl_page_type {
  */
 struct cl_page {
 	/** Reference counter. */
-	atomic_t		 cp_ref;
+	atomic_t		cp_ref;
+	/** layout_entry + stripe index, composed using lov_comp_index() */
+	unsigned int		cp_lov_index;
+	pgoff_t			cp_osc_index;
 	/** An object this page is a part of. Immutable after creation. */
 	struct cl_object	*cp_obj;
 	/** vmpage */
 	struct page		*cp_vmpage;
 	/** Linkage of pages within group. Pages must be owned */
-	struct list_head	 cp_batch;
-	/** List of slices. Immutable after creation. */
-	struct list_head	 cp_layers;
+	struct list_head	cp_batch;
+	/** array of slices offset. Immutable after creation. */
+	unsigned char		cp_layer_offset[CP_MAX_LAYER]; /* 24 bits */
+	/** current slice index */
+	unsigned char		cp_layer_count:2; /* 26 bits */
 	/**
 	 * Page state. This field is const to avoid accidental update, it is
 	 * modified only internally within cl_page.c. Protected by a VM lock.
 	 */
-	const enum cl_page_state cp_state;
+	enum cl_page_state	 cp_state:CP_STATE_BITS; /* 30 bits */
         /**
          * Page type. Only CPT_TRANSIENT is used so far. Immutable after
          * creation.
          */
-        enum cl_page_type        cp_type;
+	enum cl_page_type	cp_type:CP_TYPE_BITS; /* 32 bits */
+	/* which slab kmem index this memory allocated from */
+	short int		cp_kmem_index; /* 48 bits */
+	unsigned int		cp_unused1:16;	/* 64 bits */
 
-        /**
-         * Owning IO in cl_page_state::CPS_OWNED state. Sub-page can be owned
-         * by sub-io. Protected by a VM lock.
-         */
+	/**
+	 * Owning IO in cl_page_state::CPS_OWNED state. Sub-page can be owned
+	 * by sub-io. Protected by a VM lock.
+	 */
         struct cl_io            *cp_owner;
-        /** List of references to this page, for debugging. */
-        struct lu_ref            cp_reference;
+	/** List of references to this page, for debugging. */
+        struct lu_ref		cp_reference;
 	/** Link to an object, for debugging. */
-	struct lu_ref_link       cp_obj_ref;
+	struct lu_ref_link	cp_obj_ref;
 	/** Link to a queue, for debugging. */
-	struct lu_ref_link       cp_queue_ref;
+	struct lu_ref_link	cp_queue_ref;
 	/** Assigned if doing a sync_io */
-	struct cl_sync_io       *cp_sync_io;
+	struct cl_sync_io	*cp_sync_io;
 };
 
 /**
@@ -765,15 +783,12 @@ struct cl_page {
  */
 struct cl_page_slice {
         struct cl_page                  *cpl_page;
-	pgoff_t				 cpl_index;
         /**
          * Object slice corresponding to this page slice. Immutable after
          * creation.
          */
         struct cl_object                *cpl_obj;
         const struct cl_page_operations *cpl_ops;
-        /** Linkage into cl_page::cp_layers. Immutable after creation. */
-	struct list_head		 cpl_linkage;
 };
 
 /**
@@ -986,23 +1001,6 @@ struct cl_page_operations {
         void (*cpo_clip)(const struct lu_env *env,
                          const struct cl_page_slice *slice,
                          int from, int to);
-        /**
-         * \pre  the page was queued for transferring.
-         * \post page is removed from client's pending list, or -EBUSY
-         *       is returned if it has already been in transferring.
-         *
-         * This is one of seldom page operation which is:
-         * 0. called from top level;
-         * 1. don't have vmpage locked;
-         * 2. every layer should synchronize execution of its ->cpo_cancel()
-         *    with completion handlers. Osc uses client obd lock for this
-         *    purpose. Based on there is no vvp_page_cancel and
-         *    lov_page_cancel(), cpo_cancel is defacto protected by client lock.
-         *
-         * \see osc_page_cancel().
-         */
-        int (*cpo_cancel)(const struct lu_env *env,
-                          const struct cl_page_slice *slice);
 	/**
 	 * Write out a page by kernel. This is only called by ll_writepage
 	 * right now.
@@ -1303,7 +1301,6 @@ do {                                                                    \
 struct cl_page_list {
 	unsigned		 pl_nr;
 	struct list_head	 pl_pages;
-	struct task_struct	*pl_owner;
 };
 
 /**
@@ -1414,6 +1411,11 @@ enum cl_io_type {
 	 * To give advice about access of a file
 	 */
 	CIT_LADVISE,
+	/**
+	 * SEEK_HOLE/SEEK_DATA handling to search holes or data
+	 * across all file objects
+	 */
+	CIT_LSEEK,
         CIT_OP_NR
 };
 
@@ -1463,20 +1465,22 @@ struct cl_io_slice {
 };
 
 typedef void (*cl_commit_cbt)(const struct lu_env *, struct cl_io *,
-			      struct cl_page *);
+			      struct pagevec *);
 
 struct cl_read_ahead {
 	/* Maximum page index the readahead window will end.
 	 * This is determined DLM lock coverage, RPC and stripe boundary.
 	 * cra_end is included. */
-	pgoff_t cra_end;
+	pgoff_t		cra_end_idx;
 	/* optimal RPC size for this read, by pages */
-	unsigned long cra_rpc_size;
+	unsigned long	cra_rpc_pages;
 	/* Release callback. If readahead holds resources underneath, this
 	 * function should be called to release it. */
-	void    (*cra_release)(const struct lu_env *env, void *cbdata);
+	void		(*cra_release)(const struct lu_env *env, void *cbdata);
 	/* Callback data for cra_release routine */
-	void	*cra_cbdata;
+	void		*cra_cbdata;
+	/* whether lock is in contention */
+	bool		cra_contention;
 };
 
 static inline void cl_read_ahead_release(const struct lu_env *env,
@@ -1763,6 +1767,31 @@ struct cl_io_rw_common {
 	size_t	crw_count;
 	int	crw_nonblock;
 };
+enum cl_setattr_subtype {
+	/** regular setattr **/
+	CL_SETATTR_REG = 1,
+	/** truncate(2) **/
+	CL_SETATTR_TRUNC,
+	/** fallocate(2) - mode preallocate **/
+	CL_SETATTR_FALLOCATE
+};
+
+struct cl_io_range {
+	loff_t cir_pos;
+	size_t cir_count;
+};
+
+struct cl_io_pt {
+	struct cl_io_pt *cip_next;
+	struct kiocb cip_iocb;
+	struct iov_iter cip_iter;
+	struct file *cip_file;
+	enum cl_io_type cip_iot;
+	unsigned int cip_need_restart:1;
+	loff_t cip_pos;
+	size_t cip_count;
+	ssize_t cip_result;
+};
 
 /**
  * State for io.
@@ -1780,6 +1809,8 @@ struct cl_io {
         enum cl_io_state               ci_state;
         /** main object this io is against. Immutable after creation. */
         struct cl_object              *ci_obj;
+	/** one AIO request might be split in cl_io_loop */
+	struct cl_dio_aio	      *ci_aio;
         /**
          * Upper layer io, of which this io is a part of. Immutable after
          * creation.
@@ -1811,6 +1842,14 @@ struct cl_io {
 			int			 sa_stripe_index;
 			struct ost_layout	 sa_layout;
 			const struct lu_fid	*sa_parent_fid;
+			/* SETATTR interface is used for regular setattr, */
+			/* truncate(2) and fallocate(2) subtypes */
+			enum cl_setattr_subtype	 sa_subtype;
+			/* The following are used for fallocate(2) */
+			int			 sa_falloc_mode;
+			loff_t			 sa_falloc_offset;
+			loff_t			 sa_falloc_len;
+			loff_t			 sa_falloc_end;
 		} ci_setattr;
 		struct cl_data_version_io {
 			u64 dv_data_version;
@@ -1848,6 +1887,11 @@ struct cl_io {
 			enum lu_ladvise_type	 li_advice;
 			__u64			 li_flags;
 		} ci_ladvise;
+		struct cl_lseek_io {
+			loff_t			 ls_start;
+			loff_t			 ls_result;
+			int			 ls_whence;
+		} ci_lseek;
         } u;
         struct cl_2queue     ci_queue;
         size_t               ci_nob;
@@ -1904,7 +1948,21 @@ struct cl_io {
 	/**
 	 * Set if IO is triggered by async workqueue readahead.
 	 */
-			     ci_async_readahead:1;
+			     ci_async_readahead:1,
+	/**
+	 * Ignore lockless and do normal locking for this io.
+	 */
+			     ci_ignore_lockless:1,
+	/**
+	 * Set if we've tried all mirrors for this read IO, if it's not set,
+	 * the read IO will check to-be-read OSCs' status, and make fast-switch
+	 * another mirror if some of the OSTs are not healthy.
+	 */
+			     ci_tried_all_mirrors:1;
+	/**
+	 * Bypass quota check
+	 */
+	unsigned	     ci_noquota:1;
 	/**
 	 * How many times the read has retried before this one.
 	 * Set by the top level and consumed by the LOV.
@@ -2011,8 +2069,8 @@ static inline struct cl_site *lu2cl_site(const struct lu_site *site)
 
 static inline struct cl_device *lu2cl_dev(const struct lu_device *d)
 {
-        LASSERT(d == NULL || IS_ERR(d) || lu_device_is_cl(d));
-        return container_of0(d, struct cl_device, cd_lu_dev);
+	LASSERT(d == NULL || IS_ERR(d) || lu_device_is_cl(d));
+	return container_of_safe(d, struct cl_device, cd_lu_dev);
 }
 
 static inline struct lu_device *cl2lu_dev(struct cl_device *d)
@@ -2022,49 +2080,49 @@ static inline struct lu_device *cl2lu_dev(struct cl_device *d)
 
 static inline struct cl_object *lu2cl(const struct lu_object *o)
 {
-        LASSERT(o == NULL || IS_ERR(o) || lu_device_is_cl(o->lo_dev));
-        return container_of0(o, struct cl_object, co_lu);
+	LASSERT(o == NULL || IS_ERR(o) || lu_device_is_cl(o->lo_dev));
+	return container_of_safe(o, struct cl_object, co_lu);
 }
 
 static inline const struct cl_object_conf *
 lu2cl_conf(const struct lu_object_conf *conf)
 {
-        return container_of0(conf, struct cl_object_conf, coc_lu);
+	return container_of_safe(conf, struct cl_object_conf, coc_lu);
 }
 
 static inline struct cl_object *cl_object_next(const struct cl_object *obj)
 {
-        return obj ? lu2cl(lu_object_next(&obj->co_lu)) : NULL;
+	return obj ? lu2cl(lu_object_next(&obj->co_lu)) : NULL;
 }
 
 static inline struct cl_object_header *luh2coh(const struct lu_object_header *h)
 {
-        return container_of0(h, struct cl_object_header, coh_lu);
+	return container_of_safe(h, struct cl_object_header, coh_lu);
 }
 
 static inline struct cl_site *cl_object_site(const struct cl_object *obj)
 {
-        return lu2cl_site(obj->co_lu.lo_dev->ld_site);
+	return lu2cl_site(obj->co_lu.lo_dev->ld_site);
 }
 
 static inline
 struct cl_object_header *cl_object_header(const struct cl_object *obj)
 {
-        return luh2coh(obj->co_lu.lo_header);
+	return luh2coh(obj->co_lu.lo_header);
 }
 
 static inline int cl_device_init(struct cl_device *d, struct lu_device_type *t)
 {
-        return lu_device_init(&d->cd_lu_dev, t);
+	return lu_device_init(&d->cd_lu_dev, t);
 }
 
 static inline void cl_device_fini(struct cl_device *d)
 {
-        lu_device_fini(&d->cd_lu_dev);
+	lu_device_fini(&d->cd_lu_dev);
 }
 
 void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
-		       struct cl_object *obj, pgoff_t index,
+		       struct cl_object *obj,
 		       const struct cl_page_operations *ops);
 void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice,
                        struct cl_object *obj,
@@ -2104,6 +2162,9 @@ int cl_object_fiemap(const struct lu_env *env, struct cl_object *obj,
 int cl_object_layout_get(const struct lu_env *env, struct cl_object *obj,
 			 struct cl_layout *cl);
 loff_t cl_object_maxbytes(struct cl_object *obj);
+int cl_object_flush(const struct lu_env *env, struct cl_object *obj,
+		    struct ldlm_lock *lock);
+
 
 /**
  * Returns true, iff \a o0 and \a o1 are slices of the same object.
@@ -2139,14 +2200,6 @@ static inline int cl_object_refc(struct cl_object *clob)
 
 /** \defgroup cl_page cl_page
  * @{ */
-enum {
-        CLP_GANG_OKAY = 0,
-        CLP_GANG_RESCHED,
-        CLP_GANG_AGAIN,
-        CLP_GANG_ABORT
-};
-/* callback of cl_page_gang_lookup() */
-
 struct cl_page *cl_page_find        (const struct lu_env *env,
                                      struct cl_object *obj,
                                      pgoff_t idx, struct page *vmpage,
@@ -2211,7 +2264,6 @@ int  cl_page_cache_add  (const struct lu_env *env, struct cl_io *io,
                          struct cl_page *pg, enum cl_req_type crt);
 void cl_page_clip       (const struct lu_env *env, struct cl_page *pg,
                          int from, int to);
-int  cl_page_cancel     (const struct lu_env *env, struct cl_page *page);
 int  cl_page_flush      (const struct lu_env *env, struct cl_io *io,
 			 struct cl_page *pg);
 
@@ -2288,6 +2340,10 @@ struct cl_client_cache {
 	 * Used at umounting time and signaled on BRW commit
 	 */
 	wait_queue_head_t	ccc_unstable_waitq;
+	/**
+	 * Serialize max_cache_mb write operation
+	 */
+	struct mutex		ccc_max_cache_mb_lock;
 };
 /**
  * cl_cache functions
@@ -2349,8 +2405,6 @@ int   cl_io_read_ahead   (const struct lu_env *env, struct cl_io *io,
 			  pgoff_t start, struct cl_read_ahead *ra);
 void  cl_io_rw_advance   (const struct lu_env *env, struct cl_io *io,
                           size_t nob);
-int   cl_io_cancel       (const struct lu_env *env, struct cl_io *io,
-                          struct cl_page_list *queue);
 
 /**
  * True, iff \a io is an O_APPEND write(2).
@@ -2376,7 +2430,14 @@ static inline int cl_io_is_mkwrite(const struct cl_io *io)
 static inline int cl_io_is_trunc(const struct cl_io *io)
 {
 	return io->ci_type == CIT_SETATTR &&
-		(io->u.ci_setattr.sa_avalid & ATTR_SIZE);
+		(io->u.ci_setattr.sa_avalid & ATTR_SIZE) &&
+		(io->u.ci_setattr.sa_subtype != CL_SETATTR_FALLOCATE);
+}
+
+static inline int cl_io_is_fallocate(const struct cl_io *io)
+{
+	return (io->ci_type == CIT_SETATTR) &&
+	       (io->u.ci_setattr.sa_subtype == CL_SETATTR_FALLOCATE);
 }
 
 struct cl_io *cl_io_top(struct cl_io *io);
@@ -2461,6 +2522,25 @@ void cl_req_attr_set(const struct lu_env *env, struct cl_object *obj,
 /** \defgroup cl_sync_io cl_sync_io
  * @{ */
 
+struct cl_sync_io;
+struct cl_dio_aio;
+
+typedef void (cl_sync_io_end_t)(const struct lu_env *, struct cl_sync_io *);
+
+void cl_sync_io_init_notify(struct cl_sync_io *anchor, int nr,
+			    struct cl_dio_aio *aio, cl_sync_io_end_t *end);
+
+int  cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor,
+		     long timeout);
+void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor,
+		     int ioret);
+struct cl_dio_aio *cl_aio_alloc(struct kiocb *iocb);
+void cl_aio_free(struct cl_dio_aio *aio);
+static inline void cl_sync_io_init(struct cl_sync_io *anchor, int nr)
+{
+	cl_sync_io_init_notify(anchor, nr, NULL, NULL);
+}
+
 /**
  * Anchor for synchronous transfer. This is allocated on a stack by thread
  * doing synchronous transfer, and a pointer to this structure is set up in
@@ -2472,22 +2552,22 @@ struct cl_sync_io {
 	atomic_t		csi_sync_nr;
 	/** error code. */
 	int			csi_sync_rc;
-	/** barrier of destroy this structure */
-	atomic_t		csi_barrier;
 	/** completion to be signaled when transfer is complete. */
 	wait_queue_head_t	csi_waitq;
 	/** callback to invoke when this IO is finished */
-	void			(*csi_end_io)(const struct lu_env *,
-					      struct cl_sync_io *);
+	cl_sync_io_end_t       *csi_end_io;
+	/** aio private data */
+	struct cl_dio_aio      *csi_aio;
 };
 
-void cl_sync_io_init(struct cl_sync_io *anchor, int nr,
-		     void (*end)(const struct lu_env *, struct cl_sync_io *));
-int  cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor,
-		     long timeout);
-void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor,
-		     int ioret);
-void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor);
+/** To support Direct AIO */
+struct cl_dio_aio {
+	struct cl_sync_io	cda_sync;
+	struct cl_page_list	cda_pages;
+	struct kiocb		*cda_iocb;
+	ssize_t			cda_bytes;
+	unsigned		cda_no_aio_complete:1;
+};
 
 /** @} cl_sync_io */