* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2016, Intel Corporation.
+ * Copyright (c) 2011, 2017, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
*/
#ifndef _LUSTRE_CL_OBJECT_H
#define _LUSTRE_CL_OBJECT_H
/*
* super-class definitions.
*/
+#include <linux/aio.h>
+#include <linux/fs.h>
+
#include <libcfs/libcfs.h>
#include <lu_object.h>
#include <linux/atomic.h>
#include <linux/radix-tree.h>
#include <linux/spinlock.h>
#include <linux/wait.h>
+#include <linux/pagevec.h>
#include <lustre_dlm.h>
struct obd_info;
* Fields in cl_attr that are being set.
*/
enum cl_attr_valid {
- CAT_SIZE = 1 << 0,
- CAT_KMS = 1 << 1,
- CAT_MTIME = 1 << 3,
- CAT_ATIME = 1 << 4,
- CAT_CTIME = 1 << 5,
- CAT_BLOCKS = 1 << 6,
- CAT_UID = 1 << 7,
- CAT_GID = 1 << 8,
- CAT_PROJID = 1 << 9
+ CAT_SIZE = BIT(0),
+ CAT_KMS = BIT(1),
+ CAT_MTIME = BIT(3),
+ CAT_ATIME = BIT(4),
+ CAT_CTIME = BIT(5),
+ CAT_BLOCKS = BIT(6),
+ CAT_UID = BIT(7),
+ CAT_GID = BIT(8),
+ CAT_PROJID = BIT(9),
};
/**
size_t cl_size;
/** Layout generation. */
u32 cl_layout_gen;
+ /** whether layout is a composite one */
+ bool cl_is_composite;
+ /** Whether layout is a HSM released one */
+ bool cl_is_released;
};
/**
* Object getstripe method.
*/
int (*coo_getstripe)(const struct lu_env *env, struct cl_object *obj,
- struct lov_user_md __user *lum);
+ struct lov_user_md __user *lum, size_t size);
/**
* Get FIEMAP mapping from the object.
*/
void (*coo_req_attr_set)(const struct lu_env *env,
struct cl_object *obj,
struct cl_req_attr *attr);
+ /**
+ * Flush \a obj data corresponding to \a lock. Used for DoM
+ * locks in llite's cancelling blocking ast callback.
+ */
+ int (*coo_object_flush)(const struct lu_env *env,
+ struct cl_object *obj,
+ struct ldlm_lock *lock);
};
/**
*
* \invariant cl_page::cp_owner == NULL && cl_page::cp_req == NULL
*/
- CPS_CACHED,
+ CPS_CACHED = 1,
/**
* Page is exclusively owned by some cl_io. Page may end up in this
* state as a result of
/** Transient page, the transient cl_page is used to bind a cl_page
* to vmpage which is not belonging to the same object of cl_page.
- * it is used in DirectIO, lockless IO and liblustre. */
+ * it is used in DirectIO and lockless IO. */
CPT_TRANSIENT,
+ CPT_NR
};
+#define CP_STATE_BITS 4
+#define CP_TYPE_BITS 2
+#define CP_MAX_LAYER 3
+
/**
* Fields are protected by the lock on struct page, except for atomics and
* immutables.
*/
struct cl_page {
/** Reference counter. */
- atomic_t cp_ref;
+ atomic_t cp_ref;
+ /** layout_entry + stripe index, composed using lov_comp_index() */
+ unsigned int cp_lov_index;
+ pgoff_t cp_osc_index;
/** An object this page is a part of. Immutable after creation. */
struct cl_object *cp_obj;
/** vmpage */
struct page *cp_vmpage;
+ /**
+ * Assigned if doing direct IO, because in this case cp_vmpage is not
+ * a valid page cache page, hence the inode cannot be inferred from
+ * cp_vmpage->mapping->host.
+ */
+ struct inode *cp_inode;
/** Linkage of pages within group. Pages must be owned */
- struct list_head cp_batch;
- /** List of slices. Immutable after creation. */
- struct list_head cp_layers;
+ struct list_head cp_batch;
+ /** array of slices offset. Immutable after creation. */
+ unsigned char cp_layer_offset[CP_MAX_LAYER]; /* 24 bits */
+ /** current slice index */
+ unsigned char cp_layer_count:2; /* 26 bits */
/**
* Page state. This field is const to avoid accidental update, it is
* modified only internally within cl_page.c. Protected by a VM lock.
*/
- const enum cl_page_state cp_state;
+ enum cl_page_state cp_state:CP_STATE_BITS; /* 30 bits */
/**
* Page type. Only CPT_TRANSIENT is used so far. Immutable after
* creation.
*/
- enum cl_page_type cp_type;
+ enum cl_page_type cp_type:CP_TYPE_BITS; /* 32 bits */
+ /* which slab kmem index this memory allocated from */
+ short int cp_kmem_index; /* 48 bits */
+ unsigned int cp_unused1:16; /* 64 bits */
- /**
- * Owning IO in cl_page_state::CPS_OWNED state. Sub-page can be owned
- * by sub-io. Protected by a VM lock.
- */
+ /**
+ * Owning IO in cl_page_state::CPS_OWNED state. Sub-page can be owned
+ * by sub-io. Protected by a VM lock.
+ */
struct cl_io *cp_owner;
- /** List of references to this page, for debugging. */
- struct lu_ref cp_reference;
+ /** List of references to this page, for debugging. */
+ struct lu_ref cp_reference;
/** Link to an object, for debugging. */
- struct lu_ref_link cp_obj_ref;
+ struct lu_ref_link cp_obj_ref;
/** Link to a queue, for debugging. */
- struct lu_ref_link cp_queue_ref;
+ struct lu_ref_link cp_queue_ref;
/** Assigned if doing a sync_io */
- struct cl_sync_io *cp_sync_io;
+ struct cl_sync_io *cp_sync_io;
};
/**
*/
struct cl_page_slice {
struct cl_page *cpl_page;
- pgoff_t cpl_index;
/**
* Object slice corresponding to this page slice. Immutable after
* creation.
*/
struct cl_object *cpl_obj;
const struct cl_page_operations *cpl_ops;
- /** Linkage into cl_page::cp_layers. Immutable after creation. */
- struct list_head cpl_linkage;
};
/**
*/
int (*cpo_is_vmlocked)(const struct lu_env *env,
const struct cl_page_slice *slice);
+
+ /**
+ * Update file attributes when all we have is this page. Used for tiny
+ * writes to update attributes when we don't have a full cl_io.
+ */
+ void (*cpo_page_touch)(const struct lu_env *env,
+ const struct cl_page_slice *slice, size_t to);
/**
* Page destruction.
*/
const struct cl_page_slice *slice);
/** Destructor. Frees resources and slice itself. */
void (*cpo_fini)(const struct lu_env *env,
- struct cl_page_slice *slice);
+ struct cl_page_slice *slice,
+ struct pagevec *pvec);
/**
* Optional debugging helper. Prints given page slice.
*
void (*cpo_clip)(const struct lu_env *env,
const struct cl_page_slice *slice,
int from, int to);
- /**
- * \pre the page was queued for transferring.
- * \post page is removed from client's pending list, or -EBUSY
- * is returned if it has already been in transferring.
- *
- * This is one of seldom page operation which is:
- * 0. called from top level;
- * 1. don't have vmpage locked;
- * 2. every layer should synchronize execution of its ->cpo_cancel()
- * with completion handlers. Osc uses client obd lock for this
- * purpose. Based on there is no vvp_page_cancel and
- * lov_page_cancel(), cpo_cancel is defacto protected by client lock.
- *
- * \see osc_page_cancel().
- */
- int (*cpo_cancel)(const struct lu_env *env,
- const struct cl_page_slice *slice);
/**
* Write out a page by kernel. This is only called by ll_writepage
* right now.
* (struct cl_lock) and a list of layers (struct cl_lock_slice), linked to
* cl_lock::cll_layers list through cl_lock_slice::cls_linkage.
*
- * Typical cl_lock consists of the two layers:
+ * Typical cl_lock consists of one layer:
*
- * - vvp_lock (vvp specific data), and
* - lov_lock (lov specific data).
*
* lov_lock contains an array of sub-locks. Each of these sub-locks is a
* normal cl_lock: it has a header (struct cl_lock) and a list of layers:
*
- * - lovsub_lock, and
* - osc_lock
*
* Each sub-lock is associated with a cl_object (representing stripe
/**
* Per-layer part of cl_lock
*
- * \see vvp_lock, lov_lock, lovsub_lock, osc_lock
+ * \see lov_lock, osc_lock
*/
struct cl_lock_slice {
struct cl_lock *cls_lock;
/**
*
- * \see vvp_lock_ops, lov_lock_ops, lovsub_lock_ops, osc_lock_ops
+ * \see lov_lock_ops, osc_lock_ops
*/
struct cl_lock_operations {
/** @{ */
* @anchor for resources
* \retval -ve failure
*
- * \see vvp_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(),
- * \see osc_lock_enqueue()
+ * \see lov_lock_enqueue(), osc_lock_enqueue()
*/
int (*clo_enqueue)(const struct lu_env *env,
const struct cl_lock_slice *slice,
/**
* Destructor. Frees resources and the slice.
*
- * \see vvp_lock_fini(), lov_lock_fini(), lovsub_lock_fini(),
- * \see osc_lock_fini()
+ * \see lov_lock_fini(), osc_lock_fini()
*/
void (*clo_fini)(const struct lu_env *env, struct cl_lock_slice *slice);
/**
struct cl_page_list {
unsigned pl_nr;
struct list_head pl_pages;
- struct task_struct *pl_owner;
};
-/**
+/**
* A 2-queue of pages. A convenience data-type for common use case, 2-queue
* contains an incoming page list and an outgoing page list.
*/
*/
CIT_FSYNC,
/**
+ * glimpse. An io context to acquire glimpse lock.
+ */
+ CIT_GLIMPSE,
+ /**
* Miscellaneous io. This is used for occasional io activity that
* doesn't fit into other types. Currently this is used for:
*
* - VM induced page write-out. An io context for writing page out
* for memory cleansing;
*
- * - glimpse. An io context to acquire glimpse lock.
- *
* - grouplock. An io context to acquire group lock.
*
* CIT_MISC io is used simply as a context in which locks and pages
* To give advice about access of a file
*/
CIT_LADVISE,
+ /**
+ * SEEK_HOLE/SEEK_DATA handling to search holes or data
+ * across all file objects
+ */
+ CIT_LSEEK,
CIT_OP_NR
};
};
typedef void (*cl_commit_cbt)(const struct lu_env *, struct cl_io *,
- struct cl_page *);
+ struct pagevec *);
struct cl_read_ahead {
/* Maximum page index the readahead window will end.
* This is determined DLM lock coverage, RPC and stripe boundary.
* cra_end is included. */
- pgoff_t cra_end;
+ pgoff_t cra_end_idx;
/* optimal RPC size for this read, by pages */
- unsigned long cra_rpc_size;
+ unsigned long cra_rpc_pages;
/* Release callback. If readahead holds resources underneath, this
* function should be called to release it. */
- void (*cra_release)(const struct lu_env *env, void *cbdata);
+ void (*cra_release)(const struct lu_env *env,
+ struct cl_read_ahead *ra);
+
/* Callback data for cra_release routine */
- void *cra_cbdata;
+ void *cra_dlmlock;
+ void *cra_oio;
+
+ /* whether lock is in contention */
+ bool cra_contention;
};
static inline void cl_read_ahead_release(const struct lu_env *env,
struct cl_read_ahead *ra)
{
if (ra->cra_release != NULL)
- ra->cra_release(env, ra->cra_cbdata);
+ ra->cra_release(env, ra);
memset(ra, 0, sizeof(*ra));
}
struct cl_page_list *queue, int from, int to,
cl_commit_cbt cb);
/**
+ * Release active extent.
+ */
+ void (*cio_extent_release)(const struct lu_env *env,
+ const struct cl_io_slice *slice);
+ /**
* Decide maximum read ahead extent
*
* \pre io->ci_type == CIT_READ
int (*cio_read_ahead)(const struct lu_env *env,
const struct cl_io_slice *slice,
pgoff_t start, struct cl_read_ahead *ra);
+ /**
+ *
+ * Reserve LRU slots before IO.
+ */
+ int (*cio_lru_reserve) (const struct lu_env *env,
+ const struct cl_io_slice *slice,
+ loff_t pos, size_t bytes);
/**
* Optional debugging helper. Print given io slice.
*/
* \ingroup cl_lock
*/
enum cl_enq_flags {
- /**
- * instruct server to not block, if conflicting lock is found. Instead
- * -EWOULDBLOCK is returned immediately.
- */
- CEF_NONBLOCK = 0x00000001,
- /**
- * take lock asynchronously (out of order), as it cannot
- * deadlock. This is for LDLM_FL_HAS_INTENT locks used for glimpsing.
- */
- CEF_ASYNC = 0x00000002,
+ /**
+ * instruct server to not block, if conflicting lock is found. Instead
+ * -EAGAIN is returned immediately.
+ */
+ CEF_NONBLOCK = 0x00000001,
+ /**
+ * Tell lower layers this is a glimpse request, translated to
+ * LDLM_FL_HAS_INTENT at LDLM layer.
+ *
+ * Also, because glimpse locks never block other locks, we count this
+ * as automatically compatible with other osc locks.
+ * (see osc_lock_compatible)
+ */
+ CEF_GLIMPSE = 0x00000002,
/**
* tell the server to instruct (though a flag in the blocking ast) an
* owner of the conflicting lock, that it can drop dirty pages
* protected by this lock, without sending them to the server.
*/
CEF_DISCARD_DATA = 0x00000004,
- /**
- * tell the sub layers that it must be a `real' lock. This is used for
- * mmapped-buffer locks and glimpse locks that must be never converted
- * into lockless mode.
- *
- * \see vvp_mmap_locks(), cl_glimpse_lock().
- */
- CEF_MUST = 0x00000008,
+ /**
+ * tell the sub layers that it must be a `real' lock. This is used for
+ * mmapped-buffer locks, glimpse locks, manually requested locks
+ * (LU_LADVISE_LOCKAHEAD) that must never be converted into lockless
+ * mode.
+ *
+ * \see vvp_mmap_locks(), cl_glimpse_lock, cl_request_lock().
+ */
+ CEF_MUST = 0x00000008,
/**
* tell the sub layers that never request a `real' lock. This flag is
* not used currently.
*/
CEF_NEVER = 0x00000010,
/**
- * for async glimpse lock.
+ * tell the dlm layer this is a speculative lock request
+ * speculative lock requests are locks which are not requested as part
+ * of an I/O operation. Instead, they are requested because we expect
+ * to use them in the future. They are requested asynchronously at the
+ * ptlrpc layer.
+ *
+ * Currently used for asynchronous glimpse locks and manually requested
+ * locks (LU_LADVISE_LOCKAHEAD).
*/
- CEF_AGL = 0x00000020,
+ CEF_SPECULATIVE = 0x00000020,
/**
* enqueue a lock to test DLM lock existence.
*/
*/
CEF_LOCK_MATCH = 0x00000080,
/**
+ * tell the DLM layer to lock only the requested range
+ */
+ CEF_LOCK_NO_EXPAND = 0x00000100,
+ /**
* mask of enq_flags.
*/
- CEF_MASK = 0x000000ff,
+ CEF_MASK = 0x000001ff,
};
/**
};
struct cl_io_rw_common {
- loff_t crw_pos;
- size_t crw_count;
- int crw_nonblock;
+ loff_t crw_pos;
+ size_t crw_count;
+ int crw_nonblock;
+};
+enum cl_setattr_subtype {
+ /** regular setattr **/
+ CL_SETATTR_REG = 1,
+ /** truncate(2) **/
+ CL_SETATTR_TRUNC,
+ /** fallocate(2) - mode preallocate **/
+ CL_SETATTR_FALLOCATE
+};
+
+struct cl_io_range {
+ loff_t cir_pos;
+ size_t cir_count;
+};
+
+struct cl_io_pt {
+ struct cl_io_pt *cip_next;
+ struct kiocb cip_iocb;
+ struct iov_iter cip_iter;
+ struct file *cip_file;
+ enum cl_io_type cip_iot;
+ unsigned int cip_need_restart:1;
+ loff_t cip_pos;
+ size_t cip_count;
+ ssize_t cip_result;
};
/**
enum cl_io_state ci_state;
/** main object this io is against. Immutable after creation. */
struct cl_object *ci_obj;
+ /** one AIO request might be split in cl_io_loop */
+ struct cl_dio_aio *ci_aio;
/**
* Upper layer io, of which this io is a part of. Immutable after
* creation.
struct cl_lockset ci_lockset;
/** lock requirements, this is just a help info for sublayers. */
enum cl_io_lock_dmd ci_lockreq;
- union {
- struct cl_rd_io {
- struct cl_io_rw_common rd;
- } ci_rd;
+ /** layout version when this IO occurs */
+ __u32 ci_layout_version;
+ union {
+ struct cl_rd_io {
+ struct cl_io_rw_common rd;
+ } ci_rd;
struct cl_wr_io {
struct cl_io_rw_common wr;
int wr_append;
struct cl_setattr_io {
struct ost_lvb sa_attr;
unsigned int sa_attr_flags;
- unsigned int sa_valid;
+ unsigned int sa_avalid; /* ATTR_* */
+ unsigned int sa_xvalid; /* OP_XVALID */
int sa_stripe_index;
struct ost_layout sa_layout;
const struct lu_fid *sa_parent_fid;
+ /* SETATTR interface is used for regular setattr, */
+ /* truncate(2) and fallocate(2) subtypes */
+ enum cl_setattr_subtype sa_subtype;
+ /* The following are used for fallocate(2) */
+ int sa_falloc_mode;
+ loff_t sa_falloc_offset;
+ loff_t sa_falloc_end;
} ci_setattr;
struct cl_data_version_io {
u64 dv_data_version;
+ u32 dv_layout_version;
int dv_flags;
} ci_data_version;
struct cl_fault_io {
enum lu_ladvise_type li_advice;
__u64 li_flags;
} ci_ladvise;
+ struct cl_lseek_io {
+ loff_t ls_start;
+ loff_t ls_result;
+ int ls_whence;
+ } ci_lseek;
+ struct cl_misc_io {
+ time64_t lm_next_rpc_time;
+ } ci_misc;
} u;
struct cl_2queue ci_queue;
size_t ci_nob;
*/
ci_ignore_layout:1,
/**
- * Need MDS intervention to complete a write. This usually means the
- * corresponding component is not initialized for the writing extent.
+ * Need MDS intervention to complete a write.
+ * Write intent is required for the following cases:
+ * 1. component being written is not initialized, or
+ * 2. the mirrored files are NOT in WRITE_PENDING state.
*/
ci_need_write_intent:1,
/**
/**
* O_NOATIME
*/
- ci_noatime:1;
+ ci_noatime:1,
+ /* Tell sublayers not to expand LDLM locks requested for this IO */
+ ci_lock_no_expand:1,
+ /**
+ * Set if non-delay RPC should be used for this IO.
+ *
+ * If this file has multiple mirrors, and if the OSTs of the current
+ * mirror is inaccessible, non-delay RPC would error out quickly so
+ * that the upper layer can try to access the next mirror.
+ */
+ ci_ndelay:1,
+ /**
+ * Set if IO is triggered by async workqueue readahead.
+ */
+ ci_async_readahead:1,
+ /**
+ * Ignore lockless and do normal locking for this io.
+ */
+ ci_dio_lock:1,
+ /**
+ * Set if we've tried all mirrors for this read IO, if it's not set,
+ * the read IO will check to-be-read OSCs' status, and make fast-switch
+ * another mirror if some of the OSTs are not healthy.
+ */
+ ci_tried_all_mirrors:1,
+ /**
+ * Random read hints, readahead will be disabled.
+ */
+ ci_rand_read:1,
+ /**
+ * Sequential read hints.
+ */
+ ci_seq_read:1,
+ /**
+ * Do parallel (async) submission of DIO RPCs. Note DIO is still sync
+ * to userspace, only the RPCs are submitted async, then waited for at
+ * the llite layer before returning.
+ */
+ ci_parallel_dio:1;
+ /**
+ * Bypass quota check
+ */
+ unsigned ci_noquota:1;
+ /**
+ * How many times the read has retried before this one.
+ * Set by the top level and consumed by the LOV.
+ */
+ unsigned ci_ndelay_tried;
+ /**
+ * Designated mirror index for this I/O.
+ */
+ unsigned ci_designated_mirror;
/**
* Number of pages owned by this IO. For invariant checking.
*/
unsigned ci_owned_nr;
+ /**
+ * Range of write intent. Valid if ci_need_write_intent is set.
+ */
+ struct lu_extent ci_write_intent;
};
/** @} cl_io */
static inline struct cl_device *lu2cl_dev(const struct lu_device *d)
{
- LASSERT(d == NULL || IS_ERR(d) || lu_device_is_cl(d));
- return container_of0(d, struct cl_device, cd_lu_dev);
+ LASSERT(d == NULL || IS_ERR(d) || lu_device_is_cl(d));
+ return container_of_safe(d, struct cl_device, cd_lu_dev);
}
static inline struct lu_device *cl2lu_dev(struct cl_device *d)
static inline struct cl_object *lu2cl(const struct lu_object *o)
{
- LASSERT(o == NULL || IS_ERR(o) || lu_device_is_cl(o->lo_dev));
- return container_of0(o, struct cl_object, co_lu);
+ LASSERT(o == NULL || IS_ERR(o) || lu_device_is_cl(o->lo_dev));
+ return container_of_safe(o, struct cl_object, co_lu);
}
static inline const struct cl_object_conf *
lu2cl_conf(const struct lu_object_conf *conf)
{
- return container_of0(conf, struct cl_object_conf, coc_lu);
+ return container_of_safe(conf, struct cl_object_conf, coc_lu);
}
static inline struct cl_object *cl_object_next(const struct cl_object *obj)
{
- return obj ? lu2cl(lu_object_next(&obj->co_lu)) : NULL;
+ return obj ? lu2cl(lu_object_next(&obj->co_lu)) : NULL;
}
static inline struct cl_object_header *luh2coh(const struct lu_object_header *h)
{
- return container_of0(h, struct cl_object_header, coh_lu);
+ return container_of_safe(h, struct cl_object_header, coh_lu);
}
static inline struct cl_site *cl_object_site(const struct cl_object *obj)
{
- return lu2cl_site(obj->co_lu.lo_dev->ld_site);
+ return lu2cl_site(obj->co_lu.lo_dev->ld_site);
}
static inline
struct cl_object_header *cl_object_header(const struct cl_object *obj)
{
- return luh2coh(obj->co_lu.lo_header);
+ return luh2coh(obj->co_lu.lo_header);
}
static inline int cl_device_init(struct cl_device *d, struct lu_device_type *t)
{
- return lu_device_init(&d->cd_lu_dev, t);
+ return lu_device_init(&d->cd_lu_dev, t);
}
static inline void cl_device_fini(struct cl_device *d)
{
- lu_device_fini(&d->cd_lu_dev);
+ lu_device_fini(&d->cd_lu_dev);
}
void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
- struct cl_object *obj, pgoff_t index,
+ struct cl_object *obj,
const struct cl_page_operations *ops);
void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice,
struct cl_object *obj,
int cl_object_prune (const struct lu_env *env, struct cl_object *obj);
void cl_object_kill (const struct lu_env *env, struct cl_object *obj);
int cl_object_getstripe(const struct lu_env *env, struct cl_object *obj,
- struct lov_user_md __user *lum);
+ struct lov_user_md __user *lum, size_t size);
int cl_object_fiemap(const struct lu_env *env, struct cl_object *obj,
struct ll_fiemap_info_key *fmkey, struct fiemap *fiemap,
size_t *buflen);
int cl_object_layout_get(const struct lu_env *env, struct cl_object *obj,
struct cl_layout *cl);
loff_t cl_object_maxbytes(struct cl_object *obj);
+int cl_object_flush(const struct lu_env *env, struct cl_object *obj,
+ struct ldlm_lock *lock);
+
/**
* Returns true, iff \a o0 and \a o1 are slices of the same object.
/** \defgroup cl_page cl_page
* @{ */
-enum {
- CLP_GANG_OKAY = 0,
- CLP_GANG_RESCHED,
- CLP_GANG_AGAIN,
- CLP_GANG_ABORT
-};
-/* callback of cl_page_gang_lookup() */
-
struct cl_page *cl_page_find (const struct lu_env *env,
struct cl_object *obj,
pgoff_t idx, struct page *vmpage,
void cl_page_get (struct cl_page *page);
void cl_page_put (const struct lu_env *env,
struct cl_page *page);
+void cl_pagevec_put (const struct lu_env *env,
+ struct cl_page *page,
+ struct pagevec *pvec);
void cl_page_print (const struct lu_env *env, void *cookie,
lu_printer_t printer,
const struct cl_page *pg);
struct cl_page *pg, enum cl_req_type crt);
void cl_page_clip (const struct lu_env *env, struct cl_page *pg,
int from, int to);
-int cl_page_cancel (const struct lu_env *env, struct cl_page *page);
int cl_page_flush (const struct lu_env *env, struct cl_io *io,
struct cl_page *pg);
void cl_page_delete(const struct lu_env *env, struct cl_page *pg);
int cl_page_is_vmlocked(const struct lu_env *env,
const struct cl_page *pg);
+void cl_page_touch(const struct lu_env *env, const struct cl_page *pg,
+ size_t to);
void cl_page_export(const struct lu_env *env,
struct cl_page *pg, int uptodate);
loff_t cl_offset(const struct cl_object *obj, pgoff_t idx);
* Used at umounting time and signaled on BRW commit
*/
wait_queue_head_t ccc_unstable_waitq;
+ /**
+ * Serialize max_cache_mb write operation
+ */
+ struct mutex ccc_max_cache_mb_lock;
};
/**
* cl_cache functions
int cl_io_commit_async (const struct lu_env *env, struct cl_io *io,
struct cl_page_list *queue, int from, int to,
cl_commit_cbt cb);
+void cl_io_extent_release (const struct lu_env *env, struct cl_io *io);
+int cl_io_lru_reserve(const struct lu_env *env, struct cl_io *io,
+ loff_t pos, size_t bytes);
int cl_io_read_ahead (const struct lu_env *env, struct cl_io *io,
pgoff_t start, struct cl_read_ahead *ra);
void cl_io_rw_advance (const struct lu_env *env, struct cl_io *io,
size_t nob);
-int cl_io_cancel (const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *queue);
/**
* True, iff \a io is an O_APPEND write(2).
*/
static inline int cl_io_is_append(const struct cl_io *io)
{
- return io->ci_type == CIT_WRITE && io->u.ci_wr.wr_append;
+ return io->ci_type == CIT_WRITE && io->u.ci_wr.wr_append;
}
static inline int cl_io_is_sync_write(const struct cl_io *io)
return io->ci_type == CIT_FAULT && io->u.ci_fault.ft_mkwrite;
}
+static inline int cl_io_is_fault_writable(const struct cl_io *io)
+{
+ return io->ci_type == CIT_FAULT && io->u.ci_fault.ft_writable;
+}
+
/**
* True, iff \a io is a truncate(2).
*/
static inline int cl_io_is_trunc(const struct cl_io *io)
{
- return io->ci_type == CIT_SETATTR &&
- (io->u.ci_setattr.sa_valid & ATTR_SIZE);
+ return io->ci_type == CIT_SETATTR &&
+ (io->u.ci_setattr.sa_avalid & ATTR_SIZE) &&
+ (io->u.ci_setattr.sa_subtype != CL_SETATTR_FALLOCATE);
+}
+
+static inline int cl_io_is_fallocate(const struct cl_io *io)
+{
+ return (io->ci_type == CIT_SETATTR) &&
+ (io->u.ci_setattr.sa_subtype == CL_SETATTR_FALLOCATE);
}
struct cl_io *cl_io_top(struct cl_io *io);
void cl_io_print(const struct lu_env *env, void *cookie,
lu_printer_t printer, const struct cl_io *io);
-#define CL_IO_SLICE_CLEAN(foo_io, base) \
-do { \
- typeof(foo_io) __foo_io = (foo_io); \
- \
- CLASSERT(offsetof(typeof(*__foo_io), base) == 0); \
- memset(&__foo_io->base + 1, 0, \
- (sizeof *__foo_io) - sizeof __foo_io->base); \
+#define CL_IO_SLICE_CLEAN(foo_io, base) \
+do { \
+ typeof(foo_io) __foo_io = (foo_io); \
+ \
+ memset(&__foo_io->base, 0, \
+ sizeof(*__foo_io) - offsetof(typeof(*__foo_io), base)); \
} while (0)
/** @} cl_io */
#define cl_page_list_for_each_safe(page, temp, list) \
list_for_each_entry_safe((page), (temp), &(list)->pl_pages, cp_batch)
-void cl_page_list_init (struct cl_page_list *plist);
-void cl_page_list_add (struct cl_page_list *plist, struct cl_page *page);
-void cl_page_list_move (struct cl_page_list *dst, struct cl_page_list *src,
- struct cl_page *page);
+void cl_page_list_init(struct cl_page_list *plist);
+void cl_page_list_add(struct cl_page_list *plist, struct cl_page *page,
+ bool get_ref);
+void cl_page_list_move(struct cl_page_list *dst, struct cl_page_list *src,
+ struct cl_page *page);
void cl_page_list_move_head(struct cl_page_list *dst, struct cl_page_list *src,
- struct cl_page *page);
-void cl_page_list_splice (struct cl_page_list *list,
- struct cl_page_list *head);
-void cl_page_list_del (const struct lu_env *env,
- struct cl_page_list *plist, struct cl_page *page);
-void cl_page_list_disown (const struct lu_env *env,
- struct cl_io *io, struct cl_page_list *plist);
-void cl_page_list_assume (const struct lu_env *env,
- struct cl_io *io, struct cl_page_list *plist);
+ struct cl_page *page);
+void cl_page_list_splice(struct cl_page_list *list,
+ struct cl_page_list *head);
+void cl_page_list_del(const struct lu_env *env,
+ struct cl_page_list *plist, struct cl_page *page);
+void cl_page_list_disown(const struct lu_env *env,
+ struct cl_io *io, struct cl_page_list *plist);
+void cl_page_list_assume(const struct lu_env *env,
+ struct cl_io *io, struct cl_page_list *plist);
void cl_page_list_discard(const struct lu_env *env,
- struct cl_io *io, struct cl_page_list *plist);
-void cl_page_list_fini (const struct lu_env *env, struct cl_page_list *plist);
-
-void cl_2queue_init (struct cl_2queue *queue);
-void cl_2queue_add (struct cl_2queue *queue, struct cl_page *page);
-void cl_2queue_disown (const struct lu_env *env,
- struct cl_io *io, struct cl_2queue *queue);
-void cl_2queue_assume (const struct lu_env *env,
- struct cl_io *io, struct cl_2queue *queue);
-void cl_2queue_discard (const struct lu_env *env,
- struct cl_io *io, struct cl_2queue *queue);
-void cl_2queue_fini (const struct lu_env *env, struct cl_2queue *queue);
+ struct cl_io *io, struct cl_page_list *plist);
+void cl_page_list_fini(const struct lu_env *env, struct cl_page_list *plist);
+
+void cl_2queue_init(struct cl_2queue *queue);
+void cl_2queue_add(struct cl_2queue *queue, struct cl_page *page,
+ bool get_ref);
+void cl_2queue_disown(const struct lu_env *env, struct cl_io *io,
+ struct cl_2queue *queue);
+void cl_2queue_assume(const struct lu_env *env, struct cl_io *io,
+ struct cl_2queue *queue);
+void cl_2queue_discard(const struct lu_env *env, struct cl_io *io,
+ struct cl_2queue *queue);
+void cl_2queue_fini(const struct lu_env *env, struct cl_2queue *queue);
void cl_2queue_init_page(struct cl_2queue *queue, struct cl_page *page);
/** @} cl_page_list */
/** \defgroup cl_sync_io cl_sync_io
* @{ */
+struct cl_sync_io;
+struct cl_dio_aio;
+
+typedef void (cl_sync_io_end_t)(const struct lu_env *, struct cl_sync_io *);
+
+void cl_sync_io_init_notify(struct cl_sync_io *anchor, int nr,
+ struct cl_dio_aio *aio, cl_sync_io_end_t *end);
+
+int cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor,
+ long timeout);
+void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor,
+ int ioret);
+int cl_sync_io_wait_recycle(const struct lu_env *env, struct cl_sync_io *anchor,
+ long timeout, int ioret);
+struct cl_dio_aio *cl_aio_alloc(struct kiocb *iocb, struct cl_object *obj);
+void cl_aio_free(const struct lu_env *env, struct cl_dio_aio *aio);
+static inline void cl_sync_io_init(struct cl_sync_io *anchor, int nr)
+{
+ cl_sync_io_init_notify(anchor, nr, NULL, NULL);
+}
+
/**
* Anchor for synchronous transfer. This is allocated on a stack by thread
* doing synchronous transfer, and a pointer to this structure is set up in
atomic_t csi_sync_nr;
/** error code. */
int csi_sync_rc;
- /** barrier of destroy this structure */
- atomic_t csi_barrier;
/** completion to be signaled when transfer is complete. */
wait_queue_head_t csi_waitq;
/** callback to invoke when this IO is finished */
- void (*csi_end_io)(const struct lu_env *,
- struct cl_sync_io *);
+ cl_sync_io_end_t *csi_end_io;
+ /** aio private data */
+ struct cl_dio_aio *csi_aio;
};
-void cl_sync_io_init(struct cl_sync_io *anchor, int nr,
- void (*end)(const struct lu_env *, struct cl_sync_io *));
-int cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor,
- long timeout);
-void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor,
- int ioret);
-void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor);
+/** To support Direct AIO */
+struct cl_dio_aio {
+ struct cl_sync_io cda_sync;
+ struct cl_page_list cda_pages;
+ struct cl_object *cda_obj;
+ struct kiocb *cda_iocb;
+ ssize_t cda_bytes;
+ unsigned cda_no_aio_complete:1;
+};
/** @} cl_sync_io */