* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2016, Intel Corporation.
+ * Copyright (c) 2011, 2017, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
/*
* super-class definitions.
*/
+#include <linux/aio.h>
+#include <linux/fs.h>
+
#include <libcfs/libcfs.h>
-#include <libcfs/libcfs_ptask.h>
#include <lu_object.h>
#include <linux/atomic.h>
#include <linux/mutex.h>
#include <linux/radix-tree.h>
#include <linux/spinlock.h>
#include <linux/wait.h>
+#include <linux/pagevec.h>
#include <lustre_dlm.h>
struct obd_info;
struct cl_req_attr;
-extern struct cfs_ptask_engine *cl_io_engine;
-
/**
* Device in the client stack.
*
u32 cl_layout_gen;
/** whether layout is a composite one */
bool cl_is_composite;
+ /** Whether layout is a HSM released one */
+ bool cl_is_released;
};
/**
void (*coo_req_attr_set)(const struct lu_env *env,
struct cl_object *obj,
struct cl_req_attr *attr);
+ /**
+ * Flush \a obj data corresponding to \a lock. Used for DoM
+ * locks in llite's cancelling blocking ast callback.
+ */
+ int (*coo_object_flush)(const struct lu_env *env,
+ struct cl_object *obj,
+ struct ldlm_lock *lock);
};
/**
struct lu_ref_link cp_queue_ref;
/** Assigned if doing a sync_io */
struct cl_sync_io *cp_sync_io;
+ /** layout_entry + stripe index, composed using lov_comp_index() */
+ unsigned int cp_lov_index;
};
/**
*/
int (*cpo_is_vmlocked)(const struct lu_env *env,
const struct cl_page_slice *slice);
+
+ /**
+ * Update file attributes when all we have is this page. Used for tiny
+ * writes to update attributes when we don't have a full cl_io.
+ */
+ void (*cpo_page_touch)(const struct lu_env *env,
+ const struct cl_page_slice *slice, size_t to);
/**
* Page destruction.
*/
const struct cl_page_slice *slice);
/** Destructor. Frees resources and slice itself. */
void (*cpo_fini)(const struct lu_env *env,
- struct cl_page_slice *slice);
+ struct cl_page_slice *slice,
+ struct pagevec *pvec);
/**
* Optional debugging helper. Prints given page slice.
*
* (struct cl_lock) and a list of layers (struct cl_lock_slice), linked to
* cl_lock::cll_layers list through cl_lock_slice::cls_linkage.
*
- * Typical cl_lock consists of the two layers:
+ * Typical cl_lock consists of one layer:
*
- * - vvp_lock (vvp specific data), and
* - lov_lock (lov specific data).
*
* lov_lock contains an array of sub-locks. Each of these sub-locks is a
* normal cl_lock: it has a header (struct cl_lock) and a list of layers:
*
- * - lovsub_lock, and
* - osc_lock
*
* Each sub-lock is associated with a cl_object (representing stripe
/**
* Per-layer part of cl_lock
*
- * \see vvp_lock, lov_lock, lovsub_lock, osc_lock
+ * \see lov_lock, osc_lock
*/
struct cl_lock_slice {
struct cl_lock *cls_lock;
/**
*
- * \see vvp_lock_ops, lov_lock_ops, lovsub_lock_ops, osc_lock_ops
+ * \see lov_lock_ops, osc_lock_ops
*/
struct cl_lock_operations {
/** @{ */
* @anchor for resources
* \retval -ve failure
*
- * \see vvp_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(),
- * \see osc_lock_enqueue()
+ * \see lov_lock_enqueue(), osc_lock_enqueue()
*/
int (*clo_enqueue)(const struct lu_env *env,
const struct cl_lock_slice *slice,
/**
* Destructor. Frees resources and the slice.
*
- * \see vvp_lock_fini(), lov_lock_fini(), lovsub_lock_fini(),
- * \see osc_lock_fini()
+ * \see lov_lock_fini(), osc_lock_fini()
*/
void (*clo_fini)(const struct lu_env *env, struct cl_lock_slice *slice);
/**
struct task_struct *pl_owner;
};
-/**
+/**
* A 2-queue of pages. A convenience data-type for common use case, 2-queue
* contains an incoming page list and an outgoing page list.
*/
};
typedef void (*cl_commit_cbt)(const struct lu_env *, struct cl_io *,
- struct cl_page *);
+ struct pagevec *);
struct cl_read_ahead {
/* Maximum page index the readahead window will end.
void (*cra_release)(const struct lu_env *env, void *cbdata);
/* Callback data for cra_release routine */
void *cra_cbdata;
+ /* whether lock is in contention */
+ bool cra_contention;
};
static inline void cl_read_ahead_release(const struct lu_env *env,
CL_FSYNC_ALL = 3
};
-struct cl_io_range {
- loff_t cir_pos;
- size_t cir_count;
-};
-
-struct cl_io_pt {
- struct cl_io_pt *cip_next;
- struct cfs_ptask cip_task;
- struct kiocb cip_iocb;
- struct iov_iter cip_iter;
- struct file *cip_file;
- enum cl_io_type cip_iot;
- loff_t cip_pos;
- size_t cip_count;
- ssize_t cip_result;
+struct cl_io_rw_common {
+ loff_t crw_pos;
+ size_t crw_count;
+ int crw_nonblock;
};
/**
struct cl_lockset ci_lockset;
/** lock requirements, this is just a help info for sublayers. */
enum cl_io_lock_dmd ci_lockreq;
- union {
- struct cl_rw_io {
- struct iov_iter rw_iter;
- struct kiocb rw_iocb;
- struct cl_io_range rw_range;
- struct file *rw_file;
- unsigned int rw_nonblock:1,
- rw_append:1,
- rw_sync:1;
- int (*rw_ptask)(struct cfs_ptask *ptask);
- } ci_rw;
+ /** layout version when this IO occurs */
+ __u32 ci_layout_version;
+ union {
+ struct cl_rd_io {
+ struct cl_io_rw_common rd;
+ } ci_rd;
+ struct cl_wr_io {
+ struct cl_io_rw_common wr;
+ int wr_append;
+ int wr_sync;
+ } ci_wr;
+ struct cl_io_rw_common ci_rw;
struct cl_setattr_io {
struct ost_lvb sa_attr;
unsigned int sa_attr_flags;
- unsigned int sa_valid;
+ unsigned int sa_avalid; /* ATTR_* */
+ unsigned int sa_xvalid; /* OP_XVALID */
int sa_stripe_index;
struct ost_layout sa_layout;
const struct lu_fid *sa_parent_fid;
} ci_setattr;
struct cl_data_version_io {
u64 dv_data_version;
+ u32 dv_layout_version;
int dv_flags;
} ci_data_version;
struct cl_fault_io {
*/
ci_ignore_layout:1,
/**
- * Need MDS intervention to complete a write. This usually means the
- * corresponding component is not initialized for the writing extent.
+ * Need MDS intervention to complete a write.
+ * Write intent is required for the following cases:
+ * 1. component being written is not initialized, or
+ * 2. the mirrored files are NOT in WRITE_PENDING state.
*/
ci_need_write_intent:1,
/**
* O_NOATIME
*/
ci_noatime:1,
- /** Set to 1 if parallel execution is allowed for current I/O? */
- ci_pio:1,
/* Tell sublayers not to expand LDLM locks requested for this IO */
- ci_lock_no_expand:1;
+ ci_lock_no_expand:1,
+ /**
+ * Set if non-delay RPC should be used for this IO.
+ *
+ * If this file has multiple mirrors, and if the OSTs of the current
+ * mirror is inaccessible, non-delay RPC would error out quickly so
+ * that the upper layer can try to access the next mirror.
+ */
+ ci_ndelay:1,
+ /**
+ * Set if IO is triggered by async workqueue readahead.
+ */
+ ci_async_readahead:1,
+ /**
+ * Set if we've tried all mirrors for this read IO, if it's not set,
+ * the read IO will check to-be-read OSCs' status, and make fast-switch
+ * another mirror if some of the OSTs are not healthy.
+ */
+ ci_tried_all_mirrors:1;
+ /**
+ * How many times the read has retried before this one.
+ * Set by the top level and consumed by the LOV.
+ */
+ unsigned ci_ndelay_tried;
+ /**
+ * Designated mirror index for this I/O.
+ */
+ unsigned ci_designated_mirror;
/**
* Number of pages owned by this IO. For invariant checking.
*/
unsigned ci_owned_nr;
+ /**
+ * Range of write intent. Valid if ci_need_write_intent is set.
+ */
+ struct lu_extent ci_write_intent;
};
/** @} cl_io */
int cl_object_layout_get(const struct lu_env *env, struct cl_object *obj,
struct cl_layout *cl);
loff_t cl_object_maxbytes(struct cl_object *obj);
+int cl_object_flush(const struct lu_env *env, struct cl_object *obj,
+ struct ldlm_lock *lock);
+
/**
* Returns true, iff \a o0 and \a o1 are slices of the same object.
void cl_page_get (struct cl_page *page);
void cl_page_put (const struct lu_env *env,
struct cl_page *page);
+void cl_pagevec_put (const struct lu_env *env,
+ struct cl_page *page,
+ struct pagevec *pvec);
void cl_page_print (const struct lu_env *env, void *cookie,
lu_printer_t printer,
const struct cl_page *pg);
void cl_page_delete(const struct lu_env *env, struct cl_page *pg);
int cl_page_is_vmlocked(const struct lu_env *env,
const struct cl_page *pg);
+void cl_page_touch(const struct lu_env *env, const struct cl_page *pg,
+ size_t to);
void cl_page_export(const struct lu_env *env,
struct cl_page *pg, int uptodate);
loff_t cl_offset(const struct cl_object *obj, pgoff_t idx);
*/
static inline int cl_io_is_append(const struct cl_io *io)
{
- return io->ci_type == CIT_WRITE && io->u.ci_rw.rw_append;
+ return io->ci_type == CIT_WRITE && io->u.ci_wr.wr_append;
}
static inline int cl_io_is_sync_write(const struct cl_io *io)
{
- return io->ci_type == CIT_WRITE && io->u.ci_rw.rw_sync;
+ return io->ci_type == CIT_WRITE && io->u.ci_wr.wr_sync;
}
static inline int cl_io_is_mkwrite(const struct cl_io *io)
*/
static inline int cl_io_is_trunc(const struct cl_io *io)
{
- return io->ci_type == CIT_SETATTR &&
- (io->u.ci_setattr.sa_valid & ATTR_SIZE);
+ return io->ci_type == CIT_SETATTR &&
+ (io->u.ci_setattr.sa_avalid & ATTR_SIZE);
}
struct cl_io *cl_io_top(struct cl_io *io);
void cl_io_print(const struct lu_env *env, void *cookie,
lu_printer_t printer, const struct cl_io *io);
-#define CL_IO_SLICE_CLEAN(foo_io, base) \
-do { \
- typeof(foo_io) __foo_io = (foo_io); \
- \
- CLASSERT(offsetof(typeof(*__foo_io), base) == 0); \
- memset(&__foo_io->base + 1, 0, \
- (sizeof *__foo_io) - sizeof __foo_io->base); \
+#define CL_IO_SLICE_CLEAN(foo_io, base) \
+do { \
+ typeof(foo_io) __foo_io = (foo_io); \
+ \
+ memset(&__foo_io->base, 0, \
+ sizeof(*__foo_io) - offsetof(typeof(*__foo_io), base)); \
} while (0)
/** @} cl_io */
/** \defgroup cl_sync_io cl_sync_io
* @{ */
+struct cl_sync_io;
+
+typedef void (cl_sync_io_end_t)(const struct lu_env *, struct cl_sync_io *);
+
+void cl_sync_io_init_notify(struct cl_sync_io *anchor, int nr,
+ cl_sync_io_end_t *end);
+
+int cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor,
+ long timeout);
+void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor,
+ int ioret);
+static inline void cl_sync_io_init(struct cl_sync_io *anchor, int nr)
+{
+ cl_sync_io_init_notify(anchor, nr, NULL);
+}
+
/**
* Anchor for synchronous transfer. This is allocated on a stack by thread
* doing synchronous transfer, and a pointer to this structure is set up in
atomic_t csi_sync_nr;
/** error code. */
int csi_sync_rc;
- /** barrier of destroy this structure */
- atomic_t csi_barrier;
/** completion to be signaled when transfer is complete. */
wait_queue_head_t csi_waitq;
/** callback to invoke when this IO is finished */
- void (*csi_end_io)(const struct lu_env *,
- struct cl_sync_io *);
+ cl_sync_io_end_t *csi_end_io;
};
-void cl_sync_io_init(struct cl_sync_io *anchor, int nr,
- void (*end)(const struct lu_env *, struct cl_sync_io *));
-int cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor,
- long timeout);
-void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor,
- int ioret);
-void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor);
-
/** @} cl_sync_io */
/** \defgroup cl_env cl_env