*/
/*
* This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
*/
#ifndef _LUSTRE_CL_OBJECT_H
#define _LUSTRE_CL_OBJECT_H
*
* \invariant cl_page::cp_owner == NULL && cl_page::cp_req == NULL
*/
- CPS_CACHED,
+ CPS_CACHED = 1,
/**
* Page is exclusively owned by some cl_io. Page may end up in this
* state as a result of
* to vmpage which is not belonging to the same object of cl_page.
* it is used in DirectIO and lockless IO. */
CPT_TRANSIENT,
+ CPT_NR
};
+#define CP_STATE_BITS 4
+#define CP_TYPE_BITS 2
+#define CP_MAX_LAYER 3
+
/**
* Fields are protected by the lock on struct page, except for atomics and
* immutables.
*/
struct cl_page {
/** Reference counter. */
- atomic_t cp_ref;
- /* which slab kmem index this memory allocated from */
- int cp_kmem_index;
+ atomic_t cp_ref;
+ /** layout_entry + stripe index, composed using lov_comp_index() */
+ unsigned int cp_lov_index;
+ pgoff_t cp_osc_index;
/** An object this page is a part of. Immutable after creation. */
struct cl_object *cp_obj;
/** vmpage */
struct page *cp_vmpage;
+ /**
+ * Assigned if doing direct IO, because in this case cp_vmpage is not
+ * a valid page cache page, hence the inode cannot be inferred from
+ * cp_vmpage->mapping->host.
+ */
+ struct inode *cp_inode;
/** Linkage of pages within group. Pages must be owned */
- struct list_head cp_batch;
+ struct list_head cp_batch;
/** array of slices offset. Immutable after creation. */
- unsigned char cp_layer_offset[3];
+ unsigned char cp_layer_offset[CP_MAX_LAYER]; /* 24 bits */
/** current slice index */
- unsigned char cp_layer_count:2;
+ unsigned char cp_layer_count:2; /* 26 bits */
/**
* Page state. This field is const to avoid accidental update, it is
* modified only internally within cl_page.c. Protected by a VM lock.
*/
- const enum cl_page_state cp_state;
+ enum cl_page_state cp_state:CP_STATE_BITS; /* 30 bits */
/**
* Page type. Only CPT_TRANSIENT is used so far. Immutable after
* creation.
*/
- enum cl_page_type cp_type;
+ enum cl_page_type cp_type:CP_TYPE_BITS; /* 32 bits */
+ /* which slab kmem index this memory allocated from */
+ short int cp_kmem_index; /* 48 bits */
+ unsigned int cp_unused1:16; /* 64 bits */
- /**
- * Owning IO in cl_page_state::CPS_OWNED state. Sub-page can be owned
- * by sub-io. Protected by a VM lock.
- */
+ /**
+ * Owning IO in cl_page_state::CPS_OWNED state. Sub-page can be owned
+ * by sub-io. Protected by a VM lock.
+ */
struct cl_io *cp_owner;
- /** List of references to this page, for debugging. */
- struct lu_ref cp_reference;
+ /** List of references to this page, for debugging. */
+ struct lu_ref cp_reference;
/** Link to an object, for debugging. */
- struct lu_ref_link cp_obj_ref;
+ struct lu_ref_link cp_obj_ref;
/** Link to a queue, for debugging. */
- struct lu_ref_link cp_queue_ref;
+ struct lu_ref_link cp_queue_ref;
/** Assigned if doing a sync_io */
- struct cl_sync_io *cp_sync_io;
- /** layout_entry + stripe index, composed using lov_comp_index() */
- unsigned int cp_lov_index;
- pgoff_t cp_osc_index;
+ struct cl_sync_io *cp_sync_io;
};
/**
* To give advice about access of a file
*/
CIT_LADVISE,
+ /**
+ * SEEK_HOLE/SEEK_DATA handling to search holes or data
+ * across all file objects
+ */
+ CIT_LSEEK,
CIT_OP_NR
};
unsigned long cra_rpc_pages;
/* Release callback. If readahead holds resources underneath, this
* function should be called to release it. */
- void (*cra_release)(const struct lu_env *env, void *cbdata);
+ void (*cra_release)(const struct lu_env *env,
+ struct cl_read_ahead *ra);
+
/* Callback data for cra_release routine */
- void *cra_cbdata;
+ void *cra_dlmlock;
+ void *cra_oio;
+
/* whether lock is in contention */
bool cra_contention;
};
struct cl_read_ahead *ra)
{
if (ra->cra_release != NULL)
- ra->cra_release(env, ra->cra_cbdata);
+ ra->cra_release(env, ra);
memset(ra, 0, sizeof(*ra));
}
struct cl_page_list *queue, int from, int to,
cl_commit_cbt cb);
/**
+ * Release active extent.
+ */
+ void (*cio_extent_release)(const struct lu_env *env,
+ const struct cl_io_slice *slice);
+ /**
* Decide maximum read ahead extent
*
* \pre io->ci_type == CIT_READ
int (*cio_read_ahead)(const struct lu_env *env,
const struct cl_io_slice *slice,
pgoff_t start, struct cl_read_ahead *ra);
+ /**
+ *
+ * Reserve LRU slots before IO.
+ */
+ int (*cio_lru_reserve) (const struct lu_env *env,
+ const struct cl_io_slice *slice,
+ loff_t pos, size_t bytes);
/**
* Optional debugging helper. Print given io slice.
*/
* \ingroup cl_lock
*/
enum cl_enq_flags {
- /**
- * instruct server to not block, if conflicting lock is found. Instead
- * -EWOULDBLOCK is returned immediately.
- */
- CEF_NONBLOCK = 0x00000001,
+ /**
+ * instruct server to not block, if conflicting lock is found. Instead
+ * -EAGAIN is returned immediately.
+ */
+ CEF_NONBLOCK = 0x00000001,
/**
* Tell lower layers this is a glimpse request, translated to
* LDLM_FL_HAS_INTENT at LDLM layer.
enum cl_io_state ci_state;
/** main object this io is against. Immutable after creation. */
struct cl_object *ci_obj;
+ /** one AIO request might be split in cl_io_loop */
+ struct cl_dio_aio *ci_aio;
/**
* Upper layer io, of which this io is a part of. Immutable after
* creation.
/* The following are used for fallocate(2) */
int sa_falloc_mode;
loff_t sa_falloc_offset;
- loff_t sa_falloc_len;
loff_t sa_falloc_end;
} ci_setattr;
struct cl_data_version_io {
enum lu_ladvise_type li_advice;
__u64 li_flags;
} ci_ladvise;
+ struct cl_lseek_io {
+ loff_t ls_start;
+ loff_t ls_result;
+ int ls_whence;
+ } ci_lseek;
+ struct cl_misc_io {
+ time64_t lm_next_rpc_time;
+ } ci_misc;
} u;
struct cl_2queue ci_queue;
size_t ci_nob;
/**
* Ignore lockless and do normal locking for this io.
*/
- ci_ignore_lockless:1,
+ ci_dio_lock:1,
/**
* Set if we've tried all mirrors for this read IO, if it's not set,
* the read IO will check to-be-read OSCs' status, and make fast-switch
* another mirror if some of the OSTs are not healthy.
*/
- ci_tried_all_mirrors:1;
+ ci_tried_all_mirrors:1,
+ /**
+ * Random read hints, readahead will be disabled.
+ */
+ ci_rand_read:1,
+ /**
+ * Sequential read hints.
+ */
+ ci_seq_read:1,
+ /**
+ * Do parallel (async) submission of DIO RPCs. Note DIO is still sync
+ * to userspace, only the RPCs are submitted async, then waited for at
+ * the llite layer before returning.
+ */
+ ci_parallel_dio:1;
/**
* Bypass quota check
*/
int cl_io_commit_async (const struct lu_env *env, struct cl_io *io,
struct cl_page_list *queue, int from, int to,
cl_commit_cbt cb);
+void cl_io_extent_release (const struct lu_env *env, struct cl_io *io);
+int cl_io_lru_reserve(const struct lu_env *env, struct cl_io *io,
+ loff_t pos, size_t bytes);
int cl_io_read_ahead (const struct lu_env *env, struct cl_io *io,
pgoff_t start, struct cl_read_ahead *ra);
void cl_io_rw_advance (const struct lu_env *env, struct cl_io *io,
return io->ci_type == CIT_FAULT && io->u.ci_fault.ft_mkwrite;
}
+static inline int cl_io_is_fault_writable(const struct cl_io *io)
+{
+ return io->ci_type == CIT_FAULT && io->u.ci_fault.ft_writable;
+}
+
/**
* True, iff \a io is a truncate(2).
*/
#define cl_page_list_for_each_safe(page, temp, list) \
list_for_each_entry_safe((page), (temp), &(list)->pl_pages, cp_batch)
-void cl_page_list_init (struct cl_page_list *plist);
-void cl_page_list_add (struct cl_page_list *plist, struct cl_page *page);
-void cl_page_list_move (struct cl_page_list *dst, struct cl_page_list *src,
- struct cl_page *page);
+void cl_page_list_init(struct cl_page_list *plist);
+void cl_page_list_add(struct cl_page_list *plist, struct cl_page *page,
+ bool get_ref);
+void cl_page_list_move(struct cl_page_list *dst, struct cl_page_list *src,
+ struct cl_page *page);
void cl_page_list_move_head(struct cl_page_list *dst, struct cl_page_list *src,
- struct cl_page *page);
-void cl_page_list_splice (struct cl_page_list *list,
- struct cl_page_list *head);
-void cl_page_list_del (const struct lu_env *env,
- struct cl_page_list *plist, struct cl_page *page);
-void cl_page_list_disown (const struct lu_env *env,
- struct cl_io *io, struct cl_page_list *plist);
-void cl_page_list_assume (const struct lu_env *env,
- struct cl_io *io, struct cl_page_list *plist);
+ struct cl_page *page);
+void cl_page_list_splice(struct cl_page_list *list,
+ struct cl_page_list *head);
+void cl_page_list_del(const struct lu_env *env,
+ struct cl_page_list *plist, struct cl_page *page);
+void cl_page_list_disown(const struct lu_env *env,
+ struct cl_io *io, struct cl_page_list *plist);
+void cl_page_list_assume(const struct lu_env *env,
+ struct cl_io *io, struct cl_page_list *plist);
void cl_page_list_discard(const struct lu_env *env,
- struct cl_io *io, struct cl_page_list *plist);
-void cl_page_list_fini (const struct lu_env *env, struct cl_page_list *plist);
-
-void cl_2queue_init (struct cl_2queue *queue);
-void cl_2queue_add (struct cl_2queue *queue, struct cl_page *page);
-void cl_2queue_disown (const struct lu_env *env,
- struct cl_io *io, struct cl_2queue *queue);
-void cl_2queue_assume (const struct lu_env *env,
- struct cl_io *io, struct cl_2queue *queue);
-void cl_2queue_discard (const struct lu_env *env,
- struct cl_io *io, struct cl_2queue *queue);
-void cl_2queue_fini (const struct lu_env *env, struct cl_2queue *queue);
+ struct cl_io *io, struct cl_page_list *plist);
+void cl_page_list_fini(const struct lu_env *env, struct cl_page_list *plist);
+
+void cl_2queue_init(struct cl_2queue *queue);
+void cl_2queue_add(struct cl_2queue *queue, struct cl_page *page,
+ bool get_ref);
+void cl_2queue_disown(const struct lu_env *env, struct cl_io *io,
+ struct cl_2queue *queue);
+void cl_2queue_assume(const struct lu_env *env, struct cl_io *io,
+ struct cl_2queue *queue);
+void cl_2queue_discard(const struct lu_env *env, struct cl_io *io,
+ struct cl_2queue *queue);
+void cl_2queue_fini(const struct lu_env *env, struct cl_2queue *queue);
void cl_2queue_init_page(struct cl_2queue *queue, struct cl_page *page);
/** @} cl_page_list */
void cl_sync_io_init_notify(struct cl_sync_io *anchor, int nr,
struct cl_dio_aio *aio, cl_sync_io_end_t *end);
-int cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor,
- long timeout);
+int cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor,
+ long timeout);
void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor,
int ioret);
-struct cl_dio_aio *cl_aio_alloc(struct kiocb *iocb);
+int cl_sync_io_wait_recycle(const struct lu_env *env, struct cl_sync_io *anchor,
+ long timeout, int ioret);
+struct cl_dio_aio *cl_aio_alloc(struct kiocb *iocb, struct cl_object *obj);
+void cl_aio_free(const struct lu_env *env, struct cl_dio_aio *aio);
static inline void cl_sync_io_init(struct cl_sync_io *anchor, int nr)
{
cl_sync_io_init_notify(anchor, nr, NULL, NULL);
struct cl_dio_aio {
struct cl_sync_io cda_sync;
struct cl_page_list cda_pages;
+ struct cl_object *cda_obj;
struct kiocb *cda_iocb;
ssize_t cda_bytes;
+ unsigned cda_no_aio_complete:1;
};
/** @} cl_sync_io */