-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011 Whamcloud, Inc.
- *
+ * Copyright (c) 2011, 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
struct lu_object co_lu;
/** per-object-layer operations */
const struct cl_object_operations *co_ops;
+ /** offset of page slice in cl_page buffer */
+ int co_slice_off;
};
/**
* VFS inode. This is consumed by vvp.
*/
struct inode *coc_inode;
+ /**
+ * Layout lock handle.
+ */
+ struct ldlm_lock *coc_lock;
+ /**
+ * Operation to handle layout, OBJECT_CONF_XYZ.
+ */
+ int coc_opc;
+};
+
+enum {
+ /** configure layout, set up a new stripe, must be called while
+ * holding layout lock. */
+ OBJECT_CONF_SET = 0,
+ /** invalidate the current stripe configuration due to losing
+ * layout lock. */
+ OBJECT_CONF_INVALIDATE = 1,
+ /** wait for old layout to go away so that new layout can be
+ * set up. */
+ OBJECT_CONF_WAIT = 2
};
/**
* \retval valid-pointer pointer to already existing referenced page
* to be used instead of newly created.
*/
- struct cl_page *(*coo_page_init)(const struct lu_env *env,
- struct cl_object *obj,
- struct cl_page *page,
- cfs_page_t *vmpage);
+ int (*coo_page_init)(const struct lu_env *env, struct cl_object *obj,
+ struct cl_page *page, cfs_page_t *vmpage);
/**
* Initialize lock slice for this layer. Called top-to-bottom through
* every object layer when a new cl_lock is instantiated. Layer
*/
/** @{ */
/** Lock protecting page tree. */
- cfs_spinlock_t coh_page_guard;
- /** Lock protecting lock list. */
- cfs_spinlock_t coh_lock_guard;
+ spinlock_t coh_page_guard;
+ /** Lock protecting lock list. */
+ spinlock_t coh_lock_guard;
/** @} locks */
/** Radix tree of cl_page's, cached for this object. */
struct radix_tree_root coh_tree;
*
* \todo XXX this can be read/write lock if needed.
*/
- cfs_spinlock_t coh_attr_guard;
- /**
- * Number of objects above this one: 0 for a top-object, 1 for its
- * sub-object, etc.
- */
- unsigned coh_nesting;
+ spinlock_t coh_attr_guard;
+ /**
+ * Size of cl_page + page slices
+ */
+ unsigned short coh_page_bufsize;
+ /**
+ * Number of objects above this one: 0 for a top-object, 1 for its
+ * sub-object, etc.
+ */
+ unsigned char coh_nesting;
};
/**
* modified only internally within cl_page.c. Protected by a VM lock.
*/
const enum cl_page_state cp_state;
- /**
- * Linkage of pages within some group. Protected by
- * cl_page::cp_mutex. */
- cfs_list_t cp_batch;
- /** Mutex serializing membership of a page in a batch. */
- cfs_mutex_t cp_mutex;
+ /** Linkage of pages within group. Protected by cl_page::cp_mutex. */
+ cfs_list_t cp_batch;
+ /** Mutex serializing membership of a page in a batch. */
+ struct mutex cp_mutex;
/** Linkage of pages within cl_req. */
cfs_list_t cp_flight;
/** Transfer error. */
*/
int (*cpo_cancel)(const struct lu_env *env,
const struct cl_page_slice *slice);
+ /**
+ * Write out a page by kernel. This is only called by ll_writepage
+ * right now.
+ *
+ * \see cl_page_flush()
+ */
+ int (*cpo_flush)(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *io);
/** @} transfer */
};
*/
#define CL_PAGE_DEBUG(mask, env, page, format, ...) \
do { \
- static DECLARE_LU_CDEBUG_PRINT_INFO(__info, mask); \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
\
if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
- cl_page_print(env, &__info, lu_cdebug_printer, page); \
+ cl_page_print(env, &msgdata, lu_cdebug_printer, page); \
CDEBUG(mask, format , ## __VA_ARGS__); \
} \
} while (0)
/**
* Helper macro, dumping shorter information about \a page into a log.
*/
-#define CL_PAGE_HEADER(mask, env, page, format, ...) \
-do { \
- static DECLARE_LU_CDEBUG_PRINT_INFO(__info, mask); \
- \
- if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
- cl_page_header_print(env, &__info, lu_cdebug_printer, page); \
- CDEBUG(mask, format , ## __VA_ARGS__); \
- } \
+#define CL_PAGE_HEADER(mask, env, page, format, ...) \
+do { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
+ \
+ if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
+ cl_page_header_print(env, &msgdata, lu_cdebug_printer, page); \
+ CDEBUG(mask, format , ## __VA_ARGS__); \
+ } \
} while (0)
+static inline int __page_in_use(const struct cl_page *page, int refc)
+{
+ if (page->cp_type == CPT_CACHEABLE)
+ ++refc;
+ LASSERT(cfs_atomic_read(&page->cp_ref) > 0);
+ return (cfs_atomic_read(&page->cp_ref) > refc);
+}
+#define cl_page_in_use(pg) __page_in_use(pg, 1)
+#define cl_page_in_use_noref(pg) __page_in_use(pg, 0)
+
/** @} cl_page */
/** \addtogroup cl_lock cl_lock
/** cancellation is pending for this lock. */
CLF_CANCELPEND = 1 << 1,
/** destruction is pending for this lock. */
- CLF_DOOMED = 1 << 2
+ CLF_DOOMED = 1 << 2,
+ /** from enqueue RPC reply upcall. */
+ CLF_FROM_UPCALL= 1 << 3,
};
/**
*
* \see osc_lock_enqueue_wait(), lov_lock_cancel(), lov_sublock_wait().
*/
- cfs_mutex_t cll_guard;
+ struct mutex cll_guard;
cfs_task_t *cll_guarder;
int cll_depth;
*/
enum cl_lock_transition {
/** operation cannot be completed immediately. Wait for state change. */
- CLO_WAIT = 1,
+ CLO_WAIT = 1,
/** operation had to release lock mutex, restart. */
- CLO_REPEAT = 2
+ CLO_REPEAT = 2,
+ /** lower layer re-enqueued. */
+ CLO_REENQUEUED = 3,
};
/**
#define CL_LOCK_DEBUG(mask, env, lock, format, ...) \
do { \
- static DECLARE_LU_CDEBUG_PRINT_INFO(__info, mask); \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
\
if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
- cl_lock_print(env, &__info, lu_cdebug_printer, lock); \
+ cl_lock_print(env, &msgdata, lu_cdebug_printer, lock); \
CDEBUG(mask, format , ## __VA_ARGS__); \
} \
} while (0)
+#define CL_LOCK_ASSERT(expr, env, lock) do { \
+ if (likely(expr)) \
+ break; \
+ \
+ CL_LOCK_DEBUG(D_ERROR, env, lock, "failed at %s.\n", #expr); \
+ LBUG(); \
+} while (0)
+
/** @} cl_lock */
/** \addtogroup cl_page_list cl_page_list
*/
CIT_FAULT,
/**
+ * fsync system call handling
+ * To write out a range of file
+ */
+ CIT_FSYNC,
+ /**
* Miscellaneous io. This is used for occasional io activity that
* doesn't fit into other types. Currently this is used for:
*
CIS_FINI
};
-enum cl_req_priority {
- CRP_NORMAL,
- CRP_CANCEL
-};
-
/**
* IO state private for a layer.
*
int (*cio_submit)(const struct lu_env *env,
const struct cl_io_slice *slice,
enum cl_req_type crt,
- struct cl_2queue *queue,
- enum cl_req_priority priority);
+ struct cl_2queue *queue);
} req_op[CRT_NR];
/**
* Read missing page.
*/
CEF_NEVER = 0x00000010,
/**
+ * for async glimpse lock.
+ */
+ CEF_AGL = 0x00000020,
+ /**
* mask of enq_flags.
*/
- CEF_MASK = 0x0000001f
+ CEF_MASK = 0x0000003f,
};
/**
CILR_PEEK
};
+enum cl_fsync_mode {
+ /** start writeback, do not wait for them to finish */
+ CL_FSYNC_NONE = 0,
+ /** start writeback and wait for them to finish */
+ CL_FSYNC_LOCAL = 1,
+ /** discard all of dirty pages in a specific file range */
+ CL_FSYNC_DISCARD = 2,
+ /** start writeback and make sure they have reached storage before
+ * return. OST_SYNC RPC must be issued and finished */
+ CL_FSYNC_ALL = 3
+};
+
struct cl_io_rw_common {
loff_t crw_pos;
size_t crw_count;
struct cl_lockset ci_lockset;
/** lock requirements, this is just a help info for sublayers. */
enum cl_io_lock_dmd ci_lockreq;
- /**
- * This io has held grouplock, to inform sublayers that
- * don't do lockless i/o.
- */
- int ci_no_srvlock;
union {
struct cl_rd_io {
struct cl_io_rw_common rd;
struct cl_wr_io {
struct cl_io_rw_common wr;
int wr_append;
+ int wr_sync;
} ci_wr;
struct cl_io_rw_common ci_rw;
struct cl_setattr_io {
/** resulting page */
struct cl_page *ft_page;
} ci_fault;
+ struct cl_fsync_io {
+ loff_t fi_start;
+ loff_t fi_end;
+ struct obd_capa *fi_capa;
+ /** file system level fid */
+ struct lu_fid *fi_fid;
+ enum cl_fsync_mode fi_mode;
+ /* how many pages were written/discarded */
+ unsigned int fi_nr_written;
+ } ci_fsync;
} u;
struct cl_2queue ci_queue;
size_t ci_nob;
int ci_result;
- int ci_continue;
+ unsigned int ci_continue:1,
+ /**
+ * This io has held grouplock, to inform sublayers that
+ * don't do lockless i/o.
+ */
+ ci_no_srvlock:1,
+ /**
+ * The whole IO need to be restarted because layout has been changed
+ */
+ ci_need_restart:1,
+ /**
+ * to not refresh layout - the IO issuer knows that the layout won't
+ * change(page operations, layout change causes all page to be
+ * discarded), or it doesn't matter if it changes(sync).
+ */
+ ci_ignore_layout:1,
+ /**
+ * Check if layout changed after the IO finishes. Mainly for HSM
+ * requirement. If IO occurs to openning files, it doesn't need to
+ * verify layout because HSM won't release openning files.
+ * Right now, only two opertaions need to verify layout: glimpse
+ * and setattr.
+ */
+ ci_verify_layout:1;
/**
* Number of pages owned by this IO. For invariant checking.
*/
* Per-transfer attributes.
*/
struct cl_req_attr {
- /** Generic attributes for the server consumption. */
- struct obdo *cra_oa;
- /** Capability. */
- struct obd_capa *cra_capa;
+ /** Generic attributes for the server consumption. */
+ struct obdo *cra_oa;
+ /** Capability. */
+ struct obd_capa *cra_capa;
+ /** Jobid */
+ char cra_jobid[JOBSTATS_JOBID_SIZE];
};
/**
/* @} cl_req */
+enum cache_stats_item {
+ /** how many cache lookups were performed */
+ CS_lookup = 0,
+ /** how many times cache lookup resulted in a hit */
+ CS_hit,
+ /** how many entities are in the cache right now */
+ CS_total,
+ /** how many entities in the cache are actively used (and cannot be
+ * evicted) right now */
+ CS_busy,
+ /** how many entities were created at all */
+ CS_create,
+ CS_NR
+};
+
+#define CS_NAMES { "lookup", "hit", "total", "busy", "create" }
+
/**
* Stats for a generic cache (similar to inode, lu_object, etc. caches).
*/
struct cache_stats {
const char *cs_name;
- /** how many entities were created at all */
- cfs_atomic_t cs_created;
- /** how many cache lookups were performed */
- cfs_atomic_t cs_lookup;
- /** how many times cache lookup resulted in a hit */
- cfs_atomic_t cs_hit;
- /** how many entities are in the cache right now */
- cfs_atomic_t cs_total;
- /** how many entities in the cache are actively used (and cannot be
- * evicted) right now */
- cfs_atomic_t cs_busy;
+ cfs_atomic_t cs_stats[CS_NR];
};
/** These are not exported so far */
return cl_object_header(o0) == cl_object_header(o1);
}
+static inline void cl_object_page_init(struct cl_object *clob, int size)
+{
+ clob->co_slice_off = cl_object_header(clob)->coh_page_bufsize;
+ cl_object_header(clob)->coh_page_bufsize += ALIGN(size, 8);
+}
+
+static inline void *cl_object_page_slice(struct cl_object *clob,
+ struct cl_page *page)
+{
+ return (void *)((char *)page + clob->co_slice_off);
+}
+
/** @} cl_object */
/** \defgroup cl_page cl_page
* @{ */
enum {
CLP_GANG_OKAY = 0,
+ CLP_GANG_RESCHED,
CLP_GANG_AGAIN,
- CLP_GANG_RESCHED
+ CLP_GANG_ABORT
};
+/* callback of cl_page_gang_lookup() */
+typedef int (*cl_page_gang_cb_t) (const struct lu_env *, struct cl_io *,
+ struct cl_page *, void *);
int cl_page_gang_lookup (const struct lu_env *env,
struct cl_object *obj,
struct cl_io *io,
pgoff_t start, pgoff_t end,
- struct cl_page_list *plist);
+ cl_page_gang_cb_t cb, void *cbdata);
struct cl_page *cl_page_lookup (struct cl_object_header *hdr,
pgoff_t index);
struct cl_page *cl_page_find (const struct lu_env *env,
void cl_page_clip (const struct lu_env *env, struct cl_page *pg,
int from, int to);
int cl_page_cancel (const struct lu_env *env, struct cl_page *page);
+int cl_page_flush (const struct lu_env *env, struct cl_io *io,
+ struct cl_page *pg);
/** @} transfer */
struct cl_lock *cl_lock_request(const struct lu_env *env, struct cl_io *io,
const struct cl_lock_descr *need,
const char *scope, const void *source);
-struct cl_lock *cl_lock_at_page(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, struct cl_lock *except,
- int pending, int canceld);
+struct cl_lock *cl_lock_at_pgoff(const struct lu_env *env,
+ struct cl_object *obj, pgoff_t index,
+ struct cl_lock *except, int pending,
+ int canceld);
+static inline struct cl_lock *cl_lock_at_page(const struct lu_env *env,
+ struct cl_object *obj,
+ struct cl_page *page,
+ struct cl_lock *except,
+ int pending, int canceld)
+{
+ LASSERT(cl_object_header(obj) == cl_object_header(page->cp_obj));
+ return cl_lock_at_pgoff(env, obj, page->cp_index, except,
+ pending, canceld);
+}
const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
const struct lu_device_type *dtype);
void cl_lock_put (const struct lu_env *env, struct cl_lock *lock);
void cl_lock_hold_add (const struct lu_env *env, struct cl_lock *lock,
const char *scope, const void *source);
+void cl_lock_hold_release(const struct lu_env *env, struct cl_lock *lock,
+ const char *scope, const void *source);
void cl_lock_unhold (const struct lu_env *env, struct cl_lock *lock,
const char *scope, const void *source);
void cl_lock_release (const struct lu_env *env, struct cl_lock *lock,
const char *scope, const void *source);
void cl_lock_user_add (const struct lu_env *env, struct cl_lock *lock);
-int cl_lock_user_del (const struct lu_env *env, struct cl_lock *lock);
+void cl_lock_user_del (const struct lu_env *env, struct cl_lock *lock);
enum cl_lock_state cl_lock_intransit(const struct lu_env *env,
struct cl_lock *lock);
void cl_lock_mutex_put (const struct lu_env *env, struct cl_lock *lock);
int cl_lock_is_mutexed (struct cl_lock *lock);
int cl_lock_nr_mutexed (const struct lu_env *env);
-int cl_lock_page_out (const struct lu_env *env, struct cl_lock *lock,
- int discard);
+int cl_lock_discard_pages(const struct lu_env *env, struct cl_lock *lock);
int cl_lock_ext_match (const struct cl_lock_descr *has,
const struct cl_lock_descr *need);
int cl_lock_descr_match(const struct cl_lock_descr *has,
int cl_io_commit_write (const struct lu_env *env, struct cl_io *io,
struct cl_page *page, unsigned from, unsigned to);
int cl_io_submit_rw (const struct lu_env *env, struct cl_io *io,
- enum cl_req_type iot, struct cl_2queue *queue,
- enum cl_req_priority priority);
+ enum cl_req_type iot, struct cl_2queue *queue);
int cl_io_submit_sync (const struct lu_env *env, struct cl_io *io,
- enum cl_req_type iot, struct cl_2queue *queue,
- enum cl_req_priority priority, long timeout);
+ enum cl_req_type iot, struct cl_2queue *queue,
+ long timeout);
void cl_io_rw_advance (const struct lu_env *env, struct cl_io *io,
size_t nob);
int cl_io_cancel (const struct lu_env *env, struct cl_io *io,
return io->ci_type == CIT_WRITE && io->u.ci_wr.wr_append;
}
+static inline int cl_io_is_sync_write(const struct cl_io *io)
+{
+ return io->ci_type == CIT_WRITE && io->u.ci_wr.wr_sync;
+}
+
+static inline int cl_io_is_mkwrite(const struct cl_io *io)
+{
+ return io->ci_type == CIT_FAULT && io->u.ci_fault.ft_mkwrite;
+}
+
/**
* True, iff \a io is a truncate(2).
*/
* anchor and wakes up waiting thread when transfer is complete.
*/
struct cl_sync_io {
- /** number of pages yet to be transferred. */
- cfs_atomic_t csi_sync_nr;
- /** completion to be signaled when transfer is complete. */
- cfs_waitq_t csi_waitq;
- /** error code. */
- int csi_sync_rc;
+ /** number of pages yet to be transferred. */
+ cfs_atomic_t csi_sync_nr;
+ /** error code. */
+ int csi_sync_rc;
+ /** barrier of destroy this structure */
+ cfs_atomic_t csi_barrier;
+ /** completion to be signaled when transfer is complete. */
+ cfs_waitq_t csi_waitq;
};
void cl_sync_io_init(struct cl_sync_io *anchor, int nrpages);