X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fosc%2Fosc_cl_internal.h;h=39419109d90cec20f30d0c934e745a9fd8b77cd7;hb=df497dc560062a0a0c7178498cba8853843d39f1;hp=ac26f4f7ae23d38167c91ba417a11b758efc947b;hpb=5bc62396670e499af519149739d6ede4e7bfbe68;p=fs%2Flustre-release.git diff --git a/lustre/osc/osc_cl_internal.h b/lustre/osc/osc_cl_internal.h index ac26f4f..3941910 100644 --- a/lustre/osc/osc_cl_internal.h +++ b/lustre/osc/osc_cl_internal.h @@ -27,7 +27,7 @@ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2012, Whamcloud, Inc. + * Copyright (c) 2012, 2013, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -54,7 +54,6 @@ #include /* osc_build_res_name() */ -#include #include #include #include "osc_internal.h" @@ -80,12 +79,14 @@ struct osc_io { * page writeback from happening. */ struct osc_extent *oi_trunc; + int oi_lru_reserved; + struct obd_info oi_info; struct obdo oi_oa; struct osc_async_cbargs { bool opc_rpc_sent; int opc_rc; - cfs_completion_t opc_sync; + struct completion opc_sync; } oi_cbarg; }; @@ -103,7 +104,7 @@ struct osc_session { struct osc_io os_io; }; -#define OTI_PVEC_SIZE 64 +#define OTI_PVEC_SIZE 256 struct osc_thread_info { struct ldlm_res_id oti_resname; ldlm_policy_data_t oti_policy; @@ -112,7 +113,12 @@ struct osc_thread_info { struct lustre_handle oti_handle; struct cl_page_list oti_plist; struct cl_io oti_io; - struct cl_page *oti_pvec[OTI_PVEC_SIZE]; + void *oti_pvec[OTI_PVEC_SIZE]; + /** + * Fields used by cl_lock_discard_pages(). + */ + pgoff_t oti_next_index; + pgoff_t oti_fn_index; /* first non-overlapped index */ }; struct osc_object { @@ -123,32 +129,32 @@ struct osc_object { */ int oo_contended; cfs_time_t oo_contention_time; -#ifdef INVARIANT_CHECK +#ifdef CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK /** * IO context used for invariant checks in osc_lock_has_pages(). */ struct cl_io oo_debug_io; /** Serialization object for osc_object::oo_debug_io. */ - cfs_mutex_t oo_debug_mutex; + struct mutex oo_debug_mutex; #endif /** * List of pages in transfer. */ - cfs_list_t oo_inflight[CRT_NR]; + struct list_head oo_inflight[CRT_NR]; /** * Lock, protecting ccc_object::cob_inflight, because a seat-belt is * locked during take-off and landing. */ - cfs_spinlock_t oo_seatbelt; + spinlock_t oo_seatbelt; /** * used by the osc to keep track of what objects to build into rpcs. * Protected by client_obd->cli_loi_list_lock. */ - cfs_list_t oo_ready_item; - cfs_list_t oo_hp_ready_item; - cfs_list_t oo_write_item; - cfs_list_t oo_read_item; + struct list_head oo_ready_item; + struct list_head oo_hp_ready_item; + struct list_head oo_write_item; + struct list_head oo_read_item; /** * extent is a red black tree to manage (async) dirty pages. @@ -157,38 +163,54 @@ struct osc_object { /** * Manage write(dirty) extents. */ - cfs_list_t oo_hp_exts; /* list of hp extents */ - cfs_list_t oo_urgent_exts; /* list of writeback extents */ - cfs_list_t oo_rpc_exts; + struct list_head oo_hp_exts; /* list of hp extents */ + struct list_head oo_urgent_exts; /* list of writeback extents */ + struct list_head oo_rpc_exts; - cfs_list_t oo_reading_exts; + struct list_head oo_reading_exts; - cfs_atomic_t oo_nr_reads; - cfs_atomic_t oo_nr_writes; + atomic_t oo_nr_reads; + atomic_t oo_nr_writes; /** Protect extent tree. Will be used to protect * oo_{read|write}_pages soon. */ - cfs_spinlock_t oo_lock; + spinlock_t oo_lock; + + /** + * Radix tree for caching pages + */ + struct radix_tree_root oo_tree; + spinlock_t oo_tree_lock; + unsigned long oo_npages; }; static inline void osc_object_lock(struct osc_object *obj) { - cfs_spin_lock(&obj->oo_lock); + spin_lock(&obj->oo_lock); } static inline int osc_object_trylock(struct osc_object *obj) { - return cfs_spin_trylock(&obj->oo_lock); + return spin_trylock(&obj->oo_lock); } static inline void osc_object_unlock(struct osc_object *obj) { - cfs_spin_unlock(&obj->oo_lock); + spin_unlock(&obj->oo_lock); } static inline int osc_object_is_locked(struct osc_object *obj) { - return cfs_spin_is_locked(&obj->oo_lock); +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) + return spin_is_locked(&obj->oo_lock); +#else + /* + * It is not perfect to return true all the time. + * But since this function is only used for assertion + * and checking, it seems OK. + */ + return 1; +#endif } /* @@ -255,23 +277,13 @@ struct osc_lock { /** lock value block */ struct ost_lvb ols_lvb; /** DLM flags with which osc_lock::ols_lock was enqueued */ - int ols_flags; + __u64 ols_flags; /** osc_lock::ols_lock handle */ struct lustre_handle ols_handle; struct ldlm_enqueue_info ols_einfo; enum osc_lock_state ols_state; /** - * How many pages are using this lock for io, currently only used by - * read-ahead. If non-zero, the underlying dlm lock won't be cancelled - * during recovery to avoid deadlock. see bz16774. - * - * \see osc_page::ops_lock - * \see osc_page_addref_lock(), osc_page_putref_lock() - */ - cfs_atomic_t ols_pageref; - - /** * true, if ldlm_lock_addref() was called against * osc_lock::ols_lock. This is used for sanity checking. * @@ -368,48 +380,34 @@ struct osc_page { */ ops_in_lru:1, /** - * Set if the page must be transferred with OBD_BRW_SRVLOCK. - */ - ops_srvlock:1; - union { - /** - * lru page list. ops_inflight and ops_lru are exclusive so - * that they can share the same data. - */ - cfs_list_t ops_lru; - /** - * Linkage into a per-osc_object list of pages in flight. For - * debugging. - */ - cfs_list_t ops_inflight; - }; - /** - * Thread that submitted this page for transfer. For debugging. - */ - cfs_task_t *ops_submitter; - /** - * Submit time - the time when the page is starting RPC. For debugging. - */ - cfs_time_t ops_submit_time; - - /** - * A lock of which we hold a reference covers this page. Only used by - * read-ahead: for a readahead page, we hold it's covering lock to - * prevent it from being canceled during recovery. - * - * \see osc_lock::ols_pageref - * \see osc_page_addref_lock(), osc_page_putref_lock(). - */ - struct cl_lock *ops_lock; + * Set if the page must be transferred with OBD_BRW_SRVLOCK. + */ + ops_srvlock:1; + /** + * lru page list. See osc_lru_{del|use}() in osc_page.c for usage. + */ + struct list_head ops_lru; + /** + * Linkage into a per-osc_object list of pages in flight. For + * debugging. + */ + struct list_head ops_inflight; + /** + * Thread that submitted this page for transfer. For debugging. + */ + struct task_struct *ops_submitter; + /** + * Submit time - the time when the page is starting RPC. For debugging. + */ + cfs_time_t ops_submit_time; }; -extern cfs_mem_cache_t *osc_page_kmem; -extern cfs_mem_cache_t *osc_lock_kmem; -extern cfs_mem_cache_t *osc_object_kmem; -extern cfs_mem_cache_t *osc_thread_kmem; -extern cfs_mem_cache_t *osc_session_kmem; -extern cfs_mem_cache_t *osc_req_kmem; -extern cfs_mem_cache_t *osc_extent_kmem; +extern struct kmem_cache *osc_lock_kmem; +extern struct kmem_cache *osc_object_kmem; +extern struct kmem_cache *osc_thread_kmem; +extern struct kmem_cache *osc_session_kmem; +extern struct kmem_cache *osc_req_kmem; +extern struct kmem_cache *osc_extent_kmem; extern struct lu_device_type osc_device_type; extern struct lu_context_key osc_key; @@ -427,32 +425,32 @@ int osc_req_init (const struct lu_env *env, struct cl_device *dev, struct lu_object *osc_object_alloc(const struct lu_env *env, const struct lu_object_header *hdr, struct lu_device *dev); -struct cl_page *osc_page_init (const struct lu_env *env, - struct cl_object *obj, - struct cl_page *page, cfs_page_t *vmpage); +int osc_page_init(const struct lu_env *env, struct cl_object *obj, + struct cl_page *page, pgoff_t ind); -void osc_lock_build_res(const struct lu_env *env, const struct osc_object *obj, - struct ldlm_res_id *resname); void osc_index2policy (ldlm_policy_data_t *policy, const struct cl_object *obj, pgoff_t start, pgoff_t end); int osc_lvb_print (const struct lu_env *env, void *cookie, lu_printer_t p, const struct ost_lvb *lvb); +void osc_lru_add_batch(struct client_obd *cli, struct list_head *list); void osc_page_submit(const struct lu_env *env, struct osc_page *opg, enum cl_req_type crt, int brw_flags); int osc_cancel_async_page(const struct lu_env *env, struct osc_page *ops); int osc_set_async_flags(struct osc_object *obj, struct osc_page *opg, obd_flag async_flags); int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops, - cfs_page_t *page, loff_t offset); + struct page *page, loff_t offset); int osc_queue_async_io(const struct lu_env *env, struct cl_io *io, struct osc_page *ops); +int osc_page_cache_add(const struct lu_env *env, + const struct cl_page_slice *slice, struct cl_io *io); int osc_teardown_async_page(const struct lu_env *env, struct osc_object *obj, struct osc_page *ops); int osc_flush_async_page(const struct lu_env *env, struct cl_io *io, struct osc_page *ops); int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj, - cfs_list_t *list, int cmd, int brw_flags); + struct list_head *list, int cmd, int brw_flags); int osc_cache_truncate_start(const struct lu_env *env, struct osc_io *oio, struct osc_object *obj, __u64 size); void osc_cache_truncate_end(const struct lu_env *env, struct osc_io *oio, @@ -463,6 +461,7 @@ int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj, pgoff_t start, pgoff_t end); void osc_io_unplug(const struct lu_env *env, struct client_obd *cli, struct osc_object *osc, pdl_policy_t pol); +int lru_queue_work(const struct lu_env *env, void *data); void osc_object_set_contended (struct osc_object *obj); void osc_object_clear_contended(struct osc_object *obj); @@ -564,6 +563,11 @@ static inline struct osc_page *oap2osc(struct osc_async_page *oap) return container_of0(oap, struct osc_page, ops_oap); } +static inline pgoff_t osc_index(struct osc_page *opg) +{ + return opg->ops_cl.cpl_index; +} + static inline struct cl_page *oap2cl_page(struct osc_async_page *oap) { return oap2osc(oap)->ops_cl.cpl_page; @@ -600,8 +604,6 @@ enum osc_extent_state { OES_TRUNC = 6, /** being truncated */ OES_STATE_MAX }; -#define OES_STRINGS { "inv", "active", "cache", "locking", "lockdone", "rpc", \ - "trunc", NULL } /** * osc_extent data to manage dirty pages. @@ -620,76 +622,84 @@ enum osc_extent_state { */ struct osc_extent { /** red-black tree node */ - struct rb_node oe_node; + struct rb_node oe_node; /** osc_object of this extent */ - struct osc_object *oe_obj; + struct osc_object *oe_obj; /** refcount, removed from red-black tree if reaches zero. */ - cfs_atomic_t oe_refc; + atomic_t oe_refc; /** busy if non-zero */ - cfs_atomic_t oe_users; + atomic_t oe_users; /** link list of osc_object's oo_{hp|urgent|locking}_exts. */ - cfs_list_t oe_link; + struct list_head oe_link; /** state of this extent */ - unsigned int oe_state; + unsigned int oe_state; /** flags for this extent. */ - unsigned int oe_intree:1, + unsigned int oe_intree:1, /** 0 is write, 1 is read */ - oe_rw:1, - oe_srvlock:1, - oe_memalloc:1, + oe_rw:1, + oe_srvlock:1, + oe_memalloc:1, /** an ACTIVE extent is going to be truncated, so when this extent * is released, it will turn into TRUNC state instead of CACHE. */ - oe_trunc_pending:1, + oe_trunc_pending:1, /** this extent should be written asap and someone may wait for the * write to finish. This bit is usually set along with urgent if * the extent was CACHE state. * fsync_wait extent can't be merged because new extent region may * exceed fsync range. */ - oe_fsync_wait:1, + oe_fsync_wait:1, /** covering lock is being canceled */ - oe_hp:1, + oe_hp:1, /** this extent should be written back asap. set if one of pages is * called by page WB daemon, or sync write or reading requests. */ - oe_urgent:1; + oe_urgent:1; /** how many grants allocated for this extent. * Grant allocated for this extent. There is no grant allocated * for reading extents and sync write extents. */ - unsigned int oe_grants; + unsigned int oe_grants; /** # of dirty pages in this extent */ - unsigned int oe_nr_pages; + unsigned int oe_nr_pages; /** list of pending oap pages. Pages in this list are NOT sorted. */ - cfs_list_t oe_pages; + struct list_head oe_pages; /** Since an extent has to be written out in atomic, this is used to * remember the next page need to be locked to write this extent out. * Not used right now. */ - struct osc_page *oe_next_page; + struct osc_page *oe_next_page; /** start and end index of this extent, include start and end * themselves. Page offset here is the page index of osc_pages. * oe_start is used as keyword for red-black tree. */ - pgoff_t oe_start; - pgoff_t oe_end; + pgoff_t oe_start; + pgoff_t oe_end; /** maximum ending index of this extent, this is limited by * max_pages_per_rpc, lock extent and chunk size. */ - pgoff_t oe_max_end; + pgoff_t oe_max_end; /** waitqueue - for those who want to be notified if this extent's * state has changed. */ - cfs_waitq_t oe_waitq; + wait_queue_head_t oe_waitq; /** lock covering this extent */ - struct cl_lock *oe_osclock; + struct cl_lock *oe_osclock; /** terminator of this extent. Must be true if this extent is in IO. */ - cfs_task_t *oe_owner; + struct task_struct *oe_owner; /** return value of writeback. If somebody is waiting for this extent, * this value can be known by outside world. */ - int oe_rc; + int oe_rc; /** max pages per rpc when this extent was created */ - unsigned int oe_mppr; + unsigned int oe_mppr; }; int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext, int sent, int rc); int osc_extent_release(const struct lu_env *env, struct osc_extent *ext); +int osc_lock_discard_pages(const struct lu_env *env, struct osc_lock *lock); + +typedef int (*osc_page_gang_cbt)(const struct lu_env *, struct cl_io *, + struct osc_page *, void *); +int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io, + struct osc_object *osc, pgoff_t start, pgoff_t end, + osc_page_gang_cbt cb, void *cbdata); + /** @} osc */ #endif /* OSC_CL_INTERNAL_H */