* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2012, 2016, Intel Corporation.
+ * Copyright (c) 2012, 2017, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
} od_stats;
/* configuration item(s) */
- int od_contention_time;
+ time64_t od_contention_time;
int od_lockless_truncate;
};
/** true if this io is lockless. */
unsigned int oi_lockless:1,
/** true if this io is counted as active IO */
- oi_is_active:1;
+ oi_is_active:1,
+ /** true if this io has CAP_SYS_RESOURCE */
+ oi_cap_sys_resource:1;
/** how many LRU pages are reserved for this IO */
unsigned long oi_lru_reserved;
struct osc_thread_info {
struct ldlm_res_id oti_resname;
union ldlm_policy_data oti_policy;
- struct cl_lock_descr oti_descr;
struct cl_attr oti_attr;
- struct lustre_handle oti_handle;
- struct cl_page_list oti_plist;
struct cl_io oti_io;
+ struct pagevec oti_pagevec;
void *oti_pvec[OTI_PVEC_SIZE];
/**
* Fields used by cl_lock_discard_pages().
struct lu_buf oti_ladvise_buf;
};
+static inline __u64 osc_enq2ldlm_flags(__u32 enqflags)
+{
+ __u64 result = 0;
+
+ CDEBUG(D_DLMTRACE, "flags: %x\n", enqflags);
+
+ LASSERT((enqflags & ~CEF_MASK) == 0);
+
+ if (enqflags & CEF_NONBLOCK)
+ result |= LDLM_FL_BLOCK_NOWAIT;
+ if (enqflags & CEF_GLIMPSE)
+ result |= LDLM_FL_HAS_INTENT;
+ if (enqflags & CEF_DISCARD_DATA)
+ result |= LDLM_FL_AST_DISCARD_DATA;
+ if (enqflags & CEF_PEEK)
+ result |= LDLM_FL_TEST_LOCK;
+ if (enqflags & CEF_LOCK_MATCH)
+ result |= LDLM_FL_MATCH_LOCK;
+ if (enqflags & CEF_LOCK_NO_EXPAND)
+ result |= LDLM_FL_NO_EXPANSION;
+ if (enqflags & CEF_SPECULATIVE)
+ result |= LDLM_FL_SPECULATIVE;
+ return result;
+}
+
+typedef int (*osc_enqueue_upcall_f)(void *cookie, struct lustre_handle *lockh,
+ int rc);
+
+struct osc_enqueue_args {
+ struct obd_export *oa_exp;
+ enum ldlm_type oa_type;
+ enum ldlm_mode oa_mode;
+ __u64 *oa_flags;
+ osc_enqueue_upcall_f oa_upcall;
+ void *oa_cookie;
+ struct ost_lvb *oa_lvb;
+ struct lustre_handle oa_lockh;
+ bool oa_speculative;
+};
+
+/**
+ * Bit flags for osc_dlm_lock_at_pageoff().
+ */
+enum osc_dap_flags {
+ /**
+ * Just check if the desired lock exists, it won't hold reference
+ * count on lock.
+ */
+ OSC_DAP_FL_TEST_LOCK = 1 << 0,
+ /**
+ * Return the lock even if it is being canceled.
+ */
+ OSC_DAP_FL_CANCELING = 1 << 1
+};
+
+/*
+ * The set of operations which are different for MDC and OSC objects
+ */
+struct osc_object_operations {
+ void (*oto_build_res_name)(struct osc_object *osc,
+ struct ldlm_res_id *resname);
+ struct ldlm_lock* (*oto_dlmlock_at_pgoff)(const struct lu_env *env,
+ struct osc_object *obj,
+ pgoff_t index,
+ enum osc_dap_flags dap_flags);
+};
+
struct osc_object {
struct cl_object oo_cl;
struct lov_oinfo *oo_oinfo;
* True if locking against this stripe got -EUSERS.
*/
int oo_contended;
- cfs_time_t oo_contention_time;
+ ktime_t oo_contention_time;
#ifdef CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK
/**
* IO context used for invariant checks in osc_lock_has_pages().
atomic_t oo_nr_ios;
wait_queue_head_t oo_io_waitq;
+ const struct osc_object_operations *oo_obj_ops;
bool oo_initialized;
};
+static inline void osc_build_res_name(struct osc_object *osc,
+ struct ldlm_res_id *resname)
+{
+ return osc->oo_obj_ops->oto_build_res_name(osc, resname);
+}
+
+static inline struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env,
+ struct osc_object *obj,
+ pgoff_t index,
+ enum osc_dap_flags flags)
+{
+ return obj->oo_obj_ops->oto_dlmlock_at_pgoff(env, obj, index, flags);
+}
+
static inline void osc_object_lock(struct osc_object *obj)
{
spin_lock(&obj->oo_lock);
#endif
}
+static inline void osc_object_set_contended(struct osc_object *obj)
+{
+ obj->oo_contention_time = ktime_get();
+ /* mb(); */
+ obj->oo_contended = 1;
+}
+
+static inline void osc_object_clear_contended(struct osc_object *obj)
+{
+ obj->oo_contended = 0;
+}
+
/*
* Lock "micro-states" for osc layer.
*/
enum osc_lock_state ols_state;
/** lock value block */
struct ost_lvb ols_lvb;
-
+ /** Lockless operations to be used by lockless lock */
+ const struct cl_lock_operations *ols_lockless_ops;
/**
* true, if ldlm_lock_addref() was called against
* osc_lock::ols_lock. This is used for sanity checking.
/**
* For async glimpse lock.
*/
- ols_agl:1;
+ ols_agl:1,
+ /**
+ * for speculative locks - asynchronous glimpse locks and ladvise
+ * lockahead manual lock requests
+ *
+ * Used to tell osc layer to not wait for the ldlm reply from the
+ * server, so the osc lock will be short lived - It only exists to
+ * create the ldlm request and is not updated on request completion.
+ */
+ ols_speculative:1;
};
+static inline int osc_lock_is_lockless(const struct osc_lock *ols)
+{
+ return (ols->ols_cl.cls_ops == ols->ols_lockless_ops);
+}
/**
* Page state private for osc layer.
/**
* Set if the page must be transferred with OBD_BRW_SRVLOCK.
*/
- ops_srvlock:1;
+ ops_srvlock:1,
+ /**
+ * If the page is in osc_object::oo_tree.
+ */
+ ops_intree:1;
/**
* lru page list. See osc_lru_{del|use}() in osc_page.c for usage.
*/
/**
* Submit time - the time when the page is starting RPC. For debugging.
*/
- cfs_time_t ops_submit_time;
+ ktime_t ops_submit_time;
+};
+
+struct osc_brw_async_args {
+ struct obdo *aa_oa;
+ int aa_requested_nob;
+ int aa_nio_count;
+ u32 aa_page_count;
+ int aa_resends;
+ struct brw_page **aa_ppga;
+ struct client_obd *aa_cli;
+ struct list_head aa_oaps;
+ struct list_head aa_exts;
};
extern struct kmem_cache *osc_lock_kmem;
extern struct kmem_cache *osc_session_kmem;
extern struct kmem_cache *osc_extent_kmem;
extern struct kmem_cache *osc_quota_kmem;
+extern struct kmem_cache *osc_obdo_kmem;
extern struct lu_context_key osc_key;
extern struct lu_context_key osc_session_key;
#define OSC_FLAGS (ASYNC_URGENT|ASYNC_READY)
+/* osc_page.c */
int osc_page_init(const struct lu_env *env, struct cl_object *obj,
struct cl_page *page, pgoff_t ind);
void osc_index2policy(union ldlm_policy_data *policy, const struct cl_object *obj,
pgoff_t start, pgoff_t end);
-int osc_lvb_print(const struct lu_env *env, void *cookie,
- lu_printer_t p, const struct ost_lvb *lvb);
-
void osc_lru_add_batch(struct client_obd *cli, struct list_head *list);
void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
enum cl_req_type crt, int brw_flags);
+int lru_queue_work(const struct lu_env *env, void *data);
+long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
+ long target, bool force);
+
+/* osc_cache.c */
int osc_cancel_async_page(const struct lu_env *env, struct osc_page *ops);
int osc_set_async_flags(struct osc_object *obj, struct osc_page *opg,
u32 async_flags);
int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops,
struct page *page, loff_t offset);
int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
- struct osc_page *ops);
-int osc_page_cache_add(const struct lu_env *env,
- const struct cl_page_slice *slice, struct cl_io *io);
+ struct osc_page *ops, cl_commit_cbt cb);
+int osc_page_cache_add(const struct lu_env *env, struct osc_page *opg,
+ struct cl_io *io, cl_commit_cbt cb);
int osc_teardown_async_page(const struct lu_env *env, struct osc_object *obj,
struct osc_page *ops);
int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
struct osc_page *ops);
-int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj,
- struct list_head *list, int cmd, int brw_flags);
+int osc_queue_sync_pages(const struct lu_env *env, const struct cl_io *io,
+ struct osc_object *obj, struct list_head *list,
+ int brw_flags);
int osc_cache_truncate_start(const struct lu_env *env, struct osc_object *obj,
__u64 size, struct osc_extent **extp);
void osc_cache_truncate_end(const struct lu_env *env, struct osc_extent *ext);
pgoff_t start, pgoff_t end, int hp, int discard);
int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj,
pgoff_t start, pgoff_t end);
-void osc_io_unplug(const struct lu_env *env, struct client_obd *cli,
- struct osc_object *osc);
-int lru_queue_work(const struct lu_env *env, void *data);
+int osc_io_unplug0(const struct lu_env *env, struct client_obd *cli,
+ struct osc_object *osc, int async);
+void osc_wake_cache_waiters(struct client_obd *cli);
-void osc_object_set_contended(struct osc_object *obj);
-void osc_object_clear_contended(struct osc_object *obj);
+static inline int osc_io_unplug_async(const struct lu_env *env,
+ struct client_obd *cli,
+ struct osc_object *osc)
+{
+ return osc_io_unplug0(env, cli, osc, 1);
+}
+
+static inline void osc_io_unplug(const struct lu_env *env,
+ struct client_obd *cli,
+ struct osc_object *osc)
+{
+ (void)osc_io_unplug0(env, cli, osc, 0);
+}
+
+typedef int (*osc_page_gang_cbt)(const struct lu_env *, struct cl_io *,
+ struct osc_page *, void *);
+int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io,
+ struct osc_object *osc, pgoff_t start, pgoff_t end,
+ osc_page_gang_cbt cb, void *cbdata);
+int osc_discard_cb(const struct lu_env *env, struct cl_io *io,
+ struct osc_page *ops, void *cbdata);
+
+/* osc_dev.c */
+int osc_device_init(const struct lu_env *env, struct lu_device *d,
+ const char *name, struct lu_device *next);
+struct lu_device *osc_device_fini(const struct lu_env *env,
+ struct lu_device *d);
+struct lu_device *osc_device_free(const struct lu_env *env,
+ struct lu_device *d);
+
+/* osc_object.c */
+int osc_object_init(const struct lu_env *env, struct lu_object *obj,
+ const struct lu_object_conf *conf);
+void osc_object_free(const struct lu_env *env, struct lu_object *obj);
+int osc_lvb_print(const struct lu_env *env, void *cookie,
+ lu_printer_t p, const struct ost_lvb *lvb);
+int osc_object_print(const struct lu_env *env, void *cookie,
+ lu_printer_t p, const struct lu_object *obj);
+int osc_attr_get(const struct lu_env *env, struct cl_object *obj,
+ struct cl_attr *attr);
+int osc_attr_update(const struct lu_env *env, struct cl_object *obj,
+ const struct cl_attr *attr, unsigned valid);
+int osc_object_glimpse(const struct lu_env *env, const struct cl_object *obj,
+ struct ost_lvb *lvb);
+int osc_object_invalidate(const struct lu_env *env, struct osc_object *osc);
int osc_object_is_contended(struct osc_object *obj);
-int osc_lock_is_lockless(const struct osc_lock *olck);
+int osc_object_find_cbdata(const struct lu_env *env, struct cl_object *obj,
+ ldlm_iterator_t iter, void *data);
+int osc_object_prune(const struct lu_env *env, struct cl_object *obj);
+
+/* osc_request.c */
+void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd);
+int osc_setup_common(struct obd_device *obd, struct lustre_cfg *lcfg);
+int osc_precleanup_common(struct obd_device *obd);
+int osc_cleanup_common(struct obd_device *obd);
+int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
+ u32 keylen, void *key, u32 vallen, void *val,
+ struct ptlrpc_request_set *set);
+int osc_ldlm_resource_invalidate(struct cfs_hash *hs, struct cfs_hash_bd *bd,
+ struct hlist_node *hnode, void *arg);
+int osc_reconnect(const struct lu_env *env, struct obd_export *exp,
+ struct obd_device *obd, struct obd_uuid *cluuid,
+ struct obd_connect_data *data, void *localdata);
+int osc_disconnect(struct obd_export *exp);
+int osc_punch_send(struct obd_export *exp, struct obdo *oa,
+ obd_enqueue_update_f upcall, void *cookie);
+
+/* osc_io.c */
+int osc_io_submit(const struct lu_env *env, const struct cl_io_slice *ios,
+ enum cl_req_type crt, struct cl_2queue *queue);
+int osc_io_commit_async(const struct lu_env *env,
+ const struct cl_io_slice *ios,
+ struct cl_page_list *qin, int from, int to,
+ cl_commit_cbt cb);
+int osc_io_iter_init(const struct lu_env *env, const struct cl_io_slice *ios);
+void osc_io_iter_fini(const struct lu_env *env,
+ const struct cl_io_slice *ios);
+int osc_io_rw_iter_init(const struct lu_env *env,
+ const struct cl_io_slice *ios);
+void osc_io_rw_iter_fini(const struct lu_env *env,
+ const struct cl_io_slice *ios);
+int osc_io_fault_start(const struct lu_env *env, const struct cl_io_slice *ios);
+void osc_io_setattr_end(const struct lu_env *env,
+ const struct cl_io_slice *slice);
+int osc_io_read_start(const struct lu_env *env,
+ const struct cl_io_slice *slice);
+int osc_io_write_start(const struct lu_env *env,
+ const struct cl_io_slice *slice);
+void osc_io_end(const struct lu_env *env, const struct cl_io_slice *slice);
+int osc_fsync_ost(const struct lu_env *env, struct osc_object *obj,
+ struct cl_fsync_io *fio);
+void osc_io_fsync_end(const struct lu_env *env,
+ const struct cl_io_slice *slice);
+void osc_read_ahead_release(const struct lu_env *env, void *cbdata);
+
+/* osc_lock.c */
+void osc_lock_to_lockless(const struct lu_env *env, struct osc_lock *ols,
+ int force);
+void osc_lock_wake_waiters(const struct lu_env *env, struct osc_object *osc,
+ struct osc_lock *oscl);
+int osc_lock_enqueue_wait(const struct lu_env *env, struct osc_object *obj,
+ struct osc_lock *oscl);
+void osc_lock_set_writer(const struct lu_env *env, const struct cl_io *io,
+ struct cl_object *obj, struct osc_lock *oscl);
+int osc_lock_print(const struct lu_env *env, void *cookie,
+ lu_printer_t p, const struct cl_lock_slice *slice);
+void osc_lock_cancel(const struct lu_env *env,
+ const struct cl_lock_slice *slice);
+void osc_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice);
+int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data);
+unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock);
/*****************************************************************************
*
oe_hp:1,
/** this extent should be written back asap. set if one of pages is
* called by page WB daemon, or sync write or reading requests. */
- oe_urgent:1;
+ oe_urgent:1,
+ /** Non-delay RPC should be used for this extent. */
+ oe_ndelay:1;
/** how many grants allocated for this extent.
* Grant allocated for this extent. There is no grant allocated
* for reading extents and sync write extents. */
unsigned int oe_nr_pages;
/** list of pending oap pages. Pages in this list are NOT sorted. */
struct list_head oe_pages;
- /** Since an extent has to be written out in atomic, this is used to
- * remember the next page need to be locked to write this extent out.
- * Not used right now.
- */
- struct osc_page *oe_next_page;
/** start and end index of this extent, include start and end
* themselves. Page offset here is the page index of osc_pages.
* oe_start is used as keyword for red-black tree. */
int oe_rc;
/** max pages per rpc when this extent was created */
unsigned int oe_mppr;
+ /** FLR: layout version when this osc_extent is publised */
+ __u32 oe_layout_version;
};
-int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
- int sent, int rc);
-int osc_extent_release(const struct lu_env *env, struct osc_extent *ext);
-
-int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc,
- pgoff_t start, pgoff_t end, bool discard_pages);
-
-typedef int (*osc_page_gang_cbt)(const struct lu_env *, struct cl_io *,
- struct osc_page *, void *);
-int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io,
- struct osc_object *osc, pgoff_t start, pgoff_t end,
- osc_page_gang_cbt cb, void *cbdata);
/** @} osc */
#endif /* LUSTRE_OSC_H */