X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fosc%2Fosc_internal.h;h=6fea87041739c3b9fdeafb54785a12bfe4115d09;hp=9928169e7b132c8baf82fb95486a86ea78460e76;hb=3cce65712d94cffe8f1626545845b95b88aef672;hpb=433204f0d3ea8b831c07d552300eef242fa3308a diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h index 9928169..6fea870 100644 --- a/lustre/osc/osc_internal.h +++ b/lustre/osc/osc_internal.h @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -17,17 +15,15 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -39,166 +35,106 @@ #define OAP_MAGIC 8675309 -struct lu_env; - -enum async_flags { - ASYNC_READY = 0x1, /* ap_make_ready will not be called before this - page is added to an rpc */ - ASYNC_URGENT = 0x2, /* page must be put into an RPC before return */ - ASYNC_COUNT_STABLE = 0x4, /* ap_refresh_count will not be called - to give the caller a chance to update - or cancel the size of the io */ - ASYNC_HP = 0x10, -}; - -struct obd_async_page_ops { - int (*ap_make_ready)(const struct lu_env *env, void *data, int cmd); - int (*ap_refresh_count)(const struct lu_env *env, void *data, int cmd); - int (*ap_completion)(const struct lu_env *env, - void *data, int cmd, struct obdo *oa, int rc); -}; - -struct osc_async_page { - int oap_magic; - unsigned short oap_cmd; - unsigned short oap_interrupted:1; - - cfs_list_t oap_pending_item; - cfs_list_t oap_urgent_item; - cfs_list_t oap_rpc_item; +#include +#include - obd_off oap_obj_off; - unsigned oap_page_off; - enum async_flags oap_async_flags; +extern atomic_t osc_pool_req_count; +extern unsigned int osc_reqpool_maxreqcount; +extern struct ptlrpc_request_pool *osc_rq_pool; - struct brw_page oap_brw_page; - - struct ptlrpc_request *oap_request; - struct client_obd *oap_cli; - struct lov_oinfo *oap_loi; - - const struct obd_async_page_ops *oap_caller_ops; - void *oap_caller_data; - cfs_list_t oap_page_list; - struct ldlm_lock *oap_ldlm_lock; - cfs_spinlock_t oap_lock; -}; - -#define oap_page oap_brw_page.pg -#define oap_count oap_brw_page.count -#define oap_brw_flags oap_brw_page.flag - -struct osc_cache_waiter { - cfs_list_t ocw_entry; - cfs_waitq_t ocw_waitq; - struct osc_async_page *ocw_oap; - int ocw_rc; -}; - -#define OSCC_FLAG_RECOVERING 0x01 -#define OSCC_FLAG_CREATING 0x02 -#define OSCC_FLAG_NOSPC 0x04 /* can't create more objects on OST */ -#define OSCC_FLAG_SYNC_IN_PROGRESS 0x08 /* only allow one thread to sync */ -#define OSCC_FLAG_LOW 0x10 -#define OSCC_FLAG_EXITING 0x20 -#define OSCC_FLAG_DEGRADED 0x40 -#define OSCC_FLAG_RDONLY 0x80 - -int osc_precreate(struct obd_export *exp); -int osc_create(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md **ea, struct obd_trans_info *oti); -int osc_create_async(struct obd_export *exp, struct obd_info *oinfo, - struct lov_stripe_md **ea, struct obd_trans_info *oti); -int osc_real_create(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md **ea, struct obd_trans_info *oti); -void oscc_init(struct obd_device *obd); void osc_wake_cache_waiters(struct client_obd *cli); -int osc_shrink_grant_to_target(struct client_obd *cli, long target); - -/* - * cl integration. - */ -#include +int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes); +void osc_schedule_grant_work(void); +void osc_update_next_shrink(struct client_obd *cli); +int lru_queue_work(const struct lu_env *env, void *data); +int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext, + int sent, int rc); +int osc_extent_release(const struct lu_env *env, struct osc_extent *ext); +int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc, + pgoff_t start, pgoff_t end, bool discard); extern struct ptlrpc_request_set *PTLRPCD_SET; int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id, - int *flags, ldlm_policy_data_t *policy, - struct ost_lvb *lvb, int kms_valid, - obd_enqueue_update_f upcall, - void *cookie, struct ldlm_enqueue_info *einfo, - struct lustre_handle *lockh, - struct ptlrpc_request_set *rqset, int async); -int osc_cancel_base(struct lustre_handle *lockh, __u32 mode); + __u64 *flags, union ldlm_policy_data *policy, + struct ost_lvb *lvb, int kms_valid, + osc_enqueue_upcall_f upcall, + void *cookie, struct ldlm_enqueue_info *einfo, + struct ptlrpc_request_set *rqset, int async, + bool speculative); int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id, - __u32 type, ldlm_policy_data_t *policy, __u32 mode, - int *flags, void *data, struct lustre_handle *lockh, - int unref); - -int osc_setattr_async_base(struct obd_export *exp, struct obd_info *oinfo, - struct obd_trans_info *oti, - obd_enqueue_update_f upcall, void *cookie, - struct ptlrpc_request_set *rqset); -int osc_punch_base(struct obd_export *exp, struct obd_info *oinfo, - obd_enqueue_update_f upcall, void *cookie, - struct ptlrpc_request_set *rqset); - -int osc_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm, - struct lov_oinfo *loi, cfs_page_t *page, - obd_off offset, const struct obd_async_page_ops *ops, - void *data, void **res, int nocache, - struct lustre_handle *lockh); -void osc_oap_to_pending(struct osc_async_page *oap); -int osc_oap_interrupted(const struct lu_env *env, struct osc_async_page *oap); -void loi_list_maint(struct client_obd *cli, struct lov_oinfo *loi); -void osc_check_rpcs(const struct lu_env *env, struct client_obd *cli); - -int osc_queue_async_io(const struct lu_env *env, - struct obd_export *exp, struct lov_stripe_md *lsm, - struct lov_oinfo *loi, void *cookie, - int cmd, obd_off off, int count, - obd_flag brw_flags, enum async_flags async_flags); -int osc_teardown_async_page(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, void *cookie); + enum ldlm_type type, union ldlm_policy_data *policy, + enum ldlm_mode mode, __u64 *flags, void *data, + struct lustre_handle *lockh, int unref); + +int osc_setattr_async(struct obd_export *exp, struct obdo *oa, + obd_enqueue_update_f upcall, void *cookie, + struct ptlrpc_request_set *rqset); +int osc_sync_base(struct osc_object *obj, struct obdo *oa, + obd_enqueue_update_f upcall, void *cookie, + struct ptlrpc_request_set *rqset); +int osc_ladvise_base(struct obd_export *exp, struct obdo *oa, + struct ladvise_hdr *ladvise_hdr, + obd_enqueue_update_f upcall, void *cookie, + struct ptlrpc_request_set *rqset); int osc_process_config_base(struct obd_device *obd, struct lustre_cfg *cfg); -int osc_set_async_flags_base(struct client_obd *cli, - struct lov_oinfo *loi, struct osc_async_page *oap, - obd_flag async_flags); -int osc_enter_cache_try(const struct lu_env *env, - struct client_obd *cli, struct lov_oinfo *loi, - struct osc_async_page *oap, int transient); +int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, + struct list_head *ext_list, int cmd); +unsigned long osc_lru_reserve(struct client_obd *cli, unsigned long npages); +void osc_lru_unreserve(struct client_obd *cli, unsigned long npages); -struct cl_page *osc_oap2cl_page(struct osc_async_page *oap); -extern cfs_spinlock_t osc_ast_guard; +extern struct lu_kmem_descr osc_caches[]; + +unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock); int osc_cleanup(struct obd_device *obd); int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg); -#ifdef LPROCFS -int lproc_osc_attach_seqstat(struct obd_device *dev); -void lprocfs_osc_init_vars(struct lprocfs_static_vars *lvars); -#else -static inline int lproc_osc_attach_seqstat(struct obd_device *dev) {return 0;} -static inline void lprocfs_osc_init_vars(struct lprocfs_static_vars *lvars) +int osc_tunables_init(struct obd_device *obd); + +extern struct lu_device_type osc_device_type; + +static inline struct cl_io *osc_env_thread_io(const struct lu_env *env) +{ + struct cl_io *io = &osc_env_info(env)->oti_io; + + memset(io, 0, sizeof(*io)); + return io; +} + +static inline int osc_is_object(const struct lu_object *obj) { - memset(lvars, 0, sizeof(*lvars)); + return obj->lo_dev->ld_type == &osc_device_type; } -#endif -extern struct lu_device_type osc_device_type; +static inline struct osc_lock *osc_lock_at(const struct cl_lock *lock) +{ + return cl2osc_lock(cl_lock_at(lock, &osc_device_type)); +} + +int osc_lock_init(const struct lu_env *env, struct cl_object *obj, + struct cl_lock *lock, const struct cl_io *io); +int osc_io_init(const struct lu_env *env, struct cl_object *obj, + struct cl_io *io); +struct lu_object *osc_object_alloc(const struct lu_env *env, + const struct lu_object_header *hdr, + struct lu_device *dev); static inline int osc_recoverable_error(int rc) { - return (rc == -EIO || rc == -EROFS || rc == -ENOMEM || rc == -EAGAIN); + return (rc == -EIO || rc == -EROFS || rc == -ENOMEM || + rc == -EAGAIN || rc == -EINPROGRESS); +} + +static inline unsigned long rpcs_in_flight(struct client_obd *cli) +{ + return cli->cl_r_in_flight + cli->cl_w_in_flight; } -/* return 1 if osc should be resend request */ -static inline int osc_should_resend(int resend, struct client_obd *cli) +static inline char *cli_name(struct client_obd *cli) { - return cfs_atomic_read(&cli->cl_resends) ? - cfs_atomic_read(&cli->cl_resends) > resend : 1; + return cli->cl_import->imp_obd->obd_name; } #ifndef min_t @@ -206,27 +142,76 @@ static inline int osc_should_resend(int resend, struct client_obd *cli) ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; }) #endif -struct osc_device { - struct cl_device od_cl; - struct obd_export *od_exp; - - /* Write stats is actually protected by client_obd's lock. */ - struct osc_stats { - uint64_t os_lockless_writes; /* by bytes */ - uint64_t os_lockless_reads; /* by bytes */ - uint64_t os_lockless_truncates; /* by times */ - } od_stats; - - /* configuration item(s) */ - int od_contention_time; - int od_lockless_truncate; +struct osc_async_args { + struct obd_info *aa_oi; }; -static inline struct osc_device *obd2osc_dev(const struct obd_device *d) +int osc_quota_setup(struct obd_device *obd); +int osc_quota_cleanup(struct obd_device *obd); +int osc_quota_setdq(struct client_obd *cli, __u64 xid, const unsigned int qid[], + u64 valid, u32 flags); +int osc_quota_chkdq(struct client_obd *cli, const unsigned int qid[]); +int osc_quotactl(struct obd_device *unused, struct obd_export *exp, + struct obd_quotactl *oqctl); +void osc_inc_unstable_pages(struct ptlrpc_request *req); +void osc_dec_unstable_pages(struct ptlrpc_request *req); +bool osc_over_unstable_soft_limit(struct client_obd *cli); +void osc_page_touch_at(const struct lu_env *env, struct cl_object *obj, + pgoff_t idx, size_t to); + +struct ldlm_lock *osc_obj_dlmlock_at_pgoff(const struct lu_env *env, + struct osc_object *obj, + pgoff_t index, + enum osc_dap_flags flags); + +int osc_object_invalidate(const struct lu_env *env, struct osc_object *osc); + +/** osc shrink list to link all osc client obd */ +extern struct list_head osc_shrink_list; +/** spin lock to protect osc_shrink_list */ +extern spinlock_t osc_shrink_lock; +extern unsigned long osc_cache_shrink_count(struct shrinker *sk, + struct shrink_control *sc); +extern unsigned long osc_cache_shrink_scan(struct shrinker *sk, + struct shrink_control *sc); +static inline unsigned int osc_max_write_chunks(const struct client_obd *cli) { - return container_of0(d->obd_lu_dev, struct osc_device, od_cl.cd_lu_dev); + /* + * LU-8135: + * + * The maximum size of a single transaction is about 64MB in ZFS. + * #define DMU_MAX_ACCESS (64 * 1024 * 1024) + * + * Since ZFS is a copy-on-write file system, a single dirty page in + * a chunk will result in the rewrite of the whole chunk, therefore + * an RPC shouldn't be allowed to contain too many chunks otherwise + * it will make transaction size much bigger than 64MB, especially + * with big block size for ZFS. + * + * This piece of code is to make sure that OSC won't send write RPCs + * with too many chunks. The maximum chunk size that an RPC can cover + * is set to PTLRPC_MAX_BRW_SIZE, which is defined to 16MB. Ideally + * OST should tell the client what the biggest transaction size is, + * but it's good enough for now. + * + * This limitation doesn't apply to ldiskfs, which allows as many + * chunks in one RPC as we want. However, it won't have any benefits + * to have too many discontiguous pages in one RPC. + * + * An osc_extent won't cover over a RPC size, so the chunks in an + * osc_extent won't bigger than PTLRPC_MAX_BRW_SIZE >> chunkbits. + */ + return PTLRPC_MAX_BRW_SIZE >> cli->cl_chunkbits; } -int osc_dlm_lock_pageref(struct ldlm_lock *dlm); +static inline void osc_set_io_portal(struct ptlrpc_request *req) +{ + struct obd_import *imp = req->rq_import; + /* Distinguish OSC from MDC here to use OST or MDS portal */ + if (OCD_HAS_FLAG(&imp->imp_connect_data, IBITS)) + req->rq_request_portal = MDS_IO_PORTAL; + else + req->rq_request_portal = OST_IO_PORTAL; +} #endif /* OSC_INTERNAL_H */