*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2012, Intel Corporation.
+ * Copyright (c) 2011, 2017, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#define OAP_MAGIC 8675309
-struct lu_env;
-
-enum async_flags {
- ASYNC_READY = 0x1, /* ap_make_ready will not be called before this
- page is added to an rpc */
- ASYNC_URGENT = 0x2, /* page must be put into an RPC before return */
- ASYNC_COUNT_STABLE = 0x4, /* ap_refresh_count will not be called
- to give the caller a chance to update
- or cancel the size of the io */
- ASYNC_HP = 0x10,
-};
-
-struct osc_async_page {
- int oap_magic;
- unsigned short oap_cmd;
- unsigned short oap_interrupted:1;
-
- cfs_list_t oap_pending_item;
- cfs_list_t oap_rpc_item;
-
- obd_off oap_obj_off;
- unsigned oap_page_off;
- enum async_flags oap_async_flags;
+#include <libcfs/linux/linux-mem.h>
+#include <lustre_osc.h>
- struct brw_page oap_brw_page;
-
- struct ptlrpc_request *oap_request;
- struct client_obd *oap_cli;
- struct osc_object *oap_obj;
-
- struct ldlm_lock *oap_ldlm_lock;
- spinlock_t oap_lock;
-};
-
-#define oap_page oap_brw_page.pg
-#define oap_count oap_brw_page.count
-#define oap_brw_flags oap_brw_page.flag
-
-struct osc_cache_waiter {
- cfs_list_t ocw_entry;
- cfs_waitq_t ocw_waitq;
- struct osc_async_page *ocw_oap;
- int ocw_grant;
- int ocw_rc;
-};
+extern atomic_t osc_pool_req_count;
+extern unsigned int osc_reqpool_maxreqcount;
+extern struct ptlrpc_request_pool *osc_rq_pool;
-int osc_create(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa, struct lov_stripe_md **ea,
- struct obd_trans_info *oti);
-int osc_real_create(struct obd_export *exp, struct obdo *oa,
- struct lov_stripe_md **ea, struct obd_trans_info *oti);
void osc_wake_cache_waiters(struct client_obd *cli);
-int osc_shrink_grant_to_target(struct client_obd *cli, long target);
+int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes);
+void osc_schedule_grant_work(void);
void osc_update_next_shrink(struct client_obd *cli);
-
-/*
- * cl integration.
- */
-#include <cl_object.h>
+int lru_queue_work(const struct lu_env *env, void *data);
+int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
+ int sent, int rc);
+int osc_extent_release(const struct lu_env *env, struct osc_extent *ext);
+int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc,
+ pgoff_t start, pgoff_t end, bool discard);
extern struct ptlrpc_request_set *PTLRPCD_SET;
+void osc_lock_lvb_update(const struct lu_env *env,
+ struct osc_object *osc,
+ struct ldlm_lock *dlmlock,
+ struct ost_lvb *lvb);
+
int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
- __u64 *flags, ldlm_policy_data_t *policy,
- struct ost_lvb *lvb, int kms_valid,
- obd_enqueue_update_f upcall,
- void *cookie, struct ldlm_enqueue_info *einfo,
- struct lustre_handle *lockh,
- struct ptlrpc_request_set *rqset, int async, int agl);
-int osc_cancel_base(struct lustre_handle *lockh, __u32 mode);
-
-int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id,
- __u32 type, ldlm_policy_data_t *policy, __u32 mode,
- int *flags, void *data, struct lustre_handle *lockh,
- int unref);
-
-int osc_setattr_async_base(struct obd_export *exp, struct obd_info *oinfo,
- struct obd_trans_info *oti,
- obd_enqueue_update_f upcall, void *cookie,
- struct ptlrpc_request_set *rqset);
-int osc_punch_base(struct obd_export *exp, struct obd_info *oinfo,
- obd_enqueue_update_f upcall, void *cookie,
- struct ptlrpc_request_set *rqset);
-int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo,
+ __u64 *flags, union ldlm_policy_data *policy,
+ struct ost_lvb *lvb, osc_enqueue_upcall_f upcall,
+ void *cookie, struct ldlm_enqueue_info *einfo,
+ struct ptlrpc_request_set *rqset, int async,
+ bool speculative);
+
+int osc_match_base(const struct lu_env *env, struct obd_export *exp,
+ struct ldlm_res_id *res_id, enum ldlm_type type,
+ union ldlm_policy_data *policy, enum ldlm_mode mode,
+ __u64 *flags, struct osc_object *obj,
+ struct lustre_handle *lockh, int unref);
+
+int osc_setattr_async(struct obd_export *exp, struct obdo *oa,
+ obd_enqueue_update_f upcall, void *cookie,
+ struct ptlrpc_request_set *rqset);
+int osc_sync_base(struct osc_object *obj, struct obdo *oa,
obd_enqueue_update_f upcall, void *cookie,
struct ptlrpc_request_set *rqset);
-
+int osc_ladvise_base(struct obd_export *exp, struct obdo *oa,
+ struct ladvise_hdr *ladvise_hdr,
+ obd_enqueue_update_f upcall, void *cookie,
+ struct ptlrpc_request_set *rqset);
int osc_process_config_base(struct obd_device *obd, struct lustre_cfg *cfg);
int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
- cfs_list_t *ext_list, int cmd, pdl_policy_t p);
-int osc_lru_shrink(struct client_obd *cli, int target);
+ struct list_head *ext_list, int cmd);
+unsigned long osc_lru_reserve(struct client_obd *cli, unsigned long npages);
+void osc_lru_unreserve(struct client_obd *cli, unsigned long npages);
+
+extern struct lu_kmem_descr osc_caches[];
-extern spinlock_t osc_ast_guard;
+unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock);
int osc_cleanup(struct obd_device *obd);
int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg);
-#ifdef LPROCFS
-int lproc_osc_attach_seqstat(struct obd_device *dev);
-void lprocfs_osc_init_vars(struct lprocfs_static_vars *lvars);
-#else
-static inline int lproc_osc_attach_seqstat(struct obd_device *dev) {return 0;}
-static inline void lprocfs_osc_init_vars(struct lprocfs_static_vars *lvars)
+int osc_tunables_init(struct obd_device *obd);
+
+extern struct lu_device_type osc_device_type;
+
+static inline struct cl_io *osc_env_thread_io(const struct lu_env *env)
{
- memset(lvars, 0, sizeof(*lvars));
+ struct cl_io *io = &osc_env_info(env)->oti_io;
+
+ memset(io, 0, sizeof(*io));
+ return io;
}
-#endif
-extern struct lu_device_type osc_device_type;
+static inline int osc_is_object(const struct lu_object *obj)
+{
+ return obj->lo_dev->ld_type == &osc_device_type;
+}
+
+static inline struct osc_lock *osc_lock_at(const struct cl_lock *lock)
+{
+ return cl2osc_lock(cl_lock_at(lock, &osc_device_type));
+}
+
+int osc_lock_init(const struct lu_env *env, struct cl_object *obj,
+ struct cl_lock *lock, const struct cl_io *io);
+int osc_io_init(const struct lu_env *env, struct cl_object *obj,
+ struct cl_io *io);
+struct lu_object *osc_object_alloc(const struct lu_env *env,
+ const struct lu_object_header *hdr,
+ struct lu_device *dev);
static inline int osc_recoverable_error(int rc)
{
return cli->cl_r_in_flight + cli->cl_w_in_flight;
}
+static inline char *cli_name(struct client_obd *cli)
+{
+ return cli->cl_import->imp_obd->obd_name;
+}
+
#ifndef min_t
#define min_t(type,x,y) \
({ type __x = (x); type __y = (y); __x < __y ? __x: __y; })
#endif
-struct osc_device {
- struct cl_device od_cl;
- struct obd_export *od_exp;
-
- /* Write stats is actually protected by client_obd's lock. */
- struct osc_stats {
- uint64_t os_lockless_writes; /* by bytes */
- uint64_t os_lockless_reads; /* by bytes */
- uint64_t os_lockless_truncates; /* by times */
- } od_stats;
-
- /* configuration item(s) */
- int od_contention_time;
- int od_lockless_truncate;
+struct osc_async_args {
+ struct obd_info *aa_oi;
};
-static inline struct osc_device *obd2osc_dev(const struct obd_device *d)
-{
- return container_of0(d->obd_lu_dev, struct osc_device, od_cl.cd_lu_dev);
-}
-
-int osc_dlm_lock_pageref(struct ldlm_lock *dlm);
-
-extern cfs_mem_cache_t *osc_quota_kmem;
-struct osc_quota_info {
- /** linkage for quota hash table */
- cfs_hlist_node_t oqi_hash;
- obd_uid oqi_id;
-};
int osc_quota_setup(struct obd_device *obd);
int osc_quota_cleanup(struct obd_device *obd);
-int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[],
- obd_flag valid, obd_flag flags);
+int osc_quota_setdq(struct client_obd *cli, __u64 xid, const unsigned int qid[],
+ u64 valid, u32 flags);
int osc_quota_chkdq(struct client_obd *cli, const unsigned int qid[]);
int osc_quotactl(struct obd_device *unused, struct obd_export *exp,
struct obd_quotactl *oqctl);
-int osc_quotacheck(struct obd_device *unused, struct obd_export *exp,
- struct obd_quotactl *oqctl);
-int osc_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk);
+void osc_inc_unstable_pages(struct ptlrpc_request *req);
+void osc_dec_unstable_pages(struct ptlrpc_request *req);
+bool osc_over_unstable_soft_limit(struct client_obd *cli);
+void osc_page_touch_at(const struct lu_env *env, struct cl_object *obj,
+ pgoff_t idx, size_t to);
+
+struct ldlm_lock *osc_obj_dlmlock_at_pgoff(const struct lu_env *env,
+ struct osc_object *obj,
+ pgoff_t index,
+ enum osc_dap_flags flags);
+
+int osc_object_invalidate(const struct lu_env *env, struct osc_object *osc);
+
+/** osc shrink list to link all osc client obd */
+extern struct list_head osc_shrink_list;
+/** spin lock to protect osc_shrink_list */
+extern spinlock_t osc_shrink_lock;
+extern unsigned long osc_cache_shrink_count(struct shrinker *sk,
+ struct shrink_control *sc);
+extern unsigned long osc_cache_shrink_scan(struct shrinker *sk,
+ struct shrink_control *sc);
+static inline unsigned int osc_max_write_chunks(const struct client_obd *cli)
+{
+ /*
+ * LU-8135:
+ *
+ * The maximum size of a single transaction is about 64MB in ZFS.
+ * #define DMU_MAX_ACCESS (64 * 1024 * 1024)
+ *
+ * Since ZFS is a copy-on-write file system, a single dirty page in
+ * a chunk will result in the rewrite of the whole chunk, therefore
+ * an RPC shouldn't be allowed to contain too many chunks otherwise
+ * it will make transaction size much bigger than 64MB, especially
+ * with big block size for ZFS.
+ *
+ * This piece of code is to make sure that OSC won't send write RPCs
+ * with too many chunks. The maximum chunk size that an RPC can cover
+ * is set to PTLRPC_MAX_BRW_SIZE, which is defined to 16MB. Ideally
+ * OST should tell the client what the biggest transaction size is,
+ * but it's good enough for now.
+ *
+ * This limitation doesn't apply to ldiskfs, which allows as many
+ * chunks in one RPC as we want. However, it won't have any benefits
+ * to have too many discontiguous pages in one RPC.
+ *
+ * An osc_extent won't cover over a RPC size, so the chunks in an
+ * osc_extent won't bigger than PTLRPC_MAX_BRW_SIZE >> chunkbits.
+ */
+ return PTLRPC_MAX_BRW_SIZE >> cli->cl_chunkbits;
+}
+
+static inline void osc_set_io_portal(struct ptlrpc_request *req)
+{
+ struct obd_import *imp = req->rq_import;
+
+ /* Distinguish OSC from MDC here to use OST or MDS portal */
+ if (OCD_HAS_FLAG(&imp->imp_connect_data, IBITS))
+ req->rq_request_portal = MDS_IO_PORTAL;
+ else
+ req->rq_request_portal = OST_IO_PORTAL;
+}
#endif /* OSC_INTERNAL_H */