X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fosp%2Fosp_internal.h;h=7a4418e7639e53467ce67a940365ca7b1630c0af;hb=82c6e42d6137f39a1f2394b7bc6e8d600eb36181;hp=49ea455181612034fb5e9b659445e14fe2c82efa;hpb=9230561f268f9c3d7f84ac7824d7a1d3769a3dfe;p=fs%2Flustre-release.git diff --git a/lustre/osp/osp_internal.h b/lustre/osp/osp_internal.h index 49ea455..7a4418e 100644 --- a/lustre/osp/osp_internal.h +++ b/lustre/osp/osp_internal.h @@ -23,7 +23,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2012, 2016, Intel Corporation. + * Copyright (c) 2012, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -44,7 +44,6 @@ #include #include #include -#include /* * Infrastructure to support tracking of last committed llog record @@ -68,7 +67,6 @@ struct osp_precreate { /* * Precreation pool */ - spinlock_t osp_pre_lock; /* last fid to assign in creation */ struct lu_fid osp_pre_used_fid; @@ -144,6 +142,17 @@ struct osp_updates { * those stale RPC(with older generation) will not be sent, otherwise it * will cause update lllog corruption */ __u64 ou_generation; + + /* dedicate update thread */ + struct task_struct *ou_update_task; + struct lu_env ou_env; +}; + +struct osp_rpc_lock { + /** Lock protecting in-flight RPC concurrency. */ + struct mutex rpcl_mutex; + /** Used for MDS/RPC load testing purposes. */ + unsigned int rpcl_fakes; }; struct osp_device { @@ -164,16 +173,16 @@ struct osp_device { * and required le64_to_cpu() conversion before use. * Protected by opd_pre_lock */ struct lu_fid opd_last_used_fid; + /* on disk copy last_used_fid.f_oid or idif */ + u64 opd_last_id; struct lu_fid opd_gap_start_fid; int opd_gap_count; /* connection to OST */ + struct osp_rpc_lock opd_rpc_lock; struct obd_device *opd_obd; struct obd_export *opd_exp; - struct obd_uuid opd_cluuid; struct obd_connect_data *opd_connect_data; int opd_connects; - struct proc_dir_entry *opd_proc_entry; - struct lprocfs_stats *opd_stats; /* connection status. */ unsigned int opd_new_connection:1, opd_got_disconnected:1, @@ -189,14 +198,13 @@ struct osp_device { /* precreate structure for OSP */ struct osp_precreate *opd_pre; /* dedicate precreate thread */ - struct ptlrpc_thread opd_pre_thread; + struct task_struct *opd_pre_task; + spinlock_t opd_pre_lock; /* thread waits for signals about pool going empty */ wait_queue_head_t opd_pre_waitq; /* send update thread */ struct osp_updates *opd_update; - /* dedicate update thread */ - struct ptlrpc_thread opd_update_thread; /* * OST synchronization thread @@ -209,7 +217,7 @@ struct osp_device { /* processing of changes from previous mount is done? */ int opd_sync_prev_done; /* found records */ - struct ptlrpc_thread opd_sync_thread; + struct task_struct *opd_sync_task; wait_queue_head_t opd_sync_waitq; /* list of in flight rpcs */ struct list_head opd_sync_in_flight_list; @@ -224,29 +232,32 @@ struct osp_device { /* osd api's commit cb control structure */ struct dt_txn_callback opd_sync_txn_cb; /* last used change number -- semantically similar to transno */ - __u64 opd_sync_last_used_id; + unsigned long opd_sync_last_used_id; /* last committed change number -- semantically similar to * last_committed */ __u64 opd_sync_last_committed_id; - /* last processed (taken from llog) id */ - volatile __u64 opd_sync_last_processed_id; - struct osp_id_tracker *opd_sync_tracker; - struct list_head opd_sync_ontrack; + /* last processed catalog index */ + int opd_sync_last_catalog_idx; + /* number of processed records */ + atomic64_t opd_sync_processed_recs; /* stop processing new requests until barrier=0 */ atomic_t opd_sync_barrier; wait_queue_head_t opd_sync_barrier_waitq; + /* last generated id */ + ktime_t opd_sync_next_commit_cb; + atomic_t opd_commits_registered; /* * statfs related fields: OSP maintains it on its own */ struct obd_statfs opd_statfs; - cfs_time_t opd_statfs_fresh_till; - struct timer_list opd_statfs_timer; + ktime_t opd_statfs_fresh_till; + struct timer_list opd_statfs_timer; int opd_statfs_update_in_progress; /* how often to update statfs data */ - int opd_statfs_maxage; + time64_t opd_statfs_maxage; - struct proc_dir_entry *opd_symlink; + struct dentry *opd_debugfs; /* If the caller wants to do some idempotent async operations on * remote server, it can append the async remote requests on the @@ -267,7 +278,6 @@ struct osp_device { int opd_reserved_mb_low; }; -#define opd_pre_lock opd_pre->osp_pre_lock #define opd_pre_used_fid opd_pre->osp_pre_used_fid #define opd_pre_last_created_fid opd_pre->osp_pre_last_created_fid #define opd_pre_reserved opd_pre->osp_pre_reserved @@ -311,6 +321,9 @@ struct osp_object { struct list_head opo_invalidate_cb_list; /* Protect opo_ooa. */ spinlock_t opo_lock; + /* to implement in-flight invalidation */ + atomic_t opo_invalidate_seq; + struct rw_semaphore opo_invalidate_sem; }; extern struct lu_object_operations osp_lu_obj_ops; @@ -325,7 +338,6 @@ struct osp_thread_info { struct lu_attr osi_attr; struct ost_id osi_oi; struct ost_id osi_oi2; - u64 osi_id; loff_t osi_off; union { struct llog_rec_hdr osi_hdr; @@ -394,7 +406,7 @@ static inline bool is_only_remote_trans(struct thandle *th) } static inline void osp_objid_buf_prep(struct lu_buf *buf, loff_t *off, - __u32 *id, int index) + __u64 *id, int index) { /* Note: through id is only 32 bits, it will also write 64 bits * for oid to keep compatibility with the previous version. */ @@ -421,15 +433,7 @@ extern struct lu_context_key osp_thread_key; static inline struct osp_thread_info *osp_env_info(const struct lu_env *env) { - struct osp_thread_info *info; - - info = lu_context_key_get(&env->le_ctx, &osp_thread_key); - if (info == NULL) { - lu_env_refill((struct lu_env *)env); - info = lu_context_key_get(&env->le_ctx, &osp_thread_key); - } - LASSERT(info); - return info; + return lu_env_info(env, &osp_thread_key); } struct osp_txn_info { @@ -456,7 +460,7 @@ static inline int lu_device_is_osp(struct lu_device *d) static inline struct osp_device *lu2osp_dev(struct lu_device *d) { LASSERT(lu_device_is_osp(d)); - return container_of0(d, struct osp_device, opd_dt_dev.dd_lu_dev); + return container_of_safe(d, struct osp_device, opd_dt_dev.dd_lu_dev); } static inline struct lu_device *osp2lu_dev(struct osp_device *d) @@ -467,13 +471,13 @@ static inline struct lu_device *osp2lu_dev(struct osp_device *d) static inline struct osp_device *dt2osp_dev(struct dt_device *d) { LASSERT(lu_device_is_osp(&d->dd_lu_dev)); - return container_of0(d, struct osp_device, opd_dt_dev); + return container_of_safe(d, struct osp_device, opd_dt_dev); } static inline struct osp_object *lu2osp_obj(struct lu_object *o) { LASSERT(ergo(o != NULL, lu_device_is_osp(o->lo_dev))); - return container_of0(o, struct osp_object, opo_obj.do_lu); + return container_of_safe(o, struct osp_object, opo_obj.do_lu); } static inline struct lu_object *osp2lu_obj(struct osp_object *obj) @@ -484,7 +488,7 @@ static inline struct lu_object *osp2lu_obj(struct osp_object *obj) static inline struct osp_object *osp_obj(const struct lu_object *o) { LASSERT(lu_device_is_osp(o->lo_dev)); - return container_of0(o, struct osp_object, opo_obj.do_lu); + return container_of_safe(o, struct osp_object, opo_obj.do_lu); } static inline struct osp_object *dt2osp_obj(const struct dt_object *d) @@ -494,8 +498,8 @@ static inline struct osp_object *dt2osp_obj(const struct dt_object *d) static inline struct dt_object *osp_object_child(struct osp_object *o) { - return container_of0(lu_object_next(osp2lu_obj(o)), - struct dt_object, do_lu); + return container_of(lu_object_next(osp2lu_obj(o)), + struct dt_object, do_lu); } static inline struct seq_server_site *osp_seq_site(struct osp_device *osp) @@ -503,20 +507,79 @@ static inline struct seq_server_site *osp_seq_site(struct osp_device *osp) return osp->opd_dt_dev.dd_lu_dev.ld_site->ld_seq_site; } -#define osp_init_rpc_lock(lck) mdc_init_rpc_lock(lck) +/** + * Serializes in-flight MDT-modifying RPC requests to preserve idempotency. + * + * This mutex is used to implement execute-once semantics on the MDT. + * The MDT stores the last transaction ID and result for every client in + * its last_rcvd file. If the client doesn't get a reply, it can safely + * resend the request and the MDT will reconstruct the reply being aware + * that the request has already been executed. Without this lock, + * execution status of concurrent in-flight requests would be + * overwritten. + * + * This imlpementation limits the extent to which we can keep a full pipeline + * of in-flight requests from a single client. This limitation can be + * overcome by allowing multiple slots per client in the last_rcvd file, + * see LU-6864. + */ +#define OSP_FAKE_RPCL_IT ((void *)0x2c0012bfUL) + +static inline void osp_init_rpc_lock(struct osp_device *osp) +{ + struct osp_rpc_lock *lck = &osp->opd_rpc_lock; + + mutex_init(&lck->rpcl_mutex); + lck->rpcl_fakes = 0; +} static inline void osp_get_rpc_lock(struct osp_device *osp) { - struct mdc_rpc_lock *rpc_lock = osp->opd_obd->u.cli.cl_rpc_lock; + struct osp_rpc_lock *lck = &osp->opd_rpc_lock; + + /* This would normally block until the existing request finishes. + * If fail_loc is set it will block until the regular request is + * done, then increment rpcl_fakes. Once that is non-zero it + * will only be cleared when all fake requests are finished. + * Only when all fake requests are finished can normal requests + * be sent, to ensure they are recoverable again. + */ + again: + mutex_lock(&lck->rpcl_mutex); + + if (CFS_FAIL_CHECK_QUIET(OBD_FAIL_MDC_RPCS_SEM) || + CFS_FAIL_CHECK_QUIET(OBD_FAIL_OSP_RPCS_SEM)) { + lck->rpcl_fakes++; + mutex_unlock(&lck->rpcl_mutex); - mdc_get_rpc_lock(rpc_lock, NULL); + return; + } + + /* This will only happen when the CFS_FAIL_CHECK() was just turned + * off but there are still requests in progress. Wait until they + * finish. It doesn't need to be efficient in this extremely rare + * case, just have low overhead in the common case when it isn't true. + */ + if (unlikely(lck->rpcl_fakes)) { + mutex_unlock(&lck->rpcl_mutex); + schedule_timeout_uninterruptible(cfs_time_seconds(1) / 4); + + goto again; + } } static inline void osp_put_rpc_lock(struct osp_device *osp) { - struct mdc_rpc_lock *rpc_lock = osp->opd_obd->u.cli.cl_rpc_lock; + struct osp_rpc_lock *lck = &osp->opd_rpc_lock; - mdc_put_rpc_lock(rpc_lock, NULL); + if (lck->rpcl_fakes) { /* OBD_FAIL_OSP_RPCS_SEM */ + mutex_lock(&lck->rpcl_mutex); + + if (lck->rpcl_fakes) /* check again under lock */ + lck->rpcl_fakes--; + } + + mutex_unlock(&lck->rpcl_mutex); } static inline int osp_fid_diff(const struct lu_fid *fid1, @@ -537,16 +600,26 @@ static inline int osp_fid_diff(const struct lu_fid *fid1, fid_idif_id(fid2->f_seq, fid2->f_oid, 0); } - LASSERTF(fid_seq(fid1) == fid_seq(fid2), "fid1:"DFID - ", fid2:"DFID"\n", PFID(fid1), PFID(fid2)); + LASSERTF(fid_seq(fid1) == fid_seq(fid2), "fid1:"DFID", fid2:"DFID"\n", + PFID(fid1), PFID(fid2)); return fid_oid(fid1) - fid_oid(fid2); } +static inline void osp_fid_to_obdid(struct lu_fid *last_fid, u64 *osi_id) +{ + if (fid_is_idif((last_fid))) + *osi_id = fid_idif_id(fid_seq(last_fid), fid_oid(last_fid), + fid_ver(last_fid)); + else + *osi_id = fid_oid(last_fid); +} static inline void osp_update_last_fid(struct osp_device *d, struct lu_fid *fid) { int diff = osp_fid_diff(fid, &d->opd_last_used_fid); + struct lu_fid *gap_start = &d->opd_gap_start_fid; + /* * we might have lost precreated objects due to VBR and precreate * orphans, the gap in objid can be calculated properly only here @@ -554,12 +627,19 @@ static inline void osp_update_last_fid(struct osp_device *d, struct lu_fid *fid) if (diff > 0) { if (diff > 1) { d->opd_gap_start_fid = d->opd_last_used_fid; - d->opd_gap_start_fid.f_oid++; + if (fid_oid(gap_start) == LUSTRE_DATA_SEQ_MAX_WIDTH) { + gap_start->f_seq++; + gap_start->f_oid = fid_is_idif(gap_start) ? + 0 : 1; + } else { + gap_start->f_oid++; + } d->opd_gap_count = diff - 1; CDEBUG(D_HA, "Gap in objids: start="DFID", count =%d\n", PFID(&d->opd_gap_start_fid), d->opd_gap_count); } d->opd_last_used_fid = *fid; + osp_fid_to_obdid(fid, &d->opd_last_id); } } @@ -615,10 +695,10 @@ osp_current_object_update_request(struct osp_update_request *our); int osp_object_update_request_create(struct osp_update_request *our, size_t size); -#define osp_update_rpc_pack(env, name, our, op, ...) \ +#define OSP_UPDATE_RPC_PACK(env, out_something_pack, our, ...) \ ({ \ - struct object_update *object_update; \ - size_t max_update_length; \ + struct object_update *object_update; \ + size_t max_update_length; \ struct osp_update_request_sub *ours; \ int ret; \ \ @@ -630,9 +710,9 @@ int osp_object_update_request_create(struct osp_update_request *our, \ object_update = update_buffer_get_update(ours->ours_req,\ ours->ours_req->ourq_count); \ - ret = out_##name##_pack(env, object_update, \ - &max_update_length, \ - __VA_ARGS__); \ + ret = out_something_pack(env, object_update, \ + &max_update_length, \ + __VA_ARGS__); \ if (ret == -E2BIG) { \ int rc1; \ /* Create new object update request */ \ @@ -660,16 +740,6 @@ int osp_object_update_request_create(struct osp_update_request *our, ret; \ }) -static inline bool osp_send_update_thread_running(struct osp_device *osp) -{ - return osp->opd_update_thread.t_flags & SVC_RUNNING; -} - -static inline bool osp_send_update_thread_stopped(struct osp_device *osp) -{ - return osp->opd_update_thread.t_flags & SVC_STOPPED; -} - typedef int (*osp_update_interpreter_t)(const struct lu_env *env, struct object_update_reply *rep, struct ptlrpc_request *req, @@ -678,7 +748,6 @@ typedef int (*osp_update_interpreter_t)(const struct lu_env *env, /* osp_dev.c */ void osp_update_last_id(struct osp_device *d, u64 objid); -extern struct llog_operations osp_mds_ost_orig_logops; /* osp_trans.c */ int osp_insert_async_request(const struct lu_env *env, enum update_type op, @@ -749,6 +818,7 @@ int osp_declare_xattr_del(const struct lu_env *env, struct dt_object *dt, int osp_xattr_del(const struct lu_env *env, struct dt_object *dt, const char *name, struct thandle *th); int osp_invalidate(const struct lu_env *env, struct dt_object *dt); +bool osp_check_stale(struct dt_object *dt); void osp_obj_invalidate_cache(struct osp_object *obj); int osp_trans_stop(const struct lu_env *env, struct dt_device *dt, @@ -792,22 +862,29 @@ int osp_reset_last_used(const struct lu_env *env, struct osp_device *osp); int osp_write_last_oid_seq_files(struct lu_env *env, struct osp_device *osp, struct lu_fid *fid, int sync); int osp_init_pre_fid(struct osp_device *osp); +int osp_init_statfs(struct osp_device *osp); +void osp_fini_statfs(struct osp_device *osp); +void osp_statfs_fini(struct osp_device *d); /* lproc_osp.c */ -void osp_lprocfs_init(struct osp_device *osp); +void osp_tunables_init(struct osp_device *osp); +void osp_tunables_fini(struct osp_device *osp); /* osp_sync.c */ int osp_sync_declare_add(const struct lu_env *env, struct osp_object *o, - llog_op_type type, struct thandle *th); + enum llog_op_type type, struct thandle *th); int osp_sync_add(const struct lu_env *env, struct osp_object *o, - llog_op_type type, struct thandle *th, + enum llog_op_type type, struct thandle *th, const struct lu_attr *attr); int osp_sync_init(const struct lu_env *env, struct osp_device *d); int osp_sync_fini(struct osp_device *d); void osp_sync_check_for_work(struct osp_device *osp); +void osp_sync_force(const struct lu_env *env, struct osp_device *d); +int osp_sync_add_commit_cb_1s(const struct lu_env *env, struct osp_device *d, + struct thandle *th); /* lwp_dev.c */ -extern struct obd_ops lwp_obd_device_ops; +extern const struct obd_ops lwp_obd_device_ops; extern struct lu_device_type lwp_device_type; static inline struct lu_device *osp2top(const struct osp_device *osp)