From: Hongchao Zhang Date: Sun, 21 Jul 2013 21:40:37 +0000 (+0800) Subject: LU-2613 recovery: free open/close request promptly X-Git-Tag: 2.5.51~9 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=4322e0f99c87bc0412d315d0674d70cc1ffc0bb4 LU-2613 recovery: free open/close request promptly - For the non-create open or committed open, the open request should be freed along with the close request as soon as the close done, despite that the transno of open/close is greater than the last committed transno known by client or not. - Move the committed open request into another dedicated list, that will avoid scanning a huge replay list on receiving each reply (when there are many open files). Signed-off-by: Niu Yawei Signed-off-by: Hongchao Zhang Change-Id: I1a25a35fe7a16681368d92d16964680b6209b3ee Reviewed-on: http://review.whamcloud.com/6665 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Alex Zhuravlev Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index adf3095..f99ef62 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -1298,6 +1298,7 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); #define OBD_CONNECT_SHORTIO 0x2000000000000ULL/* short io */ #define OBD_CONNECT_PINGLESS 0x4000000000000ULL/* pings not required */ #define OBD_CONNECT_FLOCK_DEAD 0x8000000000000ULL/* improved flock deadlock detection */ +#define OBD_CONNECT_DISP_STRIPE 0x10000000000000ULL/* create stripe disposition*/ /* XXX README XXX: * Please DO NOT add flag values here before first ensuring that this same @@ -1341,7 +1342,9 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); OBD_CONNECT_LIGHTWEIGHT | OBD_CONNECT_UMASK | \ OBD_CONNECT_LVB_TYPE | OBD_CONNECT_LAYOUTLOCK |\ OBD_CONNECT_PINGLESS | OBD_CONNECT_MAX_EASIZE |\ - OBD_CONNECT_FLOCK_DEAD) + OBD_CONNECT_FLOCK_DEAD | \ + OBD_CONNECT_DISP_STRIPE) + #define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \ OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \ OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX | \ @@ -2114,6 +2117,7 @@ extern void lustre_swab_generic_32s (__u32 *val); #define DISP_ENQ_CREATE_REF 0x01000000 #define DISP_OPEN_LOCK 0x02000000 #define DISP_OPEN_LEASE 0x04000000 +#define DISP_OPEN_STRIPE 0x08000000 /* INODE LOCK PARTS */ #define MDS_INODELOCK_LOOKUP 0x000001 /* dentry, mode, owner, group */ diff --git a/lustre/include/lustre_export.h b/lustre/include/lustre_export.h index 649aa23..2a01b60 100644 --- a/lustre/include/lustre_export.h +++ b/lustre/include/lustre_export.h @@ -377,6 +377,15 @@ static inline bool imp_connect_lvb_type(struct obd_import *imp) return false; } +static inline bool imp_connect_disp_stripe(struct obd_import *imp) +{ + struct obd_connect_data *ocd; + + LASSERT(imp != NULL); + ocd = &imp->imp_connect_data; + return ocd->ocd_connect_flags & OBD_CONNECT_DISP_STRIPE; +} + extern struct obd_export *class_conn2export(struct lustre_handle *conn); extern struct obd_device *class_conn2obd(struct lustre_handle *conn); diff --git a/lustre/include/lustre_import.h b/lustre/include/lustre_import.h index d86f0e7..3beccba 100644 --- a/lustre/include/lustre_import.h +++ b/lustre/include/lustre_import.h @@ -180,6 +180,17 @@ struct obd_import { cfs_list_t imp_delayed_list; /** @} */ + /** + * List of requests that are retained for committed open replay. Once + * open is committed, open replay request will be moved from the + * imp_replay_list into the imp_committed_list. + * The imp_replay_cursor is for accelerating searching during replay. + * @{ + */ + cfs_list_t imp_committed_list; + cfs_list_t *imp_replay_cursor; + /** @} */ + /** obd device for this import */ struct obd_device *imp_obd; diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index df95d8a..3c9a2d7 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -2911,6 +2911,8 @@ int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd); * request queues, request management, etc. * @{ */ +void ptlrpc_request_committed(struct ptlrpc_request *req, int force); + void ptlrpc_init_client(int req_portal, int rep_portal, char *name, struct ptlrpc_client *); void ptlrpc_cleanup_client(struct obd_import *imp); diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 121063a..b1115d4f 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -1347,10 +1347,11 @@ struct lustre_md { }; struct md_open_data { - struct obd_client_handle *mod_och; - struct ptlrpc_request *mod_open_req; - struct ptlrpc_request *mod_close_req; - cfs_atomic_t mod_refcount; + struct obd_client_handle *mod_och; + struct ptlrpc_request *mod_open_req; + struct ptlrpc_request *mod_close_req; + cfs_atomic_t mod_refcount; + bool mod_is_create; }; struct lookup_intent; @@ -1454,7 +1455,7 @@ struct md_ops { int (*m_set_open_replay_data)(struct obd_export *, struct obd_client_handle *, - struct ptlrpc_request *); + struct lookup_intent *); int (*m_clear_open_replay_data)(struct obd_export *, struct obd_client_handle *); diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h index fe1eccc..39c0dd8 100644 --- a/lustre/include/obd_class.h +++ b/lustre/include/obd_class.h @@ -2080,13 +2080,13 @@ static inline int md_getxattr(struct obd_export *exp, } static inline int md_set_open_replay_data(struct obd_export *exp, - struct obd_client_handle *och, - struct ptlrpc_request *open_req) + struct obd_client_handle *och, + struct lookup_intent *it) { - ENTRY; - EXP_CHECK_MD_OP(exp, set_open_replay_data); - EXP_MD_COUNTER_INCREMENT(exp, set_open_replay_data); - RETURN(MDP(exp->exp_obd, set_open_replay_data)(exp, och, open_req)); + ENTRY; + EXP_CHECK_MD_OP(exp, set_open_replay_data); + EXP_MD_COUNTER_INCREMENT(exp, set_open_replay_data); + RETURN(MDP(exp->exp_obd, set_open_replay_data)(exp, och, it)); } static inline int md_clear_open_replay_data(struct obd_export *exp, diff --git a/lustre/liblustre/file.c b/lustre/liblustre/file.c index fac14db..be3bdd5 100644 --- a/lustre/liblustre/file.c +++ b/lustre/liblustre/file.c @@ -173,8 +173,7 @@ int llu_local_open(struct llu_inode_info *lli, struct lookup_intent *it) fd->fd_mds_och.och_fid = lli->lli_fid; lli->lli_file_data = fd; llu_ioepoch_open(lli, body->ioepoch); - md_set_open_replay_data(lli->lli_sbi->ll_md_exp, - &fd->fd_mds_och, it->d.lustre.it_data); + md_set_open_replay_data(lli->lli_sbi->ll_md_exp, &fd->fd_mds_och, it); RETURN(0); } diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 58c1d73..edcdda4 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -494,7 +494,7 @@ static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it, och->och_magic = OBD_CLIENT_HANDLE_MAGIC; och->och_flags = it->it_flags; - return md_set_open_replay_data(md_exp, och, req); + return md_set_open_replay_data(md_exp, och, it); } int ll_local_open(struct file *file, struct lookup_intent *it, diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index c6b85f1..a06abd3 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -207,7 +207,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE | OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS | OBD_CONNECT_MAX_EASIZE | - OBD_CONNECT_FLOCK_DEAD; + OBD_CONNECT_FLOCK_DEAD | + OBD_CONNECT_DISP_STRIPE; if (sbi->ll_flags & LL_SBI_SOM_PREVIEW) data->ocd_connect_flags |= OBD_CONNECT_SOM; diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index 20fb834..c89dc69 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -2719,19 +2719,19 @@ int lmv_free_lustre_md(struct obd_export *exp, struct lustre_md *md) } int lmv_set_open_replay_data(struct obd_export *exp, - struct obd_client_handle *och, - struct ptlrpc_request *open_req) + struct obd_client_handle *och, + struct lookup_intent *it) { - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt; - ENTRY; + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct lmv_tgt_desc *tgt; + ENTRY; - tgt = lmv_find_target(lmv, &och->och_fid); - if (IS_ERR(tgt)) - RETURN(PTR_ERR(tgt)); + tgt = lmv_find_target(lmv, &och->och_fid); + if (IS_ERR(tgt)) + RETURN(PTR_ERR(tgt)); - RETURN(md_set_open_replay_data(tgt->ltd_exp, och, open_req)); + RETURN(md_set_open_replay_data(tgt->ltd_exp, och, it)); } int lmv_clear_open_replay_data(struct obd_export *exp, diff --git a/lustre/mdc/mdc_internal.h b/lustre/mdc/mdc_internal.h index 7a807d7..adacb30 100644 --- a/lustre/mdc/mdc_internal.h +++ b/lustre/mdc/mdc_internal.h @@ -121,8 +121,8 @@ int mdc_get_lustre_md(struct obd_export *md_exp, struct ptlrpc_request *req, int mdc_free_lustre_md(struct obd_export *exp, struct lustre_md *md); int mdc_set_open_replay_data(struct obd_export *exp, - struct obd_client_handle *och, - struct ptlrpc_request *open_req); + struct obd_client_handle *och, + struct lookup_intent *it); int mdc_clear_open_replay_data(struct obd_export *exp, struct obd_client_handle *och); diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c index 5d43c47..eabfbbd 100644 --- a/lustre/mdc/mdc_locks.c +++ b/lustre/mdc/mdc_locks.c @@ -657,7 +657,7 @@ static int mdc_finish_enqueue(struct obd_export *exp, * happens immediately after swabbing below, new reply * is swabbed by that handler correctly. */ - mdc_set_open_replay_data(NULL, NULL, req); + mdc_set_open_replay_data(NULL, NULL, it); } if ((body->valid & (OBD_MD_FLDIREA | OBD_MD_FLEASIZE)) != 0) { diff --git a/lustre/mdc/mdc_reint.c b/lustre/mdc/mdc_reint.c index b28b332..56e2d74 100644 --- a/lustre/mdc/mdc_reint.c +++ b/lustre/mdc/mdc_reint.c @@ -171,6 +171,7 @@ int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data, req->rq_cb_data = *mod; (*mod)->mod_open_req = req; req->rq_commit_cb = mdc_commit_open; + (*mod)->mod_is_create = true; /** * Take an extra reference on \var mod, it protects \var * mod from being freed on eviction (commit callback is diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index ab9a375..aa8e39d 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -741,14 +741,15 @@ void mdc_commit_open(struct ptlrpc_request *req) } int mdc_set_open_replay_data(struct obd_export *exp, - struct obd_client_handle *och, - struct ptlrpc_request *open_req) -{ - struct md_open_data *mod; - struct mdt_rec_create *rec; - struct mdt_body *body; - struct obd_import *imp = open_req->rq_import; - ENTRY; + struct obd_client_handle *och, + struct lookup_intent *it) +{ + struct md_open_data *mod; + struct mdt_rec_create *rec; + struct mdt_body *body; + struct ptlrpc_request *open_req = it->d.lustre.it_data; + struct obd_import *imp = open_req->rq_import; + ENTRY; if (!open_req->rq_replay) RETURN(0); @@ -781,6 +782,8 @@ int mdc_set_open_replay_data(struct obd_export *exp, spin_lock(&open_req->rq_lock); och->och_mod = mod; mod->mod_och = och; + mod->mod_is_create = it_disposition(it, DISP_OPEN_CREATE) || + it_disposition(it, DISP_OPEN_STRIPE); mod->mod_open_req = open_req; open_req->rq_cb_data = mod; open_req->rq_commit_cb = mdc_commit_open; @@ -801,6 +804,23 @@ int mdc_set_open_replay_data(struct obd_export *exp, RETURN(0); } +static void mdc_free_open(struct md_open_data *mod) +{ + int committed = 0; + + if (mod->mod_is_create == 0 && + imp_connect_disp_stripe(mod->mod_open_req->rq_import)) + committed = 1; + + LASSERT(mod->mod_open_req->rq_replay == 0); + + DEBUG_REQ(D_RPCTRACE, mod->mod_open_req, "free open request\n"); + + ptlrpc_request_committed(mod->mod_open_req, committed); + if (mod->mod_close_req) + ptlrpc_request_committed(mod->mod_close_req, committed); +} + int mdc_clear_open_replay_data(struct obd_export *exp, struct obd_client_handle *och) { @@ -815,6 +835,8 @@ int mdc_clear_open_replay_data(struct obd_export *exp, RETURN(0); LASSERT(mod != LP_POISON); + LASSERT(mod->mod_open_req != NULL); + mdc_free_open(mod); mod->mod_och = NULL; och->och_mod = NULL; @@ -1014,6 +1036,9 @@ int mdc_done_writing(struct obd_export *exp, struct md_op_data *op_data, if (mod) { if (rc != 0) mod->mod_close_req = NULL; + LASSERT(mod->mod_open_req != NULL); + mdc_free_open(mod); + /* Since now, mod is accessed through setattr req only, * thus DW req does not keep a reference on mod anymore. */ obd_mod_put(mod); diff --git a/lustre/mdt/mdt_open.c b/lustre/mdt/mdt_open.c index 3712ae3..27e8fd8 100644 --- a/lustre/mdt/mdt_open.c +++ b/lustre/mdt/mdt_open.c @@ -664,7 +664,8 @@ void mdt_mfd_set_mode(struct mdt_file_data *mfd, __u64 mode) } static int mdt_mfd_open(struct mdt_thread_info *info, struct mdt_object *p, - struct mdt_object *o, __u64 flags, int created) + struct mdt_object *o, __u64 flags, int created, + struct ldlm_reply *rep) { struct ptlrpc_request *req = mdt_info_req(info); struct mdt_export_data *med = &req->rq_export->exp_mdt_data; @@ -692,6 +693,9 @@ static int mdt_mfd_open(struct mdt_thread_info *info, struct mdt_object *p, rc = mdt_create_data(info, p, o); if (rc) RETURN(rc); + + if (exp_connect_flags(req->rq_export) & OBD_CONNECT_DISP_STRIPE) + mdt_set_disposition(info, rep, DISP_OPEN_STRIPE); } CDEBUG(D_INODE, "after open, ma_valid bit = "LPX64" lmm_size = %d\n", @@ -979,15 +983,15 @@ int mdt_finish_open(struct mdt_thread_info *info, repbody->valid |= OBD_MD_FLEASIZE; } mdt_set_disposition(info, rep, DISP_OPEN_OPEN); - RETURN(0); - } - } + RETURN(0); + } + } - rc = mdt_mfd_open(info, p, o, flags, created); + rc = mdt_mfd_open(info, p, o, flags, created, rep); if (!rc) mdt_set_disposition(info, rep, DISP_OPEN_OPEN); - RETURN(rc); + RETURN(rc); } extern void mdt_req_from_lcd(struct ptlrpc_request *req, diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 9213b20..e9f95c0 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -1035,6 +1035,8 @@ struct obd_import *class_new_import(struct obd_device *obd) CFS_INIT_LIST_HEAD(&imp->imp_replay_list); CFS_INIT_LIST_HEAD(&imp->imp_sending_list); CFS_INIT_LIST_HEAD(&imp->imp_delayed_list); + CFS_INIT_LIST_HEAD(&imp->imp_committed_list); + imp->imp_replay_cursor = &imp->imp_committed_list; spin_lock_init(&imp->imp_lock); imp->imp_last_success_conn = 0; imp->imp_state = LUSTRE_IMP_NEW; diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 6356f2a..207c724 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -879,6 +879,7 @@ static const char *obd_connect_names[] = { "short_io", "pingless", "flock_deadlock", + "disp_stripe", "unknown", NULL }; diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 9c5fc00..7018182 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -2416,6 +2416,39 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async) } EXPORT_SYMBOL(ptlrpc_unregister_reply); +static void ptlrpc_free_request(struct ptlrpc_request *req) +{ + spin_lock(&req->rq_lock); + req->rq_replay = 0; + spin_unlock(&req->rq_lock); + + if (req->rq_commit_cb != NULL) + req->rq_commit_cb(req); + cfs_list_del_init(&req->rq_replay_list); + + __ptlrpc_req_finished(req, 1); +} + +/** + * the request is committed and dropped from the replay list of its import + */ +void ptlrpc_request_committed(struct ptlrpc_request *req, int force) +{ + struct obd_import *imp = req->rq_import; + + spin_lock(&imp->imp_lock); + if (cfs_list_empty(&req->rq_replay_list)) { + spin_unlock(&imp->imp_lock); + return; + } + + if (force || req->rq_transno <= imp->imp_peer_committed_transno) + ptlrpc_free_request(req); + + spin_unlock(&imp->imp_lock); +} +EXPORT_SYMBOL(ptlrpc_request_committed); + /** * Iterates through replay_list on import and prunes * all requests have transno smaller than last_committed for the @@ -2426,10 +2459,10 @@ EXPORT_SYMBOL(ptlrpc_unregister_reply); */ void ptlrpc_free_committed(struct obd_import *imp) { - cfs_list_t *tmp, *saved; - struct ptlrpc_request *req; - struct ptlrpc_request *last_req = NULL; /* temporary fire escape */ - ENTRY; + struct ptlrpc_request *req, *saved; + struct ptlrpc_request *last_req = NULL; /* temporary fire escape */ + bool skip_committed_list = true; + ENTRY; LASSERT(imp != NULL); LASSERT(spin_is_locked(&imp->imp_lock)); @@ -2445,13 +2478,15 @@ void ptlrpc_free_committed(struct obd_import *imp) CDEBUG(D_RPCTRACE, "%s: committing for last_committed "LPU64" gen %d\n", imp->imp_obd->obd_name, imp->imp_peer_committed_transno, imp->imp_generation); + + if (imp->imp_generation != imp->imp_last_generation_checked) + skip_committed_list = false; + imp->imp_last_transno_checked = imp->imp_peer_committed_transno; imp->imp_last_generation_checked = imp->imp_generation; - cfs_list_for_each_safe(tmp, saved, &imp->imp_replay_list) { - req = cfs_list_entry(tmp, struct ptlrpc_request, - rq_replay_list); - + cfs_list_for_each_entry_safe(req, saved, &imp->imp_replay_list, + rq_replay_list) { /* XXX ok to remove when 1357 resolved - rread 05/29/03 */ LASSERT(req != last_req); last_req = req; @@ -2465,29 +2500,37 @@ void ptlrpc_free_committed(struct obd_import *imp) GOTO(free_req, 0); } - if (req->rq_replay) { - DEBUG_REQ(D_RPCTRACE, req, "keeping (FL_REPLAY)"); - continue; - } - /* not yet committed */ if (req->rq_transno > imp->imp_peer_committed_transno) { DEBUG_REQ(D_RPCTRACE, req, "stopping search"); break; } + if (req->rq_replay) { + DEBUG_REQ(D_RPCTRACE, req, "keeping (FL_REPLAY)"); + cfs_list_move_tail(&req->rq_replay_list, + &imp->imp_committed_list); + continue; + } + DEBUG_REQ(D_INFO, req, "commit (last_committed "LPU64")", imp->imp_peer_committed_transno); free_req: - spin_lock(&req->rq_lock); - req->rq_replay = 0; - spin_unlock(&req->rq_lock); - if (req->rq_commit_cb != NULL) - req->rq_commit_cb(req); - cfs_list_del_init(&req->rq_replay_list); - __ptlrpc_req_finished(req, 1); + ptlrpc_free_request(req); } + if (skip_committed_list) + GOTO(out, 0); + + cfs_list_for_each_entry_safe(req, saved, &imp->imp_committed_list, + rq_replay_list) { + LASSERT(req->rq_transno != 0); + if (req->rq_import_generation < imp->imp_generation) { + DEBUG_REQ(D_RPCTRACE, req, "free stale open request"); + ptlrpc_free_request(req); + } + } +out: EXIT; return; } diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 30e8471..db2c547 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -567,20 +567,32 @@ static int import_select_connection(struct obd_import *imp) */ static int ptlrpc_first_transno(struct obd_import *imp, __u64 *transno) { - struct ptlrpc_request *req; - cfs_list_t *tmp; - - if (cfs_list_empty(&imp->imp_replay_list)) - return 0; - tmp = imp->imp_replay_list.next; - req = cfs_list_entry(tmp, struct ptlrpc_request, rq_replay_list); - *transno = req->rq_transno; - if (req->rq_transno == 0) { - DEBUG_REQ(D_ERROR, req, "zero transno in replay"); - LBUG(); - } - - return 1; + struct ptlrpc_request *req; + cfs_list_t *tmp; + + /* The requests in committed_list always have smaller transnos than + * the requests in replay_list */ + if (!cfs_list_empty(&imp->imp_committed_list)) { + tmp = imp->imp_committed_list.next; + req = cfs_list_entry(tmp, struct ptlrpc_request, rq_replay_list); + *transno = req->rq_transno; + if (req->rq_transno == 0) { + DEBUG_REQ(D_ERROR, req, "zero transno in committed_list"); + LBUG(); + } + return 1; + } + if (!cfs_list_empty(&imp->imp_replay_list)) { + tmp = imp->imp_replay_list.next; + req = cfs_list_entry(tmp, struct ptlrpc_request, rq_replay_list); + *transno = req->rq_transno; + if (req->rq_transno == 0) { + DEBUG_REQ(D_ERROR, req, "zero transno in replay_list"); + LBUG(); + } + return 1; + } + return 0; } /** diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index 1dd4533..266e826 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -114,23 +114,58 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight) * imp_lock is being held by ptlrpc_replay, but it's not. it's * just a little race... */ - cfs_list_for_each_safe(tmp, pos, &imp->imp_replay_list) { - req = cfs_list_entry(tmp, struct ptlrpc_request, - rq_replay_list); - - /* If need to resend the last sent transno (because a - reconnect has occurred), then stop on the matching - req and send it again. If, however, the last sent - transno has been committed then we continue replay - from the next request. */ - if (req->rq_transno > last_transno) { - if (imp->imp_resend_replay) - lustre_msg_add_flags(req->rq_reqmsg, - MSG_RESENT); - break; - } - req = NULL; - } + + /* Replay all the committed open requests on committed_list first */ + if (!cfs_list_empty(&imp->imp_committed_list)) { + tmp = imp->imp_committed_list.prev; + req = cfs_list_entry(tmp, struct ptlrpc_request, + rq_replay_list); + + /* The last request on committed_list hasn't been replayed */ + if (req->rq_transno > last_transno) { + /* Since the imp_committed_list is immutable before + * all of it's requests being replayed, it's safe to + * use a cursor to accelerate the search */ + imp->imp_replay_cursor = imp->imp_replay_cursor->next; + + while (imp->imp_replay_cursor != + &imp->imp_committed_list) { + req = cfs_list_entry(imp->imp_replay_cursor, + struct ptlrpc_request, + rq_replay_list); + if (req->rq_transno > last_transno) + break; + + req = NULL; + imp->imp_replay_cursor = + imp->imp_replay_cursor->next; + } + } else { + /* All requests on committed_list have been replayed */ + imp->imp_replay_cursor = &imp->imp_committed_list; + req = NULL; + } + } + + /* All the requests in committed list have been replayed, let's replay + * the imp_replay_list */ + if (req == NULL) { + cfs_list_for_each_safe(tmp, pos, &imp->imp_replay_list) { + req = cfs_list_entry(tmp, struct ptlrpc_request, + rq_replay_list); + + if (req->rq_transno > last_transno) + break; + req = NULL; + } + } + + /* If need to resend the last sent transno (because a reconnect + * has occurred), then stop on the matching req and send it again. + * If, however, the last sent transno has been committed then we + * continue replay from the next request. */ + if (req != NULL && imp->imp_resend_replay) + lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT); spin_lock(&imp->imp_lock); imp->imp_resend_replay = 0; diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index b012490..c78359f 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -10841,6 +10841,31 @@ test_208() { } run_test 208 "Exclusive open" +test_209() { + [[ $($LCTL get_param -n mdc.*.connect_flags) == ~disp_stripe ]] && + skip_env "must have disp_stripe" && return + + touch $DIR/$tfile + sync; sleep 5; sync; + + echo 3 > /proc/sys/vm/drop_caches + req_before=$(awk '/ptlrpc_cache / { print $2 }' /proc/slabinfo) + + # open/close 500 times + for i in $(seq 500); do + cat $DIR/$tfile + done + + echo 3 > /proc/sys/vm/drop_caches + req_after=$(awk '/ptlrpc_cache / { print $2 }' /proc/slabinfo) + + echo "before: $req_before, after: $req_after" + [ $((req_after - req_before)) -ge 300 ] && + error "open/close requests are not freed" + return 0 +} +run_test 209 "read-only open/close requests should be freed promptly" + test_212() { size=`date +%s` size=$((size % 8192 + 1))