From 4f3d5c73fdb2891826d4e7789cbaec39cf4bbf3d Mon Sep 17 00:00:00 2001 From: pravin Date: Wed, 12 May 2010 19:55:24 +0530 Subject: [PATCH] b=18857 enhance seq allocation scalability by updating seq data asynchronously. this patch also removes seq replay. ref bug for details. a=pravin,tappro i=tappro i=alexander.zarochentsev i=pravin --- lustre/fid/fid_handler.c | 338 ++++++++++++++++--------------------- lustre/fid/fid_internal.h | 12 +- lustre/fid/fid_request.c | 34 ++-- lustre/fid/fid_store.c | 38 ++++- lustre/include/lustre/lustre_idl.h | 12 -- lustre/include/lustre_fid.h | 19 ++- lustre/include/obd_support.h | 3 +- lustre/lmv/lmv_fld.c | 2 +- lustre/mdt/mdt_handler.c | 25 +-- lustre/tests/replay-single.sh | 30 ++-- 10 files changed, 229 insertions(+), 284 deletions(-) diff --git a/lustre/fid/fid_handler.c b/lustre/fid/fid_handler.c index e85d0ef..b19ba65 100644 --- a/lustre/fid/fid_handler.c +++ b/lustre/fid/fid_handler.c @@ -54,6 +54,7 @@ #include #include +#include #include #include #include @@ -100,6 +101,18 @@ out_up: return rc; } EXPORT_SYMBOL(seq_server_set_cli); +/* + * allocate \a w units of sequence from range \a from. + */ +static inline void range_alloc(struct lu_seq_range *to, + struct lu_seq_range *from, + __u64 width) +{ + width = min(range_space(from), width); + to->lsr_start = from->lsr_start; + to->lsr_end = from->lsr_start + width; + from->lsr_start += width; +} /** * On controller node, allocate new super sequence for regular sequence server. @@ -109,67 +122,24 @@ EXPORT_SYMBOL(seq_server_set_cli); */ static int __seq_server_alloc_super(struct lu_server_seq *seq, - struct lu_seq_range *in, struct lu_seq_range *out, const struct lu_env *env) { struct lu_seq_range *space = &seq->lss_space; - struct thandle *th; - __u64 mdt = out->lsr_mdt; - int rc, credit; + int rc; ENTRY; LASSERT(range_is_sane(space)); - if (in != NULL) { - CDEBUG(D_INFO, "%s: Input seq range: " - DRANGE"\n", seq->lss_name, PRANGE(in)); - - if (in->lsr_end > space->lsr_start) - space->lsr_start = in->lsr_end; - *out = *in; - - CDEBUG(D_INFO, "%s: Recovered space: "DRANGE"\n", - seq->lss_name, PRANGE(space)); + if (range_is_exhausted(space)) { + CERROR("%s: Sequences space is exhausted\n", + seq->lss_name); + RETURN(-ENOSPC); } else { - if (range_space(space) < seq->lss_width) { - CWARN("%s: Sequences space to be exhausted soon. " - "Only "LPU64" sequences left\n", seq->lss_name, - range_space(space)); - *out = *space; - space->lsr_start = space->lsr_end; - } else if (range_is_exhausted(space)) { - CERROR("%s: Sequences space is exhausted\n", - seq->lss_name); - RETURN(-ENOSPC); - } else { - range_alloc(out, space, seq->lss_width); - } - } - out->lsr_mdt = mdt; - - credit = SEQ_TXN_STORE_CREDITS + FLD_TXN_INDEX_INSERT_CREDITS; - - th = seq_store_trans_start(seq, env, credit); - if (IS_ERR(th)) - RETURN(PTR_ERR(th)); - - rc = seq_store_write(seq, env, th); - if (rc) { - CERROR("%s: Can't write space data, rc %d\n", - seq->lss_name, rc); - goto out; - } - - rc = fld_server_create(seq->lss_site->ms_server_fld, - env, out, th); - if (rc) { - CERROR("%s: Can't Update fld database, rc %d\n", - seq->lss_name, rc); + range_alloc(out, space, seq->lss_width); } -out: - seq_store_trans_stop(seq, env, th); + rc = seq_store_update(env, seq, out, 1 /* sync */); CDEBUG(D_INFO, "%s: super-sequence allocation rc = %d " DRANGE"\n", seq->lss_name, rc, PRANGE(out)); @@ -178,7 +148,6 @@ out: } int seq_server_alloc_super(struct lu_server_seq *seq, - struct lu_seq_range *in, struct lu_seq_range *out, const struct lu_env *env) { @@ -186,145 +155,137 @@ int seq_server_alloc_super(struct lu_server_seq *seq, ENTRY; cfs_down(&seq->lss_sem); - rc = __seq_server_alloc_super(seq, in, out, env); + rc = __seq_server_alloc_super(seq, out, env); cfs_up(&seq->lss_sem); RETURN(rc); } +static int __seq_set_init(const struct lu_env *env, + struct lu_server_seq *seq) +{ + struct lu_seq_range *space = &seq->lss_space; + int rc; + + range_alloc(&seq->lss_lowater_set, space, seq->lss_set_width); + range_alloc(&seq->lss_hiwater_set, space, seq->lss_set_width); + + rc = seq_store_update(env, seq, NULL, 1); + seq->lss_set_transno = 0; + + return rc; +} + +/* + * This function implements new seq allocation algorithm using async + * updates to seq file on disk. ref bug 18857 for details. + * there are four variable to keep track of this process + * + * lss_space; - available lss_space + * lss_lowater_set; - lu_seq_range for all seqs before barrier, i.e. safe to use + * lss_hiwater_set; - lu_seq_range after barrier, i.e. allocated but may be + * not yet committed + * + * when lss_lowater_set reaches the end it is replaced with hiwater one and + * a write operation is initiated to allocate new hiwater range. + * if last seq write opearion is still not commited, current operation is + * flaged as sync write op. + */ +static int range_alloc_set(const struct lu_env *env, + struct lu_seq_range *out, + struct lu_server_seq *seq) +{ + struct lu_seq_range *space = &seq->lss_space; + struct lu_seq_range *loset = &seq->lss_lowater_set; + struct lu_seq_range *hiset = &seq->lss_hiwater_set; + int rc = 0; + + if (range_is_zero(loset)) + __seq_set_init(env, seq); + + if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_ALLOC)) /* exhaust set */ + loset->lsr_start = loset->lsr_end; + + if (range_is_exhausted(loset)) { + /* reached high water mark. */ + struct lu_device *dev = seq->lss_site->ms_lu.ls_top_dev; + struct lu_target *tg = dev->ld_obd->u.obt.obt_lut; + int obd_num_clients = dev->ld_obd->obd_num_exports; + __u64 set_sz; + int sync = 0; + + /* calculate new seq width based on number of clients */ + set_sz = max(seq->lss_set_width, + obd_num_clients * seq->lss_width); + set_sz = min(range_space(space), set_sz); + + /* Switch to hiwater range now */ + loset = hiset; + /* allocate new hiwater range */ + range_alloc(hiset, space, set_sz); + + if (seq->lss_set_transno > dev->ld_obd->obd_last_committed) + sync = 1; + + /* update ondisk seq with new *space */ + rc = seq_store_update(env, seq, NULL, sync); + + /* set new hiwater transno */ + cfs_spin_lock(&tg->lut_translock); + seq->lss_set_transno = tg->lut_last_transno; + cfs_spin_unlock(&tg->lut_translock); + } + + LASSERTF(!range_is_exhausted(loset) || range_is_sane(loset), + DRANGE"\n", PRANGE(loset)); + + if (rc == 0) + range_alloc(out, loset, seq->lss_width); + + RETURN(rc); +} + static int __seq_server_alloc_meta(struct lu_server_seq *seq, - struct lu_seq_range *in, struct lu_seq_range *out, const struct lu_env *env) { struct lu_seq_range *space = &seq->lss_space; - struct thandle *th; int rc = 0; ENTRY; LASSERT(range_is_sane(space)); - /* - * This is recovery case. Adjust super range if input range looks like - * it is allocated from new super. - */ - if (in != NULL) { - CDEBUG(D_INFO, "%s: Input seq range: " - DRANGE"\n", seq->lss_name, PRANGE(in)); - - if (in->lsr_end <= space->lsr_start) { - /* - * Client is replaying a fairly old range, server - * don't need to do any allocation. - */ - } else if (range_is_exhausted(space)) { - /* - * Start is set to end of last allocated, because it - * *is* already allocated so we take that into account - * and do not use for other allocations. - */ - space->lsr_start = in->lsr_end; - - /* - * End is set to in->lsr_start + super sequence - * allocation unit. That is because in->lsr_start is - * first seq in new allocated range from controller - * before failure. - */ - space->lsr_end = in->lsr_start + LUSTRE_SEQ_SUPER_WIDTH; - - if (!seq->lss_cli) { - CERROR("%s: No sequence controller " - "is attached.\n", seq->lss_name); - RETURN(-ENODEV); - } - - /* - * Let controller know that this is recovery and last - * obtained range from it was @space. - */ - rc = seq_client_replay_super(seq->lss_cli, space, env); - - if (rc) { - CERROR("%s: Can't replay super-sequence, " - "rc %d\n", seq->lss_name, rc); - RETURN(rc); - } - } else { - /* - * Update super start by end from client's range. Super - * end should not be changed if range was not exhausted. - */ - space->lsr_start = in->lsr_end; + /* Check if available space ends and allocate new super seq */ + if (range_is_exhausted(space)) { + if (!seq->lss_cli) { + CERROR("%s: No sequence controller is attached.\n", + seq->lss_name); + RETURN(-ENODEV); } - /* sending replay_super to update fld as only super sequence - * server can update fld. - * we are sending meta sequence to fld rather than super - * sequence, but fld server can handle range merging. */ - - in->lsr_mdt = space->lsr_mdt; - rc = seq_client_replay_super(seq->lss_cli, in, env); - + rc = seq_client_alloc_super(seq->lss_cli, env); if (rc) { - CERROR("%s: Can't replay super-sequence, " - "rc %d\n", seq->lss_name, rc); + CERROR("%s: Can't allocate super-sequence, rc %d\n", + seq->lss_name, rc); RETURN(rc); } - *out = *in; - - CDEBUG(D_INFO, "%s: Recovered space: "DRANGE"\n", - seq->lss_name, PRANGE(space)); - } else { - /* - * XXX: Avoid cascading RPCs using kind of async preallocation - * when meta-sequence is close to exhausting. - */ - if (range_is_exhausted(space)) { - if (!seq->lss_cli) { - CERROR("%s: No sequence controller " - "is attached.\n", seq->lss_name); - RETURN(-ENODEV); - } - - rc = seq_client_alloc_super(seq->lss_cli, env); - if (rc) { - CERROR("%s: Can't allocate super-sequence, " - "rc %d\n", seq->lss_name, rc); - RETURN(rc); - } - - /* Saving new range to allocation space. */ - *space = seq->lss_cli->lcs_space; - LASSERT(range_is_sane(space)); - } - - range_alloc(out, space, seq->lss_width); - } - - th = seq_store_trans_start(seq, env, SEQ_TXN_STORE_CREDITS); - if (IS_ERR(th)) - RETURN(PTR_ERR(th)); - - rc = seq_store_write(seq, env, th); - if (rc) { - CERROR("%s: Can't write space data, rc %d\n", - seq->lss_name, rc); + /* Saving new range to allocation space. */ + *space = seq->lss_cli->lcs_space; + LASSERT(range_is_sane(space)); } + rc = range_alloc_set(env, out, seq); if (rc == 0) { CDEBUG(D_INFO, "%s: Allocated meta-sequence " DRANGE"\n", seq->lss_name, PRANGE(out)); } - seq_store_trans_stop(seq, env, th); RETURN(rc); } int seq_server_alloc_meta(struct lu_server_seq *seq, - struct lu_seq_range *in, struct lu_seq_range *out, const struct lu_env *env) { @@ -332,7 +293,7 @@ int seq_server_alloc_meta(struct lu_server_seq *seq, ENTRY; cfs_down(&seq->lss_sem); - rc = __seq_server_alloc_meta(seq, in, out, env); + rc = __seq_server_alloc_meta(seq, out, env); cfs_up(&seq->lss_sem); RETURN(rc); @@ -341,8 +302,7 @@ EXPORT_SYMBOL(seq_server_alloc_meta); static int seq_server_handle(struct lu_site *site, const struct lu_env *env, - __u32 opc, struct lu_seq_range *in, - struct lu_seq_range *out) + __u32 opc, struct lu_seq_range *out) { int rc; struct md_site *mite; @@ -356,8 +316,7 @@ static int seq_server_handle(struct lu_site *site, "initialized\n"); RETURN(-EINVAL); } - rc = seq_server_alloc_meta(mite->ms_server_seq, - in, out, env); + rc = seq_server_alloc_meta(mite->ms_server_seq, out, env); break; case SEQ_ALLOC_SUPER: if (!mite->ms_control_seq) { @@ -365,8 +324,7 @@ static int seq_server_handle(struct lu_site *site, "initialized\n"); RETURN(-EINVAL); } - rc = seq_server_alloc_super(mite->ms_control_seq, - in, out, env); + rc = seq_server_alloc_super(mite->ms_control_seq, out, env); break; default: rc = -EINVAL; @@ -380,12 +338,13 @@ static int seq_req_handle(struct ptlrpc_request *req, const struct lu_env *env, struct seq_thread_info *info) { - struct lu_seq_range *out, *in = NULL, *tmp; + struct lu_seq_range *out, *tmp; struct lu_site *site; int rc = -EPROTO; __u32 *opc; ENTRY; + LASSERT(!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)); site = req->rq_export->exp_obd->obd_lu_dev->ld_site; LASSERT(site != NULL); @@ -401,20 +360,11 @@ static int seq_req_handle(struct ptlrpc_request *req, tmp = req_capsule_client_get(info->sti_pill, &RMF_SEQ_RANGE); - if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) { - in = tmp; - - if (range_is_zero(in) || !range_is_sane(in)) { - CERROR("Replayed seq range is invalid: " - DRANGE"\n", PRANGE(in)); - RETURN(err_serious(-EINVAL)); - } - } /* seq client passed mdt id, we need to pass that using out * range parameter */ out->lsr_mdt = tmp->lsr_mdt; - rc = seq_server_handle(site, env, *opc, in, out); + rc = seq_server_handle(site, env, *opc, out); } else rc = err_serious(-EPROTO); @@ -455,6 +405,9 @@ static int seq_handle(struct ptlrpc_request *req) seq_thread_info_init(req, info); rc = seq_req_handle(req, env, info); + /* XXX: we don't need replay but MDT assign transno in any case, + * remove it manually before reply*/ + lustre_msg_set_transno(req->rq_repmsg, 0); seq_thread_info_fini(info); return rc; @@ -522,6 +475,7 @@ static void seq_server_proc_fini(struct lu_server_seq *seq) } #endif + int seq_server_init(struct lu_server_seq *seq, struct dt_device *dev, const char *prefix, @@ -529,17 +483,21 @@ int seq_server_init(struct lu_server_seq *seq, struct md_site *ms, const struct lu_env *env) { - struct thandle *th; int rc, is_srv = (type == LUSTRE_SEQ_SERVER); ENTRY; - LASSERT(dev != NULL); + LASSERT(dev != NULL); LASSERT(prefix != NULL); seq->lss_cli = NULL; seq->lss_type = type; seq->lss_site = ms; range_init(&seq->lss_space); + + range_init(&seq->lss_lowater_set); + range_init(&seq->lss_hiwater_set); + seq->lss_set_width = LUSTRE_SEQ_BATCH_WIDTH; + cfs_sema_init(&seq->lss_sem, 1); seq->lss_width = is_srv ? @@ -565,22 +523,16 @@ int seq_server_init(struct lu_server_seq *seq, "on store. Initialize space\n", seq->lss_name); - th = seq_store_trans_start(seq, env, SEQ_TXN_STORE_CREDITS); - if (IS_ERR(th)) - RETURN(PTR_ERR(th)); - - /* Save default controller value to store. */ - rc = seq_store_write(seq, env, th); + rc = seq_store_update(env, seq, NULL, 0); if (rc) { CERROR("%s: Can't write space data, " "rc %d\n", seq->lss_name, rc); } - seq_store_trans_stop(seq, env, th); } else if (rc) { - CERROR("%s: Can't read space data, rc %d\n", - seq->lss_name, rc); - GOTO(out, rc); - } + CERROR("%s: Can't read space data, rc %d\n", + seq->lss_name, rc); + GOTO(out, rc); + } if (is_srv) { LASSERT(range_is_sane(&seq->lss_space)); @@ -591,13 +543,13 @@ int seq_server_init(struct lu_server_seq *seq, rc = seq_server_proc_init(seq); if (rc) - GOTO(out, rc); + GOTO(out, rc); - EXIT; + EXIT; out: - if (rc) - seq_server_fini(seq, env); - return rc; + if (rc) + seq_server_fini(seq, env); + return rc; } EXPORT_SYMBOL(seq_server_init); diff --git a/lustre/fid/fid_internal.h b/lustre/fid/fid_internal.h index 03c5227..428044c 100644 --- a/lustre/fid/fid_internal.h +++ b/lustre/fid/fid_internal.h @@ -75,19 +75,11 @@ int seq_store_init(struct lu_server_seq *seq, void seq_store_fini(struct lu_server_seq *seq, const struct lu_env *env); -int seq_store_write(struct lu_server_seq *seq, - const struct lu_env *env, - struct thandle *th); - int seq_store_read(struct lu_server_seq *seq, const struct lu_env *env); -struct thandle * seq_store_trans_start(struct lu_server_seq *seq, - const struct lu_env *env, - int credits); -void seq_store_trans_stop(struct lu_server_seq *seq, - const struct lu_env *env, - struct thandle *th); +int seq_store_update(const struct lu_env *env, struct lu_server_seq *seq, + struct lu_seq_range *out, int sync); #ifdef LPROCFS extern struct lprocfs_vars seq_server_proc_list[]; diff --git a/lustre/fid/fid_request.c b/lustre/fid/fid_request.c index 62d7349..7b5d545 100644 --- a/lustre/fid/fid_request.c +++ b/lustre/fid/fid_request.c @@ -63,13 +63,13 @@ #include #include "fid_internal.h" -static int seq_client_rpc(struct lu_client_seq *seq, struct lu_seq_range *input, +static int seq_client_rpc(struct lu_client_seq *seq, struct lu_seq_range *output, __u32 opc, const char *opcname) { struct obd_export *exp = seq->lcs_exp; struct ptlrpc_request *req; - struct lu_seq_range *out, *in; + struct lu_seq_range *out, *in; __u32 *op; int rc; ENTRY; @@ -85,10 +85,7 @@ static int seq_client_rpc(struct lu_client_seq *seq, struct lu_seq_range *input, /* Zero out input range, this is not recovery yet. */ in = req_capsule_client_get(&req->rq_pill, &RMF_SEQ_RANGE); - if (input != NULL) - *in = *input; - else - range_init(in); + range_init(in); ptlrpc_request_set_replen(req); @@ -126,7 +123,6 @@ static int seq_client_rpc(struct lu_client_seq *seq, struct lu_seq_range *input, DRANGE"]\n", seq->lcs_name, PRANGE(output)); GOTO(out_req, rc = -EINVAL); } - *in = *out; CDEBUG(D_INFO, "%s: Allocated %s-sequence "DRANGE"]\n", seq->lcs_name, opcname, PRANGE(output)); @@ -138,9 +134,8 @@ out_req: } /* Request sequence-controller node to allocate new super-sequence. */ -int seq_client_replay_super(struct lu_client_seq *seq, - struct lu_seq_range *range, - const struct lu_env *env) +int seq_client_alloc_super(struct lu_client_seq *seq, + const struct lu_env *env) { int rc; ENTRY; @@ -150,11 +145,11 @@ int seq_client_replay_super(struct lu_client_seq *seq, #ifdef __KERNEL__ if (seq->lcs_srv) { LASSERT(env != NULL); - rc = seq_server_alloc_super(seq->lcs_srv, range, - &seq->lcs_space, env); + rc = seq_server_alloc_super(seq->lcs_srv, &seq->lcs_space, + env); } else { #endif - rc = seq_client_rpc(seq, range, &seq->lcs_space, + rc = seq_client_rpc(seq, &seq->lcs_space, SEQ_ALLOC_SUPER, "super"); #ifdef __KERNEL__ } @@ -163,14 +158,6 @@ int seq_client_replay_super(struct lu_client_seq *seq, RETURN(rc); } -/* Request sequence-controller node to allocate new super-sequence. */ -int seq_client_alloc_super(struct lu_client_seq *seq, - const struct lu_env *env) -{ - ENTRY; - RETURN(seq_client_replay_super(seq, NULL, env)); -} - /* Request sequence-controller node to allocate new meta-sequence. */ static int seq_client_alloc_meta(struct lu_client_seq *seq, const struct lu_env *env) @@ -181,11 +168,10 @@ static int seq_client_alloc_meta(struct lu_client_seq *seq, #ifdef __KERNEL__ if (seq->lcs_srv) { LASSERT(env != NULL); - rc = seq_server_alloc_meta(seq->lcs_srv, NULL, - &seq->lcs_space, env); + rc = seq_server_alloc_meta(seq->lcs_srv, &seq->lcs_space, env); } else { #endif - rc = seq_client_rpc(seq, NULL, &seq->lcs_space, + rc = seq_client_rpc(seq, &seq->lcs_space, SEQ_ALLOC_META, "meta"); #ifdef __KERNEL__ } diff --git a/lustre/fid/fid_store.c b/lustre/fid/fid_store.c index aea967b..feb34ef 100644 --- a/lustre/fid/fid_store.c +++ b/lustre/fid/fid_store.c @@ -73,8 +73,9 @@ static struct lu_buf *seq_store_buf(struct seq_thread_info *info) return buf; } -struct thandle * seq_store_trans_start(struct lu_server_seq *seq, - const struct lu_env *env, int credit) +struct thandle *seq_store_trans_start(struct lu_server_seq *seq, + const struct lu_env *env, int credit, + int sync) { struct seq_thread_info *info; struct dt_device *dt_dev; @@ -86,6 +87,8 @@ struct thandle * seq_store_trans_start(struct lu_server_seq *seq, LASSERT(info != NULL); txn_param_init(&info->sti_txn, credit); + if (sync) + txn_param_sync(&info->sti_txn); th = dt_dev->dd_ops->dt_trans_start(env, dt_dev, &info->sti_txn); return th; @@ -137,6 +140,37 @@ int seq_store_write(struct lu_server_seq *seq, RETURN(rc); } +int seq_store_update(const struct lu_env *env, struct lu_server_seq *seq, + struct lu_seq_range *out, int sync) +{ + struct thandle *th; + int rc; + int credits = SEQ_TXN_STORE_CREDITS; + + if (out != NULL) + credits += FLD_TXN_INDEX_INSERT_CREDITS; + + th = seq_store_trans_start(seq, env, credits, sync); + if (IS_ERR(th)) + RETURN(PTR_ERR(th)); + + rc = seq_store_write(seq, env, th); + if (rc) { + CERROR("%s: Can't write space data, rc %d\n", + seq->lss_name, rc); + } else if (out != NULL) { + rc = fld_server_create(seq->lss_site->ms_server_fld, + env, out, th); + if (rc) { + CERROR("%s: Can't Update fld database, rc %d\n", + seq->lss_name, rc); + } + } + + seq_store_trans_stop(seq, env, th); + return rc; +} + /* * This function implies that caller takes care about locking or locking is not * needed (init time). diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index f331ec5..3a1581f 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -216,18 +216,6 @@ static inline int range_within(const struct lu_seq_range *range, return s >= range->lsr_start && s < range->lsr_end; } -/** - * allocate \a w units of sequence from range \a from. - */ -static inline void range_alloc(struct lu_seq_range *to, - struct lu_seq_range *from, - __u64 width) -{ - to->lsr_start = from->lsr_start; - to->lsr_end = from->lsr_start + width; - from->lsr_start += width; -} - static inline int range_is_sane(const struct lu_seq_range *range) { return (range->lsr_end >= range->lsr_start); diff --git a/lustre/include/lustre_fid.h b/lustre/include/lustre_fid.h index 6f60da5..d00b9af 100644 --- a/lustre/include/lustre_fid.h +++ b/lustre/include/lustre_fid.h @@ -79,6 +79,11 @@ enum { /* changed to 16 to avoid overflow in test11 */ LUSTRE_SEQ_META_WIDTH = 0x0000000000000010ULL, + /* + * seq allocation pool size. + */ + LUSTRE_SEQ_BATCH_WIDTH = LUSTRE_SEQ_META_WIDTH * 1000, + /* * This is how many sequences may be in one super-sequence allocated to * MDTs. @@ -183,6 +188,10 @@ struct lu_server_seq { /* Available sequences space */ struct lu_seq_range lss_space; + /* keeps highwater in lsr_end for seq allocation algorithm */ + struct lu_seq_range lss_lowater_set; + struct lu_seq_range lss_hiwater_set; + /* * Device for server side seq manager needs (saving sequences to backing * store). @@ -216,6 +225,14 @@ struct lu_server_seq { */ __u64 lss_width; + /* + * minimum lss_alloc_set size that should be allocated from + * lss_space + */ + __u64 lss_set_width; + + /* transaction no of seq update write operation */ + __u64 lss_set_transno; /** * Pointer to site object, required to access site fld. */ @@ -236,12 +253,10 @@ void seq_server_fini(struct lu_server_seq *seq, const struct lu_env *env); int seq_server_alloc_super(struct lu_server_seq *seq, - struct lu_seq_range *in, struct lu_seq_range *out, const struct lu_env *env); int seq_server_alloc_meta(struct lu_server_seq *seq, - struct lu_seq_range *in, struct lu_seq_range *out, const struct lu_env *env); diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 591e098..fc51ca0 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -407,7 +407,8 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_LLOG_ORIGIN_HANDLE_WRITE_REC_NET 0x1307 #define OBD_FAIL_LLOG_ORIGIN_HANDLE_CLOSE_NET 0x1308 #define OBD_FAIL_LLOG_CATINFO_NET 0x1309 -#define OBD_FAIL_MDS_SYNC_CAPA_SL 0x1310 +#define OBD_FAIL_MDS_SYNC_CAPA_SL 0x1310 +#define OBD_FAIL_SEQ_ALLOC 0x1311 /* Failure injection control */ #define OBD_FAIL_MASK_SYS 0x0000FF00 diff --git a/lustre/lmv/lmv_fld.c b/lustre/lmv/lmv_fld.c index 8f4f94c..064031c 100644 --- a/lustre/lmv/lmv_fld.c +++ b/lustre/lmv/lmv_fld.c @@ -70,7 +70,7 @@ int lmv_fld_lookup(struct lmv_obd *lmv, LASSERT(fid_is_sane(fid)); rc = fld_client_lookup(&lmv->lmv_fld, fid_seq(fid), mds, NULL); if (rc) { - CERROR("Error while looking for mds number. Seq "LPU64 + CERROR("Error while looking for mds number. Seq "LPX64 ", err = %d\n", fid_seq(fid), rc); RETURN(rc); } diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index bce519f..6c63afd 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -3548,25 +3548,6 @@ static int mdt_intent_policy(struct ldlm_namespace *ns, RETURN(rc); } -/* - * Seq wrappers - */ -static void mdt_seq_adjust(const struct lu_env *env, - struct mdt_device *m, int lost) -{ - struct md_site *ms = mdt_md_site(m); - struct lu_seq_range out; - ENTRY; - - LASSERT(ms && ms->ms_server_seq); - LASSERT(lost >= 0); - /* get extra seq from seq_server, moving it's range up */ - while (lost-- > 0) { - seq_server_alloc_meta(ms->ms_server_seq, NULL, &out, env); - } - EXIT; -} - static int mdt_seq_fini(const struct lu_env *env, struct mdt_device *m) { @@ -5543,12 +5524,8 @@ int mdt_postrecov(const struct lu_env *env, struct mdt_device *mdt) #ifdef HAVE_QUOTA_SUPPORT struct md_device *next = mdt->mdt_child; #endif - int rc, lost; + int rc; ENTRY; - /* if some clients didn't participate in recovery then we can possibly - * lost sequence. Now we should increase sequence for safe value */ - lost = obd->obd_max_recoverable_clients - obd->obd_connected_clients; - mdt_seq_adjust(env, mdt, lost); rc = ld->ld_ops->ldo_recovery_complete(env, ld); #ifdef HAVE_QUOTA_SUPPORT diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index b9310f1..8ca4206 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -718,30 +718,30 @@ test_32() { } run_test 32 "close() notices client eviction; close() after client eviction" -# Abort recovery before client complete -test_33a() { # was test_33 - replay_barrier $SINGLEMDS - createmany -o $DIR/$tfile-%d 100 +test_33a() { + createmany -o $DIR/$tfile-%d 10 + replay_barrier_nosync $SINGLEMDS fail_abort $SINGLEMDS - # this file should be gone, because the replay was aborted - $CHECKSTAT -t file $DIR/$tfile-* && return 3 - unlinkmany $DIR/$tfile-%d 0 100 + # recreate shouldn't fail + createmany -o $DIR/$tfile--%d 10 || return 1 + rm $DIR/$tfile-* -f return 0 } -run_test 33a "abort recovery before client does replay" +run_test 33a "fid seq shouldn't be reused after abort recovery" + +test_33b() { + #define OBD_FAIL_SEQ_ALLOC 0x1311 + do_facet $SINGLEMDS "lctl set_param fail_loc=0x1311" -# Stale FID sequence bug 15962 -test_33b() { # was test_33a - replay_barrier $SINGLEMDS createmany -o $DIR/$tfile-%d 10 + replay_barrier_nosync $SINGLEMDS fail_abort $SINGLEMDS - unlinkmany $DIR/$tfile-%d 0 10 # recreate shouldn't fail - createmany -o $DIR/$tfile-%d 10 || return 3 - unlinkmany $DIR/$tfile-%d 0 10 + createmany -o $DIR/$tfile--%d 10 || return 1 + rm $DIR/$tfile-* -f return 0 } -run_test 33b "fid shouldn't be reused after abort recovery" +run_test 33b "test fid seq allocation" test_34() { multiop_bg_pause $DIR/$tfile O_c || return 2 -- 1.8.3.1