X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ffid%2Ffid_handler.c;h=df3a99927a84e16c81fa0f0752257af7bbae049a;hp=5af9343fe2132c1c6c722cf447db50a9812b0e3c;hb=155e4b6cf45cc0ab21f72d94e5cccbd7a0939c58;hpb=4201416b775b14d6e4cd89b7c68bb1c1bc950144 diff --git a/lustre/fid/fid_handler.c b/lustre/fid/fid_handler.c index 5af9343..df3a999 100644 --- a/lustre/fid/fid_handler.c +++ b/lustre/fid/fid_handler.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -26,8 +24,10 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, Whamcloud, Inc. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -40,9 +40,6 @@ * Author: Yury Umanets */ -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif #define DEBUG_SUBSYSTEM S_FID #ifdef __KERNEL__ @@ -74,7 +71,7 @@ int seq_server_set_cli(struct lu_server_seq *seq, * Ask client for new range, assign that range to ->seq_space and write * seq state to backing store should be atomic. */ - down(&seq->lss_sem); + cfs_mutex_lock(&seq->lss_mutex); if (cli == NULL) { CDEBUG(D_INFO, "%s: Detached sequence client %s\n", @@ -93,13 +90,25 @@ int seq_server_set_cli(struct lu_server_seq *seq, seq->lss_name, cli->lcs_name); seq->lss_cli = cli; - cli->lcs_space.lsr_mdt = seq->lss_site->ms_node_id; + cli->lcs_space.lsr_index = seq->lss_site->ms_node_id; EXIT; out_up: - up(&seq->lss_sem); + cfs_mutex_unlock(&seq->lss_mutex); return rc; } EXPORT_SYMBOL(seq_server_set_cli); +/* + * allocate \a w units of sequence from range \a from. + */ +static inline void range_alloc(struct lu_seq_range *to, + struct lu_seq_range *from, + __u64 width) +{ + width = min(range_space(from), width); + to->lsr_start = from->lsr_start; + to->lsr_end = from->lsr_start + width; + from->lsr_start += width; +} /** * On controller node, allocate new super sequence for regular sequence server. @@ -109,220 +118,172 @@ EXPORT_SYMBOL(seq_server_set_cli); */ static int __seq_server_alloc_super(struct lu_server_seq *seq, - struct lu_seq_range *in, struct lu_seq_range *out, const struct lu_env *env) { - struct lu_seq_range *space = &seq->lss_space; - struct thandle *th; - __u64 mdt = out->lsr_mdt; - int rc, credit; - ENTRY; - - LASSERT(range_is_sane(space)); - - if (in != NULL) { - CDEBUG(D_INFO, "%s: Input seq range: " - DRANGE"\n", seq->lss_name, PRANGE(in)); - - if (in->lsr_end > space->lsr_start) - space->lsr_start = in->lsr_end; - *out = *in; - - CDEBUG(D_INFO, "%s: Recovered space: "DRANGE"\n", - seq->lss_name, PRANGE(space)); - } else { - if (range_space(space) < seq->lss_width) { - CWARN("%s: Sequences space to be exhausted soon. " - "Only "LPU64" sequences left\n", seq->lss_name, - range_space(space)); - *out = *space; - space->lsr_start = space->lsr_end; - } else if (range_is_exhausted(space)) { - CERROR("%s: Sequences space is exhausted\n", - seq->lss_name); - RETURN(-ENOSPC); - } else { - range_alloc(out, space, seq->lss_width); - } - } - out->lsr_mdt = mdt; - - credit = SEQ_TXN_STORE_CREDITS + FLD_TXN_INDEX_INSERT_CREDITS; - - th = seq_store_trans_start(seq, env, credit); - if (IS_ERR(th)) - RETURN(PTR_ERR(th)); - - rc = seq_store_write(seq, env, th); - if (rc) { - CERROR("%s: Can't write space data, rc %d\n", - seq->lss_name, rc); - goto out; - } + struct lu_seq_range *space = &seq->lss_space; + int rc; + ENTRY; + + LASSERT(range_is_sane(space)); + + if (range_is_exhausted(space)) { + CERROR("%s: Sequences space is exhausted\n", + seq->lss_name); + RETURN(-ENOSPC); + } else { + range_alloc(out, space, seq->lss_width); + } - rc = fld_server_create(seq->lss_site->ms_server_fld, - env, out, th); - if (rc) { - CERROR("%s: Can't Update fld database, rc %d\n", - seq->lss_name, rc); - } + rc = seq_store_update(env, seq, out, 1 /* sync */); -out: - seq_store_trans_stop(seq, env, th); + LCONSOLE_INFO("%s: super-sequence allocation rc = %d " DRANGE"\n", + seq->lss_name, rc, PRANGE(out)); - CDEBUG(D_INFO, "%s: super-sequence allocation rc = %d " - DRANGE"\n", seq->lss_name, rc, PRANGE(out)); - - RETURN(rc); + RETURN(rc); } int seq_server_alloc_super(struct lu_server_seq *seq, - struct lu_seq_range *in, struct lu_seq_range *out, const struct lu_env *env) { int rc; ENTRY; - down(&seq->lss_sem); - rc = __seq_server_alloc_super(seq, in, out, env); - up(&seq->lss_sem); + cfs_mutex_lock(&seq->lss_mutex); + rc = __seq_server_alloc_super(seq, out, env); + cfs_mutex_unlock(&seq->lss_mutex); RETURN(rc); } -static int __seq_server_alloc_meta(struct lu_server_seq *seq, - struct lu_seq_range *in, - struct lu_seq_range *out, - const struct lu_env *env) +static int __seq_set_init(const struct lu_env *env, + struct lu_server_seq *seq) { struct lu_seq_range *space = &seq->lss_space; - struct thandle *th; - int rc = 0; + int rc; - ENTRY; + range_alloc(&seq->lss_lowater_set, space, seq->lss_set_width); + range_alloc(&seq->lss_hiwater_set, space, seq->lss_set_width); - LASSERT(range_is_sane(space)); + rc = seq_store_update(env, seq, NULL, 1); - /* - * This is recovery case. Adjust super range if input range looks like - * it is allocated from new super. - */ - if (in != NULL) { - CDEBUG(D_INFO, "%s: Input seq range: " - DRANGE"\n", seq->lss_name, PRANGE(in)); - - if (range_is_exhausted(space)) { - /* - * Server cannot send empty range to client, this is why - * we check here that range from client is "newer" than - * exhausted super. - */ - LASSERT(in->lsr_end > space->lsr_start); - - /* - * Start is set to end of last allocated, because it - * *is* already allocated so we take that into account - * and do not use for other allocations. - */ - space->lsr_start = in->lsr_end; - - /* - * End is set to in->lsr_start + super sequence - * allocation unit. That is because in->lsr_start is - * first seq in new allocated range from controller - * before failure. - */ - space->lsr_end = in->lsr_start + LUSTRE_SEQ_SUPER_WIDTH; - - if (!seq->lss_cli) { - CERROR("%s: No sequence controller " - "is attached.\n", seq->lss_name); - RETURN(-ENODEV); - } - - /* - * Let controller know that this is recovery and last - * obtained range from it was @space. - */ - rc = seq_client_replay_super(seq->lss_cli, space, env); - - if (rc) { - CERROR("%s: Can't replay super-sequence, " - "rc %d\n", seq->lss_name, rc); - RETURN(rc); - } - } else { - /* - * Update super start by end from client's range. Super - * end should not be changed if range was not exhausted. - */ - if (in->lsr_end > space->lsr_start) - space->lsr_start = in->lsr_end; - } + return rc; +} - *out = *in; +/* + * This function implements new seq allocation algorithm using async + * updates to seq file on disk. ref bug 18857 for details. + * there are four variable to keep track of this process + * + * lss_space; - available lss_space + * lss_lowater_set; - lu_seq_range for all seqs before barrier, i.e. safe to use + * lss_hiwater_set; - lu_seq_range after barrier, i.e. allocated but may be + * not yet committed + * + * when lss_lowater_set reaches the end it is replaced with hiwater one and + * a write operation is initiated to allocate new hiwater range. + * if last seq write opearion is still not commited, current operation is + * flaged as sync write op. + */ +static int range_alloc_set(const struct lu_env *env, + struct lu_seq_range *out, + struct lu_server_seq *seq) +{ + struct lu_seq_range *space = &seq->lss_space; + struct lu_seq_range *loset = &seq->lss_lowater_set; + struct lu_seq_range *hiset = &seq->lss_hiwater_set; + int rc = 0; - CDEBUG(D_INFO, "%s: Recovered space: "DRANGE"\n", - seq->lss_name, PRANGE(space)); - } else { - /* - * XXX: Avoid cascading RPCs using kind of async preallocation - * when meta-sequence is close to exhausting. - */ - if (range_is_exhausted(space)) { - if (!seq->lss_cli) { - CERROR("%s: No sequence controller " - "is attached.\n", seq->lss_name); - RETURN(-ENODEV); - } - - rc = seq_client_alloc_super(seq->lss_cli, env); - if (rc) { - CERROR("%s: Can't allocate super-sequence, " - "rc %d\n", seq->lss_name, rc); - RETURN(rc); - } - - /* Saving new range to allocation space. */ - *space = seq->lss_cli->lcs_space; - LASSERT(range_is_sane(space)); - } + if (range_is_zero(loset)) + __seq_set_init(env, seq); - range_alloc(out, space, seq->lss_width); - } + if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_ALLOC)) /* exhaust set */ + loset->lsr_start = loset->lsr_end; - th = seq_store_trans_start(seq, env, SEQ_TXN_STORE_CREDITS); - if (IS_ERR(th)) - RETURN(PTR_ERR(th)); + if (range_is_exhausted(loset)) { + /* reached high water mark. */ + struct lu_device *dev = seq->lss_site->ms_lu->ls_top_dev; + int obd_num_clients = dev->ld_obd->obd_num_exports; + __u64 set_sz; - rc = seq_store_write(seq, env, th); - if (rc) { - CERROR("%s: Can't write space data, rc %d\n", - seq->lss_name, rc); - } + /* calculate new seq width based on number of clients */ + set_sz = max(seq->lss_set_width, + obd_num_clients * seq->lss_width); + set_sz = min(range_space(space), set_sz); + + /* Switch to hiwater range now */ + *loset = *hiset; + /* allocate new hiwater range */ + range_alloc(hiset, space, set_sz); - if (rc == 0) { - CDEBUG(D_INFO, "%s: Allocated meta-sequence " - DRANGE"\n", seq->lss_name, PRANGE(out)); + /* update ondisk seq with new *space */ + rc = seq_store_update(env, seq, NULL, seq->lss_need_sync); } - seq_store_trans_stop(seq, env, th); + LASSERTF(!range_is_exhausted(loset) || range_is_sane(loset), + DRANGE"\n", PRANGE(loset)); + + if (rc == 0) + range_alloc(out, loset, seq->lss_width); + RETURN(rc); } +static int __seq_server_alloc_meta(struct lu_server_seq *seq, + struct lu_seq_range *out, + const struct lu_env *env) +{ + struct lu_seq_range *space = &seq->lss_space; + int rc = 0; + + ENTRY; + + LASSERT(range_is_sane(space)); + + /* Check if available space ends and allocate new super seq */ + if (range_is_exhausted(space)) { + if (!seq->lss_cli) { + CERROR("%s: No sequence controller is attached.\n", + seq->lss_name); + RETURN(-ENODEV); + } + + rc = seq_client_alloc_super(seq->lss_cli, env); + if (rc) { + CERROR("%s: Can't allocate super-sequence, rc %d\n", + seq->lss_name, rc); + RETURN(rc); + } + + /* Saving new range to allocation space. */ + *space = seq->lss_cli->lcs_space; + LASSERT(range_is_sane(space)); + } + + rc = range_alloc_set(env, out, seq); + if (rc != 0) { + CERROR("%s: Allocated meta-sequence failed: rc = %d\n", + seq->lss_name, rc); + RETURN(rc); + } + + CDEBUG(D_INFO, "%s: Allocated meta-sequence " DRANGE"\n", + seq->lss_name, PRANGE(out)); + + RETURN(rc); +} + int seq_server_alloc_meta(struct lu_server_seq *seq, - struct lu_seq_range *in, struct lu_seq_range *out, const struct lu_env *env) { int rc; ENTRY; - down(&seq->lss_sem); - rc = __seq_server_alloc_meta(seq, in, out, env); - up(&seq->lss_sem); + cfs_mutex_lock(&seq->lss_mutex); + rc = __seq_server_alloc_meta(seq, out, env); + cfs_mutex_unlock(&seq->lss_mutex); RETURN(rc); } @@ -330,8 +291,7 @@ EXPORT_SYMBOL(seq_server_alloc_meta); static int seq_server_handle(struct lu_site *site, const struct lu_env *env, - __u32 opc, struct lu_seq_range *in, - struct lu_seq_range *out) + __u32 opc, struct lu_seq_range *out) { int rc; struct md_site *mite; @@ -345,8 +305,7 @@ static int seq_server_handle(struct lu_site *site, "initialized\n"); RETURN(-EINVAL); } - rc = seq_server_alloc_meta(mite->ms_server_seq, - in, out, env); + rc = seq_server_alloc_meta(mite->ms_server_seq, out, env); break; case SEQ_ALLOC_SUPER: if (!mite->ms_control_seq) { @@ -354,8 +313,7 @@ static int seq_server_handle(struct lu_site *site, "initialized\n"); RETURN(-EINVAL); } - rc = seq_server_alloc_super(mite->ms_control_seq, - in, out, env); + rc = seq_server_alloc_super(mite->ms_control_seq, out, env); break; default: rc = -EINVAL; @@ -369,15 +327,16 @@ static int seq_req_handle(struct ptlrpc_request *req, const struct lu_env *env, struct seq_thread_info *info) { - struct lu_seq_range *out, *in = NULL, *tmp; + struct lu_seq_range *out, *tmp; struct lu_site *site; int rc = -EPROTO; __u32 *opc; ENTRY; + LASSERT(!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)); site = req->rq_export->exp_obd->obd_lu_dev->ld_site; LASSERT(site != NULL); - + rc = req_capsule_server_pack(info->sti_pill); if (rc) RETURN(err_serious(rc)); @@ -390,15 +349,12 @@ static int seq_req_handle(struct ptlrpc_request *req, tmp = req_capsule_client_get(info->sti_pill, &RMF_SEQ_RANGE); - if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) { - in = tmp; - LASSERT(!range_is_zero(in) && range_is_sane(in)); - } /* seq client passed mdt id, we need to pass that using out * range parameter */ - out->lsr_mdt = tmp->lsr_mdt; - rc = seq_server_handle(site, env, *opc, in, out); + out->lsr_index = tmp->lsr_index; + out->lsr_flags = tmp->lsr_flags; + rc = seq_server_handle(site, env, *opc, out); } else rc = err_serious(-EPROTO); @@ -439,6 +395,9 @@ static int seq_handle(struct ptlrpc_request *req) seq_thread_info_init(req, info); rc = seq_req_handle(req, env, info); + /* XXX: we don't need replay but MDT assign transno in any case, + * remove it manually before reply*/ + lustre_msg_set_transno(req->rq_repmsg, 0); seq_thread_info_fini(info); return rc; @@ -506,6 +465,7 @@ static void seq_server_proc_fini(struct lu_server_seq *seq) } #endif + int seq_server_init(struct lu_server_seq *seq, struct dt_device *dev, const char *prefix, @@ -513,18 +473,22 @@ int seq_server_init(struct lu_server_seq *seq, struct md_site *ms, const struct lu_env *env) { - struct thandle *th; int rc, is_srv = (type == LUSTRE_SEQ_SERVER); ENTRY; - LASSERT(dev != NULL); + LASSERT(dev != NULL); LASSERT(prefix != NULL); seq->lss_cli = NULL; seq->lss_type = type; seq->lss_site = ms; range_init(&seq->lss_space); - sema_init(&seq->lss_sem, 1); + + range_init(&seq->lss_lowater_set); + range_init(&seq->lss_hiwater_set); + seq->lss_set_width = LUSTRE_SEQ_BATCH_WIDTH; + + cfs_mutex_init(&seq->lss_mutex); seq->lss_width = is_srv ? LUSTRE_SEQ_META_WIDTH : LUSTRE_SEQ_SUPER_WIDTH; @@ -544,27 +508,21 @@ int seq_server_init(struct lu_server_seq *seq, LUSTRE_SEQ_ZERO_RANGE: LUSTRE_SEQ_SPACE_RANGE; - seq->lss_space.lsr_mdt = ms->ms_node_id; - CDEBUG(D_INFO, "%s: No data found " - "on store. Initialize space\n", - seq->lss_name); - - th = seq_store_trans_start(seq, env, SEQ_TXN_STORE_CREDITS); - if (IS_ERR(th)) - RETURN(PTR_ERR(th)); + seq->lss_space.lsr_index = ms->ms_node_id; + LCONSOLE_INFO("%s: No data found " + "on store. Initialize space\n", + seq->lss_name); - /* Save default controller value to store. */ - rc = seq_store_write(seq, env, th); + rc = seq_store_update(env, seq, NULL, 0); if (rc) { CERROR("%s: Can't write space data, " "rc %d\n", seq->lss_name, rc); } - seq_store_trans_stop(seq, env, th); } else if (rc) { - CERROR("%s: Can't read space data, rc %d\n", - seq->lss_name, rc); - GOTO(out, rc); - } + CERROR("%s: Can't read space data, rc %d\n", + seq->lss_name, rc); + GOTO(out, rc); + } if (is_srv) { LASSERT(range_is_sane(&seq->lss_space)); @@ -575,13 +533,13 @@ int seq_server_init(struct lu_server_seq *seq, rc = seq_server_proc_init(seq); if (rc) - GOTO(out, rc); + GOTO(out, rc); - EXIT; + EXIT; out: - if (rc) - seq_server_fini(seq, env); - return rc; + if (rc) + seq_server_fini(seq, env); + return rc; } EXPORT_SYMBOL(seq_server_init); @@ -599,18 +557,6 @@ EXPORT_SYMBOL(seq_server_fini); cfs_proc_dir_entry_t *seq_type_proc_dir = NULL; -static struct lu_local_obj_desc llod_seq_srv = { - .llod_name = LUSTRE_SEQ_SRV_NAME, - .llod_oid = FID_SEQ_SRV_OID, - .llod_is_index = 0, -}; - -static struct lu_local_obj_desc llod_seq_ctl = { - .llod_name = LUSTRE_SEQ_CTL_NAME, - .llod_oid = FID_SEQ_CTL_OID, - .llod_is_index = 0, -}; - static int __init fid_mod_init(void) { seq_type_proc_dir = lprocfs_register(LUSTRE_SEQ_NAME, @@ -619,9 +565,6 @@ static int __init fid_mod_init(void) if (IS_ERR(seq_type_proc_dir)) return PTR_ERR(seq_type_proc_dir); - llo_local_obj_register(&llod_seq_srv); - llo_local_obj_register(&llod_seq_ctl); - LU_CONTEXT_KEY_INIT(&seq_thread_key); lu_context_key_register(&seq_thread_key); return 0; @@ -629,9 +572,6 @@ static int __init fid_mod_init(void) static void __exit fid_mod_exit(void) { - llo_local_obj_unregister(&llod_seq_srv); - llo_local_obj_unregister(&llod_seq_ctl); - lu_context_key_degister(&seq_thread_key); if (seq_type_proc_dir != NULL && !IS_ERR(seq_type_proc_dir)) { lprocfs_remove(&seq_type_proc_dir);