X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ffid%2Ffid_handler.c;h=72159e8967279f476fde4525bf3d3ddd2418c240;hp=45e1ebb21f20bc46205c2c4aaad2716240d059f1;hb=7446b87524ba992b96b90a508a51d48d86b90865;hpb=f89a61cada93accbd02beee89f0610756ca595e9 diff --git a/lustre/fid/fid_handler.c b/lustre/fid/fid_handler.c index 45e1ebb..72159e8 100644 --- a/lustre/fid/fid_handler.c +++ b/lustre/fid/fid_handler.c @@ -1,29 +1,45 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * lustre/fid/fid_handler.c - * Lustre Sequence Manager + * GPL HEADER START * - * Copyright (c) 2006 Cluster File Systems, Inc. - * Author: Yury Umanets + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. * - * You may have signed or agreed to another license before downloading - * this software. If so, you are bound by the terms and conditions - * of that agreement, and the following does not apply to you. See the - * LICENSE file included with this distribution for more information. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). * - * If you did not agree to a different license, then this copy of Lustre - * is open source software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf * - * In either case, Lustre is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * license text for more details. + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, Whamcloud, Inc. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/fid/fid_handler.c + * + * Lustre Sequence Manager + * + * Author: Yury Umanets */ #ifndef EXPORT_SYMTAB @@ -60,7 +76,7 @@ int seq_server_set_cli(struct lu_server_seq *seq, * Ask client for new range, assign that range to ->seq_space and write * seq state to backing store should be atomic. */ - down(&seq->lss_sem); + cfs_mutex_lock(&seq->lss_mutex); if (cli == NULL) { CDEBUG(D_INFO, "%s: Detached sequence client %s\n", @@ -79,185 +95,178 @@ int seq_server_set_cli(struct lu_server_seq *seq, seq->lss_name, cli->lcs_name); seq->lss_cli = cli; + cli->lcs_space.lsr_index = seq->lss_site->ms_node_id; EXIT; out_up: - up(&seq->lss_sem); + cfs_mutex_unlock(&seq->lss_mutex); return rc; } EXPORT_SYMBOL(seq_server_set_cli); - /* + * allocate \a w units of sequence from range \a from. + */ +static inline void range_alloc(struct lu_seq_range *to, + struct lu_seq_range *from, + __u64 width) +{ + width = min(range_space(from), width); + to->lsr_start = from->lsr_start; + to->lsr_end = from->lsr_start + width; + from->lsr_start += width; +} + +/** * On controller node, allocate new super sequence for regular sequence server. + * As this super sequence controller, this node suppose to maintain fld + * and update index. + * \a out range always has currect mds node number of requester. */ + static int __seq_server_alloc_super(struct lu_server_seq *seq, - struct lu_range *in, - struct lu_range *out, + struct lu_seq_range *out, const struct lu_env *env) { - struct lu_range *space = &seq->lss_space; + struct lu_seq_range *space = &seq->lss_space; int rc; ENTRY; LASSERT(range_is_sane(space)); - if (in != NULL) { - CDEBUG(D_INFO, "%s: Input seq range: " - DRANGE"\n", seq->lss_name, PRANGE(in)); - - if (in->lr_end > space->lr_start) - space->lr_start = in->lr_end; - *out = *in; - - CDEBUG(D_INFO, "%s: Recovered space: "DRANGE"\n", - seq->lss_name, PRANGE(space)); + if (range_is_exhausted(space)) { + CERROR("%s: Sequences space is exhausted\n", + seq->lss_name); + RETURN(-ENOSPC); } else { - if (range_space(space) < seq->lss_width) { - CWARN("%s: Sequences space to be exhausted soon. " - "Only "LPU64" sequences left\n", seq->lss_name, - range_space(space)); - *out = *space; - space->lr_start = space->lr_end; - } else if (range_is_exhausted(space)) { - CERROR("%s: Sequences space is exhausted\n", - seq->lss_name); - RETURN(-ENOSPC); - } else { - range_alloc(out, space, seq->lss_width); - } + range_alloc(out, space, seq->lss_width); } - rc = seq_store_write(seq, env); - if (rc) { - CERROR("%s: Can't write space data, rc %d\n", - seq->lss_name, rc); - RETURN(rc); - } + rc = seq_store_update(env, seq, out, 1 /* sync */); - CDEBUG(D_INFO, "%s: Allocated super-sequence " - DRANGE"\n", seq->lss_name, PRANGE(out)); + CDEBUG(D_INFO, "%s: super-sequence allocation rc = %d " + DRANGE"\n", seq->lss_name, rc, PRANGE(out)); RETURN(rc); } int seq_server_alloc_super(struct lu_server_seq *seq, - struct lu_range *in, - struct lu_range *out, + struct lu_seq_range *out, const struct lu_env *env) { int rc; ENTRY; - down(&seq->lss_sem); - rc = __seq_server_alloc_super(seq, in, out, env); - up(&seq->lss_sem); + cfs_mutex_lock(&seq->lss_mutex); + rc = __seq_server_alloc_super(seq, out, env); + cfs_mutex_unlock(&seq->lss_mutex); + + RETURN(rc); +} + +static int __seq_set_init(const struct lu_env *env, + struct lu_server_seq *seq) +{ + struct lu_seq_range *space = &seq->lss_space; + int rc; + + range_alloc(&seq->lss_lowater_set, space, seq->lss_set_width); + range_alloc(&seq->lss_hiwater_set, space, seq->lss_set_width); + + rc = seq_store_update(env, seq, NULL, 1); + + return rc; +} + +/* + * This function implements new seq allocation algorithm using async + * updates to seq file on disk. ref bug 18857 for details. + * there are four variable to keep track of this process + * + * lss_space; - available lss_space + * lss_lowater_set; - lu_seq_range for all seqs before barrier, i.e. safe to use + * lss_hiwater_set; - lu_seq_range after barrier, i.e. allocated but may be + * not yet committed + * + * when lss_lowater_set reaches the end it is replaced with hiwater one and + * a write operation is initiated to allocate new hiwater range. + * if last seq write opearion is still not commited, current operation is + * flaged as sync write op. + */ +static int range_alloc_set(const struct lu_env *env, + struct lu_seq_range *out, + struct lu_server_seq *seq) +{ + struct lu_seq_range *space = &seq->lss_space; + struct lu_seq_range *loset = &seq->lss_lowater_set; + struct lu_seq_range *hiset = &seq->lss_hiwater_set; + int rc = 0; + + if (range_is_zero(loset)) + __seq_set_init(env, seq); + + if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_ALLOC)) /* exhaust set */ + loset->lsr_start = loset->lsr_end; + + if (range_is_exhausted(loset)) { + /* reached high water mark. */ + struct lu_device *dev = seq->lss_site->ms_lu.ls_top_dev; + int obd_num_clients = dev->ld_obd->obd_num_exports; + __u64 set_sz; + + /* calculate new seq width based on number of clients */ + set_sz = max(seq->lss_set_width, + obd_num_clients * seq->lss_width); + set_sz = min(range_space(space), set_sz); + + /* Switch to hiwater range now */ + *loset = *hiset; + /* allocate new hiwater range */ + range_alloc(hiset, space, set_sz); + + /* update ondisk seq with new *space */ + rc = seq_store_update(env, seq, NULL, seq->lss_need_sync); + } + + LASSERTF(!range_is_exhausted(loset) || range_is_sane(loset), + DRANGE"\n", PRANGE(loset)); + + if (rc == 0) + range_alloc(out, loset, seq->lss_width); RETURN(rc); } static int __seq_server_alloc_meta(struct lu_server_seq *seq, - struct lu_range *in, - struct lu_range *out, + struct lu_seq_range *out, const struct lu_env *env) { - struct lu_range *space = &seq->lss_space; + struct lu_seq_range *space = &seq->lss_space; int rc = 0; + ENTRY; LASSERT(range_is_sane(space)); - /* - * This is recovery case. Adjust super range if input range looks like - * it is allocated from new super. - */ - if (in != NULL) { - CDEBUG(D_INFO, "%s: Input seq range: " - DRANGE"\n", seq->lss_name, PRANGE(in)); - - if (range_is_exhausted(space)) { - /* - * Server cannot send empty range to client, this is why - * we check here that range from client is "newer" than - * exhausted super. - */ - LASSERT(in->lr_end > space->lr_start); - - /* - * Start is set to end of last allocated, because it - * *is* already allocated so we take that into account - * and do not use for other allocations. - */ - space->lr_start = in->lr_end; - - /* - * End is set to in->lr_start + super sequence - * allocation unit. That is because in->lr_start is - * first seq in new allocated range from controller - * before failure. - */ - space->lr_end = in->lr_start + LUSTRE_SEQ_SUPER_WIDTH; - - if (!seq->lss_cli) { - CERROR("%s: No sequence controller " - "is attached.\n", seq->lss_name); - RETURN(-ENODEV); - } - - /* - * Let controller know that this is recovery and last - * obtained range from it was @space. - */ - rc = seq_client_replay_super(seq->lss_cli, space, env); - if (rc) { - CERROR("%s: Can't replay super-sequence, " - "rc %d\n", seq->lss_name, rc); - RETURN(rc); - } - } else { - /* - * Update super start by end from client's range. Super - * end should not be changed if range was not exhausted. - */ - if (in->lr_end > space->lr_start) - space->lr_start = in->lr_end; + /* Check if available space ends and allocate new super seq */ + if (range_is_exhausted(space)) { + if (!seq->lss_cli) { + CERROR("%s: No sequence controller is attached.\n", + seq->lss_name); + RETURN(-ENODEV); } - *out = *in; - - CDEBUG(D_INFO, "%s: Recovered space: "DRANGE"\n", - seq->lss_name, PRANGE(space)); - } else { - /* - * XXX: Avoid cascading RPCs using kind of async preallocation - * when meta-sequence is close to exhausting. - */ - if (range_is_exhausted(space)) { - if (!seq->lss_cli) { - CERROR("%s: No sequence controller " - "is attached.\n", seq->lss_name); - RETURN(-ENODEV); - } - - rc = seq_client_alloc_super(seq->lss_cli, env); - if (rc) { - CERROR("%s: Can't allocate super-sequence, " - "rc %d\n", seq->lss_name, rc); - RETURN(rc); - } - - /* Saving new range to allocation space. */ - *space = seq->lss_cli->lcs_space; - LASSERT(range_is_sane(space)); + rc = seq_client_alloc_super(seq->lss_cli, env); + if (rc) { + CERROR("%s: Can't allocate super-sequence, rc %d\n", + seq->lss_name, rc); + RETURN(rc); } - range_alloc(out, space, seq->lss_width); - } - - rc = seq_store_write(seq, env); - if (rc) { - CERROR("%s: Can't write space data, rc %d\n", - seq->lss_name, rc); + /* Saving new range to allocation space. */ + *space = seq->lss_cli->lcs_space; + LASSERT(range_is_sane(space)); } + rc = range_alloc_set(env, out, seq); if (rc == 0) { CDEBUG(D_INFO, "%s: Allocated meta-sequence " DRANGE"\n", seq->lss_name, PRANGE(out)); @@ -267,16 +276,15 @@ static int __seq_server_alloc_meta(struct lu_server_seq *seq, } int seq_server_alloc_meta(struct lu_server_seq *seq, - struct lu_range *in, - struct lu_range *out, + struct lu_seq_range *out, const struct lu_env *env) { int rc; ENTRY; - down(&seq->lss_sem); - rc = __seq_server_alloc_meta(seq, in, out, env); - up(&seq->lss_sem); + cfs_mutex_lock(&seq->lss_mutex); + rc = __seq_server_alloc_meta(seq, out, env); + cfs_mutex_unlock(&seq->lss_mutex); RETURN(rc); } @@ -284,30 +292,29 @@ EXPORT_SYMBOL(seq_server_alloc_meta); static int seq_server_handle(struct lu_site *site, const struct lu_env *env, - __u32 opc, struct lu_range *in, - struct lu_range *out) + __u32 opc, struct lu_seq_range *out) { int rc; + struct md_site *mite; ENTRY; + mite = lu_site2md(site); switch (opc) { case SEQ_ALLOC_META: - if (!site->ls_server_seq) { + if (!mite->ms_server_seq) { CERROR("Sequence server is not " "initialized\n"); RETURN(-EINVAL); } - rc = seq_server_alloc_meta(site->ls_server_seq, - in, out, env); + rc = seq_server_alloc_meta(mite->ms_server_seq, out, env); break; case SEQ_ALLOC_SUPER: - if (!site->ls_control_seq) { + if (!mite->ms_control_seq) { CERROR("Sequence controller is not " "initialized\n"); RETURN(-EINVAL); } - rc = seq_server_alloc_super(site->ls_control_seq, - in, out, env); + rc = seq_server_alloc_super(mite->ms_control_seq, out, env); break; default: rc = -EINVAL; @@ -321,68 +328,58 @@ static int seq_req_handle(struct ptlrpc_request *req, const struct lu_env *env, struct seq_thread_info *info) { - struct lu_range *out, *in = NULL; + struct lu_seq_range *out, *tmp; struct lu_site *site; int rc = -EPROTO; __u32 *opc; ENTRY; + LASSERT(!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)); site = req->rq_export->exp_obd->obd_lu_dev->ld_site; LASSERT(site != NULL); - - rc = req_capsule_pack(&info->sti_pill); + + rc = req_capsule_server_pack(info->sti_pill); if (rc) RETURN(err_serious(rc)); - opc = req_capsule_client_get(&info->sti_pill, - &RMF_SEQ_OPC); + opc = req_capsule_client_get(info->sti_pill, &RMF_SEQ_OPC); if (opc != NULL) { - out = req_capsule_server_get(&info->sti_pill, - &RMF_SEQ_RANGE); + out = req_capsule_server_get(info->sti_pill, &RMF_SEQ_RANGE); if (out == NULL) RETURN(err_serious(-EPROTO)); - if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) { - in = req_capsule_client_get(&info->sti_pill, - &RMF_SEQ_RANGE); + tmp = req_capsule_client_get(info->sti_pill, &RMF_SEQ_RANGE); - LASSERT(!range_is_zero(in) && range_is_sane(in)); - } + /* seq client passed mdt id, we need to pass that using out + * range parameter */ - rc = seq_server_handle(site, env, *opc, in, out); + out->lsr_index = tmp->lsr_index; + out->lsr_flags = tmp->lsr_flags; + rc = seq_server_handle(site, env, *opc, out); } else rc = err_serious(-EPROTO); RETURN(rc); } +/* context key constructor/destructor: seq_key_init, seq_key_fini */ LU_KEY_INIT_FINI(seq, struct seq_thread_info); -struct lu_context_key seq_thread_key = { - .lct_tags = LCT_MD_THREAD, - .lct_init = seq_key_init, - .lct_fini = seq_key_fini -}; +/* context key: seq_thread_key */ +LU_CONTEXT_KEY_DEFINE(seq, LCT_MD_THREAD); static void seq_thread_info_init(struct ptlrpc_request *req, struct seq_thread_info *info) { - int i; - - /* Mark rep buffer as req-layout stuff expects */ - for (i = 0; i < ARRAY_SIZE(info->sti_rep_buf_size); i++) - info->sti_rep_buf_size[i] = -1; - + info->sti_pill = &req->rq_pill; /* Init request capsule */ - req_capsule_init(&info->sti_pill, req, RCL_SERVER, - info->sti_rep_buf_size); - - req_capsule_set(&info->sti_pill, &RQF_SEQ_QUERY); + req_capsule_init(info->sti_pill, req, RCL_SERVER); + req_capsule_set(info->sti_pill, &RQF_SEQ_QUERY); } static void seq_thread_info_fini(struct seq_thread_info *info) { - req_capsule_fini(&info->sti_pill); + req_capsule_fini(info->sti_pill); } static int seq_handle(struct ptlrpc_request *req) @@ -399,6 +396,9 @@ static int seq_handle(struct ptlrpc_request *req) seq_thread_info_init(req, info); rc = seq_req_handle(req, env, info); + /* XXX: we don't need replay but MDT assign transno in any case, + * remove it manually before reply*/ + lustre_msg_set_transno(req->rq_repmsg, 0); seq_thread_info_fini(info); return rc; @@ -409,7 +409,7 @@ static int seq_handle(struct ptlrpc_request *req) */ int seq_query(struct com_thread_info *info) { - return seq_handle(info->cti_pill.rc_req); + return seq_handle(info->cti_pill->rc_req); } EXPORT_SYMBOL(seq_query); @@ -466,22 +466,30 @@ static void seq_server_proc_fini(struct lu_server_seq *seq) } #endif + int seq_server_init(struct lu_server_seq *seq, struct dt_device *dev, const char *prefix, enum lu_mgr_type type, + struct md_site *ms, const struct lu_env *env) { int rc, is_srv = (type == LUSTRE_SEQ_SERVER); ENTRY; - LASSERT(dev != NULL); + LASSERT(dev != NULL); LASSERT(prefix != NULL); seq->lss_cli = NULL; seq->lss_type = type; - range_zero(&seq->lss_space); - sema_init(&seq->lss_sem, 1); + seq->lss_site = ms; + range_init(&seq->lss_space); + + range_init(&seq->lss_lowater_set); + range_init(&seq->lss_hiwater_set); + seq->lss_set_width = LUSTRE_SEQ_BATCH_WIDTH; + + cfs_mutex_init(&seq->lss_mutex); seq->lss_width = is_srv ? LUSTRE_SEQ_META_WIDTH : LUSTRE_SEQ_SUPER_WIDTH; @@ -492,7 +500,6 @@ int seq_server_init(struct lu_server_seq *seq, rc = seq_store_init(seq, env, dev); if (rc) GOTO(out, rc); - /* Request backing store for saved sequence info. */ rc = seq_store_read(seq, env); if (rc == -ENODATA) { @@ -502,21 +509,21 @@ int seq_server_init(struct lu_server_seq *seq, LUSTRE_SEQ_ZERO_RANGE: LUSTRE_SEQ_SPACE_RANGE; + seq->lss_space.lsr_index = ms->ms_node_id; CDEBUG(D_INFO, "%s: No data found " "on store. Initialize space\n", seq->lss_name); - /* Save default controller value to store. */ - rc = seq_store_write(seq, env); + rc = seq_store_update(env, seq, NULL, 0); if (rc) { CERROR("%s: Can't write space data, " "rc %d\n", seq->lss_name, rc); } } else if (rc) { - CERROR("%s: Can't read space data, rc %d\n", - seq->lss_name, rc); - GOTO(out, rc); - } + CERROR("%s: Can't read space data, rc %d\n", + seq->lss_name, rc); + GOTO(out, rc); + } if (is_srv) { LASSERT(range_is_sane(&seq->lss_space)); @@ -527,13 +534,13 @@ int seq_server_init(struct lu_server_seq *seq, rc = seq_server_proc_init(seq); if (rc) - GOTO(out, rc); + GOTO(out, rc); - EXIT; + EXIT; out: - if (rc) - seq_server_fini(seq, env); - return rc; + if (rc) + seq_server_fini(seq, env); + return rc; } EXPORT_SYMBOL(seq_server_init); @@ -551,6 +558,18 @@ EXPORT_SYMBOL(seq_server_fini); cfs_proc_dir_entry_t *seq_type_proc_dir = NULL; +static struct lu_local_obj_desc llod_seq_srv = { + .llod_name = LUSTRE_SEQ_SRV_NAME, + .llod_oid = FID_SEQ_SRV_OID, + .llod_is_index = 0, +}; + +static struct lu_local_obj_desc llod_seq_ctl = { + .llod_name = LUSTRE_SEQ_CTL_NAME, + .llod_oid = FID_SEQ_CTL_OID, + .llod_is_index = 0, +}; + static int __init fid_mod_init(void) { seq_type_proc_dir = lprocfs_register(LUSTRE_SEQ_NAME, @@ -559,6 +578,9 @@ static int __init fid_mod_init(void) if (IS_ERR(seq_type_proc_dir)) return PTR_ERR(seq_type_proc_dir); + llo_local_obj_register(&llod_seq_srv); + llo_local_obj_register(&llod_seq_ctl); + LU_CONTEXT_KEY_INIT(&seq_thread_key); lu_context_key_register(&seq_thread_key); return 0; @@ -566,6 +588,9 @@ static int __init fid_mod_init(void) static void __exit fid_mod_exit(void) { + llo_local_obj_unregister(&llod_seq_srv); + llo_local_obj_unregister(&llod_seq_ctl); + lu_context_key_degister(&seq_thread_key); if (seq_type_proc_dir != NULL && !IS_ERR(seq_type_proc_dir)) { lprocfs_remove(&seq_type_proc_dir); @@ -573,7 +598,7 @@ static void __exit fid_mod_exit(void) } } -MODULE_AUTHOR("Cluster File Systems, Inc. "); +MODULE_AUTHOR("Sun Microsystems, Inc. "); MODULE_DESCRIPTION("Lustre FID Module"); MODULE_LICENSE("GPL");