X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ffid%2Ffid_handler.c;h=a007feff577326c3291bb46c3779c96edac11faa;hb=7136981bfaad8b95e15e9d2969b94c933491eb98;hp=2b28571a94b62f6b6bc3618e4d54f91c691f70e8;hpb=90d8e7fd28746a572c8de488222f5312fe927fc3;p=fs%2Flustre-release.git diff --git a/lustre/fid/fid_handler.c b/lustre/fid/fid_handler.c index 2b28571..a007fef 100644 --- a/lustre/fid/fid_handler.c +++ b/lustre/fid/fid_handler.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -26,8 +24,10 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -40,9 +40,6 @@ * Author: Yury Umanets */ -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif #define DEBUG_SUBSYSTEM S_FID #ifdef __KERNEL__ @@ -61,7 +58,56 @@ #include #include "fid_internal.h" +int client_fid_init(struct obd_device *obd, + struct obd_export *exp, enum lu_cli_type type) +{ + struct client_obd *cli = &obd->u.cli; + char *prefix; + int rc; + ENTRY; + + OBD_ALLOC_PTR(cli->cl_seq); + if (cli->cl_seq == NULL) + RETURN(-ENOMEM); + + OBD_ALLOC(prefix, MAX_OBD_NAME + 5); + if (prefix == NULL) + GOTO(out_free_seq, rc = -ENOMEM); + + snprintf(prefix, MAX_OBD_NAME + 5, "cli-%s", obd->obd_name); + + /* Init client side sequence-manager */ + rc = seq_client_init(cli->cl_seq, exp, type, prefix, NULL); + OBD_FREE(prefix, MAX_OBD_NAME + 5); + if (rc) + GOTO(out_free_seq, rc); + + RETURN(rc); +out_free_seq: + OBD_FREE_PTR(cli->cl_seq); + cli->cl_seq = NULL; + return rc; +} +EXPORT_SYMBOL(client_fid_init); + +int client_fid_fini(struct obd_device *obd) +{ + struct client_obd *cli = &obd->u.cli; + ENTRY; + + if (cli->cl_seq != NULL) { + seq_client_fini(cli->cl_seq); + OBD_FREE_PTR(cli->cl_seq); + cli->cl_seq = NULL; + } + + RETURN(0); +} +EXPORT_SYMBOL(client_fid_fini); + #ifdef __KERNEL__ +static void seq_server_proc_fini(struct lu_server_seq *seq); + /* Assigns client to sequence controller node. */ int seq_server_set_cli(struct lu_server_seq *seq, struct lu_client_seq *cli, @@ -74,7 +120,7 @@ int seq_server_set_cli(struct lu_server_seq *seq, * Ask client for new range, assign that range to ->seq_space and write * seq state to backing store should be atomic. */ - down(&seq->lss_sem); + mutex_lock(&seq->lss_mutex); if (cli == NULL) { CDEBUG(D_INFO, "%s: Detached sequence client %s\n", @@ -83,214 +129,210 @@ int seq_server_set_cli(struct lu_server_seq *seq, GOTO(out_up, rc = 0); } - if (seq->lss_cli != NULL) { - CERROR("%s: Sequence controller is already " - "assigned\n", seq->lss_name); - GOTO(out_up, rc = -EINVAL); - } + if (seq->lss_cli != NULL) { + CDEBUG(D_HA, "%s: Sequence controller is already " + "assigned\n", seq->lss_name); + GOTO(out_up, rc = -EEXIST); + } CDEBUG(D_INFO, "%s: Attached sequence controller %s\n", seq->lss_name, cli->lcs_name); - seq->lss_cli = cli; - EXIT; + seq->lss_cli = cli; + cli->lcs_space.lsr_index = seq->lss_site->ss_node_id; + EXIT; out_up: - up(&seq->lss_sem); + mutex_unlock(&seq->lss_mutex); return rc; } EXPORT_SYMBOL(seq_server_set_cli); - /* + * allocate \a w units of sequence from range \a from. + */ +static inline void range_alloc(struct lu_seq_range *to, + struct lu_seq_range *from, + __u64 width) +{ + width = min(range_space(from), width); + to->lsr_start = from->lsr_start; + to->lsr_end = from->lsr_start + width; + from->lsr_start += width; +} + +/** * On controller node, allocate new super sequence for regular sequence server. + * As this super sequence controller, this node suppose to maintain fld + * and update index. + * \a out range always has currect mds node number of requester. */ + static int __seq_server_alloc_super(struct lu_server_seq *seq, - struct lu_range *in, - struct lu_range *out, + struct lu_seq_range *out, const struct lu_env *env) { - struct lu_range *space = &seq->lss_space; - int rc; - ENTRY; - - LASSERT(range_is_sane(space)); - - if (in != NULL) { - CDEBUG(D_INFO, "%s: Input seq range: " - DRANGE"\n", seq->lss_name, PRANGE(in)); + struct lu_seq_range *space = &seq->lss_space; + int rc; + ENTRY; + + LASSERT(range_is_sane(space)); + + if (range_is_exhausted(space)) { + CERROR("%s: Sequences space is exhausted\n", + seq->lss_name); + RETURN(-ENOSPC); + } else { + range_alloc(out, space, seq->lss_width); + } - if (in->lr_end > space->lr_start) - space->lr_start = in->lr_end; - *out = *in; + rc = seq_store_update(env, seq, out, 1 /* sync */); - CDEBUG(D_INFO, "%s: Recovered space: "DRANGE"\n", - seq->lss_name, PRANGE(space)); - } else { - if (range_space(space) < seq->lss_width) { - CWARN("%s: Sequences space to be exhausted soon. " - "Only "LPU64" sequences left\n", seq->lss_name, - range_space(space)); - *out = *space; - space->lr_start = space->lr_end; - } else if (range_is_exhausted(space)) { - CERROR("%s: Sequences space is exhausted\n", - seq->lss_name); - RETURN(-ENOSPC); - } else { - range_alloc(out, space, seq->lss_width); - } - } - - rc = seq_store_write(seq, env); - if (rc) { - CERROR("%s: Can't write space data, rc %d\n", - seq->lss_name, rc); - RETURN(rc); - } + LCONSOLE_INFO("%s: super-sequence allocation rc = %d " DRANGE"\n", + seq->lss_name, rc, PRANGE(out)); - CDEBUG(D_INFO, "%s: Allocated super-sequence " - DRANGE"\n", seq->lss_name, PRANGE(out)); - - RETURN(rc); + RETURN(rc); } int seq_server_alloc_super(struct lu_server_seq *seq, - struct lu_range *in, - struct lu_range *out, + struct lu_seq_range *out, const struct lu_env *env) { int rc; ENTRY; - down(&seq->lss_sem); - rc = __seq_server_alloc_super(seq, in, out, env); - up(&seq->lss_sem); + mutex_lock(&seq->lss_mutex); + rc = __seq_server_alloc_super(seq, out, env); + mutex_unlock(&seq->lss_mutex); RETURN(rc); } -static int __seq_server_alloc_meta(struct lu_server_seq *seq, - struct lu_range *in, - struct lu_range *out, - const struct lu_env *env) +static int __seq_set_init(const struct lu_env *env, + struct lu_server_seq *seq) +{ + struct lu_seq_range *space = &seq->lss_space; + int rc; + + range_alloc(&seq->lss_lowater_set, space, seq->lss_set_width); + range_alloc(&seq->lss_hiwater_set, space, seq->lss_set_width); + + rc = seq_store_update(env, seq, NULL, 1); + + return rc; +} + +/* + * This function implements new seq allocation algorithm using async + * updates to seq file on disk. ref bug 18857 for details. + * there are four variable to keep track of this process + * + * lss_space; - available lss_space + * lss_lowater_set; - lu_seq_range for all seqs before barrier, i.e. safe to use + * lss_hiwater_set; - lu_seq_range after barrier, i.e. allocated but may be + * not yet committed + * + * when lss_lowater_set reaches the end it is replaced with hiwater one and + * a write operation is initiated to allocate new hiwater range. + * if last seq write opearion is still not commited, current operation is + * flaged as sync write op. + */ +static int range_alloc_set(const struct lu_env *env, + struct lu_seq_range *out, + struct lu_server_seq *seq) { - struct lu_range *space = &seq->lss_space; + struct lu_seq_range *space = &seq->lss_space; + struct lu_seq_range *loset = &seq->lss_lowater_set; + struct lu_seq_range *hiset = &seq->lss_hiwater_set; int rc = 0; - ENTRY; - LASSERT(range_is_sane(space)); + if (range_is_zero(loset)) + __seq_set_init(env, seq); - /* - * This is recovery case. Adjust super range if input range looks like - * it is allocated from new super. - */ - if (in != NULL) { - CDEBUG(D_INFO, "%s: Input seq range: " - DRANGE"\n", seq->lss_name, PRANGE(in)); - - if (range_is_exhausted(space)) { - /* - * Server cannot send empty range to client, this is why - * we check here that range from client is "newer" than - * exhausted super. - */ - LASSERT(in->lr_end > space->lr_start); - - /* - * Start is set to end of last allocated, because it - * *is* already allocated so we take that into account - * and do not use for other allocations. - */ - space->lr_start = in->lr_end; - - /* - * End is set to in->lr_start + super sequence - * allocation unit. That is because in->lr_start is - * first seq in new allocated range from controller - * before failure. - */ - space->lr_end = in->lr_start + LUSTRE_SEQ_SUPER_WIDTH; - - if (!seq->lss_cli) { - CERROR("%s: No sequence controller " - "is attached.\n", seq->lss_name); - RETURN(-ENODEV); - } - - /* - * Let controller know that this is recovery and last - * obtained range from it was @space. - */ - rc = seq_client_replay_super(seq->lss_cli, space, env); - if (rc) { - CERROR("%s: Can't replay super-sequence, " - "rc %d\n", seq->lss_name, rc); - RETURN(rc); - } - } else { - /* - * Update super start by end from client's range. Super - * end should not be changed if range was not exhausted. - */ - if (in->lr_end > space->lr_start) - space->lr_start = in->lr_end; - } + if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_ALLOC)) /* exhaust set */ + loset->lsr_start = loset->lsr_end; - *out = *in; + if (range_is_exhausted(loset)) { + /* reached high water mark. */ + struct lu_device *dev = seq->lss_site->ss_lu->ls_top_dev; + int obd_num_clients = dev->ld_obd->obd_num_exports; + __u64 set_sz; - CDEBUG(D_INFO, "%s: Recovered space: "DRANGE"\n", - seq->lss_name, PRANGE(space)); - } else { - /* - * XXX: Avoid cascading RPCs using kind of async preallocation - * when meta-sequence is close to exhausting. - */ - if (range_is_exhausted(space)) { - if (!seq->lss_cli) { - CERROR("%s: No sequence controller " - "is attached.\n", seq->lss_name); - RETURN(-ENODEV); - } - - rc = seq_client_alloc_super(seq->lss_cli, env); - if (rc) { - CERROR("%s: Can't allocate super-sequence, " - "rc %d\n", seq->lss_name, rc); - RETURN(rc); - } - - /* Saving new range to allocation space. */ - *space = seq->lss_cli->lcs_space; - LASSERT(range_is_sane(space)); - } + /* calculate new seq width based on number of clients */ + set_sz = max(seq->lss_set_width, + obd_num_clients * seq->lss_width); + set_sz = min(range_space(space), set_sz); - range_alloc(out, space, seq->lss_width); - } + /* Switch to hiwater range now */ + *loset = *hiset; + /* allocate new hiwater range */ + range_alloc(hiset, space, set_sz); - rc = seq_store_write(seq, env); - if (rc) { - CERROR("%s: Can't write space data, rc %d\n", - seq->lss_name, rc); + /* update ondisk seq with new *space */ + rc = seq_store_update(env, seq, NULL, seq->lss_need_sync); } - if (rc == 0) { - CDEBUG(D_INFO, "%s: Allocated meta-sequence " - DRANGE"\n", seq->lss_name, PRANGE(out)); - } + LASSERTF(!range_is_exhausted(loset) || range_is_sane(loset), + DRANGE"\n", PRANGE(loset)); + + if (rc == 0) + range_alloc(out, loset, seq->lss_width); RETURN(rc); } +static int __seq_server_alloc_meta(struct lu_server_seq *seq, + struct lu_seq_range *out, + const struct lu_env *env) +{ + struct lu_seq_range *space = &seq->lss_space; + int rc = 0; + + ENTRY; + + LASSERT(range_is_sane(space)); + + /* Check if available space ends and allocate new super seq */ + if (range_is_exhausted(space)) { + if (!seq->lss_cli) { + CERROR("%s: No sequence controller is attached.\n", + seq->lss_name); + RETURN(-ENODEV); + } + + rc = seq_client_alloc_super(seq->lss_cli, env); + if (rc) { + CERROR("%s: Can't allocate super-sequence, rc %d\n", + seq->lss_name, rc); + RETURN(rc); + } + + /* Saving new range to allocation space. */ + *space = seq->lss_cli->lcs_space; + LASSERT(range_is_sane(space)); + } + + rc = range_alloc_set(env, out, seq); + if (rc != 0) { + CERROR("%s: Allocated meta-sequence failed: rc = %d\n", + seq->lss_name, rc); + RETURN(rc); + } + + CDEBUG(D_INFO, "%s: Allocated meta-sequence " DRANGE"\n", + seq->lss_name, PRANGE(out)); + + RETURN(rc); +} + int seq_server_alloc_meta(struct lu_server_seq *seq, - struct lu_range *in, - struct lu_range *out, + struct lu_seq_range *out, const struct lu_env *env) { int rc; ENTRY; - down(&seq->lss_sem); - rc = __seq_server_alloc_meta(seq, in, out, env); - up(&seq->lss_sem); + mutex_lock(&seq->lss_mutex); + rc = __seq_server_alloc_meta(seq, out, env); + mutex_unlock(&seq->lss_mutex); RETURN(rc); } @@ -298,54 +340,53 @@ EXPORT_SYMBOL(seq_server_alloc_meta); static int seq_server_handle(struct lu_site *site, const struct lu_env *env, - __u32 opc, struct lu_range *in, - struct lu_range *out) + __u32 opc, struct lu_seq_range *out) { - int rc; - struct md_site *mite; - ENTRY; - - mite = lu_site2md(site); - switch (opc) { - case SEQ_ALLOC_META: - if (!mite->ms_server_seq) { - CERROR("Sequence server is not " - "initialized\n"); - RETURN(-EINVAL); - } - rc = seq_server_alloc_meta(mite->ms_server_seq, - in, out, env); - break; - case SEQ_ALLOC_SUPER: - if (!mite->ms_control_seq) { - CERROR("Sequence controller is not " - "initialized\n"); - RETURN(-EINVAL); - } - rc = seq_server_alloc_super(mite->ms_control_seq, - in, out, env); - break; - default: - rc = -EINVAL; - break; - } + int rc; + struct seq_server_site *ss_site; + ENTRY; + + ss_site = lu_site2seq(site); + + switch (opc) { + case SEQ_ALLOC_META: + if (!ss_site->ss_server_seq) { + CERROR("Sequence server is not " + "initialized\n"); + RETURN(-EINVAL); + } + rc = seq_server_alloc_meta(ss_site->ss_server_seq, out, env); + break; + case SEQ_ALLOC_SUPER: + if (!ss_site->ss_control_seq) { + CERROR("Sequence controller is not " + "initialized\n"); + RETURN(-EINVAL); + } + rc = seq_server_alloc_super(ss_site->ss_control_seq, out, env); + break; + default: + rc = -EINVAL; + break; + } - RETURN(rc); + RETURN(rc); } static int seq_req_handle(struct ptlrpc_request *req, const struct lu_env *env, struct seq_thread_info *info) { - struct lu_range *out, *in = NULL; + struct lu_seq_range *out, *tmp; struct lu_site *site; int rc = -EPROTO; __u32 *opc; ENTRY; + LASSERT(!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)); site = req->rq_export->exp_obd->obd_lu_dev->ld_site; LASSERT(site != NULL); - + rc = req_capsule_server_pack(info->sti_pill); if (rc) RETURN(err_serious(rc)); @@ -356,14 +397,14 @@ static int seq_req_handle(struct ptlrpc_request *req, if (out == NULL) RETURN(err_serious(-EPROTO)); - if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) { - in = req_capsule_client_get(info->sti_pill, - &RMF_SEQ_RANGE); + tmp = req_capsule_client_get(info->sti_pill, &RMF_SEQ_RANGE); - LASSERT(!range_is_zero(in) && range_is_sane(in)); - } + /* seq client passed mdt id, we need to pass that using out + * range parameter */ - rc = seq_server_handle(site, env, *opc, in, out); + out->lsr_index = tmp->lsr_index; + out->lsr_flags = tmp->lsr_flags; + rc = seq_server_handle(site, env, *opc, out); } else rc = err_serious(-EPROTO); @@ -374,7 +415,7 @@ static int seq_req_handle(struct ptlrpc_request *req, LU_KEY_INIT_FINI(seq, struct seq_thread_info); /* context key: seq_thread_key */ -LU_CONTEXT_KEY_DEFINE(seq, LCT_MD_THREAD); +LU_CONTEXT_KEY_DEFINE(seq, LCT_MD_THREAD | LCT_DT_THREAD); static void seq_thread_info_init(struct ptlrpc_request *req, struct seq_thread_info *info) @@ -390,7 +431,7 @@ static void seq_thread_info_fini(struct seq_thread_info *info) req_capsule_fini(info->sti_pill); } -static int seq_handle(struct ptlrpc_request *req) +int seq_handle(struct ptlrpc_request *req) { const struct lu_env *env; struct seq_thread_info *info; @@ -404,10 +445,14 @@ static int seq_handle(struct ptlrpc_request *req) seq_thread_info_init(req, info); rc = seq_req_handle(req, env, info); + /* XXX: we don't need replay but MDT assign transno in any case, + * remove it manually before reply*/ + lustre_msg_set_transno(req->rq_repmsg, 0); seq_thread_info_fini(info); return rc; } +EXPORT_SYMBOL(seq_handle); /* * Entry point for handling FLD RPCs called from MDT. @@ -418,7 +463,6 @@ int seq_query(struct com_thread_info *info) } EXPORT_SYMBOL(seq_query); -static void seq_server_proc_fini(struct lu_server_seq *seq); #ifdef LPROCFS static int seq_server_proc_init(struct lu_server_seq *seq) @@ -471,22 +515,32 @@ static void seq_server_proc_fini(struct lu_server_seq *seq) } #endif + int seq_server_init(struct lu_server_seq *seq, - struct dt_device *dev, - const char *prefix, - enum lu_mgr_type type, - const struct lu_env *env) + struct dt_device *dev, + const char *prefix, + enum lu_mgr_type type, + struct seq_server_site *ss, + const struct lu_env *env) { int rc, is_srv = (type == LUSTRE_SEQ_SERVER); ENTRY; - LASSERT(dev != NULL); + LASSERT(dev != NULL); LASSERT(prefix != NULL); + LASSERT(ss != NULL); + LASSERT(ss->ss_lu != NULL); - seq->lss_cli = NULL; - seq->lss_type = type; - range_init(&seq->lss_space); - sema_init(&seq->lss_sem, 1); + seq->lss_cli = NULL; + seq->lss_type = type; + seq->lss_site = ss; + range_init(&seq->lss_space); + + range_init(&seq->lss_lowater_set); + range_init(&seq->lss_hiwater_set); + seq->lss_set_width = LUSTRE_SEQ_BATCH_WIDTH; + + mutex_init(&seq->lss_mutex); seq->lss_width = is_srv ? LUSTRE_SEQ_META_WIDTH : LUSTRE_SEQ_SUPER_WIDTH; @@ -497,7 +551,6 @@ int seq_server_init(struct lu_server_seq *seq, rc = seq_store_init(seq, env, dev); if (rc) GOTO(out, rc); - /* Request backing store for saved sequence info. */ rc = seq_store_read(seq, env); if (rc == -ENODATA) { @@ -507,21 +560,22 @@ int seq_server_init(struct lu_server_seq *seq, LUSTRE_SEQ_ZERO_RANGE: LUSTRE_SEQ_SPACE_RANGE; - CDEBUG(D_INFO, "%s: No data found " - "on store. Initialize space\n", - seq->lss_name); + LASSERT(ss != NULL); + seq->lss_space.lsr_index = ss->ss_node_id; + LCONSOLE_INFO("%s: No data found " + "on store. Initialize space\n", + seq->lss_name); - /* Save default controller value to store. */ - rc = seq_store_write(seq, env); + rc = seq_store_update(env, seq, NULL, 0); if (rc) { CERROR("%s: Can't write space data, " "rc %d\n", seq->lss_name, rc); } } else if (rc) { - CERROR("%s: Can't read space data, rc %d\n", - seq->lss_name, rc); - GOTO(out, rc); - } + CERROR("%s: Can't read space data, rc %d\n", + seq->lss_name, rc); + GOTO(out, rc); + } if (is_srv) { LASSERT(range_is_sane(&seq->lss_space)); @@ -532,13 +586,13 @@ int seq_server_init(struct lu_server_seq *seq, rc = seq_server_proc_init(seq); if (rc) - GOTO(out, rc); + GOTO(out, rc); - EXIT; + EXIT; out: - if (rc) - seq_server_fini(seq, env); - return rc; + if (rc) + seq_server_fini(seq, env); + return rc; } EXPORT_SYMBOL(seq_server_init); @@ -554,19 +608,34 @@ void seq_server_fini(struct lu_server_seq *seq, } EXPORT_SYMBOL(seq_server_fini); -cfs_proc_dir_entry_t *seq_type_proc_dir = NULL; +int seq_site_fini(const struct lu_env *env, struct seq_server_site *ss) +{ + if (ss == NULL) + RETURN(0); + + if (ss->ss_server_seq) { + seq_server_fini(ss->ss_server_seq, env); + OBD_FREE_PTR(ss->ss_server_seq); + ss->ss_server_seq = NULL; + } -static struct lu_local_obj_desc llod_seq_srv = { - .llod_name = LUSTRE_SEQ_SRV_NAME, - .llod_oid = FID_SEQ_SRV_OID, - .llod_is_index = 0, -}; + if (ss->ss_control_seq) { + seq_server_fini(ss->ss_control_seq, env); + OBD_FREE_PTR(ss->ss_control_seq); + ss->ss_control_seq = NULL; + } + + if (ss->ss_client_seq) { + seq_client_fini(ss->ss_client_seq); + OBD_FREE_PTR(ss->ss_client_seq); + ss->ss_client_seq = NULL; + } -static struct lu_local_obj_desc llod_seq_ctl = { - .llod_name = LUSTRE_SEQ_CTL_NAME, - .llod_oid = FID_SEQ_CTL_OID, - .llod_is_index = 0, -}; + RETURN(0); +} +EXPORT_SYMBOL(seq_site_fini); + +cfs_proc_dir_entry_t *seq_type_proc_dir = NULL; static int __init fid_mod_init(void) { @@ -576,9 +645,6 @@ static int __init fid_mod_init(void) if (IS_ERR(seq_type_proc_dir)) return PTR_ERR(seq_type_proc_dir); - llo_local_obj_register(&llod_seq_srv); - llo_local_obj_register(&llod_seq_ctl); - LU_CONTEXT_KEY_INIT(&seq_thread_key); lu_context_key_register(&seq_thread_key); return 0;