1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/fid/fid_handler.c
38 * Lustre Sequence Manager
40 * Author: Yury Umanets <umka@clusterfs.com>
44 # define EXPORT_SYMTAB
46 #define DEBUG_SUBSYSTEM S_FID
49 # include <libcfs/libcfs.h>
50 # include <linux/module.h>
51 #else /* __KERNEL__ */
52 # include <liblustre.h>
56 #include <obd_class.h>
57 #include <lu_target.h>
58 #include <dt_object.h>
59 #include <md_object.h>
60 #include <obd_support.h>
61 #include <lustre_req_layout.h>
62 #include <lustre_fid.h>
63 #include "fid_internal.h"
66 /* Assigns client to sequence controller node. */
67 int seq_server_set_cli(struct lu_server_seq *seq,
68 struct lu_client_seq *cli,
69 const struct lu_env *env)
75 * Ask client for new range, assign that range to ->seq_space and write
76 * seq state to backing store should be atomic.
78 cfs_down(&seq->lss_sem);
81 CDEBUG(D_INFO, "%s: Detached sequence client %s\n",
82 seq->lss_name, cli->lcs_name);
87 if (seq->lss_cli != NULL) {
88 CERROR("%s: Sequence controller is already "
89 "assigned\n", seq->lss_name);
90 GOTO(out_up, rc = -EINVAL);
93 CDEBUG(D_INFO, "%s: Attached sequence controller %s\n",
94 seq->lss_name, cli->lcs_name);
97 cli->lcs_space.lsr_mdt = seq->lss_site->ms_node_id;
100 cfs_up(&seq->lss_sem);
103 EXPORT_SYMBOL(seq_server_set_cli);
105 * allocate \a w units of sequence from range \a from.
107 static inline void range_alloc(struct lu_seq_range *to,
108 struct lu_seq_range *from,
111 width = min(range_space(from), width);
112 to->lsr_start = from->lsr_start;
113 to->lsr_end = from->lsr_start + width;
114 from->lsr_start += width;
118 * On controller node, allocate new super sequence for regular sequence server.
119 * As this super sequence controller, this node suppose to maintain fld
121 * \a out range always has currect mds node number of requester.
124 static int __seq_server_alloc_super(struct lu_server_seq *seq,
125 struct lu_seq_range *out,
126 const struct lu_env *env)
128 struct lu_seq_range *space = &seq->lss_space;
132 LASSERT(range_is_sane(space));
134 if (range_is_exhausted(space)) {
135 CERROR("%s: Sequences space is exhausted\n",
139 range_alloc(out, space, seq->lss_width);
142 rc = seq_store_update(env, seq, out, 1 /* sync */);
144 CDEBUG(D_INFO, "%s: super-sequence allocation rc = %d "
145 DRANGE"\n", seq->lss_name, rc, PRANGE(out));
150 int seq_server_alloc_super(struct lu_server_seq *seq,
151 struct lu_seq_range *out,
152 const struct lu_env *env)
157 cfs_down(&seq->lss_sem);
158 rc = __seq_server_alloc_super(seq, out, env);
159 cfs_up(&seq->lss_sem);
164 static int __seq_set_init(const struct lu_env *env,
165 struct lu_server_seq *seq)
167 struct lu_seq_range *space = &seq->lss_space;
170 range_alloc(&seq->lss_lowater_set, space, seq->lss_set_width);
171 range_alloc(&seq->lss_hiwater_set, space, seq->lss_set_width);
173 rc = seq_store_update(env, seq, NULL, 1);
174 seq->lss_set_transno = 0;
180 * This function implements new seq allocation algorithm using async
181 * updates to seq file on disk. ref bug 18857 for details.
182 * there are four variable to keep track of this process
184 * lss_space; - available lss_space
185 * lss_lowater_set; - lu_seq_range for all seqs before barrier, i.e. safe to use
186 * lss_hiwater_set; - lu_seq_range after barrier, i.e. allocated but may be
189 * when lss_lowater_set reaches the end it is replaced with hiwater one and
190 * a write operation is initiated to allocate new hiwater range.
191 * if last seq write opearion is still not commited, current operation is
192 * flaged as sync write op.
194 static int range_alloc_set(const struct lu_env *env,
195 struct lu_seq_range *out,
196 struct lu_server_seq *seq)
198 struct lu_seq_range *space = &seq->lss_space;
199 struct lu_seq_range *loset = &seq->lss_lowater_set;
200 struct lu_seq_range *hiset = &seq->lss_hiwater_set;
203 if (range_is_zero(loset))
204 __seq_set_init(env, seq);
206 if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_ALLOC)) /* exhaust set */
207 loset->lsr_start = loset->lsr_end;
209 if (range_is_exhausted(loset)) {
210 /* reached high water mark. */
211 struct lu_device *dev = seq->lss_site->ms_lu.ls_top_dev;
212 struct lu_target *tg = dev->ld_obd->u.obt.obt_lut;
213 int obd_num_clients = dev->ld_obd->obd_num_exports;
217 /* calculate new seq width based on number of clients */
218 set_sz = max(seq->lss_set_width,
219 obd_num_clients * seq->lss_width);
220 set_sz = min(range_space(space), set_sz);
222 /* Switch to hiwater range now */
224 /* allocate new hiwater range */
225 range_alloc(hiset, space, set_sz);
227 if (seq->lss_set_transno > dev->ld_obd->obd_last_committed)
230 /* update ondisk seq with new *space */
231 rc = seq_store_update(env, seq, NULL, sync);
233 /* set new hiwater transno */
234 cfs_spin_lock(&tg->lut_translock);
235 seq->lss_set_transno = tg->lut_last_transno;
236 cfs_spin_unlock(&tg->lut_translock);
239 LASSERTF(!range_is_exhausted(loset) || range_is_sane(loset),
240 DRANGE"\n", PRANGE(loset));
243 range_alloc(out, loset, seq->lss_width);
248 static int __seq_server_alloc_meta(struct lu_server_seq *seq,
249 struct lu_seq_range *out,
250 const struct lu_env *env)
252 struct lu_seq_range *space = &seq->lss_space;
257 LASSERT(range_is_sane(space));
259 /* Check if available space ends and allocate new super seq */
260 if (range_is_exhausted(space)) {
262 CERROR("%s: No sequence controller is attached.\n",
267 rc = seq_client_alloc_super(seq->lss_cli, env);
269 CERROR("%s: Can't allocate super-sequence, rc %d\n",
274 /* Saving new range to allocation space. */
275 *space = seq->lss_cli->lcs_space;
276 LASSERT(range_is_sane(space));
279 rc = range_alloc_set(env, out, seq);
281 CDEBUG(D_INFO, "%s: Allocated meta-sequence "
282 DRANGE"\n", seq->lss_name, PRANGE(out));
288 int seq_server_alloc_meta(struct lu_server_seq *seq,
289 struct lu_seq_range *out,
290 const struct lu_env *env)
295 cfs_down(&seq->lss_sem);
296 rc = __seq_server_alloc_meta(seq, out, env);
297 cfs_up(&seq->lss_sem);
301 EXPORT_SYMBOL(seq_server_alloc_meta);
303 static int seq_server_handle(struct lu_site *site,
304 const struct lu_env *env,
305 __u32 opc, struct lu_seq_range *out)
308 struct md_site *mite;
311 mite = lu_site2md(site);
314 if (!mite->ms_server_seq) {
315 CERROR("Sequence server is not "
319 rc = seq_server_alloc_meta(mite->ms_server_seq, out, env);
321 case SEQ_ALLOC_SUPER:
322 if (!mite->ms_control_seq) {
323 CERROR("Sequence controller is not "
327 rc = seq_server_alloc_super(mite->ms_control_seq, out, env);
337 static int seq_req_handle(struct ptlrpc_request *req,
338 const struct lu_env *env,
339 struct seq_thread_info *info)
341 struct lu_seq_range *out, *tmp;
342 struct lu_site *site;
347 LASSERT(!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY));
348 site = req->rq_export->exp_obd->obd_lu_dev->ld_site;
349 LASSERT(site != NULL);
351 rc = req_capsule_server_pack(info->sti_pill);
353 RETURN(err_serious(rc));
355 opc = req_capsule_client_get(info->sti_pill, &RMF_SEQ_OPC);
357 out = req_capsule_server_get(info->sti_pill, &RMF_SEQ_RANGE);
359 RETURN(err_serious(-EPROTO));
361 tmp = req_capsule_client_get(info->sti_pill, &RMF_SEQ_RANGE);
363 /* seq client passed mdt id, we need to pass that using out
366 out->lsr_mdt = tmp->lsr_mdt;
367 rc = seq_server_handle(site, env, *opc, out);
369 rc = err_serious(-EPROTO);
374 /* context key constructor/destructor: seq_key_init, seq_key_fini */
375 LU_KEY_INIT_FINI(seq, struct seq_thread_info);
377 /* context key: seq_thread_key */
378 LU_CONTEXT_KEY_DEFINE(seq, LCT_MD_THREAD);
380 static void seq_thread_info_init(struct ptlrpc_request *req,
381 struct seq_thread_info *info)
383 info->sti_pill = &req->rq_pill;
384 /* Init request capsule */
385 req_capsule_init(info->sti_pill, req, RCL_SERVER);
386 req_capsule_set(info->sti_pill, &RQF_SEQ_QUERY);
389 static void seq_thread_info_fini(struct seq_thread_info *info)
391 req_capsule_fini(info->sti_pill);
394 static int seq_handle(struct ptlrpc_request *req)
396 const struct lu_env *env;
397 struct seq_thread_info *info;
400 env = req->rq_svc_thread->t_env;
401 LASSERT(env != NULL);
403 info = lu_context_key_get(&env->le_ctx, &seq_thread_key);
404 LASSERT(info != NULL);
406 seq_thread_info_init(req, info);
407 rc = seq_req_handle(req, env, info);
408 /* XXX: we don't need replay but MDT assign transno in any case,
409 * remove it manually before reply*/
410 lustre_msg_set_transno(req->rq_repmsg, 0);
411 seq_thread_info_fini(info);
417 * Entry point for handling FLD RPCs called from MDT.
419 int seq_query(struct com_thread_info *info)
421 return seq_handle(info->cti_pill->rc_req);
423 EXPORT_SYMBOL(seq_query);
425 static void seq_server_proc_fini(struct lu_server_seq *seq);
428 static int seq_server_proc_init(struct lu_server_seq *seq)
433 seq->lss_proc_dir = lprocfs_register(seq->lss_name,
436 if (IS_ERR(seq->lss_proc_dir)) {
437 rc = PTR_ERR(seq->lss_proc_dir);
441 rc = lprocfs_add_vars(seq->lss_proc_dir,
442 seq_server_proc_list, seq);
444 CERROR("%s: Can't init sequence manager "
445 "proc, rc %d\n", seq->lss_name, rc);
446 GOTO(out_cleanup, rc);
452 seq_server_proc_fini(seq);
456 static void seq_server_proc_fini(struct lu_server_seq *seq)
459 if (seq->lss_proc_dir != NULL) {
460 if (!IS_ERR(seq->lss_proc_dir))
461 lprocfs_remove(&seq->lss_proc_dir);
462 seq->lss_proc_dir = NULL;
467 static int seq_server_proc_init(struct lu_server_seq *seq)
472 static void seq_server_proc_fini(struct lu_server_seq *seq)
479 int seq_server_init(struct lu_server_seq *seq,
480 struct dt_device *dev,
482 enum lu_mgr_type type,
484 const struct lu_env *env)
486 int rc, is_srv = (type == LUSTRE_SEQ_SERVER);
489 LASSERT(dev != NULL);
490 LASSERT(prefix != NULL);
493 seq->lss_type = type;
495 range_init(&seq->lss_space);
497 range_init(&seq->lss_lowater_set);
498 range_init(&seq->lss_hiwater_set);
499 seq->lss_set_width = LUSTRE_SEQ_BATCH_WIDTH;
501 cfs_sema_init(&seq->lss_sem, 1);
503 seq->lss_width = is_srv ?
504 LUSTRE_SEQ_META_WIDTH : LUSTRE_SEQ_SUPER_WIDTH;
506 snprintf(seq->lss_name, sizeof(seq->lss_name),
507 "%s-%s", (is_srv ? "srv" : "ctl"), prefix);
509 rc = seq_store_init(seq, env, dev);
512 /* Request backing store for saved sequence info. */
513 rc = seq_store_read(seq, env);
514 if (rc == -ENODATA) {
516 /* Nothing is read, init by default value. */
517 seq->lss_space = is_srv ?
518 LUSTRE_SEQ_ZERO_RANGE:
519 LUSTRE_SEQ_SPACE_RANGE;
521 seq->lss_space.lsr_mdt = ms->ms_node_id;
522 CDEBUG(D_INFO, "%s: No data found "
523 "on store. Initialize space\n",
526 rc = seq_store_update(env, seq, NULL, 0);
528 CERROR("%s: Can't write space data, "
529 "rc %d\n", seq->lss_name, rc);
532 CERROR("%s: Can't read space data, rc %d\n",
538 LASSERT(range_is_sane(&seq->lss_space));
540 LASSERT(!range_is_zero(&seq->lss_space) &&
541 range_is_sane(&seq->lss_space));
544 rc = seq_server_proc_init(seq);
551 seq_server_fini(seq, env);
554 EXPORT_SYMBOL(seq_server_init);
556 void seq_server_fini(struct lu_server_seq *seq,
557 const struct lu_env *env)
561 seq_server_proc_fini(seq);
562 seq_store_fini(seq, env);
566 EXPORT_SYMBOL(seq_server_fini);
568 cfs_proc_dir_entry_t *seq_type_proc_dir = NULL;
570 static struct lu_local_obj_desc llod_seq_srv = {
571 .llod_name = LUSTRE_SEQ_SRV_NAME,
572 .llod_oid = FID_SEQ_SRV_OID,
576 static struct lu_local_obj_desc llod_seq_ctl = {
577 .llod_name = LUSTRE_SEQ_CTL_NAME,
578 .llod_oid = FID_SEQ_CTL_OID,
582 static int __init fid_mod_init(void)
584 seq_type_proc_dir = lprocfs_register(LUSTRE_SEQ_NAME,
587 if (IS_ERR(seq_type_proc_dir))
588 return PTR_ERR(seq_type_proc_dir);
590 llo_local_obj_register(&llod_seq_srv);
591 llo_local_obj_register(&llod_seq_ctl);
593 LU_CONTEXT_KEY_INIT(&seq_thread_key);
594 lu_context_key_register(&seq_thread_key);
598 static void __exit fid_mod_exit(void)
600 llo_local_obj_unregister(&llod_seq_srv);
601 llo_local_obj_unregister(&llod_seq_ctl);
603 lu_context_key_degister(&seq_thread_key);
604 if (seq_type_proc_dir != NULL && !IS_ERR(seq_type_proc_dir)) {
605 lprocfs_remove(&seq_type_proc_dir);
606 seq_type_proc_dir = NULL;
610 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
611 MODULE_DESCRIPTION("Lustre FID Module");
612 MODULE_LICENSE("GPL");
614 cfs_module(fid, "0.1.0", fid_mod_init, fid_mod_exit);