4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2013, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/fid/fid_handler.c
38 * Lustre Sequence Manager
40 * Author: Yury Umanets <umka@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_FID
46 # include <libcfs/libcfs.h>
47 # include <linux/module.h>
48 #else /* __KERNEL__ */
49 # include <liblustre.h>
53 #include <obd_class.h>
54 #include <dt_object.h>
55 #include <md_object.h>
56 #include <obd_support.h>
57 #include <lustre_req_layout.h>
58 #include <lustre_fid.h>
59 #include "fid_internal.h"
61 int client_fid_init(struct obd_device *obd,
62 struct obd_export *exp, enum lu_cli_type type)
64 struct client_obd *cli = &obd->u.cli;
69 OBD_ALLOC_PTR(cli->cl_seq);
70 if (cli->cl_seq == NULL)
73 OBD_ALLOC(prefix, MAX_OBD_NAME + 5);
75 GOTO(out_free_seq, rc = -ENOMEM);
77 snprintf(prefix, MAX_OBD_NAME + 5, "cli-%s", obd->obd_name);
79 /* Init client side sequence-manager */
80 rc = seq_client_init(cli->cl_seq, exp, type, prefix, NULL);
81 OBD_FREE(prefix, MAX_OBD_NAME + 5);
83 GOTO(out_free_seq, rc);
87 OBD_FREE_PTR(cli->cl_seq);
91 EXPORT_SYMBOL(client_fid_init);
93 int client_fid_fini(struct obd_device *obd)
95 struct client_obd *cli = &obd->u.cli;
98 if (cli->cl_seq != NULL) {
99 seq_client_fini(cli->cl_seq);
100 OBD_FREE_PTR(cli->cl_seq);
106 EXPORT_SYMBOL(client_fid_fini);
109 static void seq_server_proc_fini(struct lu_server_seq *seq);
111 /* Assigns client to sequence controller node. */
112 int seq_server_set_cli(struct lu_server_seq *seq,
113 struct lu_client_seq *cli,
114 const struct lu_env *env)
120 * Ask client for new range, assign that range to ->seq_space and write
121 * seq state to backing store should be atomic.
123 mutex_lock(&seq->lss_mutex);
126 CDEBUG(D_INFO, "%s: Detached sequence client %s\n",
127 seq->lss_name, cli->lcs_name);
129 GOTO(out_up, rc = 0);
132 if (seq->lss_cli != NULL) {
133 CDEBUG(D_HA, "%s: Sequence controller is already "
134 "assigned\n", seq->lss_name);
135 GOTO(out_up, rc = -EEXIST);
138 CDEBUG(D_INFO, "%s: Attached sequence controller %s\n",
139 seq->lss_name, cli->lcs_name);
142 cli->lcs_space.lsr_index = seq->lss_site->ss_node_id;
145 mutex_unlock(&seq->lss_mutex);
148 EXPORT_SYMBOL(seq_server_set_cli);
150 * allocate \a w units of sequence from range \a from.
152 static inline void range_alloc(struct lu_seq_range *to,
153 struct lu_seq_range *from,
156 width = min(range_space(from), width);
157 to->lsr_start = from->lsr_start;
158 to->lsr_end = from->lsr_start + width;
159 from->lsr_start += width;
163 * On controller node, allocate new super sequence for regular sequence server.
164 * As this super sequence controller, this node suppose to maintain fld
166 * \a out range always has currect mds node number of requester.
169 static int __seq_server_alloc_super(struct lu_server_seq *seq,
170 struct lu_seq_range *out,
171 const struct lu_env *env)
173 struct lu_seq_range *space = &seq->lss_space;
177 LASSERT(range_is_sane(space));
179 if (range_is_exhausted(space)) {
180 CERROR("%s: Sequences space is exhausted\n",
184 range_alloc(out, space, seq->lss_width);
187 rc = seq_store_update(env, seq, out, 1 /* sync */);
189 LCONSOLE_INFO("%s: super-sequence allocation rc = %d " DRANGE"\n",
190 seq->lss_name, rc, PRANGE(out));
195 int seq_server_alloc_super(struct lu_server_seq *seq,
196 struct lu_seq_range *out,
197 const struct lu_env *env)
202 mutex_lock(&seq->lss_mutex);
203 rc = __seq_server_alloc_super(seq, out, env);
204 mutex_unlock(&seq->lss_mutex);
209 static int __seq_set_init(const struct lu_env *env,
210 struct lu_server_seq *seq)
212 struct lu_seq_range *space = &seq->lss_space;
215 range_alloc(&seq->lss_lowater_set, space, seq->lss_set_width);
216 range_alloc(&seq->lss_hiwater_set, space, seq->lss_set_width);
218 rc = seq_store_update(env, seq, NULL, 1);
224 * This function implements new seq allocation algorithm using async
225 * updates to seq file on disk. ref bug 18857 for details.
226 * there are four variable to keep track of this process
228 * lss_space; - available lss_space
229 * lss_lowater_set; - lu_seq_range for all seqs before barrier, i.e. safe to use
230 * lss_hiwater_set; - lu_seq_range after barrier, i.e. allocated but may be
233 * when lss_lowater_set reaches the end it is replaced with hiwater one and
234 * a write operation is initiated to allocate new hiwater range.
235 * if last seq write opearion is still not commited, current operation is
236 * flaged as sync write op.
238 static int range_alloc_set(const struct lu_env *env,
239 struct lu_seq_range *out,
240 struct lu_server_seq *seq)
242 struct lu_seq_range *space = &seq->lss_space;
243 struct lu_seq_range *loset = &seq->lss_lowater_set;
244 struct lu_seq_range *hiset = &seq->lss_hiwater_set;
247 if (range_is_zero(loset))
248 __seq_set_init(env, seq);
250 if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_ALLOC)) /* exhaust set */
251 loset->lsr_start = loset->lsr_end;
253 if (range_is_exhausted(loset)) {
254 /* reached high water mark. */
255 struct lu_device *dev = seq->lss_site->ss_lu->ls_top_dev;
256 int obd_num_clients = dev->ld_obd->obd_num_exports;
259 /* calculate new seq width based on number of clients */
260 set_sz = max(seq->lss_set_width,
261 obd_num_clients * seq->lss_width);
262 set_sz = min(range_space(space), set_sz);
264 /* Switch to hiwater range now */
266 /* allocate new hiwater range */
267 range_alloc(hiset, space, set_sz);
269 /* update ondisk seq with new *space */
270 rc = seq_store_update(env, seq, NULL, seq->lss_need_sync);
273 LASSERTF(!range_is_exhausted(loset) || range_is_sane(loset),
274 DRANGE"\n", PRANGE(loset));
277 range_alloc(out, loset, seq->lss_width);
282 static int __seq_server_alloc_meta(struct lu_server_seq *seq,
283 struct lu_seq_range *out,
284 const struct lu_env *env)
286 struct lu_seq_range *space = &seq->lss_space;
291 LASSERT(range_is_sane(space));
293 /* Check if available space ends and allocate new super seq */
294 if (range_is_exhausted(space)) {
296 CERROR("%s: No sequence controller is attached.\n",
301 rc = seq_client_alloc_super(seq->lss_cli, env);
303 CERROR("%s: Can't allocate super-sequence, rc %d\n",
308 /* Saving new range to allocation space. */
309 *space = seq->lss_cli->lcs_space;
310 LASSERT(range_is_sane(space));
313 rc = range_alloc_set(env, out, seq);
315 CERROR("%s: Allocated meta-sequence failed: rc = %d\n",
320 CDEBUG(D_INFO, "%s: Allocated meta-sequence " DRANGE"\n",
321 seq->lss_name, PRANGE(out));
326 int seq_server_alloc_meta(struct lu_server_seq *seq,
327 struct lu_seq_range *out,
328 const struct lu_env *env)
333 mutex_lock(&seq->lss_mutex);
334 rc = __seq_server_alloc_meta(seq, out, env);
335 mutex_unlock(&seq->lss_mutex);
339 EXPORT_SYMBOL(seq_server_alloc_meta);
341 static int seq_server_handle(struct lu_site *site,
342 const struct lu_env *env,
343 __u32 opc, struct lu_seq_range *out)
346 struct seq_server_site *ss_site;
349 ss_site = lu_site2seq(site);
353 if (!ss_site->ss_server_seq) {
354 CERROR("Sequence server is not "
358 rc = seq_server_alloc_meta(ss_site->ss_server_seq, out, env);
360 case SEQ_ALLOC_SUPER:
361 if (!ss_site->ss_control_seq) {
362 CERROR("Sequence controller is not "
366 rc = seq_server_alloc_super(ss_site->ss_control_seq, out, env);
376 static int seq_req_handle(struct ptlrpc_request *req,
377 const struct lu_env *env,
378 struct seq_thread_info *info)
380 struct lu_seq_range *out, *tmp;
381 struct lu_site *site;
386 LASSERT(!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY));
387 site = req->rq_export->exp_obd->obd_lu_dev->ld_site;
388 LASSERT(site != NULL);
390 rc = req_capsule_server_pack(info->sti_pill);
392 RETURN(err_serious(rc));
394 opc = req_capsule_client_get(info->sti_pill, &RMF_SEQ_OPC);
396 out = req_capsule_server_get(info->sti_pill, &RMF_SEQ_RANGE);
398 RETURN(err_serious(-EPROTO));
400 tmp = req_capsule_client_get(info->sti_pill, &RMF_SEQ_RANGE);
402 /* seq client passed mdt id, we need to pass that using out
405 out->lsr_index = tmp->lsr_index;
406 out->lsr_flags = tmp->lsr_flags;
407 rc = seq_server_handle(site, env, *opc, out);
409 rc = err_serious(-EPROTO);
414 /* context key constructor/destructor: seq_key_init, seq_key_fini */
415 LU_KEY_INIT_FINI(seq, struct seq_thread_info);
417 /* context key: seq_thread_key */
418 LU_CONTEXT_KEY_DEFINE(seq, LCT_MD_THREAD | LCT_DT_THREAD);
420 static void seq_thread_info_init(struct ptlrpc_request *req,
421 struct seq_thread_info *info)
423 info->sti_pill = &req->rq_pill;
424 /* Init request capsule */
425 req_capsule_init(info->sti_pill, req, RCL_SERVER);
426 req_capsule_set(info->sti_pill, &RQF_SEQ_QUERY);
429 static void seq_thread_info_fini(struct seq_thread_info *info)
431 req_capsule_fini(info->sti_pill);
434 int seq_handle(struct ptlrpc_request *req)
436 const struct lu_env *env;
437 struct seq_thread_info *info;
440 env = req->rq_svc_thread->t_env;
441 LASSERT(env != NULL);
443 info = lu_context_key_get(&env->le_ctx, &seq_thread_key);
444 LASSERT(info != NULL);
446 seq_thread_info_init(req, info);
447 rc = seq_req_handle(req, env, info);
448 /* XXX: we don't need replay but MDT assign transno in any case,
449 * remove it manually before reply*/
450 lustre_msg_set_transno(req->rq_repmsg, 0);
451 seq_thread_info_fini(info);
455 EXPORT_SYMBOL(seq_handle);
458 * Entry point for handling FLD RPCs called from MDT.
460 int seq_query(struct com_thread_info *info)
462 return seq_handle(info->cti_pill->rc_req);
464 EXPORT_SYMBOL(seq_query);
468 static int seq_server_proc_init(struct lu_server_seq *seq)
473 seq->lss_proc_dir = lprocfs_register(seq->lss_name,
476 if (IS_ERR(seq->lss_proc_dir)) {
477 rc = PTR_ERR(seq->lss_proc_dir);
481 rc = lprocfs_add_vars(seq->lss_proc_dir,
482 seq_server_proc_list, seq);
484 CERROR("%s: Can't init sequence manager "
485 "proc, rc %d\n", seq->lss_name, rc);
486 GOTO(out_cleanup, rc);
492 seq_server_proc_fini(seq);
496 static void seq_server_proc_fini(struct lu_server_seq *seq)
499 if (seq->lss_proc_dir != NULL) {
500 if (!IS_ERR(seq->lss_proc_dir))
501 lprocfs_remove(&seq->lss_proc_dir);
502 seq->lss_proc_dir = NULL;
507 static int seq_server_proc_init(struct lu_server_seq *seq)
512 static void seq_server_proc_fini(struct lu_server_seq *seq)
519 int seq_server_init(struct lu_server_seq *seq,
520 struct dt_device *dev,
522 enum lu_mgr_type type,
523 struct seq_server_site *ss,
524 const struct lu_env *env)
526 int rc, is_srv = (type == LUSTRE_SEQ_SERVER);
529 LASSERT(dev != NULL);
530 LASSERT(prefix != NULL);
532 LASSERT(ss->ss_lu != NULL);
535 seq->lss_type = type;
537 range_init(&seq->lss_space);
539 range_init(&seq->lss_lowater_set);
540 range_init(&seq->lss_hiwater_set);
541 seq->lss_set_width = LUSTRE_SEQ_BATCH_WIDTH;
543 mutex_init(&seq->lss_mutex);
545 seq->lss_width = is_srv ?
546 LUSTRE_SEQ_META_WIDTH : LUSTRE_SEQ_SUPER_WIDTH;
548 snprintf(seq->lss_name, sizeof(seq->lss_name),
549 "%s-%s", (is_srv ? "srv" : "ctl"), prefix);
551 rc = seq_store_init(seq, env, dev);
554 /* Request backing store for saved sequence info. */
555 rc = seq_store_read(seq, env);
556 if (rc == -ENODATA) {
558 /* Nothing is read, init by default value. */
559 seq->lss_space = is_srv ?
560 LUSTRE_SEQ_ZERO_RANGE:
561 LUSTRE_SEQ_SPACE_RANGE;
564 seq->lss_space.lsr_index = ss->ss_node_id;
565 LCONSOLE_INFO("%s: No data found "
566 "on store. Initialize space\n",
569 rc = seq_store_update(env, seq, NULL, 0);
571 CERROR("%s: Can't write space data, "
572 "rc %d\n", seq->lss_name, rc);
575 CERROR("%s: Can't read space data, rc %d\n",
581 LASSERT(range_is_sane(&seq->lss_space));
583 LASSERT(!range_is_zero(&seq->lss_space) &&
584 range_is_sane(&seq->lss_space));
587 rc = seq_server_proc_init(seq);
594 seq_server_fini(seq, env);
597 EXPORT_SYMBOL(seq_server_init);
599 void seq_server_fini(struct lu_server_seq *seq,
600 const struct lu_env *env)
604 seq_server_proc_fini(seq);
605 seq_store_fini(seq, env);
609 EXPORT_SYMBOL(seq_server_fini);
611 int seq_site_fini(const struct lu_env *env, struct seq_server_site *ss)
616 if (ss->ss_server_seq) {
617 seq_server_fini(ss->ss_server_seq, env);
618 OBD_FREE_PTR(ss->ss_server_seq);
619 ss->ss_server_seq = NULL;
622 if (ss->ss_control_seq) {
623 seq_server_fini(ss->ss_control_seq, env);
624 OBD_FREE_PTR(ss->ss_control_seq);
625 ss->ss_control_seq = NULL;
628 if (ss->ss_client_seq) {
629 seq_client_fini(ss->ss_client_seq);
630 OBD_FREE_PTR(ss->ss_client_seq);
631 ss->ss_client_seq = NULL;
636 EXPORT_SYMBOL(seq_site_fini);
638 cfs_proc_dir_entry_t *seq_type_proc_dir = NULL;
640 static int __init fid_mod_init(void)
642 seq_type_proc_dir = lprocfs_register(LUSTRE_SEQ_NAME,
645 if (IS_ERR(seq_type_proc_dir))
646 return PTR_ERR(seq_type_proc_dir);
648 LU_CONTEXT_KEY_INIT(&seq_thread_key);
649 lu_context_key_register(&seq_thread_key);
653 static void __exit fid_mod_exit(void)
655 lu_context_key_degister(&seq_thread_key);
656 if (seq_type_proc_dir != NULL && !IS_ERR(seq_type_proc_dir)) {
657 lprocfs_remove(&seq_type_proc_dir);
658 seq_type_proc_dir = NULL;
662 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
663 MODULE_DESCRIPTION("Lustre FID Module");
664 MODULE_LICENSE("GPL");
666 cfs_module(fid, "0.1.0", fid_mod_init, fid_mod_exit);