4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2013, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/fid/fid_handler.c
38 * Lustre Sequence Manager
40 * Author: Yury Umanets <umka@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_FID
45 #include <libcfs/libcfs.h>
46 #include <linux/module.h>
48 #include <obd_class.h>
49 #include <dt_object.h>
50 #include <md_object.h>
51 #include <obd_support.h>
52 #include <lustre_req_layout.h>
53 #include <lustre_fid.h>
54 #include "fid_internal.h"
56 static void seq_server_proc_fini(struct lu_server_seq *seq);
58 /* Assigns client to sequence controller node. */
59 int seq_server_set_cli(struct lu_server_seq *seq,
60 struct lu_client_seq *cli,
61 const struct lu_env *env)
67 * Ask client for new range, assign that range to ->seq_space and write
68 * seq state to backing store should be atomic.
70 mutex_lock(&seq->lss_mutex);
73 CDEBUG(D_INFO, "%s: Detached sequence client %s\n",
74 seq->lss_name, cli->lcs_name);
79 if (seq->lss_cli != NULL) {
80 CDEBUG(D_HA, "%s: Sequence controller is already "
81 "assigned\n", seq->lss_name);
82 GOTO(out_up, rc = -EEXIST);
85 CDEBUG(D_INFO, "%s: Attached sequence controller %s\n",
86 seq->lss_name, cli->lcs_name);
89 cli->lcs_space.lsr_index = seq->lss_site->ss_node_id;
92 mutex_unlock(&seq->lss_mutex);
95 EXPORT_SYMBOL(seq_server_set_cli);
97 * allocate \a w units of sequence from range \a from.
99 static inline void range_alloc(struct lu_seq_range *to,
100 struct lu_seq_range *from,
103 width = min(range_space(from), width);
104 to->lsr_start = from->lsr_start;
105 to->lsr_end = from->lsr_start + width;
106 from->lsr_start += width;
110 * On controller node, allocate new super sequence for regular sequence server.
111 * As this super sequence controller, this node suppose to maintain fld
113 * \a out range always has currect mds node number of requester.
116 static int __seq_server_alloc_super(struct lu_server_seq *seq,
117 struct lu_seq_range *out,
118 const struct lu_env *env)
120 struct lu_seq_range *space = &seq->lss_space;
124 LASSERT(range_is_sane(space));
126 if (range_is_exhausted(space)) {
127 CERROR("%s: Sequences space is exhausted\n",
131 range_alloc(out, space, seq->lss_width);
134 rc = seq_store_update(env, seq, out, 1 /* sync */);
136 LCONSOLE_INFO("%s: super-sequence allocation rc = %d " DRANGE"\n",
137 seq->lss_name, rc, PRANGE(out));
142 int seq_server_alloc_super(struct lu_server_seq *seq,
143 struct lu_seq_range *out,
144 const struct lu_env *env)
149 mutex_lock(&seq->lss_mutex);
150 rc = __seq_server_alloc_super(seq, out, env);
151 mutex_unlock(&seq->lss_mutex);
156 int seq_server_alloc_spec(struct lu_server_seq *seq,
157 struct lu_seq_range *spec,
158 const struct lu_env *env)
160 struct lu_seq_range *space = &seq->lss_space;
165 * In some cases (like recovery after a disaster)
166 * we may need to allocate sequences manually
167 * Notice some sequences can be lost if requested
168 * range doesn't start at the beginning of current
169 * free space. Also notice it's not possible now
170 * to allocate sequences out of natural order.
172 if (spec->lsr_start >= spec->lsr_end)
174 if (spec->lsr_flags != LU_SEQ_RANGE_MDT &&
175 spec->lsr_flags != LU_SEQ_RANGE_OST)
178 mutex_lock(&seq->lss_mutex);
179 if (spec->lsr_start >= space->lsr_start) {
180 space->lsr_start = spec->lsr_end;
181 rc = seq_store_update(env, seq, spec, 1 /* sync */);
183 LCONSOLE_INFO("%s: "DRANGE" sequences allocated: rc = %d \n",
184 seq->lss_name, PRANGE(spec), rc);
186 mutex_unlock(&seq->lss_mutex);
191 static int __seq_set_init(const struct lu_env *env,
192 struct lu_server_seq *seq)
194 struct lu_seq_range *space = &seq->lss_space;
197 range_alloc(&seq->lss_lowater_set, space, seq->lss_set_width);
198 range_alloc(&seq->lss_hiwater_set, space, seq->lss_set_width);
200 rc = seq_store_update(env, seq, NULL, 1);
206 * This function implements new seq allocation algorithm using async
207 * updates to seq file on disk. ref bug 18857 for details.
208 * there are four variable to keep track of this process
210 * lss_space; - available lss_space
211 * lss_lowater_set; - lu_seq_range for all seqs before barrier, i.e. safe to use
212 * lss_hiwater_set; - lu_seq_range after barrier, i.e. allocated but may be
215 * when lss_lowater_set reaches the end it is replaced with hiwater one and
216 * a write operation is initiated to allocate new hiwater range.
217 * if last seq write opearion is still not commited, current operation is
218 * flaged as sync write op.
220 static int range_alloc_set(const struct lu_env *env,
221 struct lu_seq_range *out,
222 struct lu_server_seq *seq)
224 struct lu_seq_range *space = &seq->lss_space;
225 struct lu_seq_range *loset = &seq->lss_lowater_set;
226 struct lu_seq_range *hiset = &seq->lss_hiwater_set;
229 if (range_is_zero(loset))
230 __seq_set_init(env, seq);
232 if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_ALLOC)) /* exhaust set */
233 loset->lsr_start = loset->lsr_end;
235 if (range_is_exhausted(loset)) {
236 /* reached high water mark. */
237 struct lu_device *dev = seq->lss_site->ss_lu->ls_top_dev;
238 int obd_num_clients = dev->ld_obd->obd_num_exports;
241 /* calculate new seq width based on number of clients */
242 set_sz = max(seq->lss_set_width,
243 obd_num_clients * seq->lss_width);
244 set_sz = min(range_space(space), set_sz);
246 /* Switch to hiwater range now */
248 /* allocate new hiwater range */
249 range_alloc(hiset, space, set_sz);
251 /* update ondisk seq with new *space */
252 rc = seq_store_update(env, seq, NULL, seq->lss_need_sync);
255 LASSERTF(!range_is_exhausted(loset) || range_is_sane(loset),
256 DRANGE"\n", PRANGE(loset));
259 range_alloc(out, loset, seq->lss_width);
264 static int __seq_server_alloc_meta(struct lu_server_seq *seq,
265 struct lu_seq_range *out,
266 const struct lu_env *env)
268 struct lu_seq_range *space = &seq->lss_space;
273 LASSERT(range_is_sane(space));
275 /* Check if available space ends and allocate new super seq */
276 if (range_is_exhausted(space)) {
278 CERROR("%s: No sequence controller is attached.\n",
283 rc = seq_client_alloc_super(seq->lss_cli, env);
285 CERROR("%s: Can't allocate super-sequence, rc %d\n",
290 /* Saving new range to allocation space. */
291 *space = seq->lss_cli->lcs_space;
292 LASSERT(range_is_sane(space));
295 rc = range_alloc_set(env, out, seq);
297 CERROR("%s: Allocated meta-sequence failed: rc = %d\n",
302 CDEBUG(D_INFO, "%s: Allocated meta-sequence " DRANGE"\n",
303 seq->lss_name, PRANGE(out));
308 int seq_server_alloc_meta(struct lu_server_seq *seq,
309 struct lu_seq_range *out,
310 const struct lu_env *env)
315 mutex_lock(&seq->lss_mutex);
316 rc = __seq_server_alloc_meta(seq, out, env);
317 mutex_unlock(&seq->lss_mutex);
321 EXPORT_SYMBOL(seq_server_alloc_meta);
323 static int seq_server_handle(struct lu_site *site,
324 const struct lu_env *env,
325 __u32 opc, struct lu_seq_range *out)
328 struct seq_server_site *ss_site;
331 ss_site = lu_site2seq(site);
335 if (!ss_site->ss_server_seq) {
336 CERROR("Sequence server is not "
340 rc = seq_server_alloc_meta(ss_site->ss_server_seq, out, env);
342 case SEQ_ALLOC_SUPER:
343 if (!ss_site->ss_control_seq) {
344 CERROR("Sequence controller is not "
348 rc = seq_server_alloc_super(ss_site->ss_control_seq, out, env);
358 static int seq_handler(struct tgt_session_info *tsi)
360 struct lu_seq_range *out, *tmp;
361 struct lu_site *site;
367 LASSERT(!(lustre_msg_get_flags(tgt_ses_req(tsi)->rq_reqmsg) & MSG_REPLAY));
368 site = tsi->tsi_exp->exp_obd->obd_lu_dev->ld_site;
369 LASSERT(site != NULL);
371 opc = req_capsule_client_get(tsi->tsi_pill, &RMF_SEQ_OPC);
373 out = req_capsule_server_get(tsi->tsi_pill, &RMF_SEQ_RANGE);
375 RETURN(err_serious(-EPROTO));
377 tmp = req_capsule_client_get(tsi->tsi_pill, &RMF_SEQ_RANGE);
379 /* seq client passed mdt id, we need to pass that using out
382 out->lsr_index = tmp->lsr_index;
383 out->lsr_flags = tmp->lsr_flags;
384 rc = seq_server_handle(site, tsi->tsi_env, *opc, out);
386 rc = err_serious(-EPROTO);
392 struct tgt_handler seq_handlers[] = {
393 TGT_SEQ_HDL(HABEO_REFERO, SEQ_QUERY, seq_handler),
395 EXPORT_SYMBOL(seq_handlers);
397 /* context key constructor/destructor: seq_key_init, seq_key_fini */
398 LU_KEY_INIT_FINI(seq, struct seq_thread_info);
400 /* context key: seq_thread_key */
401 LU_CONTEXT_KEY_DEFINE(seq, LCT_MD_THREAD | LCT_DT_THREAD);
403 extern const struct file_operations seq_fld_proc_seq_fops;
405 static int seq_server_proc_init(struct lu_server_seq *seq)
412 seq->lss_proc_dir = lprocfs_register(seq->lss_name,
415 if (IS_ERR(seq->lss_proc_dir)) {
416 rc = PTR_ERR(seq->lss_proc_dir);
420 rc = lprocfs_add_vars(seq->lss_proc_dir,
421 seq_server_proc_list, seq);
423 CERROR("%s: Can't init sequence manager "
424 "proc, rc %d\n", seq->lss_name, rc);
425 GOTO(out_cleanup, rc);
428 if (seq->lss_type == LUSTRE_SEQ_CONTROLLER) {
429 rc = lprocfs_seq_create(seq->lss_proc_dir, "fldb", 0644,
430 &seq_fld_proc_seq_fops, seq);
432 CERROR("%s: Can't create fldb for sequence manager "
433 "proc: rc = %d\n", seq->lss_name, rc);
434 GOTO(out_cleanup, rc);
441 seq_server_proc_fini(seq);
448 static void seq_server_proc_fini(struct lu_server_seq *seq)
452 if (seq->lss_proc_dir != NULL) {
453 if (!IS_ERR(seq->lss_proc_dir))
454 lprocfs_remove(&seq->lss_proc_dir);
455 seq->lss_proc_dir = NULL;
461 int seq_server_init(struct lu_server_seq *seq,
462 struct dt_device *dev,
464 enum lu_mgr_type type,
465 struct seq_server_site *ss,
466 const struct lu_env *env)
468 int rc, is_srv = (type == LUSTRE_SEQ_SERVER);
471 LASSERT(dev != NULL);
472 LASSERT(prefix != NULL);
474 LASSERT(ss->ss_lu != NULL);
476 /* A compile-time check for FIDs that used to be in lustre_idl.h
477 * but is moved here to remove CLASSERT/LASSERT in that header.
478 * Check all lu_fid fields are converted in fid_cpu_to_le() and friends
479 * and that there is no padding added by compiler to the struct. */
483 CLASSERT(sizeof(tst) == sizeof(tst.f_seq) +
484 sizeof(tst.f_oid) + sizeof(tst.f_ver));
488 seq->lss_type = type;
490 range_init(&seq->lss_space);
492 range_init(&seq->lss_lowater_set);
493 range_init(&seq->lss_hiwater_set);
494 seq->lss_set_width = LUSTRE_SEQ_BATCH_WIDTH;
496 mutex_init(&seq->lss_mutex);
498 seq->lss_width = is_srv ?
499 LUSTRE_SEQ_META_WIDTH : LUSTRE_SEQ_SUPER_WIDTH;
501 snprintf(seq->lss_name, sizeof(seq->lss_name),
502 "%s-%s", (is_srv ? "srv" : "ctl"), prefix);
504 rc = seq_store_init(seq, env, dev);
507 /* Request backing store for saved sequence info. */
508 rc = seq_store_read(seq, env);
509 if (rc == -ENODATA) {
511 /* Nothing is read, init by default value. */
512 seq->lss_space = is_srv ?
513 LUSTRE_SEQ_ZERO_RANGE:
514 LUSTRE_SEQ_SPACE_RANGE;
516 seq->lss_space.lsr_index = ss->ss_node_id;
517 LCONSOLE_INFO("%s: No data found "
518 "on store. Initialize space\n",
521 rc = seq_store_update(env, seq, NULL, 0);
523 CERROR("%s: Can't write space data, "
524 "rc %d\n", seq->lss_name, rc);
527 CERROR("%s: Can't read space data, rc %d\n",
533 LASSERT(range_is_sane(&seq->lss_space));
535 LASSERT(!range_is_zero(&seq->lss_space) &&
536 range_is_sane(&seq->lss_space));
539 rc = seq_server_proc_init(seq);
546 seq_server_fini(seq, env);
549 EXPORT_SYMBOL(seq_server_init);
551 void seq_server_fini(struct lu_server_seq *seq,
552 const struct lu_env *env)
556 seq_server_proc_fini(seq);
557 seq_store_fini(seq, env);
561 EXPORT_SYMBOL(seq_server_fini);
563 int seq_site_fini(const struct lu_env *env, struct seq_server_site *ss)
568 if (ss->ss_server_seq) {
569 seq_server_fini(ss->ss_server_seq, env);
570 OBD_FREE_PTR(ss->ss_server_seq);
571 ss->ss_server_seq = NULL;
574 if (ss->ss_control_seq) {
575 seq_server_fini(ss->ss_control_seq, env);
576 OBD_FREE_PTR(ss->ss_control_seq);
577 ss->ss_control_seq = NULL;
580 if (ss->ss_client_seq) {
581 seq_client_fini(ss->ss_client_seq);
582 OBD_FREE_PTR(ss->ss_client_seq);
583 ss->ss_client_seq = NULL;
588 EXPORT_SYMBOL(seq_site_fini);
590 int fid_server_mod_init(void)
592 LU_CONTEXT_KEY_INIT(&seq_thread_key);
593 return lu_context_key_register(&seq_thread_key);
596 void fid_server_mod_exit(void)
598 lu_context_key_degister(&seq_thread_key);