1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see [sun.com URL with a
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/lov/lov_pool.c
40 * Author: Jacques-Charles LAFOUCRIERE <jc.lafoucriere@cea.fr>
43 #define DEBUG_SUBSYSTEM S_LOV
46 #include <libcfs/libcfs.h>
48 #include <liblustre.h>
52 #include "lov_internal.h"
54 static void lov_pool_getref(struct pool_desc *pool) {
55 atomic_inc(&pool->pool_refcount);
58 static void lov_pool_putref(struct pool_desc *pool) {
59 if (atomic_dec_and_test(&pool->pool_refcount)) {
60 lov_ost_pool_free(&(pool->pool_rr.lqr_pool));
61 lov_ost_pool_free(&(pool->pool_obds));
67 * hash function using a Rotating Hash algorithm
68 * Knuth, D. The Art of Computer Programming,
69 * Volume 3: Sorting and Searching,
71 * Addison Wesley, 1973
73 static __u32 pool_hashfn(lustre_hash_t *hash_body, void *key, unsigned mask)
80 poolname = (char *)key;
81 for (i = 0; i < LOV_MAXPOOLNAME; i++) {
82 if (poolname[i] == '\0')
84 result = (result << 4)^(result >> 28) ^ poolname[i];
86 return (result % mask);
89 static void *pool_key(struct hlist_node *hnode)
91 struct pool_desc *pool;
93 pool = hlist_entry(hnode, struct pool_desc, pool_hash);
94 return (pool->pool_name);
97 static int pool_hashkey_compare(void *key, struct hlist_node *compared_hnode)
100 struct pool_desc *pool;
103 pool_name = (char *)key;
104 pool = hlist_entry(compared_hnode, struct pool_desc, pool_hash);
105 rc = strncmp(pool_name, pool->pool_name, LOV_MAXPOOLNAME);
109 static void *pool_hashrefcount_get(struct hlist_node *hnode)
111 struct pool_desc *pool;
113 pool = hlist_entry(hnode, struct pool_desc, pool_hash);
114 lov_pool_getref(pool);
118 static void *pool_hashrefcount_put(struct hlist_node *hnode)
120 struct pool_desc *pool;
122 pool = hlist_entry(hnode, struct pool_desc, pool_hash);
123 lov_pool_putref(pool);
127 lustre_hash_ops_t pool_hash_operations = {
128 .lh_hash = pool_hashfn,
130 .lh_compare = pool_hashkey_compare,
131 .lh_get = pool_hashrefcount_get,
132 .lh_put = pool_hashrefcount_put,
136 /* ifdef needed for liblustre support */
138 * pool /proc seq_file methods
141 * iterator is used to go through the target pool entries
142 * index is the current entry index in the lp_array[] array
143 * index >= pos returned to the seq_file interface
144 * pos is from 0 to (pool->pool_obds.op_count - 1)
146 #define POOL_IT_MAGIC 0xB001CEA0
147 struct pool_iterator {
149 struct pool_desc *pool;
150 int idx; /* from 0 to pool_tgt_size - 1 */
153 static void *pool_proc_next(struct seq_file *s, void *v, loff_t *pos)
155 struct pool_iterator *iter = (struct pool_iterator *)s->private;
158 LASSERT(iter->magic == POOL_IT_MAGIC);
160 /* test if end of file */
161 if (*pos >= pool_tgt_count(iter->pool))
164 /* iterate to find a non empty entry */
165 prev_idx = iter->idx;
166 down_read(&pool_tgt_rw_sem(iter->pool));
168 if (iter->idx == pool_tgt_count(iter->pool)) {
169 iter->idx = prev_idx; /* we stay on the last entry */
170 up_read(&pool_tgt_rw_sem(iter->pool));
173 up_read(&pool_tgt_rw_sem(iter->pool));
175 /* return != NULL to continue */
179 static void *pool_proc_start(struct seq_file *s, loff_t *pos)
181 struct pool_desc *pool = (struct pool_desc *)s->private;
182 struct pool_iterator *iter;
184 lov_pool_getref(pool);
185 if ((pool_tgt_count(pool) == 0) ||
186 (*pos >= pool_tgt_count(pool))) {
187 /* iter is not created, so stop() has no way to
188 * find pool to dec ref */
189 lov_pool_putref(pool);
195 return ERR_PTR(-ENOMEM);
196 iter->magic = POOL_IT_MAGIC;
200 /* we use seq_file private field to memorized iterator so
201 * we can free it at stop() */
202 /* /!\ do not forget to restore it to pool before freeing it */
210 ptr = pool_proc_next(s, &iter, &i);
211 } while ((i < *pos) && (ptr != NULL));
217 static void pool_proc_stop(struct seq_file *s, void *v)
219 struct pool_iterator *iter = (struct pool_iterator *)s->private;
221 /* in some cases stop() method is called 2 times, without
222 * calling start() method (see seq_read() from fs/seq_file.c)
223 * we have to free only if s->private is an iterator */
224 if ((iter) && (iter->magic == POOL_IT_MAGIC)) {
225 /* we restore s->private so next call to pool_proc_start()
227 s->private = iter->pool;
228 lov_pool_putref(iter->pool);
234 static int pool_proc_show(struct seq_file *s, void *v)
236 struct pool_iterator *iter = (struct pool_iterator *)v;
237 struct lov_tgt_desc *tgt;
239 LASSERT(iter->magic == POOL_IT_MAGIC);
240 LASSERT(iter->pool != NULL);
241 LASSERT(iter->idx <= pool_tgt_count(iter->pool));
243 down_read(&pool_tgt_rw_sem(iter->pool));
244 tgt = pool_tgt(iter->pool, iter->idx);
245 up_read(&pool_tgt_rw_sem(iter->pool));
247 seq_printf(s, "%s\n", obd_uuid2str(&(tgt->ltd_uuid)));
252 static struct seq_operations pool_proc_ops = {
253 .start = pool_proc_start,
254 .next = pool_proc_next,
255 .stop = pool_proc_stop,
256 .show = pool_proc_show,
259 static int pool_proc_open(struct inode *inode, struct file *file)
263 rc = seq_open(file, &pool_proc_ops);
265 struct seq_file *s = file->private_data;
266 s->private = PROC_I(inode)->pde->data;
271 static struct file_operations pool_proc_operations = {
272 .open = pool_proc_open,
275 .release = seq_release,
279 void lov_dump_pool(int level, struct pool_desc *pool)
283 lov_pool_getref(pool);
285 CDEBUG(level, "pool "LOV_POOLNAMEF" has %d members\n",
286 pool->pool_name, pool->pool_obds.op_count);
287 down_read(&pool_tgt_rw_sem(pool));
289 for (i = 0; i < pool_tgt_count(pool) ; i++) {
290 if (!pool_tgt(pool, i) || !(pool_tgt(pool, i))->ltd_exp)
292 CDEBUG(level, "pool "LOV_POOLNAMEF"[%d] = %s\n",
294 obd_uuid2str(&((pool_tgt(pool, i))->ltd_uuid)));
297 up_read(&pool_tgt_rw_sem(pool));
298 lov_pool_putref(pool);
301 #define LOV_POOL_INIT_COUNT 2
302 int lov_ost_pool_init(struct ost_pool *op, unsigned int count)
305 count = LOV_POOL_INIT_COUNT;
308 init_rwsem(&op->op_rw_sem);
310 OBD_ALLOC(op->op_array, op->op_size * sizeof(op->op_array[0]));
311 if (op->op_array == NULL) {
318 /* Caller must hold write op_rwlock */
319 int lov_ost_pool_extend(struct ost_pool *op, unsigned int max_count)
324 LASSERT(max_count != 0);
326 if (op->op_count < op->op_size)
329 new_size = min(max_count, 2 * op->op_size);
330 OBD_ALLOC(new, new_size * sizeof(op->op_array[0]));
334 /* copy old array to new one */
335 memcpy(new, op->op_array, op->op_size * sizeof(op->op_array[0]));
336 OBD_FREE(op->op_array, op->op_size * sizeof(op->op_array[0]));
338 op->op_size = new_size;
342 int lov_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int max_count)
347 down_write(&op->op_rw_sem);
349 rc = lov_ost_pool_extend(op, max_count);
353 /* search ost in pool array */
354 for (i = 0; i < op->op_count; i++) {
355 if (op->op_array[i] == idx)
356 GOTO(out, rc = -EEXIST);
358 /* ost not found we add it */
359 op->op_array[op->op_count] = idx;
362 up_write(&op->op_rw_sem);
366 int lov_ost_pool_remove(struct ost_pool *op, __u32 idx)
370 down_write(&op->op_rw_sem);
371 for (i = 0; i < op->op_count; i++) {
372 if (op->op_array[i] == idx) {
373 memmove(&op->op_array[i], &op->op_array[i + 1],
374 (op->op_count - i - 1) * sizeof(op->op_array[0]));
376 up_write(&op->op_rw_sem);
380 up_write(&op->op_rw_sem);
384 int lov_ost_pool_free(struct ost_pool *op)
386 if (op->op_size == 0)
389 down_write(&op->op_rw_sem);
390 OBD_FREE(op->op_array, op->op_size * sizeof(op->op_array[0]));
394 up_write(&op->op_rw_sem);
399 int lov_pool_new(struct obd_device *obd, char *poolname)
402 struct pool_desc *new_pool;
408 if (strlen(poolname) > LOV_MAXPOOLNAME)
409 RETURN(-ENAMETOOLONG);
411 OBD_ALLOC_PTR(new_pool);
412 if (new_pool == NULL)
415 strncpy(new_pool->pool_name, poolname, LOV_MAXPOOLNAME);
416 new_pool->pool_name[LOV_MAXPOOLNAME] = '\0';
417 new_pool->pool_lov = lov;
418 /* ref count init to 1 because when created a pool is always used
421 atomic_set(&new_pool->pool_refcount, 1);
422 rc = lov_ost_pool_init(&new_pool->pool_obds, 0);
426 memset(&(new_pool->pool_rr), 0, sizeof(struct lov_qos_rr));
427 rc = lov_ost_pool_init(&new_pool->pool_rr.lqr_pool, 0);
429 lov_ost_pool_free(&new_pool->pool_obds);
433 INIT_HLIST_NODE(&new_pool->pool_hash);
434 rc = lustre_hash_add_unique(lov->lov_pools_hash_body, poolname,
435 &new_pool->pool_hash);
437 lov_ost_pool_free(&new_pool->pool_rr.lqr_pool);
438 lov_ost_pool_free(&new_pool->pool_obds);
439 GOTO(out_err, rc = -EEXIST);
442 spin_lock(&obd->obd_dev_lock);
443 list_add_tail(&new_pool->pool_list, &lov->lov_pool_list);
444 lov->lov_pool_count++;
446 spin_unlock(&obd->obd_dev_lock);
448 CDEBUG(D_CONFIG, LOV_POOLNAMEF" is pool #%d\n",
449 poolname, lov->lov_pool_count);
452 /* ifdef needed for liblustre */
453 /* get ref for /proc file */
454 lov_pool_getref(new_pool);
455 new_pool->pool_proc_entry = lprocfs_add_simple(lov->lov_pool_proc_entry,
456 poolname, NULL, NULL,
458 &pool_proc_operations);
461 if (IS_ERR(new_pool->pool_proc_entry)) {
462 CWARN("Cannot add proc pool entry "LOV_POOLNAMEF"\n", poolname);
463 new_pool->pool_proc_entry = NULL;
464 lov_pool_putref(new_pool);
470 OBD_FREE_PTR(new_pool);
474 int lov_pool_del(struct obd_device *obd, char *poolname)
477 struct pool_desc *pool;
482 spin_lock(&obd->obd_dev_lock);
483 pool = lustre_hash_lookup(lov->lov_pools_hash_body, poolname);
485 spin_unlock(&obd->obd_dev_lock);
490 if (pool->pool_proc_entry != NULL) {
491 remove_proc_entry(pool->pool_proc_entry->name,
492 pool->pool_proc_entry->parent);
493 lov_pool_putref(pool);
497 lustre_hash_del_key(lov->lov_pools_hash_body, poolname);
498 list_del_init(&pool->pool_list);
500 lov->lov_pool_count--;
501 lh_put(lov->lov_pools_hash_body, &pool->pool_hash);
502 spin_unlock(&obd->obd_dev_lock);
504 /* remove ref got when pool was created in memory
505 * pool will be freed when refount will reach 0
507 lov_pool_putref(pool);
513 int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname)
515 struct obd_uuid ost_uuid;
517 struct pool_desc *pool;
518 unsigned int i, lov_idx;
524 pool = lustre_hash_lookup(lov->lov_pools_hash_body, poolname);
528 obd_str2uuid(&ost_uuid, ostname);
530 /* search ost in lov array */
531 mutex_down(&lov->lov_lock);
532 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
533 if (!lov->lov_tgts[i])
535 if (obd_uuid_equals(&ost_uuid, &(lov->lov_tgts[i]->ltd_uuid)))
539 /* test if ost found in lov */
540 if (i == lov->desc.ld_tgt_count) {
541 mutex_up(&lov->lov_lock);
542 GOTO(out, rc = -EINVAL);
544 mutex_up(&lov->lov_lock);
548 rc = lov_ost_pool_add(&pool->pool_obds, lov_idx, lov->lov_tgt_size);
552 pool->pool_rr.lqr_dirty = 1;
554 CDEBUG(D_CONFIG, "Added %s to "LOV_POOLNAMEF" as member %d\n",
555 ostname, poolname, pool_tgt_count(pool));
559 lh_put(lov->lov_pools_hash_body, &pool->pool_hash);
563 int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
565 struct obd_uuid ost_uuid;
567 struct pool_desc *pool;
568 unsigned int i, lov_idx;
574 spin_lock(&obd->obd_dev_lock);
575 pool = lustre_hash_lookup(lov->lov_pools_hash_body, poolname);
577 spin_unlock(&obd->obd_dev_lock);
581 obd_str2uuid(&ost_uuid, ostname);
583 /* search ost in lov array, to get index */
584 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
585 if (!lov->lov_tgts[i])
588 if (obd_uuid_equals(&ost_uuid, &(lov->lov_tgts[i]->ltd_uuid)))
592 /* test if ost found in lov */
593 if (i == lov->desc.ld_tgt_count) {
594 spin_unlock(&obd->obd_dev_lock);
595 GOTO(out, rc = -EINVAL);
598 spin_unlock(&obd->obd_dev_lock);
602 lov_ost_pool_remove(&pool->pool_obds, lov_idx);
604 pool->pool_rr.lqr_dirty = 1;
606 CDEBUG(D_CONFIG, "%s removed from "LOV_POOLNAMEF"\n", ostname,
611 lh_put(lov->lov_pools_hash_body, &pool->pool_hash);
615 int lov_check_index_in_pool(__u32 idx, struct pool_desc *pool)
620 /* caller may no have a ref on pool if it got the pool
621 * without calling lov_find_pool() (e.g. go through the lov pool
624 lov_pool_getref(pool);
626 down_read(&pool_tgt_rw_sem(pool));
627 for (i = 0; i < pool_tgt_count(pool); i++) {
628 if (pool_tgt_array(pool)[i] == idx)
634 up_read(&pool_tgt_rw_sem(pool));
636 lov_pool_putref(pool);
640 struct pool_desc *lov_find_pool(struct lov_obd *lov, char *poolname)
642 struct pool_desc *pool;
645 if (poolname[0] != '\0') {
646 pool = lustre_hash_lookup(lov->lov_pools_hash_body, poolname);
648 CWARN("Request for an unknown pool ("LOV_POOLNAMEF")\n",
650 if ((pool != NULL) && (pool_tgt_count(pool) == 0)) {
651 CWARN("Request for an empty pool ("LOV_POOLNAMEF")\n",
653 /* pool is ignored, so we remove ref on it */
654 lh_put(lov->lov_pools_hash_body, &pool->pool_hash);