Whamcloud - gitweb
Mass conversion of all copyright messages to Oracle.
[fs/lustre-release.git] / lustre / lov / lov_pool.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see [sun.com URL with a
20  * copy of GPLv2].
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/lov/lov_pool.c
37  *
38  * OST pool methods
39  *
40  * Author: Jacques-Charles LAFOUCRIERE <jc.lafoucriere@cea.fr>
41  * Author: Alex Lyashkov <Alexey.Lyashkov@Sun.COM>
42  * Author: Nathaniel Rutman <Nathan.Rutman@Sun.COM>
43  */
44
45 #define DEBUG_SUBSYSTEM S_LOV
46
47 #ifdef __KERNEL__
48 #include <libcfs/libcfs.h>
49 #else
50 #include <liblustre.h>
51 #endif
52
53 #include <obd.h>
54 #include "lov_internal.h"
55
56 static void lov_pool_getref(struct pool_desc *pool)
57 {
58         CDEBUG(D_INFO, "pool %p\n", pool);
59         cfs_atomic_inc(&pool->pool_refcount);
60 }
61
62 void lov_pool_putref(struct pool_desc *pool) 
63 {
64         CDEBUG(D_INFO, "pool %p\n", pool);
65         if (cfs_atomic_dec_and_test(&pool->pool_refcount)) {
66                 LASSERT(cfs_hlist_unhashed(&pool->pool_hash));
67                 LASSERT(cfs_list_empty(&pool->pool_list));
68                 LASSERT(pool->pool_proc_entry == NULL);
69                 lov_ost_pool_free(&(pool->pool_rr.lqr_pool));
70                 lov_ost_pool_free(&(pool->pool_obds));
71                 OBD_FREE_PTR(pool);
72                 EXIT;
73         }
74 }
75
76
77 /*
78  * hash function using a Rotating Hash algorithm
79  * Knuth, D. The Art of Computer Programming,
80  * Volume 3: Sorting and Searching,
81  * Chapter 6.4.
82  * Addison Wesley, 1973
83  */
84 static __u32 pool_hashfn(cfs_hash_t *hash_body, void *key, unsigned mask)
85 {
86         int i;
87         __u32 result;
88         char *poolname;
89
90         result = 0;
91         poolname = (char *)key;
92         for (i = 0; i < LOV_MAXPOOLNAME; i++) {
93                 if (poolname[i] == '\0')
94                         break;
95                 result = (result << 4)^(result >> 28) ^  poolname[i];
96         }
97         return (result % mask);
98 }
99
100 static void *pool_key(cfs_hlist_node_t *hnode)
101 {
102         struct pool_desc *pool;
103
104         pool = cfs_hlist_entry(hnode, struct pool_desc, pool_hash);
105         return (pool->pool_name);
106 }
107
108 static int pool_hashkey_compare(void *key, cfs_hlist_node_t *compared_hnode)
109 {
110         char *pool_name;
111         struct pool_desc *pool;
112         int rc;
113
114         pool_name = (char *)key;
115         pool = cfs_hlist_entry(compared_hnode, struct pool_desc, pool_hash);
116         rc = strncmp(pool_name, pool->pool_name, LOV_MAXPOOLNAME);
117         return (!rc);
118 }
119
120 static void *pool_hashrefcount_get(cfs_hlist_node_t *hnode)
121 {
122         struct pool_desc *pool;
123
124         pool = cfs_hlist_entry(hnode, struct pool_desc, pool_hash);
125         lov_pool_getref(pool);
126         return (pool);
127 }
128
129 static void *pool_hashrefcount_put(cfs_hlist_node_t *hnode)
130 {
131         struct pool_desc *pool;
132
133         pool = cfs_hlist_entry(hnode, struct pool_desc, pool_hash);
134         lov_pool_putref(pool);
135         return (pool);
136 }
137
138 cfs_hash_ops_t pool_hash_operations = {
139         .hs_hash        = pool_hashfn,
140         .hs_key         = pool_key,
141         .hs_compare     = pool_hashkey_compare,
142         .hs_get         = pool_hashrefcount_get,
143         .hs_put         = pool_hashrefcount_put,
144 };
145
146 #ifdef LPROCFS
147 /* ifdef needed for liblustre support */
148 /*
149  * pool /proc seq_file methods
150  */
151 /*
152  * iterator is used to go through the target pool entries
153  * index is the current entry index in the lp_array[] array
154  * index >= pos returned to the seq_file interface
155  * pos is from 0 to (pool->pool_obds.op_count - 1)
156  */
157 #define POOL_IT_MAGIC 0xB001CEA0
158 struct pool_iterator {
159         int magic;
160         struct pool_desc *pool;
161         int idx;        /* from 0 to pool_tgt_size - 1 */
162 };
163
164 static void *pool_proc_next(struct seq_file *s, void *v, loff_t *pos)
165 {
166         struct pool_iterator *iter = (struct pool_iterator *)s->private;
167         int prev_idx;
168
169         LASSERTF(iter->magic == POOL_IT_MAGIC, "%08X", iter->magic);
170
171         /* test if end of file */
172         if (*pos >= pool_tgt_count(iter->pool))
173                 return NULL;
174
175         /* iterate to find a non empty entry */
176         prev_idx = iter->idx;
177         cfs_down_read(&pool_tgt_rw_sem(iter->pool));
178         iter->idx++;
179         if (iter->idx == pool_tgt_count(iter->pool)) {
180                 iter->idx = prev_idx; /* we stay on the last entry */
181                 cfs_up_read(&pool_tgt_rw_sem(iter->pool));
182                 return NULL;
183         }
184         cfs_up_read(&pool_tgt_rw_sem(iter->pool));
185         (*pos)++;
186         /* return != NULL to continue */
187         return iter;
188 }
189
190 static void *pool_proc_start(struct seq_file *s, loff_t *pos)
191 {
192         struct pool_desc *pool = (struct pool_desc *)s->private;
193         struct pool_iterator *iter;
194
195         lov_pool_getref(pool);
196         if ((pool_tgt_count(pool) == 0) ||
197             (*pos >= pool_tgt_count(pool))) {
198                 /* iter is not created, so stop() has no way to
199                  * find pool to dec ref */
200                 lov_pool_putref(pool);
201                 return NULL;
202         }
203
204         OBD_ALLOC_PTR(iter);
205         if (!iter)
206                 return ERR_PTR(-ENOMEM);
207         iter->magic = POOL_IT_MAGIC;
208         iter->pool = pool;
209         iter->idx = 0;
210
211         /* we use seq_file private field to memorized iterator so
212          * we can free it at stop() */
213         /* /!\ do not forget to restore it to pool before freeing it */
214         s->private = iter;
215         if (*pos > 0) {
216                 loff_t i;
217                 void *ptr;
218
219                 i = 0;
220                 do {
221                      ptr = pool_proc_next(s, &iter, &i);
222                 } while ((i < *pos) && (ptr != NULL));
223                 return ptr;
224         }
225         return iter;
226 }
227
228 static void pool_proc_stop(struct seq_file *s, void *v)
229 {
230         struct pool_iterator *iter = (struct pool_iterator *)s->private;
231
232         /* in some cases stop() method is called 2 times, without
233          * calling start() method (see seq_read() from fs/seq_file.c)
234          * we have to free only if s->private is an iterator */
235         if ((iter) && (iter->magic == POOL_IT_MAGIC)) {
236                 /* we restore s->private so next call to pool_proc_start()
237                  * will work */
238                 s->private = iter->pool;
239                 lov_pool_putref(iter->pool);
240                 OBD_FREE_PTR(iter);
241         }
242         return;
243 }
244
245 static int pool_proc_show(struct seq_file *s, void *v)
246 {
247         struct pool_iterator *iter = (struct pool_iterator *)v;
248         struct lov_tgt_desc *tgt;
249
250         LASSERTF(iter->magic == POOL_IT_MAGIC, "%08X", iter->magic);
251         LASSERT(iter->pool != NULL);
252         LASSERT(iter->idx <= pool_tgt_count(iter->pool));
253
254         cfs_down_read(&pool_tgt_rw_sem(iter->pool));
255         tgt = pool_tgt(iter->pool, iter->idx);
256         cfs_up_read(&pool_tgt_rw_sem(iter->pool));
257         if (tgt)
258                 seq_printf(s, "%s\n", obd_uuid2str(&(tgt->ltd_uuid)));
259
260         return 0;
261 }
262
263 static struct seq_operations pool_proc_ops = {
264         .start          = pool_proc_start,
265         .next           = pool_proc_next,
266         .stop           = pool_proc_stop,
267         .show           = pool_proc_show,
268 };
269
270 static int pool_proc_open(struct inode *inode, struct file *file)
271 {
272         int rc;
273
274         rc = seq_open(file, &pool_proc_ops);
275         if (!rc) {
276                 struct seq_file *s = file->private_data;
277                 s->private = PROC_I(inode)->pde->data;
278         }
279         return rc;
280 }
281
282 static struct file_operations pool_proc_operations = {
283         .open           = pool_proc_open,
284         .read           = seq_read,
285         .llseek         = seq_lseek,
286         .release        = seq_release,
287 };
288 #endif /* LPROCFS */
289
290 void lov_dump_pool(int level, struct pool_desc *pool)
291 {
292         int i;
293
294         lov_pool_getref(pool);
295
296         CDEBUG(level, "pool "LOV_POOLNAMEF" has %d members\n",
297                pool->pool_name, pool->pool_obds.op_count);
298         cfs_down_read(&pool_tgt_rw_sem(pool));
299
300         for (i = 0; i < pool_tgt_count(pool) ; i++) {
301                 if (!pool_tgt(pool, i) || !(pool_tgt(pool, i))->ltd_exp)
302                         continue;
303                 CDEBUG(level, "pool "LOV_POOLNAMEF"[%d] = %s\n",
304                        pool->pool_name, i,
305                        obd_uuid2str(&((pool_tgt(pool, i))->ltd_uuid)));
306         }
307
308         cfs_up_read(&pool_tgt_rw_sem(pool));
309         lov_pool_putref(pool);
310 }
311
312 #define LOV_POOL_INIT_COUNT 2
313 int lov_ost_pool_init(struct ost_pool *op, unsigned int count)
314 {
315         ENTRY;
316
317         if (count == 0)
318                 count = LOV_POOL_INIT_COUNT;
319         op->op_array = NULL;
320         op->op_count = 0;
321         cfs_init_rwsem(&op->op_rw_sem);
322         op->op_size = count;
323         OBD_ALLOC(op->op_array, op->op_size * sizeof(op->op_array[0]));
324         if (op->op_array == NULL) {
325                 op->op_size = 0;
326                 RETURN(-ENOMEM);
327         }
328         EXIT;
329         return 0;
330 }
331
332 /* Caller must hold write op_rwlock */
333 int lov_ost_pool_extend(struct ost_pool *op, unsigned int min_count)
334 {
335         __u32 *new;
336         int new_size;
337
338         LASSERT(min_count != 0);
339
340         if (op->op_count < op->op_size)
341                 return 0;
342
343         new_size = max(min_count, 2 * op->op_size);
344         OBD_ALLOC(new, new_size * sizeof(op->op_array[0]));
345         if (new == NULL)
346                 return -ENOMEM;
347
348         /* copy old array to new one */
349         memcpy(new, op->op_array, op->op_size * sizeof(op->op_array[0]));
350         OBD_FREE(op->op_array, op->op_size * sizeof(op->op_array[0]));
351         op->op_array = new;
352         op->op_size = new_size;
353         return 0;
354 }
355
356 int lov_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int min_count)
357 {
358         int rc = 0, i;
359         ENTRY;
360
361         cfs_down_write(&op->op_rw_sem);
362
363         rc = lov_ost_pool_extend(op, min_count);
364         if (rc)
365                 GOTO(out, rc);
366
367         /* search ost in pool array */
368         for (i = 0; i < op->op_count; i++) {
369                 if (op->op_array[i] == idx)
370                         GOTO(out, rc = -EEXIST);
371         }
372         /* ost not found we add it */
373         op->op_array[op->op_count] = idx;
374         op->op_count++;
375         EXIT;
376 out:
377         cfs_up_write(&op->op_rw_sem);
378         return rc;
379 }
380
381 int lov_ost_pool_remove(struct ost_pool *op, __u32 idx)
382 {
383         int i;
384         ENTRY;
385
386         cfs_down_write(&op->op_rw_sem);
387
388         for (i = 0; i < op->op_count; i++) {
389                 if (op->op_array[i] == idx) {
390                         memmove(&op->op_array[i], &op->op_array[i + 1],
391                                 (op->op_count - i - 1) * sizeof(op->op_array[0]));
392                         op->op_count--;
393                         cfs_up_write(&op->op_rw_sem);
394                         EXIT;
395                         return 0;
396                 }
397         }
398
399         cfs_up_write(&op->op_rw_sem);
400         RETURN(-EINVAL);
401 }
402
403 int lov_ost_pool_free(struct ost_pool *op)
404 {
405         ENTRY;
406
407         if (op->op_size == 0)
408                 RETURN(0);
409
410         cfs_down_write(&op->op_rw_sem);
411
412         OBD_FREE(op->op_array, op->op_size * sizeof(op->op_array[0]));
413         op->op_array = NULL;
414         op->op_count = 0;
415         op->op_size = 0;
416
417         cfs_up_write(&op->op_rw_sem);
418         RETURN(0);
419 }
420
421
422 int lov_pool_new(struct obd_device *obd, char *poolname)
423 {
424         struct lov_obd *lov;
425         struct pool_desc *new_pool;
426         int rc;
427         ENTRY;
428
429         lov = &(obd->u.lov);
430
431         if (strlen(poolname) > LOV_MAXPOOLNAME)
432                 RETURN(-ENAMETOOLONG);
433
434         OBD_ALLOC_PTR(new_pool);
435         if (new_pool == NULL)
436                 RETURN(-ENOMEM);
437
438         strncpy(new_pool->pool_name, poolname, LOV_MAXPOOLNAME);
439         new_pool->pool_name[LOV_MAXPOOLNAME] = '\0';
440         new_pool->pool_lov = lov;
441         /* ref count init to 1 because when created a pool is always used
442          * up to deletion
443          */
444         cfs_atomic_set(&new_pool->pool_refcount, 1);
445         rc = lov_ost_pool_init(&new_pool->pool_obds, 0);
446         if (rc)
447                GOTO(out_err, rc);
448
449         memset(&(new_pool->pool_rr), 0, sizeof(struct lov_qos_rr));
450         rc = lov_ost_pool_init(&new_pool->pool_rr.lqr_pool, 0);
451         if (rc)
452                 GOTO(out_free_pool_obds, rc);
453
454         CFS_INIT_HLIST_NODE(&new_pool->pool_hash);
455
456 #ifdef LPROCFS
457         /* we need this assert seq_file is not implementated for liblustre */
458         /* get ref for /proc file */
459         lov_pool_getref(new_pool);
460         new_pool->pool_proc_entry = lprocfs_add_simple(lov->lov_pool_proc_entry,
461                                                        poolname, NULL, NULL,
462                                                        new_pool,
463                                                        &pool_proc_operations);
464         if (IS_ERR(new_pool->pool_proc_entry)) {
465                 CWARN("Cannot add proc pool entry "LOV_POOLNAMEF"\n", poolname);
466                 new_pool->pool_proc_entry = NULL;
467                 lov_pool_putref(new_pool);
468         }
469         CDEBUG(D_INFO, "pool %p - proc %p\n", new_pool, new_pool->pool_proc_entry);
470 #endif
471
472         cfs_spin_lock(&obd->obd_dev_lock);
473         cfs_list_add_tail(&new_pool->pool_list, &lov->lov_pool_list);
474         lov->lov_pool_count++;
475         cfs_spin_unlock(&obd->obd_dev_lock);
476
477         /* add to find only when it fully ready  */
478         rc = cfs_hash_add_unique(lov->lov_pools_hash_body, poolname,
479                                  &new_pool->pool_hash);
480         if (rc)
481                 GOTO(out_err, rc = -EEXIST);
482
483         CDEBUG(D_CONFIG, LOV_POOLNAMEF" is pool #%d\n",
484                poolname, lov->lov_pool_count);
485
486         RETURN(0);
487
488 out_err:
489         cfs_spin_lock(&obd->obd_dev_lock);
490         cfs_list_del_init(&new_pool->pool_list);
491         lov->lov_pool_count--;
492         cfs_spin_unlock(&obd->obd_dev_lock);
493
494         lprocfs_remove(&new_pool->pool_proc_entry);
495
496         lov_ost_pool_free(&new_pool->pool_rr.lqr_pool);
497 out_free_pool_obds:
498         lov_ost_pool_free(&new_pool->pool_obds);
499         OBD_FREE_PTR(new_pool);
500         return rc;
501 }
502
503 int lov_pool_del(struct obd_device *obd, char *poolname)
504 {
505         struct lov_obd *lov;
506         struct pool_desc *pool;
507         ENTRY;
508
509         lov = &(obd->u.lov);
510
511         /* lookup and kill hash reference */
512         pool = cfs_hash_del_key(lov->lov_pools_hash_body, poolname);
513         if (pool == NULL)
514                 RETURN(-ENOENT);
515
516         if (pool->pool_proc_entry != NULL) {
517                 CDEBUG(D_INFO, "proc entry %p\n", pool->pool_proc_entry);
518                 lprocfs_remove(&pool->pool_proc_entry);
519                 lov_pool_putref(pool);
520         }
521
522         cfs_spin_lock(&obd->obd_dev_lock);
523         cfs_list_del_init(&pool->pool_list);
524         lov->lov_pool_count--;
525         cfs_spin_unlock(&obd->obd_dev_lock);
526
527         /* release last reference */
528         lov_pool_putref(pool);
529
530         RETURN(0);
531 }
532
533
534 int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname)
535 {
536         struct obd_uuid ost_uuid;
537         struct lov_obd *lov;
538         struct pool_desc *pool;
539         unsigned int lov_idx;
540         int rc;
541         ENTRY;
542
543         lov = &(obd->u.lov);
544
545         pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname);
546         if (pool == NULL)
547                 RETURN(-ENOENT);
548
549         obd_str2uuid(&ost_uuid, ostname);
550
551
552         /* search ost in lov array */
553         obd_getref(obd);
554         for (lov_idx = 0; lov_idx < lov->desc.ld_tgt_count; lov_idx++) {
555                 if (!lov->lov_tgts[lov_idx])
556                         continue;
557                 if (obd_uuid_equals(&ost_uuid,
558                                     &(lov->lov_tgts[lov_idx]->ltd_uuid)))
559                         break;
560         }
561         /* test if ost found in lov */
562         if (lov_idx == lov->desc.ld_tgt_count)
563                 GOTO(out, rc = -EINVAL);
564
565         rc = lov_ost_pool_add(&pool->pool_obds, lov_idx, lov->lov_tgt_size);
566         if (rc)
567                 GOTO(out, rc);
568
569         pool->pool_rr.lqr_dirty = 1;
570
571         CDEBUG(D_CONFIG, "Added %s to "LOV_POOLNAMEF" as member %d\n",
572                ostname, poolname,  pool_tgt_count(pool));
573
574         EXIT;
575 out:
576         obd_putref(obd);
577         lov_pool_putref(pool);
578         return rc;
579 }
580
581 int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
582 {
583         struct obd_uuid ost_uuid;
584         struct lov_obd *lov;
585         struct pool_desc *pool;
586         unsigned int lov_idx;
587         int rc = 0;
588         ENTRY;
589
590         lov = &(obd->u.lov);
591
592         pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname);
593         if (pool == NULL)
594                 RETURN(-ENOENT);
595
596         obd_str2uuid(&ost_uuid, ostname);
597
598         obd_getref(obd);
599         /* search ost in lov array, to get index */
600         for (lov_idx = 0; lov_idx < lov->desc.ld_tgt_count; lov_idx++) {
601                 if (!lov->lov_tgts[lov_idx])
602                         continue;
603
604                 if (obd_uuid_equals(&ost_uuid,
605                                     &(lov->lov_tgts[lov_idx]->ltd_uuid)))
606                         break;
607         }
608
609         /* test if ost found in lov */
610         if (lov_idx == lov->desc.ld_tgt_count)
611                 GOTO(out, rc = -EINVAL);
612
613         lov_ost_pool_remove(&pool->pool_obds, lov_idx);
614
615         pool->pool_rr.lqr_dirty = 1;
616
617         CDEBUG(D_CONFIG, "%s removed from "LOV_POOLNAMEF"\n", ostname,
618                poolname);
619
620         EXIT;
621 out:
622         obd_putref(obd);
623         lov_pool_putref(pool);
624         return rc;
625 }
626
627 int lov_check_index_in_pool(__u32 idx, struct pool_desc *pool)
628 {
629         int i, rc;
630         ENTRY;
631
632         /* caller may no have a ref on pool if it got the pool
633          * without calling lov_find_pool() (e.g. go through the lov pool
634          * list)
635          */
636         lov_pool_getref(pool);
637
638         cfs_down_read(&pool_tgt_rw_sem(pool));
639
640         for (i = 0; i < pool_tgt_count(pool); i++) {
641                 if (pool_tgt_array(pool)[i] == idx)
642                         GOTO(out, rc = 0);
643         }
644         rc = -ENOENT;
645         EXIT;
646 out:
647         cfs_up_read(&pool_tgt_rw_sem(pool));
648
649         lov_pool_putref(pool);
650         return rc;
651 }
652
653 struct pool_desc *lov_find_pool(struct lov_obd *lov, char *poolname)
654 {
655         struct pool_desc *pool;
656
657         pool = NULL;
658         if (poolname[0] != '\0') {
659                 pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname);
660                 if (pool == NULL)
661                         CWARN("Request for an unknown pool ("LOV_POOLNAMEF")\n",
662                               poolname);
663                 if ((pool != NULL) && (pool_tgt_count(pool) == 0)) {
664                         CWARN("Request for an empty pool ("LOV_POOLNAMEF")\n",
665                                poolname);
666                         /* pool is ignored, so we remove ref on it */
667                         lov_pool_putref(pool);
668                         pool = NULL;
669                 }
670         }
671         return pool;
672 }
673