Whamcloud - gitweb
Land b1_8_gate onto b1_8 (20081218_1708)
[fs/lustre-release.git] / lustre / lov / lov_pool.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see [sun.com URL with a
20  * copy of GPLv2].
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/lov/lov_pool.c
37  *
38  * OST pool methods
39  *
40  * Author: Jacques-Charles LAFOUCRIERE <jc.lafoucriere@cea.fr>
41  */
42
43 #define DEBUG_SUBSYSTEM S_LOV
44
45 #ifdef __KERNEL__
46 #include <libcfs/libcfs.h>
47 #else
48 #include <liblustre.h>
49 #endif
50
51 #include <obd.h>
52 #include "lov_internal.h"
53
54 static void lov_pool_getref(struct pool_desc *pool) {
55         atomic_inc(&pool->pool_refcount);
56 }
57
58 static void lov_pool_putref(struct pool_desc *pool) {
59         if (atomic_dec_and_test(&pool->pool_refcount)) {
60                 lov_ost_pool_free(&(pool->pool_rr.lqr_pool));
61                 lov_ost_pool_free(&(pool->pool_obds));
62                 OBD_FREE_PTR(pool);
63         }
64 }
65
66 /*
67  * hash function using a Rotating Hash algorithm
68  * Knuth, D. The Art of Computer Programming,
69  * Volume 3: Sorting and Searching,
70  * Chapter 6.4.
71  * Addison Wesley, 1973
72  */
73 static __u32 pool_hashfn(lustre_hash_t *hash_body, void *key, unsigned mask)
74 {
75         int i;
76         __u32 result;
77         char *poolname;
78
79         result = 0;
80         poolname = (char *)key;
81         for (i = 0; i < LOV_MAXPOOLNAME; i++) {
82                 if (poolname[i] == '\0')
83                         break;
84                 result = (result << 4)^(result >> 28) ^  poolname[i];
85         }
86         return (result % mask);
87 }
88
89 static void *pool_key(struct hlist_node *hnode)
90 {
91         struct pool_desc *pool;
92
93         pool = hlist_entry(hnode, struct pool_desc, pool_hash);
94         return (pool->pool_name);
95 }
96
97 static int pool_hashkey_compare(void *key, struct hlist_node *compared_hnode)
98 {
99         char *pool_name;
100         struct pool_desc *pool;
101         int rc;
102
103         pool_name = (char *)key;
104         pool = hlist_entry(compared_hnode, struct pool_desc, pool_hash);
105         rc = strncmp(pool_name, pool->pool_name, LOV_MAXPOOLNAME);
106         return (!rc);
107 }
108
109 static void *pool_hashrefcount_get(struct hlist_node *hnode)
110 {
111         struct pool_desc *pool;
112
113         pool = hlist_entry(hnode, struct pool_desc, pool_hash);
114         lov_pool_getref(pool);
115         return (pool);
116 }
117
118 static void *pool_hashrefcount_put(struct hlist_node *hnode)
119 {
120         struct pool_desc *pool;
121
122         pool = hlist_entry(hnode, struct pool_desc, pool_hash);
123         lov_pool_putref(pool);
124         return (pool);
125 }
126
127 lustre_hash_ops_t pool_hash_operations = {
128         .lh_hash        = pool_hashfn,
129         .lh_key         = pool_key,
130         .lh_compare     = pool_hashkey_compare,
131         .lh_get         = pool_hashrefcount_get,
132         .lh_put         = pool_hashrefcount_put,
133 };
134
135 #ifdef LPROCFS
136 /* ifdef needed for liblustre support */
137 /*
138  * pool /proc seq_file methods
139  */
140 /*
141  * iterator is used to go through the target pool entries
142  * index is the current entry index in the lp_array[] array
143  * index >= pos returned to the seq_file interface
144  * pos is from 0 to (pool->pool_obds.op_count - 1)
145  */
146 #define POOL_IT_MAGIC 0xB001CEA0
147 struct pool_iterator {
148         int magic;
149         struct pool_desc *pool;
150         int idx;        /* from 0 to pool_tgt_size - 1 */
151 };
152
153 static void *pool_proc_next(struct seq_file *s, void *v, loff_t *pos)
154 {
155         struct pool_iterator *iter = (struct pool_iterator *)s->private;
156         int prev_idx;
157
158         LASSERT(iter->magic == POOL_IT_MAGIC);
159
160         /* test if end of file */
161         if (*pos >= pool_tgt_count(iter->pool))
162                 return NULL;
163
164         /* iterate to find a non empty entry */
165         prev_idx = iter->idx;
166         down_read(&pool_tgt_rw_sem(iter->pool));
167         iter->idx++;
168         if (iter->idx == pool_tgt_count(iter->pool)) {
169                 iter->idx = prev_idx; /* we stay on the last entry */
170                 up_read(&pool_tgt_rw_sem(iter->pool));
171                 return NULL;
172         }
173         up_read(&pool_tgt_rw_sem(iter->pool));
174         (*pos)++;
175         /* return != NULL to continue */
176         return iter;
177 }
178
179 static void *pool_proc_start(struct seq_file *s, loff_t *pos)
180 {
181         struct pool_desc *pool = (struct pool_desc *)s->private;
182         struct pool_iterator *iter;
183
184         lov_pool_getref(pool);
185         if ((pool_tgt_count(pool) == 0) ||
186             (*pos >= pool_tgt_count(pool))) {
187                 /* iter is not created, so stop() has no way to
188                  * find pool to dec ref */
189                 lov_pool_putref(pool);
190                 return NULL;
191         }
192
193         OBD_ALLOC_PTR(iter);
194         if (!iter)
195                 return ERR_PTR(-ENOMEM);
196         iter->magic = POOL_IT_MAGIC;
197         iter->pool = pool;
198         iter->idx = 0;
199
200         /* we use seq_file private field to memorized iterator so
201          * we can free it at stop() */
202         /* /!\ do not forget to restore it to pool before freeing it */
203         s->private = iter;
204         if (*pos > 0) {
205                 loff_t i;
206                 void *ptr;
207
208                 i = 0;
209                 do {
210                      ptr = pool_proc_next(s, &iter, &i);
211                 } while ((i < *pos) && (ptr != NULL));
212                 return ptr;
213         }
214         return iter;
215 }
216
217 static void pool_proc_stop(struct seq_file *s, void *v)
218 {
219         struct pool_iterator *iter = (struct pool_iterator *)s->private;
220
221         /* in some cases stop() method is called 2 times, without
222          * calling start() method (see seq_read() from fs/seq_file.c)
223          * we have to free only if s->private is an iterator */
224         if ((iter) && (iter->magic == POOL_IT_MAGIC)) {
225                 /* we restore s->private so next call to pool_proc_start()
226                  * will work */
227                 s->private = iter->pool;
228                 lov_pool_putref(iter->pool);
229                 OBD_FREE_PTR(iter);
230         }
231         return;
232 }
233
234 static int pool_proc_show(struct seq_file *s, void *v)
235 {
236         struct pool_iterator *iter = (struct pool_iterator *)v;
237         struct lov_tgt_desc *tgt;
238
239         LASSERT(iter->magic == POOL_IT_MAGIC);
240         LASSERT(iter->pool != NULL);
241         LASSERT(iter->idx <= pool_tgt_count(iter->pool));
242
243         down_read(&pool_tgt_rw_sem(iter->pool));
244         tgt = pool_tgt(iter->pool, iter->idx);
245         up_read(&pool_tgt_rw_sem(iter->pool));
246         if (tgt)
247                 seq_printf(s, "%s\n", obd_uuid2str(&(tgt->ltd_uuid)));
248
249         return 0;
250 }
251
252 static struct seq_operations pool_proc_ops = {
253         .start          = pool_proc_start,
254         .next           = pool_proc_next,
255         .stop           = pool_proc_stop,
256         .show           = pool_proc_show,
257 };
258
259 static int pool_proc_open(struct inode *inode, struct file *file)
260 {
261         int rc;
262
263         rc = seq_open(file, &pool_proc_ops);
264         if (!rc) {
265                 struct seq_file *s = file->private_data;
266                 s->private = PROC_I(inode)->pde->data;
267         }
268         return rc;
269 }
270
271 static struct file_operations pool_proc_operations = {
272         .open           = pool_proc_open,
273         .read           = seq_read,
274         .llseek         = seq_lseek,
275         .release        = seq_release,
276 };
277 #endif /* LPROCFS */
278
279 void lov_dump_pool(int level, struct pool_desc *pool)
280 {
281         int i;
282
283         lov_pool_getref(pool);
284
285         CDEBUG(level, "pool "LOV_POOLNAMEF" has %d members\n",
286                pool->pool_name, pool->pool_obds.op_count);
287         down_read(&pool_tgt_rw_sem(pool));
288
289         for (i = 0; i < pool_tgt_count(pool) ; i++) {
290                 if (!pool_tgt(pool, i) || !(pool_tgt(pool, i))->ltd_exp)
291                         continue;
292                 CDEBUG(level, "pool "LOV_POOLNAMEF"[%d] = %s\n",
293                        pool->pool_name, i,
294                        obd_uuid2str(&((pool_tgt(pool, i))->ltd_uuid)));
295         }
296
297         up_read(&pool_tgt_rw_sem(pool));
298         lov_pool_putref(pool);
299 }
300
301 #define LOV_POOL_INIT_COUNT 2
302 int lov_ost_pool_init(struct ost_pool *op, unsigned int count)
303 {
304         if (count == 0)
305                 count = LOV_POOL_INIT_COUNT;
306         op->op_array = NULL;
307         op->op_count = 0;
308         init_rwsem(&op->op_rw_sem);
309         op->op_size = count;
310         OBD_ALLOC(op->op_array, op->op_size * sizeof(op->op_array[0]));
311         if (op->op_array == NULL) {
312                 op->op_size = 0;
313                 return -ENOMEM;
314         }
315         return 0;
316 }
317
318 /* Caller must hold write op_rwlock */
319 int lov_ost_pool_extend(struct ost_pool *op, unsigned int max_count)
320 {
321         __u32 *new;
322         int new_size;
323
324         LASSERT(max_count != 0);
325
326         if (op->op_count < op->op_size)
327                 return 0;
328
329         new_size = min(max_count, 2 * op->op_size);
330         OBD_ALLOC(new, new_size * sizeof(op->op_array[0]));
331         if (new == NULL)
332                 return -ENOMEM;
333
334         /* copy old array to new one */
335         memcpy(new, op->op_array, op->op_size * sizeof(op->op_array[0]));
336         OBD_FREE(op->op_array, op->op_size * sizeof(op->op_array[0]));
337         op->op_array = new;
338         op->op_size = new_size;
339         return 0;
340 }
341
342 int lov_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int max_count)
343 {
344         int rc = 0, i;
345         ENTRY;
346
347         down_write(&op->op_rw_sem);
348
349         rc = lov_ost_pool_extend(op, max_count);
350         if (rc)
351                 GOTO(out, rc);
352
353         /* search ost in pool array */
354         for (i = 0; i < op->op_count; i++) {
355                 if (op->op_array[i] == idx)
356                         GOTO(out, rc = -EEXIST);
357         }
358         /* ost not found we add it */
359         op->op_array[op->op_count] = idx;
360         op->op_count++;
361 out:
362         up_write(&op->op_rw_sem);
363         return rc;
364 }
365
366 int lov_ost_pool_remove(struct ost_pool *op, __u32 idx)
367 {
368         int i;
369
370         down_write(&op->op_rw_sem);
371         for (i = 0; i < op->op_count; i++) {
372                 if (op->op_array[i] == idx) {
373                         memmove(&op->op_array[i], &op->op_array[i + 1],
374                                 (op->op_count - i - 1) * sizeof(op->op_array[0]));
375                         op->op_count--;
376                         up_write(&op->op_rw_sem);
377                         return 0;
378                 }
379         }
380         up_write(&op->op_rw_sem);
381         return -EINVAL;
382 }
383
384 int lov_ost_pool_free(struct ost_pool *op)
385 {
386         if (op->op_size == 0)
387                 return 0;
388
389         down_write(&op->op_rw_sem);
390         OBD_FREE(op->op_array, op->op_size * sizeof(op->op_array[0]));
391         op->op_array = NULL;
392         op->op_count = 0;
393         op->op_size = 0;
394         up_write(&op->op_rw_sem);
395         return 0;
396 }
397
398
399 int lov_pool_new(struct obd_device *obd, char *poolname)
400 {
401         struct lov_obd *lov;
402         struct pool_desc *new_pool;
403         int rc;
404         ENTRY;
405
406         lov = &(obd->u.lov);
407
408         if (strlen(poolname) > LOV_MAXPOOLNAME)
409                 RETURN(-ENAMETOOLONG);
410
411         OBD_ALLOC_PTR(new_pool);
412         if (new_pool == NULL)
413                 RETURN(-ENOMEM);
414
415         strncpy(new_pool->pool_name, poolname, LOV_MAXPOOLNAME);
416         new_pool->pool_name[LOV_MAXPOOLNAME] = '\0';
417         new_pool->pool_lov = lov;
418         /* ref count init to 1 because when created a pool is always used
419          * up to deletion
420          */
421         atomic_set(&new_pool->pool_refcount, 1);
422         rc = lov_ost_pool_init(&new_pool->pool_obds, 0);
423         if (rc)
424                GOTO(out_err, rc);
425
426         memset(&(new_pool->pool_rr), 0, sizeof(struct lov_qos_rr));
427         rc = lov_ost_pool_init(&new_pool->pool_rr.lqr_pool, 0);
428         if (rc) {
429                 lov_ost_pool_free(&new_pool->pool_obds);
430                 GOTO(out_err, rc);
431         }
432
433         INIT_HLIST_NODE(&new_pool->pool_hash);
434         rc = lustre_hash_add_unique(lov->lov_pools_hash_body, poolname,
435                                     &new_pool->pool_hash);
436         if (rc) {
437                 lov_ost_pool_free(&new_pool->pool_rr.lqr_pool);
438                 lov_ost_pool_free(&new_pool->pool_obds);
439                 GOTO(out_err, rc = -EEXIST);
440         }
441
442         spin_lock(&obd->obd_dev_lock);
443         list_add_tail(&new_pool->pool_list, &lov->lov_pool_list);
444         lov->lov_pool_count++;
445
446         spin_unlock(&obd->obd_dev_lock);
447
448         CDEBUG(D_CONFIG, LOV_POOLNAMEF" is pool #%d\n",
449                poolname, lov->lov_pool_count);
450
451 #ifdef LPROCFS
452         /* ifdef needed for liblustre */
453         /* get ref for /proc file */
454         lov_pool_getref(new_pool);
455         new_pool->pool_proc_entry = lprocfs_add_simple(lov->lov_pool_proc_entry,
456                                                        poolname, NULL, NULL,
457                                                        new_pool,
458                                                        &pool_proc_operations);
459 #endif
460
461         if (IS_ERR(new_pool->pool_proc_entry)) {
462                 CWARN("Cannot add proc pool entry "LOV_POOLNAMEF"\n", poolname);
463                 new_pool->pool_proc_entry = NULL;
464                 lov_pool_putref(new_pool);
465         }
466
467         RETURN(0);
468
469 out_err:
470         OBD_FREE_PTR(new_pool);
471         return rc;
472 }
473
474 int lov_pool_del(struct obd_device *obd, char *poolname)
475 {
476         struct lov_obd *lov;
477         struct pool_desc *pool;
478         ENTRY;
479
480         lov = &(obd->u.lov);
481
482         spin_lock(&obd->obd_dev_lock);
483         pool = lustre_hash_lookup(lov->lov_pools_hash_body, poolname);
484         if (pool == NULL) {
485                 spin_unlock(&obd->obd_dev_lock);
486                 RETURN(-ENOENT);
487         }
488
489 #ifdef LPROCFS
490         if (pool->pool_proc_entry != NULL) {
491                 remove_proc_entry(pool->pool_proc_entry->name,
492                                   pool->pool_proc_entry->parent);
493                 lov_pool_putref(pool);
494         }
495 #endif
496
497         lustre_hash_del_key(lov->lov_pools_hash_body, poolname);
498         list_del_init(&pool->pool_list);
499
500         lov->lov_pool_count--;
501         lh_put(lov->lov_pools_hash_body, &pool->pool_hash);
502         spin_unlock(&obd->obd_dev_lock);
503
504         /* remove ref got when pool was created in memory
505          * pool will be freed when refount will reach 0
506          */
507         lov_pool_putref(pool);
508
509         RETURN(0);
510 }
511
512
513 int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname)
514 {
515         struct obd_uuid ost_uuid;
516         struct lov_obd *lov;
517         struct pool_desc *pool;
518         unsigned int i, lov_idx;
519         int rc;
520         ENTRY;
521
522         lov = &(obd->u.lov);
523
524         pool = lustre_hash_lookup(lov->lov_pools_hash_body, poolname);
525         if (pool == NULL)
526                 RETURN(-ENOENT);
527
528         obd_str2uuid(&ost_uuid, ostname);
529
530         /* search ost in lov array */
531         mutex_down(&lov->lov_lock);
532         for (i = 0; i < lov->desc.ld_tgt_count; i++) {
533                 if (!lov->lov_tgts[i])
534                         continue;
535                 if (obd_uuid_equals(&ost_uuid, &(lov->lov_tgts[i]->ltd_uuid)))
536                         break;
537         }
538
539         /* test if ost found in lov */
540         if (i == lov->desc.ld_tgt_count) {
541                 mutex_up(&lov->lov_lock);
542                 GOTO(out, rc = -EINVAL);
543         }
544         mutex_up(&lov->lov_lock);
545
546         lov_idx = i;
547
548         rc = lov_ost_pool_add(&pool->pool_obds, lov_idx, lov->lov_tgt_size);
549         if (rc)
550                 GOTO(out, rc);
551
552         pool->pool_rr.lqr_dirty = 1;
553
554         CDEBUG(D_CONFIG, "Added %s to "LOV_POOLNAMEF" as member %d\n",
555                ostname, poolname,  pool_tgt_count(pool));
556         rc = 0;
557         EXIT;
558 out:
559         lh_put(lov->lov_pools_hash_body, &pool->pool_hash);
560         return rc;
561 }
562
563 int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
564 {
565         struct obd_uuid ost_uuid;
566         struct lov_obd *lov;
567         struct pool_desc *pool;
568         unsigned int i, lov_idx;
569         int rc;
570         ENTRY;
571
572         lov = &(obd->u.lov);
573
574         spin_lock(&obd->obd_dev_lock);
575         pool = lustre_hash_lookup(lov->lov_pools_hash_body, poolname);
576         if (pool == NULL) {
577                 spin_unlock(&obd->obd_dev_lock);
578                 RETURN(-ENOENT);
579         }
580
581         obd_str2uuid(&ost_uuid, ostname);
582
583         /* search ost in lov array, to get index */
584         for (i = 0; i < lov->desc.ld_tgt_count; i++) {
585                 if (!lov->lov_tgts[i])
586                         continue;
587
588                 if (obd_uuid_equals(&ost_uuid, &(lov->lov_tgts[i]->ltd_uuid)))
589                         break;
590         }
591
592         /* test if ost found in lov */
593         if (i == lov->desc.ld_tgt_count) {
594                 spin_unlock(&obd->obd_dev_lock);
595                 GOTO(out, rc = -EINVAL);
596         }
597
598         spin_unlock(&obd->obd_dev_lock);
599
600         lov_idx = i;
601
602         lov_ost_pool_remove(&pool->pool_obds, lov_idx);
603
604         pool->pool_rr.lqr_dirty = 1;
605
606         CDEBUG(D_CONFIG, "%s removed from "LOV_POOLNAMEF"\n", ostname,
607                poolname);
608         rc = 0;
609         EXIT;
610 out:
611         lh_put(lov->lov_pools_hash_body, &pool->pool_hash);
612         return rc;
613 }
614
615 int lov_check_index_in_pool(__u32 idx, struct pool_desc *pool)
616 {
617         int i, rc;
618         ENTRY;
619
620         /* caller may no have a ref on pool if it got the pool
621          * without calling lov_find_pool() (e.g. go through the lov pool
622          * list)
623          */
624         lov_pool_getref(pool);
625
626         down_read(&pool_tgt_rw_sem(pool));
627         for (i = 0; i < pool_tgt_count(pool); i++) {
628                 if (pool_tgt_array(pool)[i] == idx)
629                         GOTO(out, rc = 0);
630         }
631         rc = -ENOENT;
632         EXIT;
633 out:
634         up_read(&pool_tgt_rw_sem(pool));
635
636         lov_pool_putref(pool);
637         return rc;
638 }
639
640 struct pool_desc *lov_find_pool(struct lov_obd *lov, char *poolname)
641 {
642         struct pool_desc *pool;
643
644         pool = NULL;
645         if (poolname[0] != '\0') {
646                 pool = lustre_hash_lookup(lov->lov_pools_hash_body, poolname);
647                 if (pool == NULL)
648                         CWARN("Request for an unknown pool ("LOV_POOLNAMEF")\n",
649                               poolname);
650                 if ((pool != NULL) && (pool_tgt_count(pool) == 0)) {
651                         CWARN("Request for an empty pool ("LOV_POOLNAMEF")\n",
652                                poolname);
653                         /* pool is ignored, so we remove ref on it */
654                         lh_put(lov->lov_pools_hash_body, &pool->pool_hash);
655                         pool = NULL;
656                 }
657         }
658         return pool;
659 }
660