1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * Implementation of cl_object for LOV layer.
38 * Author: Nikita Danilov <nikita.danilov@sun.com>
41 /** \addtogroup lov lov @{ */
43 #define DEBUG_SUBSYSTEM S_LOV
45 #include "lov_cl_internal.h"
47 /*****************************************************************************
53 struct lov_layout_operations {
54 int (*llo_init)(const struct lu_env *env, struct lov_device *dev,
55 struct lov_object *lov,
56 const struct cl_object_conf *conf,
57 union lov_layout_state *state);
58 void (*llo_delete)(const struct lu_env *env, struct lov_object *lov,
59 union lov_layout_state *state);
60 void (*llo_fini)(const struct lu_env *env, struct lov_object *lov,
61 union lov_layout_state *state);
62 void (*llo_install)(const struct lu_env *env, struct lov_object *lov,
63 union lov_layout_state *state);
64 int (*llo_print)(const struct lu_env *env, void *cookie,
65 lu_printer_t p, const struct lu_object *o);
66 struct cl_page *(*llo_page_init)(const struct lu_env *env,
67 struct cl_object *obj,
70 int (*llo_lock_init)(const struct lu_env *env,
71 struct cl_object *obj, struct cl_lock *lock,
72 const struct cl_io *io);
73 int (*llo_io_init)(const struct lu_env *env,
74 struct cl_object *obj, struct cl_io *io);
75 int (*llo_getattr)(const struct lu_env *env, struct cl_object *obj,
76 struct cl_attr *attr);
79 /*****************************************************************************
81 * Lov object layout operations.
85 static void lov_install_empty(const struct lu_env *env,
86 struct lov_object *lov,
87 union lov_layout_state *state)
90 * File without objects.
94 static int lov_init_empty(const struct lu_env *env,
95 struct lov_device *dev, struct lov_object *lov,
96 const struct cl_object_conf *conf,
97 union lov_layout_state *state)
102 static void lov_install_raid0(const struct lu_env *env,
103 struct lov_object *lov,
104 union lov_layout_state *state)
109 static void oinfo_get_fid(const struct lov_oinfo *oinfo, struct lu_fid *fid)
111 __u64 idx = oinfo->loi_id;
113 /* See idif definition in wiki:CMD3_interoperability_architecture */
115 LASSERT(oinfo->loi_gr < 1ULL << 16);
116 LASSERT(oinfo->loi_id < 1ULL << 49);
120 * Now that the fid of stripe is not unique now, ost_idx have to
121 * be used to make it unique. This is ok because the stripe fids
122 * are just used in client side(to locate the objects). -jay
124 fid->f_seq = ((__u64)oinfo->loi_ost_idx) << 32 |
125 oinfo->loi_gr << 16 | idx >> 32;
126 fid->f_oid = idx; /* truncated to 32 bits by assignment */
131 static struct cl_object *lov_sub_find(const struct lu_env *env,
132 struct cl_device *dev,
133 const struct lu_fid *fid,
134 const struct cl_object_conf *conf)
139 o = lu_object_find_at(env, cl2lu_dev(dev), fid, &conf->coc_lu);
140 LASSERT(ergo(!IS_ERR(o), o->lo_dev->ld_type == &lovsub_device_type));
144 static int lov_init_sub(const struct lu_env *env, struct lov_object *lov,
145 struct cl_object *stripe,
146 struct lov_layout_raid0 *r0, int idx)
148 struct cl_object_header *hdr;
149 struct cl_object_header *subhdr;
150 struct cl_object_header *parent;
151 struct lov_oinfo *oinfo;
154 hdr = cl_object_header(lov2cl(lov));
155 subhdr = cl_object_header(stripe);
156 parent = subhdr->coh_parent;
158 oinfo = r0->lo_lsm->lsm_oinfo[idx];
159 CDEBUG(D_INODE, DFID"@%p[%d] -> "DFID"@%p: id: "LPU64" gr: "LPU64
160 " idx: %d gen: %d\n",
161 PFID(&subhdr->coh_lu.loh_fid), subhdr, idx,
162 PFID(&hdr->coh_lu.loh_fid), hdr,
163 oinfo->loi_id, oinfo->loi_gr,
164 oinfo->loi_ost_idx, oinfo->loi_ost_gen);
166 if (parent == NULL) {
167 subhdr->coh_parent = hdr;
168 subhdr->coh_nesting = hdr->coh_nesting + 1;
169 lu_object_ref_add(&stripe->co_lu, "lov-parent", lov);
170 r0->lo_sub[idx] = cl2lovsub(stripe);
171 r0->lo_sub[idx]->lso_super = lov;
172 r0->lo_sub[idx]->lso_index = idx;
175 CERROR("Stripe is already owned by other file (%i).\n", idx);
176 LU_OBJECT_DEBUG(D_ERROR, env, &stripe->co_lu, "\n");
177 LU_OBJECT_DEBUG(D_ERROR, env, lu_object_top(&parent->coh_lu),
179 LU_OBJECT_HEADER(D_ERROR, env, lov2lu(lov), "new\n");
180 cl_object_put(env, stripe);
186 static int lov_init_raid0(const struct lu_env *env,
187 struct lov_device *dev, struct lov_object *lov,
188 const struct cl_object_conf *conf,
189 union lov_layout_state *state)
194 struct cl_object *stripe;
195 struct lov_thread_info *lti = lov_env_info(env);
196 struct cl_object_conf *subconf = <i->lti_stripe_conf;
197 struct lov_stripe_md *lsm = conf->u.coc_md->lsm;
198 struct lu_fid *ofid = <i->lti_fid;
199 struct lov_layout_raid0 *r0 = &state->raid0;
202 r0->lo_nr = conf->u.coc_md->lsm->lsm_stripe_count;
203 r0->lo_lsm = conf->u.coc_md->lsm;
204 LASSERT(r0->lo_nr <= lov_targets_nr(dev));
206 OBD_ALLOC(r0->lo_sub, r0->lo_nr * sizeof r0->lo_sub[0]);
207 if (r0->lo_sub != NULL) {
209 subconf->coc_inode = conf->coc_inode;
211 * Create stripe cl_objects.
213 for (i = 0; i < r0->lo_nr && result == 0; ++i) {
214 struct cl_device *subdev;
215 struct lov_oinfo *oinfo = lsm->lsm_oinfo[i];
216 int ost_idx = oinfo->loi_ost_idx;
218 oinfo_get_fid(oinfo, ofid);
219 subdev = lovsub2cl_dev(dev->ld_target[ost_idx]);
220 subconf->u.coc_oinfo = oinfo;
221 stripe = lov_sub_find(env, subdev, ofid, subconf);
223 result = lov_init_sub(env, lov, stripe, r0, i);
225 result = PTR_ERR(stripe);
232 static void lov_delete_empty(const struct lu_env *env, struct lov_object *lov,
233 union lov_layout_state *state)
235 LASSERT(lov->lo_type == LLT_EMPTY);
238 static void lov_delete_raid0(const struct lu_env *env, struct lov_object *lov,
239 union lov_layout_state *state)
241 struct lov_layout_raid0 *r0 = &state->raid0;
245 if (r0->lo_sub != NULL &&
246 lu_object_is_dying(lov->lo_cl.co_lu.lo_header)) {
247 for (i = 0; i < r0->lo_nr; ++i) {
248 struct lovsub_object *sub = r0->lo_sub[i];
252 * If top-level object is to be evicted from
253 * the cache, so are its sub-objects.
255 cl_object_kill(env, lovsub2cl(sub));
261 static void lov_fini_empty(const struct lu_env *env, struct lov_object *lov,
262 union lov_layout_state *state)
264 LASSERT(lov->lo_type == LLT_EMPTY);
267 static void lov_fini_raid0(const struct lu_env *env, struct lov_object *lov,
268 union lov_layout_state *state)
270 struct lov_layout_raid0 *r0 = &state->raid0;
273 if (r0->lo_sub != NULL) {
276 for (i = 0; i < r0->lo_nr; ++i) {
277 struct cl_object *sub;
279 if (r0->lo_sub[i] == NULL)
281 sub = lovsub2cl(r0->lo_sub[i]);
282 cl_object_header(sub)->coh_parent = NULL;
283 lu_object_ref_del(&sub->co_lu, "lov-parent", lov);
284 cl_object_put(env, sub);
285 r0->lo_sub[i] = NULL;
287 OBD_FREE(r0->lo_sub, r0->lo_nr * sizeof r0->lo_sub[0]);
293 static int lov_print_empty(const struct lu_env *env, void *cookie,
294 lu_printer_t p, const struct lu_object *o)
296 (*p)(env, cookie, "empty\n");
300 static int lov_print_raid0(const struct lu_env *env, void *cookie,
301 lu_printer_t p, const struct lu_object *o)
303 struct lov_object *lov = lu2lov(o);
304 struct lov_layout_raid0 *r0 = lov_r0(lov);
307 (*p)(env, cookie, "stripes: %d:\n", r0->lo_nr);
308 for (i = 0; i < r0->lo_nr; ++i) {
309 struct lu_object *sub;
311 if (r0->lo_sub[i] != NULL) {
312 sub = lovsub2lu(r0->lo_sub[i]);
313 lu_object_print(env, cookie, p, sub);
315 (*p)(env, cookie, "sub %d absent\n", i);
321 * Implements cl_object_operations::coo_attr_get() method for an object
322 * without stripes (LLT_EMPTY layout type).
324 * The only attributes this layer is authoritative in this case is
325 * cl_attr::cat_blocks---it's 0.
327 static int lov_attr_get_empty(const struct lu_env *env, struct cl_object *obj,
328 struct cl_attr *attr)
330 attr->cat_blocks = 0;
334 static int lov_attr_get_raid0(const struct lu_env *env, struct cl_object *obj,
335 struct cl_attr *attr)
337 struct lov_object *lov = cl2lov(obj);
338 struct lov_layout_raid0 *r0 = lov_r0(lov);
339 struct lov_stripe_md *lsm = lov->u.raid0.lo_lsm;
340 struct ost_lvb *lvb = &lov_env_info(env)->lti_lvb;
345 if (!r0->lo_attr_valid) {
347 * Fill LVB with attributes already initialized by the upper
350 cl_attr2lvb(lvb, attr);
354 * XXX that should be replaced with a loop over sub-objects,
355 * doing cl_object_attr_get() on them. But for now, let's
356 * reuse old lov code.
360 * XXX take lsm spin-lock to keep lov_merge_lvb_kms()
361 * happy. It's not needed, because new code uses
362 * ->coh_attr_guard spin-lock to protect consistency of
363 * sub-object attributes.
365 lov_stripe_lock(lsm);
366 result = lov_merge_lvb_kms(lsm, lvb, &kms);
367 lov_stripe_unlock(lsm);
369 cl_lvb2attr(attr, lvb);
371 r0->lo_attr_valid = 1;
379 const static struct lov_layout_operations lov_dispatch[] = {
381 .llo_init = lov_init_empty,
382 .llo_delete = lov_delete_empty,
383 .llo_fini = lov_fini_empty,
384 .llo_install = lov_install_empty,
385 .llo_print = lov_print_empty,
386 .llo_page_init = lov_page_init_empty,
387 .llo_lock_init = NULL,
388 .llo_io_init = lov_io_init_empty,
389 .llo_getattr = lov_attr_get_empty
392 .llo_init = lov_init_raid0,
393 .llo_delete = lov_delete_raid0,
394 .llo_fini = lov_fini_raid0,
395 .llo_install = lov_install_raid0,
396 .llo_print = lov_print_raid0,
397 .llo_page_init = lov_page_init_raid0,
398 .llo_lock_init = lov_lock_init_raid0,
399 .llo_io_init = lov_io_init_raid0,
400 .llo_getattr = lov_attr_get_raid0
406 * Performs a double-dispatch based on the layout type of an object.
408 #define LOV_2DISPATCH_NOLOCK(obj, op, ...) \
410 struct lov_object *__obj = (obj); \
411 enum lov_layout_type __llt; \
413 __llt = __obj->lo_type; \
414 LASSERT(0 <= __llt && __llt < ARRAY_SIZE(lov_dispatch)); \
415 lov_dispatch[__llt].op(__VA_ARGS__); \
418 #define LOV_2DISPATCH_MAYLOCK(obj, op, lock, ...) \
420 struct lov_object *__obj = (obj); \
421 int __lock = !!(lock); \
422 typeof(lov_dispatch[0].op(__VA_ARGS__)) __result; \
424 __lock &= __obj->lo_owner != cfs_current(); \
426 down_read(&__obj->lo_type_guard); \
427 __result = LOV_2DISPATCH_NOLOCK(obj, op, __VA_ARGS__); \
429 up_read(&__obj->lo_type_guard); \
434 * Performs a locked double-dispatch based on the layout type of an object.
436 #define LOV_2DISPATCH(obj, op, ...) \
437 LOV_2DISPATCH_MAYLOCK(obj, op, 1, __VA_ARGS__)
439 #define LOV_2DISPATCH_VOID(obj, op, ...) \
441 struct lov_object *__obj = (obj); \
442 enum lov_layout_type __llt; \
444 if (__obj->lo_owner != cfs_current()) \
445 down_read(&__obj->lo_type_guard); \
446 __llt = __obj->lo_type; \
447 LASSERT(0 <= __llt && __llt < ARRAY_SIZE(lov_dispatch)); \
448 lov_dispatch[__llt].op(__VA_ARGS__); \
449 if (__obj->lo_owner != cfs_current()) \
450 up_read(&__obj->lo_type_guard); \
453 static int lov_layout_change(const struct lu_env *env,
454 struct lov_object *obj, enum lov_layout_type llt,
455 const struct cl_object_conf *conf)
458 union lov_layout_state *state = &lov_env_info(env)->lti_state;
459 const struct lov_layout_operations *old_ops;
460 const struct lov_layout_operations *new_ops;
462 LASSERT(0 <= obj->lo_type && obj->lo_type < ARRAY_SIZE(lov_dispatch));
463 LASSERT(0 <= llt && llt < ARRAY_SIZE(lov_dispatch));
466 old_ops = &lov_dispatch[obj->lo_type];
467 new_ops = &lov_dispatch[llt];
469 result = new_ops->llo_init(env, lu2lov_dev(obj->lo_cl.co_lu.lo_dev),
472 struct cl_object_header *hdr = cl_object_header(&obj->lo_cl);
474 struct lu_env *nested;
477 cookie = cl_env_reenter();
478 nested = cl_env_get(&refcheck);
480 cl_object_prune(nested, &obj->lo_cl);
482 result = PTR_ERR(nested);
483 cl_env_put(nested, &refcheck);
484 cl_env_reexit(cookie);
486 old_ops->llo_fini(env, obj, &obj->u);
487 LASSERT(list_empty(&hdr->coh_locks));
488 LASSERT(hdr->coh_tree.rnode == NULL);
489 LASSERT(hdr->coh_pages == 0);
491 new_ops->llo_install(env, obj, state);
494 new_ops->llo_fini(env, obj, state);
498 /*****************************************************************************
500 * Lov object operations.
504 int lov_object_init(const struct lu_env *env, struct lu_object *obj,
505 const struct lu_object_conf *conf)
507 struct lov_device *dev = lu2lov_dev(obj->lo_dev);
508 struct lov_object *lov = lu2lov(obj);
509 const struct cl_object_conf *cconf = lu2cl_conf(conf);
510 union lov_layout_state *set = &lov_env_info(env)->lti_state;
511 const struct lov_layout_operations *ops;
515 init_rwsem(&lov->lo_type_guard);
517 /* no locking is necessary, as object is being created */
518 lov->lo_type = cconf->u.coc_md->lsm != NULL ? LLT_RAID0 : LLT_EMPTY;
519 ops = &lov_dispatch[lov->lo_type];
520 result = ops->llo_init(env, dev, lov, cconf, set);
522 ops->llo_install(env, lov, set);
524 ops->llo_fini(env, lov, set);
528 static int lov_conf_set(const struct lu_env *env, struct cl_object *obj,
529 const struct cl_object_conf *conf)
531 struct lov_object *lov = cl2lov(obj);
536 * Currently only LLT_EMPTY -> LLT_RAID0 transition is supported.
538 LASSERT(lov->lo_owner != cfs_current());
539 down_write(&lov->lo_type_guard);
540 LASSERT(lov->lo_owner == NULL);
541 lov->lo_owner = cfs_current();
542 if (lov->lo_type == LLT_EMPTY && conf->u.coc_md->lsm != NULL)
543 result = lov_layout_change(env, lov, LLT_RAID0, conf);
545 result = -EOPNOTSUPP;
546 lov->lo_owner = NULL;
547 up_write(&lov->lo_type_guard);
551 static void lov_object_delete(const struct lu_env *env, struct lu_object *obj)
553 struct lov_object *lov = lu2lov(obj);
556 LOV_2DISPATCH_VOID(lov, llo_delete, env, lov, &lov->u);
560 static void lov_object_free(const struct lu_env *env, struct lu_object *obj)
562 struct lov_object *lov = lu2lov(obj);
565 LOV_2DISPATCH_VOID(lov, llo_fini, env, lov, &lov->u);
567 OBD_SLAB_FREE_PTR(lov, lov_object_kmem);
571 static int lov_object_print(const struct lu_env *env, void *cookie,
572 lu_printer_t p, const struct lu_object *o)
574 return LOV_2DISPATCH(lu2lov(o), llo_print, env, cookie, p, o);
577 struct cl_page *lov_page_init(const struct lu_env *env, struct cl_object *obj,
578 struct cl_page *page, cfs_page_t *vmpage)
580 return LOV_2DISPATCH(cl2lov(obj),
581 llo_page_init, env, obj, page, vmpage);
585 * Implements cl_object_operations::clo_io_init() method for lov
586 * layer. Dispatches to the appropriate layout io initialization method.
588 int lov_io_init(const struct lu_env *env, struct cl_object *obj,
591 CL_IO_SLICE_CLEAN(lov_env_io(env), lis_cl);
593 * Do not take lock in case of CIT_MISC io, because
595 * - if this is an io for a glimpse, then we don't care;
597 * - if this not a glimpse (writepage or lock cancellation), then
598 * layout change cannot happen because a page or a lock
601 * - lock ordering (lock mutex nests within layout rw-semaphore)
602 * is obeyed in case of lock cancellation.
604 return LOV_2DISPATCH_MAYLOCK(cl2lov(obj), llo_io_init,
605 io->ci_type != CIT_MISC, env, obj, io);
609 * An implementation of cl_object_operations::clo_attr_get() method for lov
610 * layer. For raid0 layout this collects and merges attributes of all
613 static int lov_attr_get(const struct lu_env *env, struct cl_object *obj,
614 struct cl_attr *attr)
616 /* do not take lock, as this function is called under a
617 * spin-lock. Layout is protected from changing by ongoing IO. */
618 return LOV_2DISPATCH_NOLOCK(cl2lov(obj), llo_getattr, env, obj, attr);
621 static int lov_attr_set(const struct lu_env *env, struct cl_object *obj,
622 const struct cl_attr *attr, unsigned valid)
625 * No dispatch is required here, as no layout implements this.
630 int lov_lock_init(const struct lu_env *env, struct cl_object *obj,
631 struct cl_lock *lock, const struct cl_io *io)
633 return LOV_2DISPATCH(cl2lov(obj), llo_lock_init, env, obj, lock, io);
636 static const struct cl_object_operations lov_ops = {
637 .coo_page_init = lov_page_init,
638 .coo_lock_init = lov_lock_init,
639 .coo_io_init = lov_io_init,
640 .coo_attr_get = lov_attr_get,
641 .coo_attr_set = lov_attr_set,
642 .coo_conf_set = lov_conf_set
645 static const struct lu_object_operations lov_lu_obj_ops = {
646 .loo_object_init = lov_object_init,
647 .loo_object_delete = lov_object_delete,
648 .loo_object_release = NULL,
649 .loo_object_free = lov_object_free,
650 .loo_object_print = lov_object_print,
651 .loo_object_invariant = NULL
654 struct lu_object *lov_object_alloc(const struct lu_env *env,
655 const struct lu_object_header *_,
656 struct lu_device *dev)
658 struct lov_object *lov;
659 struct lu_object *obj;
662 OBD_SLAB_ALLOC_PTR(lov, lov_object_kmem);
665 lu_object_init(obj, NULL, dev);
666 lov->lo_cl.co_ops = &lov_ops;
667 lov->lo_type = -1; /* invalid, to catch uninitialized type */
669 * object io operation vector (cl_object::co_iop) is installed
670 * later in lov_object_init(), as different vectors are used
671 * for object with different layouts.
673 obj->lo_ops = &lov_lu_obj_ops;