4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved
24 * Use is subject to license terms.
26 * Copyright (c) 2012, 2013, Intel Corporation.
29 * lustre/lod/lod_object.c
31 * Author: Alex Zhuravlev <alexey.zhuravlev@intel.com>
34 #define DEBUG_SUBSYSTEM S_MDS
37 #include <obd_class.h>
38 #include <lustre_ver.h>
39 #include <obd_support.h>
40 #include <lprocfs_status.h>
42 #include <lustre_fid.h>
43 #include <lustre_param.h>
44 #include <lustre_fid.h>
45 #include <lustre_lmv.h>
47 #include <md_object.h>
49 #include "lod_internal.h"
51 static const char dot[] = ".";
52 static const char dotdot[] = "..";
54 extern struct kmem_cache *lod_object_kmem;
55 static const struct dt_body_operations lod_body_lnk_ops;
57 static int lod_index_lookup(const struct lu_env *env, struct dt_object *dt,
58 struct dt_rec *rec, const struct dt_key *key,
59 struct lustre_capa *capa)
61 struct dt_object *next = dt_object_child(dt);
62 return next->do_index_ops->dio_lookup(env, next, rec, key, capa);
65 static int lod_declare_index_insert(const struct lu_env *env,
67 const struct dt_rec *rec,
68 const struct dt_key *key,
69 struct thandle *handle)
71 return dt_declare_insert(env, dt_object_child(dt), rec, key, handle);
74 static int lod_index_insert(const struct lu_env *env,
76 const struct dt_rec *rec,
77 const struct dt_key *key,
79 struct lustre_capa *capa,
82 return dt_insert(env, dt_object_child(dt), rec, key, th, capa, ign);
85 static int lod_declare_index_delete(const struct lu_env *env,
87 const struct dt_key *key,
90 return dt_declare_delete(env, dt_object_child(dt), key, th);
93 static int lod_index_delete(const struct lu_env *env,
95 const struct dt_key *key,
97 struct lustre_capa *capa)
99 return dt_delete(env, dt_object_child(dt), key, th, capa);
102 static struct dt_it *lod_it_init(const struct lu_env *env,
103 struct dt_object *dt, __u32 attr,
104 struct lustre_capa *capa)
106 struct dt_object *next = dt_object_child(dt);
107 struct lod_it *it = &lod_env_info(env)->lti_it;
108 struct dt_it *it_next;
111 it_next = next->do_index_ops->dio_it.init(env, next, attr, capa);
115 /* currently we do not use more than one iterator per thread
116 * so we store it in thread info. if at some point we need
117 * more active iterators in a single thread, we can allocate
119 LASSERT(it->lit_obj == NULL);
121 it->lit_it = it_next;
124 return (struct dt_it *)it;
127 #define LOD_CHECK_IT(env, it) \
129 LASSERT((it)->lit_obj != NULL); \
130 LASSERT((it)->lit_it != NULL); \
133 void lod_it_fini(const struct lu_env *env, struct dt_it *di)
135 struct lod_it *it = (struct lod_it *)di;
137 LOD_CHECK_IT(env, it);
138 it->lit_obj->do_index_ops->dio_it.fini(env, it->lit_it);
140 /* the iterator not in use any more */
145 int lod_it_get(const struct lu_env *env, struct dt_it *di,
146 const struct dt_key *key)
148 const struct lod_it *it = (const struct lod_it *)di;
150 LOD_CHECK_IT(env, it);
151 return it->lit_obj->do_index_ops->dio_it.get(env, it->lit_it, key);
154 void lod_it_put(const struct lu_env *env, struct dt_it *di)
156 struct lod_it *it = (struct lod_it *)di;
158 LOD_CHECK_IT(env, it);
159 return it->lit_obj->do_index_ops->dio_it.put(env, it->lit_it);
162 int lod_it_next(const struct lu_env *env, struct dt_it *di)
164 struct lod_it *it = (struct lod_it *)di;
166 LOD_CHECK_IT(env, it);
167 return it->lit_obj->do_index_ops->dio_it.next(env, it->lit_it);
170 struct dt_key *lod_it_key(const struct lu_env *env, const struct dt_it *di)
172 const struct lod_it *it = (const struct lod_it *)di;
174 LOD_CHECK_IT(env, it);
175 return it->lit_obj->do_index_ops->dio_it.key(env, it->lit_it);
178 int lod_it_key_size(const struct lu_env *env, const struct dt_it *di)
180 struct lod_it *it = (struct lod_it *)di;
182 LOD_CHECK_IT(env, it);
183 return it->lit_obj->do_index_ops->dio_it.key_size(env, it->lit_it);
186 int lod_it_rec(const struct lu_env *env, const struct dt_it *di,
187 struct dt_rec *rec, __u32 attr)
189 const struct lod_it *it = (const struct lod_it *)di;
191 LOD_CHECK_IT(env, it);
192 return it->lit_obj->do_index_ops->dio_it.rec(env, it->lit_it, rec, attr);
195 __u64 lod_it_store(const struct lu_env *env, const struct dt_it *di)
197 const struct lod_it *it = (const struct lod_it *)di;
199 LOD_CHECK_IT(env, it);
200 return it->lit_obj->do_index_ops->dio_it.store(env, it->lit_it);
203 int lod_it_load(const struct lu_env *env, const struct dt_it *di, __u64 hash)
205 const struct lod_it *it = (const struct lod_it *)di;
207 LOD_CHECK_IT(env, it);
208 return it->lit_obj->do_index_ops->dio_it.load(env, it->lit_it, hash);
211 int lod_it_key_rec(const struct lu_env *env, const struct dt_it *di,
214 const struct lod_it *it = (const struct lod_it *)di;
216 LOD_CHECK_IT(env, it);
217 return it->lit_obj->do_index_ops->dio_it.key_rec(env, it->lit_it, key_rec);
220 static struct dt_index_operations lod_index_ops = {
221 .dio_lookup = lod_index_lookup,
222 .dio_declare_insert = lod_declare_index_insert,
223 .dio_insert = lod_index_insert,
224 .dio_declare_delete = lod_declare_index_delete,
225 .dio_delete = lod_index_delete,
233 .key_size = lod_it_key_size,
235 .store = lod_it_store,
237 .key_rec = lod_it_key_rec,
241 static void lod_object_read_lock(const struct lu_env *env,
242 struct dt_object *dt, unsigned role)
244 dt_read_lock(env, dt_object_child(dt), role);
247 static void lod_object_write_lock(const struct lu_env *env,
248 struct dt_object *dt, unsigned role)
250 dt_write_lock(env, dt_object_child(dt), role);
253 static void lod_object_read_unlock(const struct lu_env *env,
254 struct dt_object *dt)
256 dt_read_unlock(env, dt_object_child(dt));
259 static void lod_object_write_unlock(const struct lu_env *env,
260 struct dt_object *dt)
262 dt_write_unlock(env, dt_object_child(dt));
265 static int lod_object_write_locked(const struct lu_env *env,
266 struct dt_object *dt)
268 return dt_write_locked(env, dt_object_child(dt));
271 static int lod_attr_get(const struct lu_env *env,
272 struct dt_object *dt,
273 struct lu_attr *attr,
274 struct lustre_capa *capa)
276 return dt_attr_get(env, dt_object_child(dt), attr, capa);
279 static int lod_declare_attr_set(const struct lu_env *env,
280 struct dt_object *dt,
281 const struct lu_attr *attr,
282 struct thandle *handle)
284 struct dt_object *next = dt_object_child(dt);
285 struct lod_object *lo = lod_dt_obj(dt);
290 * declare setattr on the local object
292 rc = dt_declare_attr_set(env, next, attr, handle);
296 /* osp_declare_attr_set() ignores all attributes other than
297 * UID, GID, and size, and osp_attr_set() ignores all but UID
298 * and GID. Declaration of size attr setting happens through
299 * lod_declare_init_size(), and not through this function.
300 * Therefore we need not load striping unless ownership is
301 * changing. This should save memory and (we hope) speed up
303 if (!S_ISDIR(dt->do_lu.lo_header->loh_attr)) {
304 if (!(attr->la_valid & (LA_UID | LA_GID)))
307 if (!(attr->la_valid & (LA_UID | LA_GID | LA_MODE |
308 LA_ATIME | LA_MTIME | LA_CTIME)))
312 * load striping information, notice we don't do this when object
313 * is being initialized as we don't need this information till
314 * few specific cases like destroy, chown
316 rc = lod_load_striping(env, lo);
320 if (lo->ldo_stripenr == 0)
323 if (!(attr->la_valid & ~(LA_ATIME | LA_MTIME | LA_CTIME))) {
324 struct lu_attr *la = &lod_env_info(env)->lti_attr;
325 bool setattr_time = false;
327 rc = dt_attr_get(env, dt_object_child(dt), la,
332 /* If it will only setattr time, it will only set
333 * time < current_time */
334 if ((attr->la_valid & LA_ATIME &&
335 attr->la_atime < la->la_atime) ||
336 (attr->la_valid & LA_CTIME &&
337 attr->la_ctime < la->la_ctime) ||
338 (attr->la_valid & LA_MTIME &&
339 attr->la_mtime < la->la_mtime))
346 * if object is striped declare changes on the stripes
348 LASSERT(lo->ldo_stripe);
349 for (i = 0; i < lo->ldo_stripenr; i++) {
350 LASSERT(lo->ldo_stripe[i]);
352 rc = dt_declare_attr_set(env, lo->ldo_stripe[i], attr, handle);
354 CERROR("failed declaration: %d\n", rc);
362 static int lod_attr_set(const struct lu_env *env,
363 struct dt_object *dt,
364 const struct lu_attr *attr,
365 struct thandle *handle,
366 struct lustre_capa *capa)
368 struct dt_object *next = dt_object_child(dt);
369 struct lod_object *lo = lod_dt_obj(dt);
374 * apply changes to the local object
376 rc = dt_attr_set(env, next, attr, handle, capa);
380 if (!S_ISDIR(dt->do_lu.lo_header->loh_attr)) {
381 if (!(attr->la_valid & (LA_UID | LA_GID)))
384 if (!(attr->la_valid & (LA_UID | LA_GID | LA_MODE |
385 LA_ATIME | LA_MTIME | LA_CTIME)))
389 if (lo->ldo_stripenr == 0)
392 if (!(attr->la_valid & ~(LA_ATIME | LA_MTIME | LA_CTIME))) {
393 struct lu_attr *la = &lod_env_info(env)->lti_attr;
394 bool setattr_time = false;
396 rc = dt_attr_get(env, dt_object_child(dt), la,
401 /* If it will only setattr time, it will only set
402 * time < current_time */
403 if ((attr->la_valid & LA_ATIME &&
404 attr->la_atime < la->la_atime) ||
405 (attr->la_valid & LA_CTIME &&
406 attr->la_atime < la->la_ctime) ||
407 (attr->la_valid & LA_MTIME &&
408 attr->la_atime < la->la_mtime))
416 * if object is striped, apply changes to all the stripes
418 LASSERT(lo->ldo_stripe);
419 for (i = 0; i < lo->ldo_stripenr; i++) {
420 LASSERT(lo->ldo_stripe[i]);
421 rc = dt_attr_set(env, lo->ldo_stripe[i], attr, handle, capa);
423 CERROR("failed declaration: %d\n", rc);
431 static int lod_xattr_get(const struct lu_env *env, struct dt_object *dt,
432 struct lu_buf *buf, const char *name,
433 struct lustre_capa *capa)
435 struct lod_thread_info *info = lod_env_info(env);
436 struct lod_device *dev = lu2lod_dev(dt->do_lu.lo_dev);
440 rc = dt_xattr_get(env, dt_object_child(dt), buf, name, capa);
441 if (rc != -ENODATA || !S_ISDIR(dt->do_lu.lo_header->loh_attr & S_IFMT))
445 * lod returns default striping on the real root of the device
446 * this is like the root stores default striping for the whole
447 * filesystem. historically we've been using a different approach
448 * and store it in the config.
450 dt_root_get(env, dev->lod_child, &info->lti_fid);
451 is_root = lu_fid_eq(&info->lti_fid, lu_object_fid(&dt->do_lu));
453 if (is_root && strcmp(XATTR_NAME_LOV, name) == 0) {
454 struct lov_user_md *lum = buf->lb_buf;
455 struct lov_desc *desc = &dev->lod_desc;
457 if (buf->lb_buf == NULL) {
459 } else if (buf->lb_len >= sizeof(*lum)) {
460 lum->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V1);
461 lmm_oi_set_seq(&lum->lmm_oi, FID_SEQ_LOV_DEFAULT);
462 lmm_oi_set_id(&lum->lmm_oi, 0);
463 lmm_oi_cpu_to_le(&lum->lmm_oi, &lum->lmm_oi);
464 lum->lmm_pattern = cpu_to_le32(desc->ld_pattern);
465 lum->lmm_stripe_size = cpu_to_le32(
466 desc->ld_default_stripe_size);
467 lum->lmm_stripe_count = cpu_to_le16(
468 desc->ld_default_stripe_count);
469 lum->lmm_stripe_offset = cpu_to_le16(
470 desc->ld_default_stripe_offset);
480 static int lod_verify_md_striping(struct lod_device *lod,
481 const struct lmv_user_md_v1 *lum)
486 if (unlikely(le32_to_cpu(lum->lum_magic) != LMV_USER_MAGIC))
487 GOTO(out, rc = -EINVAL);
489 if (unlikely(le32_to_cpu(lum->lum_stripe_count) == 0))
490 GOTO(out, rc = -EINVAL);
492 if (unlikely(le32_to_cpu(lum->lum_stripe_count) >
493 lod->lod_remote_mdt_count + 1))
494 GOTO(out, rc = -EINVAL);
497 CERROR("%s: invalid lmv_user_md: magic = %x, "
498 "stripe_offset = %d, stripe_count = %u: rc = %d\n",
499 lod2obd(lod)->obd_name, le32_to_cpu(lum->lum_magic),
500 (int)le32_to_cpu(lum->lum_stripe_offset),
501 le32_to_cpu(lum->lum_stripe_count), rc);
505 int lod_prep_lmv_md(const struct lu_env *env, struct dt_object *dt,
506 struct lu_buf *lmv_buf)
508 struct lod_thread_info *info = lod_env_info(env);
509 struct lod_device *lod = lu2lod_dev(dt->do_lu.lo_dev);
510 struct lod_object *lo = lod_dt_obj(dt);
511 struct lmv_mds_md_v1 *lmm1;
519 LASSERT(lo->ldo_dir_striped != 0);
520 LASSERT(lo->ldo_stripenr > 0);
521 stripe_count = lo->ldo_stripenr + 1;
522 lmm_size = lmv_mds_md_size(stripe_count, LMV_MAGIC);
523 if (info->lti_ea_store_size < lmm_size) {
524 rc = lod_ea_store_resize(info, lmm_size);
529 lmm1 = (struct lmv_mds_md_v1 *)info->lti_ea_store;
530 lmm1->lmv_magic = cpu_to_le32(LMV_MAGIC);
531 lmm1->lmv_stripe_count = cpu_to_le32(stripe_count);
532 lmm1->lmv_hash_type = cpu_to_le32(lo->ldo_dir_hash_type);
533 rc = lod_fld_lookup(env, lod, lu_object_fid(&dt->do_lu),
534 &mdtidx, LU_SEQ_RANGE_MDT);
538 lmm1->lmv_master_mdt_index = cpu_to_le32(mdtidx);
539 fid_cpu_to_le(&lmm1->lmv_stripe_fids[0], lu_object_fid(&dt->do_lu));
540 for (i = 0; i < lo->ldo_stripenr; i++) {
541 struct dt_object *dto;
543 dto = lo->ldo_stripe[i];
544 LASSERT(dto != NULL);
545 fid_cpu_to_le(&lmm1->lmv_stripe_fids[i + 1],
546 lu_object_fid(&dto->do_lu));
549 lmv_buf->lb_buf = info->lti_ea_store;
550 lmv_buf->lb_len = lmm_size;
551 lo->ldo_dir_striping_cached = 1;
556 int lod_parse_dir_striping(const struct lu_env *env, struct lod_object *lo,
557 const struct lu_buf *buf)
559 struct lod_thread_info *info = lod_env_info(env);
560 struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
561 struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
562 struct dt_object **stripe;
563 union lmv_mds_md *lmm = buf->lb_buf;
564 struct lmv_mds_md_v1 *lmv1 = &lmm->lmv_md_v1;
565 struct lu_fid *fid = &info->lti_fid;
570 if (le32_to_cpu(lmv1->lmv_magic) != LMV_MAGIC_V1)
573 if (le32_to_cpu(lmv1->lmv_stripe_count) <= 1)
576 fid_le_to_cpu(fid, &lmv1->lmv_stripe_fids[0]);
577 /* Do not load striping information for slave inode */
578 if (!lu_fid_eq(fid, lu_object_fid(&lo->ldo_obj.do_lu))) {
579 lo->ldo_dir_slave_stripe = 1;
583 LASSERT(lo->ldo_stripe == NULL);
584 OBD_ALLOC(stripe, sizeof(stripe[0]) *
585 (le32_to_cpu(lmv1->lmv_stripe_count) - 1));
589 /* skip master stripe */
590 for (i = 1; i < le32_to_cpu(lmv1->lmv_stripe_count); i++) {
591 struct lod_tgt_desc *tgt;
593 struct dt_object *dto;
595 fid_le_to_cpu(fid, &lmv1->lmv_stripe_fids[i]);
596 rc = lod_fld_lookup(env, lod, fid,
597 &idx, LU_SEQ_RANGE_MDT);
601 tgt = LTD_TGT(ltd, idx);
603 GOTO(out, rc = -ESTALE);
605 dto = dt_locate_at(env, tgt->ltd_tgt, fid,
606 lo->ldo_obj.do_lu.lo_dev->ld_site->ls_top_dev,
609 GOTO(out, rc = PTR_ERR(dto));
614 lo->ldo_stripe = stripe;
615 lo->ldo_stripenr = le32_to_cpu(lmv1->lmv_stripe_count) - 1;
616 lo->ldo_stripes_allocated = le32_to_cpu(lmv1->lmv_stripe_count) - 1;
618 lod_object_free_striping(env, lo);
623 static int lod_prep_md_striped_create(const struct lu_env *env,
624 struct dt_object *dt,
625 struct lu_attr *attr,
626 const struct lmv_user_md_v1 *lum,
629 struct lod_device *lod = lu2lod_dev(dt->do_lu.lo_dev);
630 struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
631 struct lod_object *lo = lod_dt_obj(dt);
632 struct dt_object **stripe;
633 struct lu_buf lmv_buf;
641 /* The lum has been verifed in lod_verify_md_striping */
642 LASSERT(le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC);
643 LASSERT(le32_to_cpu(lum->lum_stripe_count) > 0);
645 /* Do not need allocated master stripe */
646 stripe_count = le32_to_cpu(lum->lum_stripe_count);
647 OBD_ALLOC(stripe, sizeof(stripe[0]) * (stripe_count - 1));
651 OBD_ALLOC(idx_array, sizeof(idx_array[0]) * stripe_count);
652 if (idx_array == NULL)
653 GOTO(out_free, rc = -ENOMEM);
655 idx_array[0] = le32_to_cpu(lum->lum_stripe_offset);
656 for (i = 1; i < stripe_count; i++) {
657 struct lod_tgt_desc *tgt;
658 struct dt_object *dto;
661 struct lu_object_conf conf = { 0 };
663 idx = (idx_array[i - 1] + 1) % (lod->lod_remote_mdt_count + 1);
665 for (j = 0; j < lod->lod_remote_mdt_count;
666 j++, idx = (idx + 1) % (lod->lod_remote_mdt_count + 1)) {
667 bool already_allocated = false;
670 CDEBUG(D_INFO, "try idx %d, mdt cnt %d,"
671 " allocated %d, last allocated %d\n", idx,
672 lod->lod_remote_mdt_count, i, idx_array[i - 1]);
674 /* Find next avaible target */
675 if (!cfs_bitmap_check(ltd->ltd_tgt_bitmap, idx))
678 /* check whether the idx already exists
679 * in current allocated array */
680 for (k = 0; k < i; k++) {
681 if (idx_array[k] == idx) {
682 already_allocated = true;
687 if (already_allocated)
693 /* Can not allocate more stripes */
694 if (j == lod->lod_remote_mdt_count) {
695 CDEBUG(D_INFO, "%s: require stripes %d only get %d\n",
696 lod2obd(lod)->obd_name, stripe_count, i - 1);
700 CDEBUG(D_INFO, "idx %d, mdt cnt %d,"
701 " allocated %d, last allocated %d\n", idx,
702 lod->lod_remote_mdt_count, i, idx_array[i - 1]);
704 tgt = LTD_TGT(ltd, idx);
705 LASSERT(tgt != NULL);
707 rc = obd_fid_alloc(tgt->ltd_exp, &fid, NULL);
712 conf.loc_flags = LOC_F_NEW;
713 dto = dt_locate_at(env, tgt->ltd_tgt, &fid,
714 dt->do_lu.lo_dev->ld_site->ls_top_dev, &conf);
716 GOTO(out_put, rc = PTR_ERR(dto));
721 lo->ldo_dir_striped = 1;
722 lo->ldo_stripe = stripe;
723 lo->ldo_stripenr = i - 1;
724 lo->ldo_stripes_allocated = stripe_count - 1;
726 if (lo->ldo_stripenr == 0)
727 GOTO(out_put, rc = -ENOSPC);
729 rc = lod_prep_lmv_md(env, dt, &lmv_buf);
733 for (i = 0; i < lo->ldo_stripenr; i++) {
734 struct dt_object *dto;
737 /* only create slave striped object */
738 rc = dt_declare_create(env, dto, attr, NULL, NULL, th);
742 if (!dt_try_as_dir(env, dto))
743 GOTO(out_put, rc = -EINVAL);
745 rc = dt_declare_insert(env, dto,
746 (const struct dt_rec *)lu_object_fid(&dto->do_lu),
747 (const struct dt_key *)dot, th);
751 /* master stripe FID will be put to .. */
752 rc = dt_declare_insert(env, dto,
753 (const struct dt_rec *)lu_object_fid(&dt->do_lu),
754 (const struct dt_key *)dotdot, th);
758 /* probably nothing to inherite */
759 if (lo->ldo_striping_cached &&
760 !LOVEA_DELETE_VALUES(lo->ldo_def_stripe_size,
761 lo->ldo_def_stripenr,
762 lo->ldo_def_stripe_offset)) {
763 struct lod_thread_info *info;
764 struct lov_user_md_v3 *v3;
766 /* sigh, lti_ea_store has been used for lmv_buf,
767 * so we have to allocate buffer for default
771 GOTO(out_put, rc = -ENOMEM);
773 memset(v3, 0, sizeof(*v3));
774 v3->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V3);
775 v3->lmm_stripe_count =
776 cpu_to_le32(lo->ldo_def_stripenr);
777 v3->lmm_stripe_offset =
778 cpu_to_le32(lo->ldo_def_stripe_offset);
779 v3->lmm_stripe_size =
780 cpu_to_le32(lo->ldo_def_stripe_size);
782 strncpy(v3->lmm_pool_name, lo->ldo_pool,
785 info = lod_env_info(env);
786 info->lti_buf.lb_buf = v3;
787 info->lti_buf.lb_len = sizeof(*v3);
788 rc = dt_declare_xattr_set(env, dto,
796 rc = dt_declare_xattr_set(env, dto, &lmv_buf, XATTR_NAME_LMV, 0,
802 rc = dt_declare_xattr_set(env, dt, &lmv_buf, XATTR_NAME_LMV, 0, th);
808 for (i = 0; i < stripe_count - 1; i++)
809 if (stripe[i] != NULL)
810 lu_object_put(env, &stripe[i]->do_lu);
811 OBD_FREE(stripe, sizeof(stripe[0]) * (stripe_count - 1));
815 if (idx_array != NULL)
816 OBD_FREE(idx_array, sizeof(idx_array[0]) * stripe_count);
822 * Declare create striped md object.
824 static int lod_declare_xattr_set_lmv(const struct lu_env *env,
825 struct dt_object *dt,
826 struct lu_attr *attr,
827 const struct lu_buf *lum_buf,
830 struct lod_object *lo = lod_dt_obj(dt);
831 struct lod_device *lod = lu2lod_dev(dt->do_lu.lo_dev);
832 struct lmv_user_md_v1 *lum;
836 lum = lum_buf->lb_buf;
837 LASSERT(lum != NULL);
839 CDEBUG(D_INFO, "lum magic = %x count = %u offset = %d\n",
840 le32_to_cpu(lum->lum_magic), le32_to_cpu(lum->lum_stripe_count),
841 (int)le32_to_cpu(lum->lum_stripe_offset));
843 if (le32_to_cpu(lum->lum_stripe_count) <= 1)
846 rc = lod_verify_md_striping(lod, lum);
850 /* prepare dir striped objects */
851 rc = lod_prep_md_striped_create(env, dt, attr, lum, th);
853 /* failed to create striping, let's reset
854 * config so that others don't get confused */
855 lod_object_free_striping(env, lo);
863 * LOV xattr is a storage for striping, and LOD owns this xattr.
864 * but LOD allows others to control striping to some extent
866 * - to set new defined striping
867 * - to set new semi-defined striping
868 * - number of stripes is defined
869 * - number of stripes + osts are defined
872 static int lod_declare_xattr_set(const struct lu_env *env,
873 struct dt_object *dt,
874 const struct lu_buf *buf,
875 const char *name, int fl,
878 struct dt_object *next = dt_object_child(dt);
879 struct lu_attr *attr = &lod_env_info(env)->lti_attr;
885 * allow to declare predefined striping on a new (!mode) object
886 * which is supposed to be replay of regular file creation
887 * (when LOV setting is declared)
888 * LU_XATTR_REPLACE is set to indicate a layout swap
890 mode = dt->do_lu.lo_header->loh_attr & S_IFMT;
891 if ((S_ISREG(mode) || mode == 0) && strcmp(name, XATTR_NAME_LOV) == 0 &&
892 !(fl & LU_XATTR_REPLACE)) {
894 * this is a request to manipulate object's striping
896 if (dt_object_exists(dt)) {
897 rc = dt_attr_get(env, next, attr, BYPASS_CAPA);
901 memset(attr, 0, sizeof(*attr));
902 attr->la_valid = LA_TYPE | LA_MODE;
903 attr->la_mode = S_IFREG;
905 rc = lod_declare_striped_object(env, dt, attr, buf, th);
906 } else if (S_ISDIR(mode)) {
907 struct lod_device *d = lu2lod_dev(dt->do_lu.lo_dev);
908 struct lod_object *lo = lod_dt_obj(dt);
911 if (strcmp(name, XATTR_NAME_DEFAULT_LMV) == 0) {
912 struct lmv_user_md_v1 *lum;
914 LASSERT(buf != NULL && buf->lb_buf != NULL);
916 rc = lod_verify_md_striping(d, lum);
921 rc = dt_declare_xattr_set(env, next, buf, name, fl, th);
925 /* set xattr to each stripes, if needed */
926 rc = lod_load_striping(env, lo);
930 if (lo->ldo_stripenr == 0)
933 for (i = 0; i < lo->ldo_stripenr; i++) {
934 LASSERT(lo->ldo_stripe[i]);
935 rc = dt_declare_xattr_set(env, lo->ldo_stripe[i], buf,
941 rc = dt_declare_xattr_set(env, next, buf, name, fl, th);
947 static void lod_lov_stripe_cache_clear(struct lod_object *lo)
949 lo->ldo_striping_cached = 0;
950 lo->ldo_def_striping_set = 0;
951 lod_object_set_pool(lo, NULL);
952 lo->ldo_def_stripe_size = 0;
953 lo->ldo_def_stripenr = 0;
956 static int lod_xattr_set_lov_on_dir(const struct lu_env *env,
957 struct dt_object *dt,
958 const struct lu_buf *buf,
959 const char *name, int fl,
961 struct lustre_capa *capa)
963 struct lod_device *d = lu2lod_dev(dt->do_lu.lo_dev);
964 struct dt_object *next = dt_object_child(dt);
965 struct lod_object *l = lod_dt_obj(dt);
966 struct lov_user_md_v1 *lum;
967 struct lov_user_md_v3 *v3 = NULL;
971 /* If it is striped dir, we should clear the stripe cache for
972 * slave stripe as well, but there are no effective way to
973 * notify the LOD on the slave MDT, so we do not cache stripe
974 * information for slave stripe for now. XXX*/
975 lod_lov_stripe_cache_clear(l);
976 LASSERT(buf != NULL && buf->lb_buf != NULL);
979 rc = lod_verify_striping(d, buf, 0);
983 if (lum->lmm_magic == LOV_USER_MAGIC_V3)
986 /* if { size, offset, count } = { 0, -1, 0 } and no pool
987 * (i.e. all default values specified) then delete default
988 * striping from dir. */
990 "set default striping: sz %u # %u offset %d %s %s\n",
991 (unsigned)lum->lmm_stripe_size,
992 (unsigned)lum->lmm_stripe_count,
993 (int)lum->lmm_stripe_offset,
994 v3 ? "from" : "", v3 ? v3->lmm_pool_name : "");
996 if (LOVEA_DELETE_VALUES((lum->lmm_stripe_size),
997 (lum->lmm_stripe_count),
998 (lum->lmm_stripe_offset)) &&
999 lum->lmm_magic == LOV_USER_MAGIC_V1) {
1000 rc = dt_xattr_del(env, next, name, th, capa);
1004 rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
1010 static int lod_xattr_set_default_lmv_on_dir(const struct lu_env *env,
1011 struct dt_object *dt,
1012 const struct lu_buf *buf,
1013 const char *name, int fl,
1015 struct lustre_capa *capa)
1017 struct dt_object *next = dt_object_child(dt);
1018 struct lod_object *l = lod_dt_obj(dt);
1019 struct lmv_user_md_v1 *lum;
1023 LASSERT(buf != NULL && buf->lb_buf != NULL);
1026 CDEBUG(D_OTHER, "set default stripe_count # %u stripe_offset %d\n",
1027 le32_to_cpu(lum->lum_stripe_count),
1028 (int)le32_to_cpu(lum->lum_stripe_offset));
1030 if (LMVEA_DELETE_VALUES((le32_to_cpu(lum->lum_stripe_count)),
1031 le32_to_cpu(lum->lum_stripe_offset)) &&
1032 le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC) {
1033 rc = dt_xattr_del(env, next, name, th, capa);
1037 rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
1041 /* Update default stripe cache */
1042 if (l->ldo_dir_stripe == NULL) {
1043 OBD_ALLOC_PTR(l->ldo_dir_stripe);
1044 if (l->ldo_dir_stripe == NULL)
1048 l->ldo_dir_striping_cached = 0;
1049 l->ldo_dir_def_striping_set = 1;
1050 l->ldo_dir_def_stripenr =
1051 le32_to_cpu(lum->lum_stripe_count) - 1;
1057 static int lod_xattr_set_lmv(const struct lu_env *env, struct dt_object *dt,
1058 const struct lu_buf *buf, const char *name,
1059 int fl, struct thandle *th,
1060 struct lustre_capa *capa)
1062 struct lod_object *lo = lod_dt_obj(dt);
1063 struct lu_buf lmv_buf;
1068 if (!S_ISDIR(dt->do_lu.lo_header->loh_attr))
1071 /* The stripes are supposed to be allocated in declare phase,
1072 * if there are no stripes being allocated, it will skip */
1073 if (lo->ldo_stripenr == 0)
1076 rc = lod_prep_lmv_md(env, dt, &lmv_buf);
1080 for (i = 0; i < lo->ldo_stripenr; i++) {
1081 struct dt_object *dto;
1082 struct lu_attr *attr = &lod_env_info(env)->lti_attr;
1084 dto = lo->ldo_stripe[i];
1085 memset(attr, 0, sizeof(*attr));
1086 attr->la_valid = LA_TYPE | LA_MODE;
1087 attr->la_mode = S_IFDIR;
1088 rc = dt_create(env, dto, attr, NULL, NULL, th);
1092 rc = dt_insert(env, dto,
1093 (const struct dt_rec *)lu_object_fid(&dto->do_lu),
1094 (const struct dt_key *)dot, th, capa, 0);
1098 rc = dt_insert(env, dto,
1099 (struct dt_rec *)lu_object_fid(&dt->do_lu),
1100 (const struct dt_key *)dotdot, th, capa, 0);
1104 if (lo->ldo_striping_cached &&
1105 !LOVEA_DELETE_VALUES(lo->ldo_def_stripe_size,
1106 lo->ldo_def_stripenr,
1107 lo->ldo_def_stripe_offset)) {
1108 struct lod_thread_info *info;
1109 struct lov_user_md_v3 *v3;
1111 /* sigh, lti_ea_store has been used for lmv_buf,
1112 * so we have to allocate buffer for default
1118 memset(v3, 0, sizeof(*v3));
1119 v3->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V3);
1120 v3->lmm_stripe_count =
1121 cpu_to_le32(lo->ldo_def_stripenr);
1122 v3->lmm_stripe_offset =
1123 cpu_to_le32(lo->ldo_def_stripe_offset);
1124 v3->lmm_stripe_size =
1125 cpu_to_le32(lo->ldo_def_stripe_size);
1127 strncpy(v3->lmm_pool_name, lo->ldo_pool,
1130 info = lod_env_info(env);
1131 info->lti_buf.lb_buf = v3;
1132 info->lti_buf.lb_len = sizeof(*v3);
1133 rc = dt_xattr_set(env, dto, &info->lti_buf,
1134 XATTR_NAME_LOV, 0, th, capa);
1140 rc = dt_xattr_set(env, dto, &lmv_buf, XATTR_NAME_LMV, fl, th,
1144 rc = dt_xattr_set(env, dt, &lmv_buf, XATTR_NAME_LMV, fl, th, capa);
1149 static int lod_xattr_set(const struct lu_env *env,
1150 struct dt_object *dt, const struct lu_buf *buf,
1151 const char *name, int fl, struct thandle *th,
1152 struct lustre_capa *capa)
1154 struct lod_object *lo = lod_dt_obj(dt);
1155 struct dt_object *next = dt_object_child(dt);
1161 attr = dt->do_lu.lo_header->loh_attr & S_IFMT;
1162 if (S_ISDIR(attr) && strcmp(name, XATTR_NAME_LOV) == 0) {
1163 rc = lod_xattr_set_lov_on_dir(env, dt, buf, name, fl, th, capa);
1164 } else if (S_ISREG(attr) && !strcmp(name, XATTR_NAME_LOV)) {
1165 /* in case of lov EA swap, just set it
1166 * if not, it is a replay so check striping match what we
1167 * already have during req replay, declare_xattr_set()
1168 * defines striping, then create() does the work
1170 if (fl & LU_XATTR_REPLACE) {
1171 /* free stripes, then update disk */
1172 lod_object_free_striping(env, lod_dt_obj(dt));
1173 rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
1175 rc = lod_striping_create(env, dt, NULL, NULL, th);
1177 } else if (strcmp(name, XATTR_NAME_DEFAULT_LMV) == 0) {
1180 rc = lod_xattr_set_default_lmv_on_dir(env, dt, buf, name, fl,
1184 * behave transparantly for all other EAs
1186 rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
1189 if (rc != 0 || !S_ISDIR(attr))
1192 if (lo->ldo_stripenr == 0)
1195 for (i = 0; i < lo->ldo_stripenr; i++) {
1196 LASSERT(lo->ldo_stripe[i]);
1197 rc = dt_xattr_set(env, lo->ldo_stripe[i], buf, name, fl, th,
1206 static int lod_declare_xattr_del(const struct lu_env *env,
1207 struct dt_object *dt, const char *name,
1210 return dt_declare_xattr_del(env, dt_object_child(dt), name, th);
1213 static int lod_xattr_del(const struct lu_env *env, struct dt_object *dt,
1214 const char *name, struct thandle *th,
1215 struct lustre_capa *capa)
1217 if (!strcmp(name, XATTR_NAME_LOV))
1218 lod_object_free_striping(env, lod_dt_obj(dt));
1219 return dt_xattr_del(env, dt_object_child(dt), name, th, capa);
1222 static int lod_xattr_list(const struct lu_env *env,
1223 struct dt_object *dt, struct lu_buf *buf,
1224 struct lustre_capa *capa)
1226 return dt_xattr_list(env, dt_object_child(dt), buf, capa);
1229 int lod_object_set_pool(struct lod_object *o, char *pool)
1234 len = strlen(o->ldo_pool);
1235 OBD_FREE(o->ldo_pool, len + 1);
1240 OBD_ALLOC(o->ldo_pool, len + 1);
1241 if (o->ldo_pool == NULL)
1243 strcpy(o->ldo_pool, pool);
1248 static inline int lod_object_will_be_striped(int is_reg, const struct lu_fid *fid)
1250 return (is_reg && fid_seq(fid) != FID_SEQ_LOCAL_FILE);
1254 static int lod_cache_parent_lov_striping(const struct lu_env *env,
1255 struct lod_object *lp)
1257 struct lod_thread_info *info = lod_env_info(env);
1258 struct lov_user_md_v1 *v1 = NULL;
1259 struct lov_user_md_v3 *v3 = NULL;
1263 /* called from MDD without parent being write locked,
1265 dt_write_lock(env, dt_object_child(&lp->ldo_obj), 0);
1266 rc = lod_get_lov_ea(env, lp);
1270 if (rc < sizeof(struct lov_user_md)) {
1271 /* don't lookup for non-existing or invalid striping */
1272 lp->ldo_def_striping_set = 0;
1273 lp->ldo_striping_cached = 1;
1274 lp->ldo_def_stripe_size = 0;
1275 lp->ldo_def_stripenr = 0;
1276 lp->ldo_def_stripe_offset = (typeof(v1->lmm_stripe_offset))(-1);
1277 GOTO(unlock, rc = 0);
1281 v1 = info->lti_ea_store;
1282 if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_V1))
1283 lustre_swab_lov_user_md_v1(v1);
1284 else if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_V3))
1285 lustre_swab_lov_user_md_v3(v3);
1287 if (v1->lmm_magic != LOV_MAGIC_V3 && v1->lmm_magic != LOV_MAGIC_V1)
1288 GOTO(unlock, rc = 0);
1290 if (v1->lmm_pattern != LOV_PATTERN_RAID0 && v1->lmm_pattern != 0)
1291 GOTO(unlock, rc = 0);
1293 lp->ldo_def_stripenr = v1->lmm_stripe_count;
1294 lp->ldo_def_stripe_size = v1->lmm_stripe_size;
1295 lp->ldo_def_stripe_offset = v1->lmm_stripe_offset;
1296 lp->ldo_striping_cached = 1;
1297 lp->ldo_def_striping_set = 1;
1298 if (v1->lmm_magic == LOV_USER_MAGIC_V3) {
1299 /* XXX: sanity check here */
1300 v3 = (struct lov_user_md_v3 *) v1;
1301 if (v3->lmm_pool_name[0])
1302 lod_object_set_pool(lp, v3->lmm_pool_name);
1306 dt_write_unlock(env, dt_object_child(&lp->ldo_obj));
1311 static int lod_cache_parent_lmv_striping(const struct lu_env *env,
1312 struct lod_object *lp)
1314 struct lod_thread_info *info = lod_env_info(env);
1315 struct lmv_user_md_v1 *v1 = NULL;
1319 /* called from MDD without parent being write locked,
1321 dt_write_lock(env, dt_object_child(&lp->ldo_obj), 0);
1322 rc = lod_get_default_lmv_ea(env, lp);
1326 if (rc < sizeof(struct lmv_user_md)) {
1327 /* don't lookup for non-existing or invalid striping */
1328 lp->ldo_dir_def_striping_set = 0;
1329 lp->ldo_dir_striping_cached = 1;
1330 lp->ldo_dir_def_stripenr = 0;
1331 lp->ldo_dir_def_stripe_offset =
1332 (typeof(v1->lum_stripe_offset))(-1);
1333 lp->ldo_dir_def_hash_type = LMV_HASH_TYPE_FNV_1A_64;
1334 GOTO(unlock, rc = 0);
1338 v1 = info->lti_ea_store;
1340 lp->ldo_dir_def_stripenr = le32_to_cpu(v1->lum_stripe_count) - 1;
1341 lp->ldo_dir_def_stripe_offset = le32_to_cpu(v1->lum_stripe_offset);
1342 lp->ldo_dir_def_hash_type = le32_to_cpu(v1->lum_hash_type);
1343 lp->ldo_dir_def_striping_set = 1;
1344 lp->ldo_dir_striping_cached = 1;
1348 dt_write_unlock(env, dt_object_child(&lp->ldo_obj));
1352 static int lod_cache_parent_striping(const struct lu_env *env,
1353 struct lod_object *lp,
1359 rc = lod_load_striping(env, lp);
1363 if (!lp->ldo_striping_cached) {
1364 /* we haven't tried to get default striping for
1365 * the directory yet, let's cache it in the object */
1366 rc = lod_cache_parent_lov_striping(env, lp);
1371 if (S_ISDIR(child_mode) && !lp->ldo_dir_striping_cached)
1372 rc = lod_cache_parent_lmv_striping(env, lp);
1378 * used to transfer default striping data to the object being created
1380 static void lod_ah_init(const struct lu_env *env,
1381 struct dt_allocation_hint *ah,
1382 struct dt_object *parent,
1383 struct dt_object *child,
1386 struct lod_device *d = lu2lod_dev(child->do_lu.lo_dev);
1387 struct dt_object *nextp = NULL;
1388 struct dt_object *nextc;
1389 struct lod_object *lp = NULL;
1390 struct lod_object *lc;
1391 struct lov_desc *desc;
1396 if (likely(parent)) {
1397 nextp = dt_object_child(parent);
1398 lp = lod_dt_obj(parent);
1401 nextc = dt_object_child(child);
1402 lc = lod_dt_obj(child);
1404 LASSERT(lc->ldo_stripenr == 0);
1405 LASSERT(lc->ldo_stripe == NULL);
1408 * local object may want some hints
1409 * in case of late striping creation, ->ah_init()
1410 * can be called with local object existing
1412 if (!dt_object_exists(nextc) || dt_object_remote(nextc))
1413 nextc->do_ops->do_ah_init(env, ah, dt_object_remote(nextp) ?
1414 NULL : nextp, nextc, child_mode);
1416 if (S_ISDIR(child_mode)) {
1419 if (lc->ldo_dir_stripe == NULL) {
1420 OBD_ALLOC_PTR(lc->ldo_dir_stripe);
1421 if (lc->ldo_dir_stripe == NULL)
1425 if (lp->ldo_dir_stripe == NULL) {
1426 OBD_ALLOC_PTR(lp->ldo_dir_stripe);
1427 if (lp->ldo_dir_stripe == NULL)
1431 rc = lod_cache_parent_striping(env, lp, child_mode);
1435 /* transfer defaults to new directory */
1436 if (lp->ldo_striping_cached) {
1438 lod_object_set_pool(lc, lp->ldo_pool);
1439 lc->ldo_def_stripenr = lp->ldo_def_stripenr;
1440 lc->ldo_def_stripe_size = lp->ldo_def_stripe_size;
1441 lc->ldo_def_stripe_offset = lp->ldo_def_stripe_offset;
1442 lc->ldo_striping_cached = 1;
1443 lc->ldo_def_striping_set = 1;
1444 CDEBUG(D_OTHER, "inherite EA sz:%d off:%d nr:%d\n",
1445 (int)lc->ldo_def_stripe_size,
1446 (int)lc->ldo_def_stripe_offset,
1447 (int)lc->ldo_def_stripenr);
1450 /* transfer dir defaults to new directory */
1451 if (lp->ldo_dir_striping_cached) {
1452 lc->ldo_dir_def_stripenr = lp->ldo_dir_def_stripenr;
1453 lc->ldo_dir_def_stripe_offset =
1454 lp->ldo_dir_def_stripe_offset;
1455 lc->ldo_dir_def_hash_type =
1456 lp->ldo_dir_def_hash_type;
1457 lc->ldo_dir_striping_cached = 1;
1458 lc->ldo_dir_def_striping_set = 1;
1459 CDEBUG(D_INFO, "inherit default EA nr:%d off:%d t%u\n",
1460 (int)lc->ldo_dir_def_stripenr,
1461 (int)lc->ldo_dir_def_stripe_offset,
1462 lc->ldo_dir_def_hash_type);
1465 /* If the directory is specified with certain stripes */
1466 if (ah->dah_eadata != NULL && ah->dah_eadata_len != 0) {
1467 const struct lmv_user_md_v1 *lum1 = ah->dah_eadata;
1470 rc = lod_verify_md_striping(d, lum1);
1472 le32_to_cpu(lum1->lum_stripe_count) > 1) {
1473 /* Directory will be striped only if
1474 * stripe_count > 1 */
1476 le32_to_cpu(lum1->lum_stripe_count) - 1;
1477 lc->ldo_dir_stripe_offset =
1478 le32_to_cpu(lum1->lum_stripe_offset);
1479 lc->ldo_dir_hash_type =
1480 le32_to_cpu(lum1->lum_hash_type);
1481 CDEBUG(D_INFO, "set stripe EA nr:%hu off:%d\n",
1483 (int)lc->ldo_dir_stripe_offset);
1485 } else if (lp->ldo_dir_def_striping_set) {
1486 /* If there are default dir stripe from parent */
1487 lc->ldo_stripenr = lp->ldo_dir_def_stripenr;
1488 lc->ldo_dir_stripe_offset =
1489 lp->ldo_dir_def_stripe_offset;
1490 lc->ldo_dir_hash_type =
1491 lp->ldo_dir_def_hash_type;
1492 CDEBUG(D_INFO, "inherit EA nr:%hu off:%d\n",
1494 (int)lc->ldo_dir_stripe_offset);
1496 /* set default stripe for this directory */
1497 lc->ldo_stripenr = 0;
1498 lc->ldo_dir_stripe_offset = -1;
1501 CDEBUG(D_INFO, "final striping count:%hu, offset:%d\n",
1502 lc->ldo_stripenr, (int)lc->ldo_dir_stripe_offset);
1508 * if object is going to be striped over OSTs, transfer default
1509 * striping information to the child, so that we can use it
1510 * during declaration and creation
1512 if (!lod_object_will_be_striped(S_ISREG(child_mode),
1513 lu_object_fid(&child->do_lu)))
1516 * try from the parent
1518 if (likely(parent)) {
1519 lod_cache_parent_striping(env, lp, child_mode);
1521 lc->ldo_def_stripe_offset = (__u16) -1;
1523 if (lp->ldo_def_striping_set) {
1525 lod_object_set_pool(lc, lp->ldo_pool);
1526 lc->ldo_stripenr = lp->ldo_def_stripenr;
1527 lc->ldo_stripe_size = lp->ldo_def_stripe_size;
1528 lc->ldo_def_stripe_offset = lp->ldo_def_stripe_offset;
1529 CDEBUG(D_OTHER, "striping from parent: #%d, sz %d %s\n",
1530 lc->ldo_stripenr, lc->ldo_stripe_size,
1531 lp->ldo_pool ? lp->ldo_pool : "");
1536 * if the parent doesn't provide with specific pattern, grab fs-wide one
1538 desc = &d->lod_desc;
1539 if (lc->ldo_stripenr == 0)
1540 lc->ldo_stripenr = desc->ld_default_stripe_count;
1541 if (lc->ldo_stripe_size == 0)
1542 lc->ldo_stripe_size = desc->ld_default_stripe_size;
1543 CDEBUG(D_OTHER, "final striping: # %d stripes, sz %d from %s\n",
1544 lc->ldo_stripenr, lc->ldo_stripe_size,
1545 lc->ldo_pool ? lc->ldo_pool : "");
1548 /* we do not cache stripe information for slave stripe, see
1549 * lod_xattr_set_lov_on_dir */
1550 if (lp != NULL && lp->ldo_dir_slave_stripe)
1551 lod_lov_stripe_cache_clear(lp);
1556 #define ll_do_div64(aaa,bbb) do_div((aaa), (bbb))
1558 * this function handles a special case when truncate was done
1559 * on a stripeless object and now striping is being created
1560 * we can't lose that size, so we have to propagate it to newly
1563 static int lod_declare_init_size(const struct lu_env *env,
1564 struct dt_object *dt, struct thandle *th)
1566 struct dt_object *next = dt_object_child(dt);
1567 struct lod_object *lo = lod_dt_obj(dt);
1568 struct lu_attr *attr = &lod_env_info(env)->lti_attr;
1569 uint64_t size, offs;
1573 /* XXX: we support the simplest (RAID0) striping so far */
1574 LASSERT(lo->ldo_stripe || lo->ldo_stripenr == 0);
1575 LASSERT(lo->ldo_stripe_size > 0);
1577 rc = dt_attr_get(env, next, attr, BYPASS_CAPA);
1578 LASSERT(attr->la_valid & LA_SIZE);
1582 size = attr->la_size;
1586 /* ll_do_div64(a, b) returns a % b, and a = a / b */
1587 ll_do_div64(size, (__u64) lo->ldo_stripe_size);
1588 stripe = ll_do_div64(size, (__u64) lo->ldo_stripenr);
1590 size = size * lo->ldo_stripe_size;
1591 offs = attr->la_size;
1592 size += ll_do_div64(offs, lo->ldo_stripe_size);
1594 attr->la_valid = LA_SIZE;
1595 attr->la_size = size;
1597 rc = dt_declare_attr_set(env, lo->ldo_stripe[stripe], attr, th);
1603 * Create declaration of striped object
1605 int lod_declare_striped_object(const struct lu_env *env, struct dt_object *dt,
1606 struct lu_attr *attr,
1607 const struct lu_buf *lovea, struct thandle *th)
1609 struct lod_thread_info *info = lod_env_info(env);
1610 struct dt_object *next = dt_object_child(dt);
1611 struct lod_object *lo = lod_dt_obj(dt);
1615 if (OBD_FAIL_CHECK(OBD_FAIL_MDS_ALLOC_OBDO)) {
1616 /* failed to create striping, let's reset
1617 * config so that others don't get confused */
1618 lod_object_free_striping(env, lo);
1619 GOTO(out, rc = -ENOMEM);
1622 /* choose OST and generate appropriate objects */
1623 rc = lod_qos_prep_create(env, lo, attr, lovea, th);
1625 /* failed to create striping, let's reset
1626 * config so that others don't get confused */
1627 lod_object_free_striping(env, lo);
1632 * declare storage for striping data
1634 info->lti_buf.lb_len = lov_mds_md_size(lo->ldo_stripenr,
1635 lo->ldo_pool ? LOV_MAGIC_V3 : LOV_MAGIC_V1);
1636 rc = dt_declare_xattr_set(env, next, &info->lti_buf, XATTR_NAME_LOV,
1642 * if striping is created with local object's size > 0,
1643 * we have to propagate this size to specific object
1644 * the case is possible only when local object was created previously
1646 if (dt_object_exists(next))
1647 rc = lod_declare_init_size(env, dt, th);
1653 int lod_dir_striping_create_internal(const struct lu_env *env,
1654 struct dt_object *dt,
1655 struct lu_attr *attr,
1656 const struct dt_object_format *dof,
1660 struct lod_thread_info *info = lod_env_info(env);
1661 struct dt_object *next = dt_object_child(dt);
1662 struct lod_object *lo = lod_dt_obj(dt);
1666 if (lo->ldo_dir_def_striping_set &&
1667 !LMVEA_DELETE_VALUES(lo->ldo_stripenr,
1668 lo->ldo_dir_stripe_offset)) {
1669 struct lmv_user_md_v1 *v1 = info->lti_ea_store;
1670 int stripe_count = lo->ldo_stripenr + 1;
1672 if (info->lti_ea_store_size < sizeof(*v1)) {
1673 rc = lod_ea_store_resize(info, sizeof(*v1));
1676 v1 = info->lti_ea_store;
1679 memset(v1, 0, sizeof(*v1));
1680 v1->lum_magic = cpu_to_le32(LMV_USER_MAGIC);
1681 v1->lum_stripe_count = cpu_to_le32(stripe_count);
1682 v1->lum_stripe_offset =
1683 cpu_to_le32(lo->ldo_dir_stripe_offset);
1685 info->lti_buf.lb_buf = v1;
1686 info->lti_buf.lb_len = sizeof(*v1);
1689 rc = lod_declare_xattr_set_lmv(env, dt, attr,
1690 &info->lti_buf, th);
1692 rc = lod_xattr_set_lmv(env, dt, &info->lti_buf,
1693 XATTR_NAME_LMV, 0, th,
1699 /* Transfer default LMV striping from the parent */
1700 if (lo->ldo_dir_striping_cached &&
1701 !LMVEA_DELETE_VALUES(lo->ldo_dir_def_stripenr,
1702 lo->ldo_dir_def_stripe_offset)) {
1703 struct lmv_user_md_v1 *v1 = info->lti_ea_store;
1704 int def_stripe_count = lo->ldo_dir_def_stripenr + 1;
1706 if (info->lti_ea_store_size < sizeof(*v1)) {
1707 rc = lod_ea_store_resize(info, sizeof(*v1));
1710 v1 = info->lti_ea_store;
1713 memset(v1, 0, sizeof(*v1));
1714 v1->lum_magic = cpu_to_le32(LMV_USER_MAGIC);
1715 v1->lum_stripe_count = cpu_to_le32(def_stripe_count);
1716 v1->lum_stripe_offset =
1717 cpu_to_le32(lo->ldo_dir_def_stripe_offset);
1719 cpu_to_le32(lo->ldo_dir_def_hash_type);
1721 info->lti_buf.lb_buf = v1;
1722 info->lti_buf.lb_len = sizeof(*v1);
1724 rc = dt_declare_xattr_set(env, next, &info->lti_buf,
1725 XATTR_NAME_DEFAULT_LMV, 0,
1728 rc = dt_xattr_set(env, next, &info->lti_buf,
1729 XATTR_NAME_DEFAULT_LMV, 0, th,
1735 /* Transfer default LOV striping from the parent */
1736 if (lo->ldo_striping_cached &&
1737 !LOVEA_DELETE_VALUES(lo->ldo_def_stripe_size,
1738 lo->ldo_def_stripenr,
1739 lo->ldo_def_stripe_offset)) {
1740 struct lov_user_md_v3 *v3 = info->lti_ea_store;
1742 if (info->lti_ea_store_size < sizeof(*v3)) {
1743 rc = lod_ea_store_resize(info, sizeof(*v3));
1746 v3 = info->lti_ea_store;
1749 memset(v3, 0, sizeof(*v3));
1750 v3->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V3);
1751 v3->lmm_stripe_count = cpu_to_le16(lo->ldo_def_stripenr);
1752 v3->lmm_stripe_offset = cpu_to_le16(lo->ldo_def_stripe_offset);
1753 v3->lmm_stripe_size = cpu_to_le32(lo->ldo_def_stripe_size);
1755 strncpy(v3->lmm_pool_name, lo->ldo_pool,
1758 info->lti_buf.lb_buf = v3;
1759 info->lti_buf.lb_len = sizeof(*v3);
1762 rc = dt_declare_xattr_set(env, next, &info->lti_buf,
1763 XATTR_NAME_LOV, 0, th);
1765 rc = dt_xattr_set(env, next, &info->lti_buf,
1766 XATTR_NAME_LOV, 0, th,
1775 static int lod_declare_dir_striping_create(const struct lu_env *env,
1776 struct dt_object *dt,
1777 struct lu_attr *attr,
1778 struct dt_object_format *dof,
1781 return lod_dir_striping_create_internal(env, dt, attr, dof, th, true);
1784 static int lod_dir_striping_create(const struct lu_env *env,
1785 struct dt_object *dt,
1786 struct lu_attr *attr,
1787 struct dt_object_format *dof,
1790 return lod_dir_striping_create_internal(env, dt, attr, dof, th, false);
1793 static int lod_declare_object_create(const struct lu_env *env,
1794 struct dt_object *dt,
1795 struct lu_attr *attr,
1796 struct dt_allocation_hint *hint,
1797 struct dt_object_format *dof,
1800 struct dt_object *next = dt_object_child(dt);
1801 struct lod_object *lo = lod_dt_obj(dt);
1810 * first of all, we declare creation of local object
1812 rc = dt_declare_create(env, next, attr, hint, dof, th);
1816 if (dof->dof_type == DFT_SYM)
1817 dt->do_body_ops = &lod_body_lnk_ops;
1820 * it's lod_ah_init() who has decided the object will striped
1822 if (dof->dof_type == DFT_REGULAR) {
1823 /* callers don't want stripes */
1824 /* XXX: all tricky interactions with ->ah_make_hint() decided
1825 * to use striping, then ->declare_create() behaving differently
1826 * should be cleaned */
1827 if (dof->u.dof_reg.striped == 0)
1828 lo->ldo_stripenr = 0;
1829 if (lo->ldo_stripenr > 0)
1830 rc = lod_declare_striped_object(env, dt, attr,
1832 } else if (dof->dof_type == DFT_DIR) {
1833 rc = lod_declare_dir_striping_create(env, dt, attr, dof, th);
1839 int lod_striping_create(const struct lu_env *env, struct dt_object *dt,
1840 struct lu_attr *attr, struct dt_object_format *dof,
1843 struct lod_object *lo = lod_dt_obj(dt);
1847 LASSERT(lo->ldo_striping_cached == 0);
1849 /* create all underlying objects */
1850 for (i = 0; i < lo->ldo_stripenr; i++) {
1851 LASSERT(lo->ldo_stripe[i]);
1852 rc = dt_create(env, lo->ldo_stripe[i], attr, NULL, dof, th);
1858 rc = lod_generate_and_set_lovea(env, lo, th);
1863 static int lod_object_create(const struct lu_env *env, struct dt_object *dt,
1864 struct lu_attr *attr,
1865 struct dt_allocation_hint *hint,
1866 struct dt_object_format *dof, struct thandle *th)
1868 struct dt_object *next = dt_object_child(dt);
1869 struct lod_object *lo = lod_dt_obj(dt);
1873 /* create local object */
1874 rc = dt_create(env, next, attr, hint, dof, th);
1877 if (S_ISDIR(dt->do_lu.lo_header->loh_attr))
1878 rc = lod_dir_striping_create(env, dt, attr, dof, th);
1879 else if (lo->ldo_stripe && dof->u.dof_reg.striped != 0)
1880 rc = lod_striping_create(env, dt, attr, dof, th);
1886 static int lod_declare_object_destroy(const struct lu_env *env,
1887 struct dt_object *dt,
1890 struct dt_object *next = dt_object_child(dt);
1891 struct lod_object *lo = lod_dt_obj(dt);
1896 * we declare destroy for the local object
1898 rc = dt_declare_destroy(env, next, th);
1903 * load striping information, notice we don't do this when object
1904 * is being initialized as we don't need this information till
1905 * few specific cases like destroy, chown
1907 rc = lod_load_striping(env, lo);
1911 /* declare destroy for all underlying objects */
1912 for (i = 0; i < lo->ldo_stripenr; i++) {
1913 LASSERT(lo->ldo_stripe[i]);
1914 rc = dt_declare_destroy(env, lo->ldo_stripe[i], th);
1923 static int lod_object_destroy(const struct lu_env *env,
1924 struct dt_object *dt, struct thandle *th)
1926 struct dt_object *next = dt_object_child(dt);
1927 struct lod_object *lo = lod_dt_obj(dt);
1931 /* destroy local object */
1932 rc = dt_destroy(env, next, th);
1936 /* destroy all underlying objects */
1937 for (i = 0; i < lo->ldo_stripenr; i++) {
1938 LASSERT(lo->ldo_stripe[i]);
1939 rc = dt_destroy(env, lo->ldo_stripe[i], th);
1947 static int lod_index_try(const struct lu_env *env, struct dt_object *dt,
1948 const struct dt_index_features *feat)
1950 struct dt_object *next = dt_object_child(dt);
1954 LASSERT(next->do_ops);
1955 LASSERT(next->do_ops->do_index_try);
1957 rc = next->do_ops->do_index_try(env, next, feat);
1958 if (next->do_index_ops && dt->do_index_ops == NULL)
1959 dt->do_index_ops = &lod_index_ops;
1964 static int lod_declare_ref_add(const struct lu_env *env,
1965 struct dt_object *dt, struct thandle *th)
1967 return dt_declare_ref_add(env, dt_object_child(dt), th);
1970 static int lod_ref_add(const struct lu_env *env,
1971 struct dt_object *dt, struct thandle *th)
1973 return dt_ref_add(env, dt_object_child(dt), th);
1976 static int lod_declare_ref_del(const struct lu_env *env,
1977 struct dt_object *dt, struct thandle *th)
1979 return dt_declare_ref_del(env, dt_object_child(dt), th);
1982 static int lod_ref_del(const struct lu_env *env,
1983 struct dt_object *dt, struct thandle *th)
1985 return dt_ref_del(env, dt_object_child(dt), th);
1988 static struct obd_capa *lod_capa_get(const struct lu_env *env,
1989 struct dt_object *dt,
1990 struct lustre_capa *old, __u64 opc)
1992 return dt_capa_get(env, dt_object_child(dt), old, opc);
1995 static int lod_object_sync(const struct lu_env *env, struct dt_object *dt)
1997 return dt_object_sync(env, dt_object_child(dt));
2000 static int lod_object_lock(const struct lu_env *env,
2001 struct dt_object *dt, struct lustre_handle *lh,
2002 struct ldlm_enqueue_info *einfo,
2005 struct dt_object *next = dt_object_child(dt);
2010 * declare setattr on the local object
2012 rc = dt_object_lock(env, next, lh, einfo, policy);
2017 struct dt_object_operations lod_obj_ops = {
2018 .do_read_lock = lod_object_read_lock,
2019 .do_write_lock = lod_object_write_lock,
2020 .do_read_unlock = lod_object_read_unlock,
2021 .do_write_unlock = lod_object_write_unlock,
2022 .do_write_locked = lod_object_write_locked,
2023 .do_attr_get = lod_attr_get,
2024 .do_declare_attr_set = lod_declare_attr_set,
2025 .do_attr_set = lod_attr_set,
2026 .do_xattr_get = lod_xattr_get,
2027 .do_declare_xattr_set = lod_declare_xattr_set,
2028 .do_xattr_set = lod_xattr_set,
2029 .do_declare_xattr_del = lod_declare_xattr_del,
2030 .do_xattr_del = lod_xattr_del,
2031 .do_xattr_list = lod_xattr_list,
2032 .do_ah_init = lod_ah_init,
2033 .do_declare_create = lod_declare_object_create,
2034 .do_create = lod_object_create,
2035 .do_declare_destroy = lod_declare_object_destroy,
2036 .do_destroy = lod_object_destroy,
2037 .do_index_try = lod_index_try,
2038 .do_declare_ref_add = lod_declare_ref_add,
2039 .do_ref_add = lod_ref_add,
2040 .do_declare_ref_del = lod_declare_ref_del,
2041 .do_ref_del = lod_ref_del,
2042 .do_capa_get = lod_capa_get,
2043 .do_object_sync = lod_object_sync,
2044 .do_object_lock = lod_object_lock,
2047 static ssize_t lod_read(const struct lu_env *env, struct dt_object *dt,
2048 struct lu_buf *buf, loff_t *pos,
2049 struct lustre_capa *capa)
2051 struct dt_object *next = dt_object_child(dt);
2052 return next->do_body_ops->dbo_read(env, next, buf, pos, capa);
2055 static ssize_t lod_declare_write(const struct lu_env *env,
2056 struct dt_object *dt,
2057 const loff_t size, loff_t pos,
2060 return dt_declare_record_write(env, dt_object_child(dt),
2064 static ssize_t lod_write(const struct lu_env *env, struct dt_object *dt,
2065 const struct lu_buf *buf, loff_t *pos,
2066 struct thandle *th, struct lustre_capa *capa, int iq)
2068 struct dt_object *next = dt_object_child(dt);
2070 return next->do_body_ops->dbo_write(env, next, buf, pos, th, capa, iq);
2073 static const struct dt_body_operations lod_body_lnk_ops = {
2074 .dbo_read = lod_read,
2075 .dbo_declare_write = lod_declare_write,
2076 .dbo_write = lod_write
2079 static int lod_object_init(const struct lu_env *env, struct lu_object *lo,
2080 const struct lu_object_conf *conf)
2082 struct lod_device *lod = lu2lod_dev(lo->lo_dev);
2083 struct lu_device *cdev = NULL;
2084 struct lu_object *cobj;
2085 struct lod_tgt_descs *ltd = NULL;
2086 struct lod_tgt_desc *tgt;
2088 int type = LU_SEQ_RANGE_ANY;
2092 rc = lod_fld_lookup(env, lod, lu_object_fid(lo), &idx, &type);
2096 if (type == LU_SEQ_RANGE_MDT &&
2097 idx == lu_site2seq(lo->lo_dev->ld_site)->ss_node_id) {
2098 cdev = &lod->lod_child->dd_lu_dev;
2099 } else if (type == LU_SEQ_RANGE_MDT) {
2100 ltd = &lod->lod_mdt_descs;
2102 } else if (type == LU_SEQ_RANGE_OST) {
2103 ltd = &lod->lod_ost_descs;
2110 if (ltd->ltd_tgts_size > idx &&
2111 cfs_bitmap_check(ltd->ltd_tgt_bitmap, idx)) {
2112 tgt = LTD_TGT(ltd, idx);
2114 LASSERT(tgt != NULL);
2115 LASSERT(tgt->ltd_tgt != NULL);
2117 cdev = &(tgt->ltd_tgt->dd_lu_dev);
2119 lod_putref(lod, ltd);
2122 if (unlikely(cdev == NULL))
2125 cobj = cdev->ld_ops->ldo_object_alloc(env, lo->lo_header, cdev);
2126 if (unlikely(cobj == NULL))
2129 lu_object_add(lo, cobj);
2134 void lod_object_free_striping(const struct lu_env *env, struct lod_object *lo)
2138 if (lo->ldo_dir_stripe != NULL) {
2139 OBD_FREE_PTR(lo->ldo_dir_stripe);
2140 lo->ldo_dir_stripe = NULL;
2143 if (lo->ldo_stripe) {
2144 LASSERT(lo->ldo_stripes_allocated > 0);
2146 for (i = 0; i < lo->ldo_stripenr; i++) {
2147 if (lo->ldo_stripe[i])
2148 lu_object_put(env, &lo->ldo_stripe[i]->do_lu);
2151 i = sizeof(struct dt_object *) * lo->ldo_stripes_allocated;
2152 OBD_FREE(lo->ldo_stripe, i);
2153 lo->ldo_stripe = NULL;
2154 lo->ldo_stripes_allocated = 0;
2156 lo->ldo_stripenr = 0;
2157 lo->ldo_pattern = 0;
2161 * ->start is called once all slices are initialized, including header's
2162 * cache for mode (object type). using the type we can initialize ops
2164 static int lod_object_start(const struct lu_env *env, struct lu_object *o)
2166 if (S_ISLNK(o->lo_header->loh_attr & S_IFMT))
2167 lu2lod_obj(o)->ldo_obj.do_body_ops = &lod_body_lnk_ops;
2171 static void lod_object_free(const struct lu_env *env, struct lu_object *o)
2173 struct lod_object *mo = lu2lod_obj(o);
2176 * release all underlying object pinned
2179 lod_object_free_striping(env, mo);
2181 lod_object_set_pool(mo, NULL);
2184 OBD_SLAB_FREE_PTR(mo, lod_object_kmem);
2187 static void lod_object_release(const struct lu_env *env, struct lu_object *o)
2189 /* XXX: shouldn't we release everything here in case if object
2190 * creation failed before? */
2193 static int lod_object_print(const struct lu_env *env, void *cookie,
2194 lu_printer_t p, const struct lu_object *l)
2196 struct lod_object *o = lu2lod_obj((struct lu_object *) l);
2198 return (*p)(env, cookie, LUSTRE_LOD_NAME"-object@%p", o);
2201 struct lu_object_operations lod_lu_obj_ops = {
2202 .loo_object_init = lod_object_init,
2203 .loo_object_start = lod_object_start,
2204 .loo_object_free = lod_object_free,
2205 .loo_object_release = lod_object_release,
2206 .loo_object_print = lod_object_print,