4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved
24 * Use is subject to license terms.
26 * Copyright (c) 2012, 2013, Intel Corporation.
29 * lustre/lod/lod_object.c
31 * Author: Alex Zhuravlev <alexey.zhuravlev@intel.com>
34 #define DEBUG_SUBSYSTEM S_MDS
37 #include <obd_class.h>
38 #include <lustre_ver.h>
39 #include <obd_support.h>
40 #include <lprocfs_status.h>
42 #include <lustre_fid.h>
43 #include <lustre_param.h>
44 #include <lustre_fid.h>
45 #include <lustre_lmv.h>
48 #include "lod_internal.h"
50 static const char dot[] = ".";
51 static const char dotdot[] = "..";
53 extern struct kmem_cache *lod_object_kmem;
54 static const struct dt_body_operations lod_body_lnk_ops;
56 static int lod_index_lookup(const struct lu_env *env, struct dt_object *dt,
57 struct dt_rec *rec, const struct dt_key *key,
58 struct lustre_capa *capa)
60 struct dt_object *next = dt_object_child(dt);
61 return next->do_index_ops->dio_lookup(env, next, rec, key, capa);
64 static int lod_declare_index_insert(const struct lu_env *env,
66 const struct dt_rec *rec,
67 const struct dt_key *key,
68 struct thandle *handle)
70 return dt_declare_insert(env, dt_object_child(dt), rec, key, handle);
73 static int lod_index_insert(const struct lu_env *env,
75 const struct dt_rec *rec,
76 const struct dt_key *key,
78 struct lustre_capa *capa,
81 return dt_insert(env, dt_object_child(dt), rec, key, th, capa, ign);
84 static int lod_declare_index_delete(const struct lu_env *env,
86 const struct dt_key *key,
89 return dt_declare_delete(env, dt_object_child(dt), key, th);
92 static int lod_index_delete(const struct lu_env *env,
94 const struct dt_key *key,
96 struct lustre_capa *capa)
98 return dt_delete(env, dt_object_child(dt), key, th, capa);
101 static struct dt_it *lod_it_init(const struct lu_env *env,
102 struct dt_object *dt, __u32 attr,
103 struct lustre_capa *capa)
105 struct dt_object *next = dt_object_child(dt);
106 struct lod_it *it = &lod_env_info(env)->lti_it;
107 struct dt_it *it_next;
110 it_next = next->do_index_ops->dio_it.init(env, next, attr, capa);
114 /* currently we do not use more than one iterator per thread
115 * so we store it in thread info. if at some point we need
116 * more active iterators in a single thread, we can allocate
118 LASSERT(it->lit_obj == NULL);
120 it->lit_it = it_next;
123 return (struct dt_it *)it;
126 #define LOD_CHECK_IT(env, it) \
128 LASSERT((it)->lit_obj != NULL); \
129 LASSERT((it)->lit_it != NULL); \
132 void lod_it_fini(const struct lu_env *env, struct dt_it *di)
134 struct lod_it *it = (struct lod_it *)di;
136 LOD_CHECK_IT(env, it);
137 it->lit_obj->do_index_ops->dio_it.fini(env, it->lit_it);
139 /* the iterator not in use any more */
144 int lod_it_get(const struct lu_env *env, struct dt_it *di,
145 const struct dt_key *key)
147 const struct lod_it *it = (const struct lod_it *)di;
149 LOD_CHECK_IT(env, it);
150 return it->lit_obj->do_index_ops->dio_it.get(env, it->lit_it, key);
153 void lod_it_put(const struct lu_env *env, struct dt_it *di)
155 struct lod_it *it = (struct lod_it *)di;
157 LOD_CHECK_IT(env, it);
158 return it->lit_obj->do_index_ops->dio_it.put(env, it->lit_it);
161 int lod_it_next(const struct lu_env *env, struct dt_it *di)
163 struct lod_it *it = (struct lod_it *)di;
165 LOD_CHECK_IT(env, it);
166 return it->lit_obj->do_index_ops->dio_it.next(env, it->lit_it);
169 struct dt_key *lod_it_key(const struct lu_env *env, const struct dt_it *di)
171 const struct lod_it *it = (const struct lod_it *)di;
173 LOD_CHECK_IT(env, it);
174 return it->lit_obj->do_index_ops->dio_it.key(env, it->lit_it);
177 int lod_it_key_size(const struct lu_env *env, const struct dt_it *di)
179 struct lod_it *it = (struct lod_it *)di;
181 LOD_CHECK_IT(env, it);
182 return it->lit_obj->do_index_ops->dio_it.key_size(env, it->lit_it);
185 int lod_it_rec(const struct lu_env *env, const struct dt_it *di,
186 struct dt_rec *rec, __u32 attr)
188 const struct lod_it *it = (const struct lod_it *)di;
190 LOD_CHECK_IT(env, it);
191 return it->lit_obj->do_index_ops->dio_it.rec(env, it->lit_it, rec, attr);
194 __u64 lod_it_store(const struct lu_env *env, const struct dt_it *di)
196 const struct lod_it *it = (const struct lod_it *)di;
198 LOD_CHECK_IT(env, it);
199 return it->lit_obj->do_index_ops->dio_it.store(env, it->lit_it);
202 int lod_it_load(const struct lu_env *env, const struct dt_it *di, __u64 hash)
204 const struct lod_it *it = (const struct lod_it *)di;
206 LOD_CHECK_IT(env, it);
207 return it->lit_obj->do_index_ops->dio_it.load(env, it->lit_it, hash);
210 int lod_it_key_rec(const struct lu_env *env, const struct dt_it *di,
213 const struct lod_it *it = (const struct lod_it *)di;
215 LOD_CHECK_IT(env, it);
216 return it->lit_obj->do_index_ops->dio_it.key_rec(env, it->lit_it, key_rec);
219 static struct dt_index_operations lod_index_ops = {
220 .dio_lookup = lod_index_lookup,
221 .dio_declare_insert = lod_declare_index_insert,
222 .dio_insert = lod_index_insert,
223 .dio_declare_delete = lod_declare_index_delete,
224 .dio_delete = lod_index_delete,
232 .key_size = lod_it_key_size,
234 .store = lod_it_store,
236 .key_rec = lod_it_key_rec,
240 static void lod_object_read_lock(const struct lu_env *env,
241 struct dt_object *dt, unsigned role)
243 dt_read_lock(env, dt_object_child(dt), role);
246 static void lod_object_write_lock(const struct lu_env *env,
247 struct dt_object *dt, unsigned role)
249 dt_write_lock(env, dt_object_child(dt), role);
252 static void lod_object_read_unlock(const struct lu_env *env,
253 struct dt_object *dt)
255 dt_read_unlock(env, dt_object_child(dt));
258 static void lod_object_write_unlock(const struct lu_env *env,
259 struct dt_object *dt)
261 dt_write_unlock(env, dt_object_child(dt));
264 static int lod_object_write_locked(const struct lu_env *env,
265 struct dt_object *dt)
267 return dt_write_locked(env, dt_object_child(dt));
270 static int lod_attr_get(const struct lu_env *env,
271 struct dt_object *dt,
272 struct lu_attr *attr,
273 struct lustre_capa *capa)
275 return dt_attr_get(env, dt_object_child(dt), attr, capa);
278 static int lod_declare_attr_set(const struct lu_env *env,
279 struct dt_object *dt,
280 const struct lu_attr *attr,
281 struct thandle *handle)
283 struct dt_object *next = dt_object_child(dt);
284 struct lod_object *lo = lod_dt_obj(dt);
289 * declare setattr on the local object
291 rc = dt_declare_attr_set(env, next, attr, handle);
295 /* osp_declare_attr_set() ignores all attributes other than
296 * UID, GID, and size, and osp_attr_set() ignores all but UID
297 * and GID. Declaration of size attr setting happens through
298 * lod_declare_init_size(), and not through this function.
299 * Therefore we need not load striping unless ownership is
300 * changing. This should save memory and (we hope) speed up
302 if (!S_ISDIR(dt->do_lu.lo_header->loh_attr)) {
303 if (!(attr->la_valid & (LA_UID | LA_GID)))
306 if (!(attr->la_valid & (LA_UID | LA_GID | LA_MODE |
307 LA_ATIME | LA_MTIME | LA_CTIME)))
311 * load striping information, notice we don't do this when object
312 * is being initialized as we don't need this information till
313 * few specific cases like destroy, chown
315 rc = lod_load_striping(env, lo);
319 if (lo->ldo_stripenr == 0)
322 if (!(attr->la_valid & ~(LA_ATIME | LA_MTIME | LA_CTIME))) {
323 struct lu_attr *la = &lod_env_info(env)->lti_attr;
324 bool setattr_time = false;
326 rc = dt_attr_get(env, dt_object_child(dt), la,
331 /* If it will only setattr time, it will only set
332 * time < current_time */
333 if ((attr->la_valid & LA_ATIME &&
334 attr->la_atime < la->la_atime) ||
335 (attr->la_valid & LA_CTIME &&
336 attr->la_ctime < la->la_ctime) ||
337 (attr->la_valid & LA_MTIME &&
338 attr->la_mtime < la->la_mtime))
345 * if object is striped declare changes on the stripes
347 LASSERT(lo->ldo_stripe);
348 for (i = 0; i < lo->ldo_stripenr; i++) {
349 LASSERT(lo->ldo_stripe[i]);
351 rc = dt_declare_attr_set(env, lo->ldo_stripe[i], attr, handle);
353 CERROR("failed declaration: %d\n", rc);
361 static int lod_attr_set(const struct lu_env *env,
362 struct dt_object *dt,
363 const struct lu_attr *attr,
364 struct thandle *handle,
365 struct lustre_capa *capa)
367 struct dt_object *next = dt_object_child(dt);
368 struct lod_object *lo = lod_dt_obj(dt);
373 * apply changes to the local object
375 rc = dt_attr_set(env, next, attr, handle, capa);
379 if (!S_ISDIR(dt->do_lu.lo_header->loh_attr)) {
380 if (!(attr->la_valid & (LA_UID | LA_GID)))
383 if (!(attr->la_valid & (LA_UID | LA_GID | LA_MODE |
384 LA_ATIME | LA_MTIME | LA_CTIME)))
388 if (lo->ldo_stripenr == 0)
391 if (!(attr->la_valid & ~(LA_ATIME | LA_MTIME | LA_CTIME))) {
392 struct lu_attr *la = &lod_env_info(env)->lti_attr;
393 bool setattr_time = false;
395 rc = dt_attr_get(env, dt_object_child(dt), la,
400 /* If it will only setattr time, it will only set
401 * time < current_time */
402 if ((attr->la_valid & LA_ATIME &&
403 attr->la_atime < la->la_atime) ||
404 (attr->la_valid & LA_CTIME &&
405 attr->la_atime < la->la_ctime) ||
406 (attr->la_valid & LA_MTIME &&
407 attr->la_atime < la->la_mtime))
415 * if object is striped, apply changes to all the stripes
417 LASSERT(lo->ldo_stripe);
418 for (i = 0; i < lo->ldo_stripenr; i++) {
419 LASSERT(lo->ldo_stripe[i]);
420 rc = dt_attr_set(env, lo->ldo_stripe[i], attr, handle, capa);
422 CERROR("failed declaration: %d\n", rc);
430 static int lod_xattr_get(const struct lu_env *env, struct dt_object *dt,
431 struct lu_buf *buf, const char *name,
432 struct lustre_capa *capa)
434 struct lod_thread_info *info = lod_env_info(env);
435 struct lod_device *dev = lu2lod_dev(dt->do_lu.lo_dev);
439 rc = dt_xattr_get(env, dt_object_child(dt), buf, name, capa);
440 if (rc != -ENODATA || !S_ISDIR(dt->do_lu.lo_header->loh_attr & S_IFMT))
444 * lod returns default striping on the real root of the device
445 * this is like the root stores default striping for the whole
446 * filesystem. historically we've been using a different approach
447 * and store it in the config.
449 dt_root_get(env, dev->lod_child, &info->lti_fid);
450 is_root = lu_fid_eq(&info->lti_fid, lu_object_fid(&dt->do_lu));
452 if (is_root && strcmp(XATTR_NAME_LOV, name) == 0) {
453 struct lov_user_md *lum = buf->lb_buf;
454 struct lov_desc *desc = &dev->lod_desc;
456 if (buf->lb_buf == NULL) {
458 } else if (buf->lb_len >= sizeof(*lum)) {
459 lum->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V1);
460 lmm_oi_set_seq(&lum->lmm_oi, FID_SEQ_LOV_DEFAULT);
461 lmm_oi_set_id(&lum->lmm_oi, 0);
462 lmm_oi_cpu_to_le(&lum->lmm_oi, &lum->lmm_oi);
463 lum->lmm_pattern = cpu_to_le32(desc->ld_pattern);
464 lum->lmm_stripe_size = cpu_to_le32(
465 desc->ld_default_stripe_size);
466 lum->lmm_stripe_count = cpu_to_le16(
467 desc->ld_default_stripe_count);
468 lum->lmm_stripe_offset = cpu_to_le16(
469 desc->ld_default_stripe_offset);
479 static int lod_verify_md_striping(struct lod_device *lod,
480 const struct lmv_user_md_v1 *lum)
485 if (unlikely(le32_to_cpu(lum->lum_magic) != LMV_USER_MAGIC))
486 GOTO(out, rc = -EINVAL);
488 if (unlikely(le32_to_cpu(lum->lum_stripe_count) == 0))
489 GOTO(out, rc = -EINVAL);
491 if (unlikely(le32_to_cpu(lum->lum_stripe_count) >
492 lod->lod_remote_mdt_count + 1))
493 GOTO(out, rc = -EINVAL);
496 CERROR("%s: invalid lmv_user_md: magic = %x, "
497 "stripe_offset = %d, stripe_count = %u: rc = %d\n",
498 lod2obd(lod)->obd_name, le32_to_cpu(lum->lum_magic),
499 (int)le32_to_cpu(lum->lum_stripe_offset),
500 le32_to_cpu(lum->lum_stripe_count), rc);
504 int lod_prep_lmv_md(const struct lu_env *env, struct dt_object *dt,
505 struct lu_buf *lmv_buf)
507 struct lod_thread_info *info = lod_env_info(env);
508 struct lod_device *lod = lu2lod_dev(dt->do_lu.lo_dev);
509 struct lod_object *lo = lod_dt_obj(dt);
510 struct lmv_mds_md_v1 *lmm1;
518 LASSERT(lo->ldo_dir_striped != 0);
519 LASSERT(lo->ldo_stripenr > 0);
520 stripe_count = lo->ldo_stripenr + 1;
521 lmm_size = lmv_mds_md_size(stripe_count, LMV_MAGIC);
522 if (info->lti_ea_store_size < lmm_size) {
523 rc = lod_ea_store_resize(info, lmm_size);
528 lmm1 = (struct lmv_mds_md_v1 *)info->lti_ea_store;
529 lmm1->lmv_magic = cpu_to_le32(LMV_MAGIC);
530 lmm1->lmv_stripe_count = cpu_to_le32(stripe_count);
531 lmm1->lmv_hash_type = cpu_to_le32(lo->ldo_dir_hash_type);
532 rc = lod_fld_lookup(env, lod, lu_object_fid(&dt->do_lu),
533 &mdtidx, LU_SEQ_RANGE_MDT);
537 lmm1->lmv_master_mdt_index = cpu_to_le32(mdtidx);
538 fid_cpu_to_le(&lmm1->lmv_stripe_fids[0], lu_object_fid(&dt->do_lu));
539 for (i = 0; i < lo->ldo_stripenr; i++) {
540 struct dt_object *dto;
542 dto = lo->ldo_stripe[i];
543 LASSERT(dto != NULL);
544 fid_cpu_to_le(&lmm1->lmv_stripe_fids[i + 1],
545 lu_object_fid(&dto->do_lu));
548 lmv_buf->lb_buf = info->lti_ea_store;
549 lmv_buf->lb_len = lmm_size;
550 lo->ldo_dir_striping_cached = 1;
555 int lod_parse_dir_striping(const struct lu_env *env, struct lod_object *lo,
556 const struct lu_buf *buf)
558 struct lod_thread_info *info = lod_env_info(env);
559 struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
560 struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
561 struct dt_object **stripe;
562 union lmv_mds_md *lmm = buf->lb_buf;
563 struct lmv_mds_md_v1 *lmv1 = &lmm->lmv_md_v1;
564 struct lu_fid *fid = &info->lti_fid;
569 if (le32_to_cpu(lmv1->lmv_magic) != LMV_MAGIC_V1)
572 if (le32_to_cpu(lmv1->lmv_stripe_count) <= 1)
575 fid_le_to_cpu(fid, &lmv1->lmv_stripe_fids[0]);
576 /* Do not load striping information for slave inode */
577 if (!lu_fid_eq(fid, lu_object_fid(&lo->ldo_obj.do_lu))) {
578 lo->ldo_dir_slave_stripe = 1;
582 LASSERT(lo->ldo_stripe == NULL);
583 OBD_ALLOC(stripe, sizeof(stripe[0]) *
584 (le32_to_cpu(lmv1->lmv_stripe_count) - 1));
588 /* skip master stripe */
589 for (i = 1; i < le32_to_cpu(lmv1->lmv_stripe_count); i++) {
590 struct lod_tgt_desc *tgt;
592 struct dt_object *dto;
594 fid_le_to_cpu(fid, &lmv1->lmv_stripe_fids[i]);
595 rc = lod_fld_lookup(env, lod, fid,
596 &idx, LU_SEQ_RANGE_MDT);
600 tgt = LTD_TGT(ltd, idx);
602 GOTO(out, rc = -ESTALE);
604 dto = dt_locate_at(env, tgt->ltd_tgt, fid,
605 lo->ldo_obj.do_lu.lo_dev->ld_site->ls_top_dev,
608 GOTO(out, rc = PTR_ERR(dto));
613 lo->ldo_stripe = stripe;
614 lo->ldo_stripenr = le32_to_cpu(lmv1->lmv_stripe_count) - 1;
615 lo->ldo_stripes_allocated = le32_to_cpu(lmv1->lmv_stripe_count) - 1;
617 lod_object_free_striping(env, lo);
622 static int lod_prep_md_striped_create(const struct lu_env *env,
623 struct dt_object *dt,
624 struct lu_attr *attr,
625 const struct lmv_user_md_v1 *lum,
628 struct lod_device *lod = lu2lod_dev(dt->do_lu.lo_dev);
629 struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
630 struct lod_object *lo = lod_dt_obj(dt);
631 struct dt_object **stripe;
632 struct lu_buf lmv_buf;
640 /* The lum has been verifed in lod_verify_md_striping */
641 LASSERT(le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC);
642 LASSERT(le32_to_cpu(lum->lum_stripe_count) > 0);
644 /* Do not need allocated master stripe */
645 stripe_count = le32_to_cpu(lum->lum_stripe_count);
646 OBD_ALLOC(stripe, sizeof(stripe[0]) * (stripe_count - 1));
650 OBD_ALLOC(idx_array, sizeof(idx_array[0]) * stripe_count);
651 if (idx_array == NULL)
652 GOTO(out_free, rc = -ENOMEM);
654 idx_array[0] = le32_to_cpu(lum->lum_stripe_offset);
655 for (i = 1; i < stripe_count; i++) {
656 struct lod_tgt_desc *tgt;
657 struct dt_object *dto;
660 struct lu_object_conf conf = { 0 };
662 idx = (idx_array[i - 1] + 1) % (lod->lod_remote_mdt_count + 1);
664 for (j = 0; j < lod->lod_remote_mdt_count;
665 j++, idx = (idx + 1) % (lod->lod_remote_mdt_count + 1)) {
666 bool already_allocated = false;
669 CDEBUG(D_INFO, "try idx %d, mdt cnt %d,"
670 " allocated %d, last allocated %d\n", idx,
671 lod->lod_remote_mdt_count, i, idx_array[i - 1]);
673 /* Find next avaible target */
674 if (!cfs_bitmap_check(ltd->ltd_tgt_bitmap, idx))
677 /* check whether the idx already exists
678 * in current allocated array */
679 for (k = 0; k < i; k++) {
680 if (idx_array[k] == idx) {
681 already_allocated = true;
686 if (already_allocated)
692 /* Can not allocate more stripes */
693 if (j == lod->lod_remote_mdt_count) {
694 CDEBUG(D_INFO, "%s: require stripes %d only get %d\n",
695 lod2obd(lod)->obd_name, stripe_count, i - 1);
699 CDEBUG(D_INFO, "idx %d, mdt cnt %d,"
700 " allocated %d, last allocated %d\n", idx,
701 lod->lod_remote_mdt_count, i, idx_array[i - 1]);
703 tgt = LTD_TGT(ltd, idx);
704 LASSERT(tgt != NULL);
706 rc = obd_fid_alloc(tgt->ltd_exp, &fid, NULL);
711 conf.loc_flags = LOC_F_NEW;
712 dto = dt_locate_at(env, tgt->ltd_tgt, &fid,
713 dt->do_lu.lo_dev->ld_site->ls_top_dev, &conf);
715 GOTO(out_put, rc = PTR_ERR(dto));
720 lo->ldo_dir_striped = 1;
721 lo->ldo_stripe = stripe;
722 lo->ldo_stripenr = i - 1;
723 lo->ldo_stripes_allocated = stripe_count - 1;
725 if (lo->ldo_stripenr == 0)
726 GOTO(out_put, rc = -ENOSPC);
728 rc = lod_prep_lmv_md(env, dt, &lmv_buf);
732 for (i = 0; i < lo->ldo_stripenr; i++) {
733 struct dt_object *dto;
736 /* only create slave striped object */
737 rc = dt_declare_create(env, dto, attr, NULL, NULL, th);
741 if (!dt_try_as_dir(env, dto))
742 GOTO(out_put, rc = -EINVAL);
744 rc = dt_declare_insert(env, dto,
745 (const struct dt_rec *)lu_object_fid(&dto->do_lu),
746 (const struct dt_key *)dot, th);
750 /* master stripe FID will be put to .. */
751 rc = dt_declare_insert(env, dto,
752 (const struct dt_rec *)lu_object_fid(&dt->do_lu),
753 (const struct dt_key *)dotdot, th);
757 /* probably nothing to inherite */
758 if (lo->ldo_striping_cached &&
759 !LOVEA_DELETE_VALUES(lo->ldo_def_stripe_size,
760 lo->ldo_def_stripenr,
761 lo->ldo_def_stripe_offset)) {
762 struct lod_thread_info *info;
763 struct lov_user_md_v3 *v3;
765 /* sigh, lti_ea_store has been used for lmv_buf,
766 * so we have to allocate buffer for default
770 GOTO(out_put, rc = -ENOMEM);
772 memset(v3, 0, sizeof(*v3));
773 v3->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V3);
774 v3->lmm_stripe_count =
775 cpu_to_le32(lo->ldo_def_stripenr);
776 v3->lmm_stripe_offset =
777 cpu_to_le32(lo->ldo_def_stripe_offset);
778 v3->lmm_stripe_size =
779 cpu_to_le32(lo->ldo_def_stripe_size);
781 strncpy(v3->lmm_pool_name, lo->ldo_pool,
784 info = lod_env_info(env);
785 info->lti_buf.lb_buf = v3;
786 info->lti_buf.lb_len = sizeof(*v3);
787 rc = dt_declare_xattr_set(env, dto,
795 rc = dt_declare_xattr_set(env, dto, &lmv_buf, XATTR_NAME_LMV, 0,
801 rc = dt_declare_xattr_set(env, dt, &lmv_buf, XATTR_NAME_LMV, 0, th);
807 for (i = 0; i < stripe_count - 1; i++)
808 if (stripe[i] != NULL)
809 lu_object_put(env, &stripe[i]->do_lu);
810 OBD_FREE(stripe, sizeof(stripe[0]) * (stripe_count - 1));
814 if (idx_array != NULL)
815 OBD_FREE(idx_array, sizeof(idx_array[0]) * stripe_count);
821 * Declare create striped md object.
823 static int lod_declare_xattr_set_lmv(const struct lu_env *env,
824 struct dt_object *dt,
825 struct lu_attr *attr,
826 const struct lu_buf *lum_buf,
829 struct lod_object *lo = lod_dt_obj(dt);
830 struct lod_device *lod = lu2lod_dev(dt->do_lu.lo_dev);
831 struct lmv_user_md_v1 *lum;
835 lum = lum_buf->lb_buf;
836 LASSERT(lum != NULL);
838 CDEBUG(D_INFO, "lum magic = %x count = %u offset = %d\n",
839 le32_to_cpu(lum->lum_magic), le32_to_cpu(lum->lum_stripe_count),
840 (int)le32_to_cpu(lum->lum_stripe_offset));
842 if (le32_to_cpu(lum->lum_stripe_count) <= 1)
845 rc = lod_verify_md_striping(lod, lum);
849 /* prepare dir striped objects */
850 rc = lod_prep_md_striped_create(env, dt, attr, lum, th);
852 /* failed to create striping, let's reset
853 * config so that others don't get confused */
854 lod_object_free_striping(env, lo);
862 * LOV xattr is a storage for striping, and LOD owns this xattr.
863 * but LOD allows others to control striping to some extent
865 * - to set new defined striping
866 * - to set new semi-defined striping
867 * - number of stripes is defined
868 * - number of stripes + osts are defined
871 static int lod_declare_xattr_set(const struct lu_env *env,
872 struct dt_object *dt,
873 const struct lu_buf *buf,
874 const char *name, int fl,
877 struct dt_object *next = dt_object_child(dt);
878 struct lu_attr *attr = &lod_env_info(env)->lti_attr;
884 * allow to declare predefined striping on a new (!mode) object
885 * which is supposed to be replay of regular file creation
886 * (when LOV setting is declared)
887 * LU_XATTR_REPLACE is set to indicate a layout swap
889 mode = dt->do_lu.lo_header->loh_attr & S_IFMT;
890 if ((S_ISREG(mode) || mode == 0) && strcmp(name, XATTR_NAME_LOV) == 0 &&
891 !(fl & LU_XATTR_REPLACE)) {
893 * this is a request to manipulate object's striping
895 if (dt_object_exists(dt)) {
896 rc = dt_attr_get(env, next, attr, BYPASS_CAPA);
900 memset(attr, 0, sizeof(*attr));
901 attr->la_valid = LA_TYPE | LA_MODE;
902 attr->la_mode = S_IFREG;
904 rc = lod_declare_striped_object(env, dt, attr, buf, th);
905 } else if (S_ISDIR(mode)) {
906 struct lod_device *d = lu2lod_dev(dt->do_lu.lo_dev);
907 struct lod_object *lo = lod_dt_obj(dt);
910 if (strcmp(name, XATTR_NAME_DEFAULT_LMV) == 0) {
911 struct lmv_user_md_v1 *lum;
913 LASSERT(buf != NULL && buf->lb_buf != NULL);
915 rc = lod_verify_md_striping(d, lum);
920 rc = dt_declare_xattr_set(env, next, buf, name, fl, th);
924 /* set xattr to each stripes, if needed */
925 rc = lod_load_striping(env, lo);
929 if (lo->ldo_stripenr == 0)
932 for (i = 0; i < lo->ldo_stripenr; i++) {
933 LASSERT(lo->ldo_stripe[i]);
934 rc = dt_declare_xattr_set(env, lo->ldo_stripe[i], buf,
940 rc = dt_declare_xattr_set(env, next, buf, name, fl, th);
946 static void lod_lov_stripe_cache_clear(struct lod_object *lo)
948 lo->ldo_striping_cached = 0;
949 lo->ldo_def_striping_set = 0;
950 lod_object_set_pool(lo, NULL);
951 lo->ldo_def_stripe_size = 0;
952 lo->ldo_def_stripenr = 0;
955 static int lod_xattr_set_lov_on_dir(const struct lu_env *env,
956 struct dt_object *dt,
957 const struct lu_buf *buf,
958 const char *name, int fl,
960 struct lustre_capa *capa)
962 struct lod_device *d = lu2lod_dev(dt->do_lu.lo_dev);
963 struct dt_object *next = dt_object_child(dt);
964 struct lod_object *l = lod_dt_obj(dt);
965 struct lov_user_md_v1 *lum;
966 struct lov_user_md_v3 *v3 = NULL;
970 /* If it is striped dir, we should clear the stripe cache for
971 * slave stripe as well, but there are no effective way to
972 * notify the LOD on the slave MDT, so we do not cache stripe
973 * information for slave stripe for now. XXX*/
974 lod_lov_stripe_cache_clear(l);
975 LASSERT(buf != NULL && buf->lb_buf != NULL);
978 rc = lod_verify_striping(d, buf, 0);
982 if (lum->lmm_magic == LOV_USER_MAGIC_V3)
985 /* if { size, offset, count } = { 0, -1, 0 } and no pool
986 * (i.e. all default values specified) then delete default
987 * striping from dir. */
989 "set default striping: sz %u # %u offset %d %s %s\n",
990 (unsigned)lum->lmm_stripe_size,
991 (unsigned)lum->lmm_stripe_count,
992 (int)lum->lmm_stripe_offset,
993 v3 ? "from" : "", v3 ? v3->lmm_pool_name : "");
995 if (LOVEA_DELETE_VALUES((lum->lmm_stripe_size),
996 (lum->lmm_stripe_count),
997 (lum->lmm_stripe_offset)) &&
998 lum->lmm_magic == LOV_USER_MAGIC_V1) {
999 rc = dt_xattr_del(env, next, name, th, capa);
1003 rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
1009 static int lod_xattr_set_default_lmv_on_dir(const struct lu_env *env,
1010 struct dt_object *dt,
1011 const struct lu_buf *buf,
1012 const char *name, int fl,
1014 struct lustre_capa *capa)
1016 struct dt_object *next = dt_object_child(dt);
1017 struct lod_object *l = lod_dt_obj(dt);
1018 struct lmv_user_md_v1 *lum;
1022 LASSERT(buf != NULL && buf->lb_buf != NULL);
1025 CDEBUG(D_OTHER, "set default stripe_count # %u stripe_offset %d\n",
1026 le32_to_cpu(lum->lum_stripe_count),
1027 (int)le32_to_cpu(lum->lum_stripe_offset));
1029 if (LMVEA_DELETE_VALUES((le32_to_cpu(lum->lum_stripe_count)),
1030 le32_to_cpu(lum->lum_stripe_offset)) &&
1031 le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC) {
1032 rc = dt_xattr_del(env, next, name, th, capa);
1036 rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
1040 /* Update default stripe cache */
1041 if (l->ldo_dir_stripe == NULL) {
1042 OBD_ALLOC_PTR(l->ldo_dir_stripe);
1043 if (l->ldo_dir_stripe == NULL)
1047 l->ldo_dir_striping_cached = 0;
1048 l->ldo_dir_def_striping_set = 1;
1049 l->ldo_dir_def_stripenr =
1050 le32_to_cpu(lum->lum_stripe_count) - 1;
1056 static int lod_xattr_set_lmv(const struct lu_env *env, struct dt_object *dt,
1057 const struct lu_buf *buf, const char *name,
1058 int fl, struct thandle *th,
1059 struct lustre_capa *capa)
1061 struct lod_object *lo = lod_dt_obj(dt);
1062 struct lu_buf lmv_buf;
1067 if (!S_ISDIR(dt->do_lu.lo_header->loh_attr))
1070 /* The stripes are supposed to be allocated in declare phase,
1071 * if there are no stripes being allocated, it will skip */
1072 if (lo->ldo_stripenr == 0)
1075 rc = lod_prep_lmv_md(env, dt, &lmv_buf);
1079 for (i = 0; i < lo->ldo_stripenr; i++) {
1080 struct dt_object *dto;
1081 struct lu_attr *attr = &lod_env_info(env)->lti_attr;
1083 dto = lo->ldo_stripe[i];
1084 memset(attr, 0, sizeof(*attr));
1085 attr->la_valid = LA_TYPE | LA_MODE;
1086 attr->la_mode = S_IFDIR;
1087 rc = dt_create(env, dto, attr, NULL, NULL, th);
1091 rc = dt_insert(env, dto,
1092 (const struct dt_rec *)lu_object_fid(&dto->do_lu),
1093 (const struct dt_key *)dot, th, capa, 0);
1097 rc = dt_insert(env, dto,
1098 (struct dt_rec *)lu_object_fid(&dt->do_lu),
1099 (const struct dt_key *)dotdot, th, capa, 0);
1103 if (lo->ldo_striping_cached &&
1104 !LOVEA_DELETE_VALUES(lo->ldo_def_stripe_size,
1105 lo->ldo_def_stripenr,
1106 lo->ldo_def_stripe_offset)) {
1107 struct lod_thread_info *info;
1108 struct lov_user_md_v3 *v3;
1110 /* sigh, lti_ea_store has been used for lmv_buf,
1111 * so we have to allocate buffer for default
1117 memset(v3, 0, sizeof(*v3));
1118 v3->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V3);
1119 v3->lmm_stripe_count =
1120 cpu_to_le32(lo->ldo_def_stripenr);
1121 v3->lmm_stripe_offset =
1122 cpu_to_le32(lo->ldo_def_stripe_offset);
1123 v3->lmm_stripe_size =
1124 cpu_to_le32(lo->ldo_def_stripe_size);
1126 strncpy(v3->lmm_pool_name, lo->ldo_pool,
1129 info = lod_env_info(env);
1130 info->lti_buf.lb_buf = v3;
1131 info->lti_buf.lb_len = sizeof(*v3);
1132 rc = dt_xattr_set(env, dto, &info->lti_buf,
1133 XATTR_NAME_LOV, 0, th, capa);
1139 rc = dt_xattr_set(env, dto, &lmv_buf, XATTR_NAME_LMV, fl, th,
1143 rc = dt_xattr_set(env, dt, &lmv_buf, XATTR_NAME_LMV, fl, th, capa);
1148 static int lod_xattr_set(const struct lu_env *env,
1149 struct dt_object *dt, const struct lu_buf *buf,
1150 const char *name, int fl, struct thandle *th,
1151 struct lustre_capa *capa)
1153 struct lod_object *lo = lod_dt_obj(dt);
1154 struct dt_object *next = dt_object_child(dt);
1160 attr = dt->do_lu.lo_header->loh_attr & S_IFMT;
1161 if (S_ISDIR(attr) && strcmp(name, XATTR_NAME_LOV) == 0) {
1162 rc = lod_xattr_set_lov_on_dir(env, dt, buf, name, fl, th, capa);
1163 } else if (S_ISREG(attr) && !strcmp(name, XATTR_NAME_LOV)) {
1164 /* in case of lov EA swap, just set it
1165 * if not, it is a replay so check striping match what we
1166 * already have during req replay, declare_xattr_set()
1167 * defines striping, then create() does the work
1169 if (fl & LU_XATTR_REPLACE) {
1170 /* free stripes, then update disk */
1171 lod_object_free_striping(env, lod_dt_obj(dt));
1172 rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
1174 rc = lod_striping_create(env, dt, NULL, NULL, th);
1176 } else if (strcmp(name, XATTR_NAME_DEFAULT_LMV) == 0) {
1179 rc = lod_xattr_set_default_lmv_on_dir(env, dt, buf, name, fl,
1183 * behave transparantly for all other EAs
1185 rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
1188 if (rc != 0 || !S_ISDIR(attr))
1191 if (lo->ldo_stripenr == 0)
1194 for (i = 0; i < lo->ldo_stripenr; i++) {
1195 LASSERT(lo->ldo_stripe[i]);
1196 rc = dt_xattr_set(env, lo->ldo_stripe[i], buf, name, fl, th,
1205 static int lod_declare_xattr_del(const struct lu_env *env,
1206 struct dt_object *dt, const char *name,
1209 return dt_declare_xattr_del(env, dt_object_child(dt), name, th);
1212 static int lod_xattr_del(const struct lu_env *env, struct dt_object *dt,
1213 const char *name, struct thandle *th,
1214 struct lustre_capa *capa)
1216 if (!strcmp(name, XATTR_NAME_LOV))
1217 lod_object_free_striping(env, lod_dt_obj(dt));
1218 return dt_xattr_del(env, dt_object_child(dt), name, th, capa);
1221 static int lod_xattr_list(const struct lu_env *env,
1222 struct dt_object *dt, struct lu_buf *buf,
1223 struct lustre_capa *capa)
1225 return dt_xattr_list(env, dt_object_child(dt), buf, capa);
1228 int lod_object_set_pool(struct lod_object *o, char *pool)
1233 len = strlen(o->ldo_pool);
1234 OBD_FREE(o->ldo_pool, len + 1);
1239 OBD_ALLOC(o->ldo_pool, len + 1);
1240 if (o->ldo_pool == NULL)
1242 strcpy(o->ldo_pool, pool);
1247 static inline int lod_object_will_be_striped(int is_reg, const struct lu_fid *fid)
1249 return (is_reg && fid_seq(fid) != FID_SEQ_LOCAL_FILE);
1253 static int lod_cache_parent_lov_striping(const struct lu_env *env,
1254 struct lod_object *lp)
1256 struct lod_thread_info *info = lod_env_info(env);
1257 struct lov_user_md_v1 *v1 = NULL;
1258 struct lov_user_md_v3 *v3 = NULL;
1262 /* called from MDD without parent being write locked,
1264 dt_write_lock(env, dt_object_child(&lp->ldo_obj), 0);
1265 rc = lod_get_lov_ea(env, lp);
1269 if (rc < sizeof(struct lov_user_md)) {
1270 /* don't lookup for non-existing or invalid striping */
1271 lp->ldo_def_striping_set = 0;
1272 lp->ldo_striping_cached = 1;
1273 lp->ldo_def_stripe_size = 0;
1274 lp->ldo_def_stripenr = 0;
1275 lp->ldo_def_stripe_offset = (typeof(v1->lmm_stripe_offset))(-1);
1276 GOTO(unlock, rc = 0);
1280 v1 = info->lti_ea_store;
1281 if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_V1))
1282 lustre_swab_lov_user_md_v1(v1);
1283 else if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_V3))
1284 lustre_swab_lov_user_md_v3(v3);
1286 if (v1->lmm_magic != LOV_MAGIC_V3 && v1->lmm_magic != LOV_MAGIC_V1)
1287 GOTO(unlock, rc = 0);
1289 if (v1->lmm_pattern != LOV_PATTERN_RAID0 && v1->lmm_pattern != 0)
1290 GOTO(unlock, rc = 0);
1292 lp->ldo_def_stripenr = v1->lmm_stripe_count;
1293 lp->ldo_def_stripe_size = v1->lmm_stripe_size;
1294 lp->ldo_def_stripe_offset = v1->lmm_stripe_offset;
1295 lp->ldo_striping_cached = 1;
1296 lp->ldo_def_striping_set = 1;
1297 if (v1->lmm_magic == LOV_USER_MAGIC_V3) {
1298 /* XXX: sanity check here */
1299 v3 = (struct lov_user_md_v3 *) v1;
1300 if (v3->lmm_pool_name[0])
1301 lod_object_set_pool(lp, v3->lmm_pool_name);
1305 dt_write_unlock(env, dt_object_child(&lp->ldo_obj));
1310 static int lod_cache_parent_lmv_striping(const struct lu_env *env,
1311 struct lod_object *lp)
1313 struct lod_thread_info *info = lod_env_info(env);
1314 struct lmv_user_md_v1 *v1 = NULL;
1318 /* called from MDD without parent being write locked,
1320 dt_write_lock(env, dt_object_child(&lp->ldo_obj), 0);
1321 rc = lod_get_default_lmv_ea(env, lp);
1325 if (rc < sizeof(struct lmv_user_md)) {
1326 /* don't lookup for non-existing or invalid striping */
1327 lp->ldo_dir_def_striping_set = 0;
1328 lp->ldo_dir_striping_cached = 1;
1329 lp->ldo_dir_def_stripenr = 0;
1330 lp->ldo_dir_def_stripe_offset =
1331 (typeof(v1->lum_stripe_offset))(-1);
1332 lp->ldo_dir_def_hash_type = LMV_HASH_TYPE_FNV_1A_64;
1333 GOTO(unlock, rc = 0);
1337 v1 = info->lti_ea_store;
1339 lp->ldo_dir_def_stripenr = le32_to_cpu(v1->lum_stripe_count) - 1;
1340 lp->ldo_dir_def_stripe_offset = le32_to_cpu(v1->lum_stripe_offset);
1341 lp->ldo_dir_def_hash_type = le32_to_cpu(v1->lum_hash_type);
1342 lp->ldo_dir_def_striping_set = 1;
1343 lp->ldo_dir_striping_cached = 1;
1347 dt_write_unlock(env, dt_object_child(&lp->ldo_obj));
1351 static int lod_cache_parent_striping(const struct lu_env *env,
1352 struct lod_object *lp,
1358 rc = lod_load_striping(env, lp);
1362 if (!lp->ldo_striping_cached) {
1363 /* we haven't tried to get default striping for
1364 * the directory yet, let's cache it in the object */
1365 rc = lod_cache_parent_lov_striping(env, lp);
1370 if (S_ISDIR(child_mode) && !lp->ldo_dir_striping_cached)
1371 rc = lod_cache_parent_lmv_striping(env, lp);
1377 * used to transfer default striping data to the object being created
1379 static void lod_ah_init(const struct lu_env *env,
1380 struct dt_allocation_hint *ah,
1381 struct dt_object *parent,
1382 struct dt_object *child,
1385 struct lod_device *d = lu2lod_dev(child->do_lu.lo_dev);
1386 struct dt_object *nextp = NULL;
1387 struct dt_object *nextc;
1388 struct lod_object *lp = NULL;
1389 struct lod_object *lc;
1390 struct lov_desc *desc;
1395 if (likely(parent)) {
1396 nextp = dt_object_child(parent);
1397 lp = lod_dt_obj(parent);
1400 nextc = dt_object_child(child);
1401 lc = lod_dt_obj(child);
1403 LASSERT(lc->ldo_stripenr == 0);
1404 LASSERT(lc->ldo_stripe == NULL);
1407 * local object may want some hints
1408 * in case of late striping creation, ->ah_init()
1409 * can be called with local object existing
1411 if (!dt_object_exists(nextc) || dt_object_remote(nextc))
1412 nextc->do_ops->do_ah_init(env, ah, dt_object_remote(nextp) ?
1413 NULL : nextp, nextc, child_mode);
1415 if (S_ISDIR(child_mode)) {
1418 if (lc->ldo_dir_stripe == NULL) {
1419 OBD_ALLOC_PTR(lc->ldo_dir_stripe);
1420 if (lc->ldo_dir_stripe == NULL)
1424 if (lp->ldo_dir_stripe == NULL) {
1425 OBD_ALLOC_PTR(lp->ldo_dir_stripe);
1426 if (lp->ldo_dir_stripe == NULL)
1430 rc = lod_cache_parent_striping(env, lp, child_mode);
1434 /* transfer defaults to new directory */
1435 if (lp->ldo_striping_cached) {
1437 lod_object_set_pool(lc, lp->ldo_pool);
1438 lc->ldo_def_stripenr = lp->ldo_def_stripenr;
1439 lc->ldo_def_stripe_size = lp->ldo_def_stripe_size;
1440 lc->ldo_def_stripe_offset = lp->ldo_def_stripe_offset;
1441 lc->ldo_striping_cached = 1;
1442 lc->ldo_def_striping_set = 1;
1443 CDEBUG(D_OTHER, "inherite EA sz:%d off:%d nr:%d\n",
1444 (int)lc->ldo_def_stripe_size,
1445 (int)lc->ldo_def_stripe_offset,
1446 (int)lc->ldo_def_stripenr);
1449 /* transfer dir defaults to new directory */
1450 if (lp->ldo_dir_striping_cached) {
1451 lc->ldo_dir_def_stripenr = lp->ldo_dir_def_stripenr;
1452 lc->ldo_dir_def_stripe_offset =
1453 lp->ldo_dir_def_stripe_offset;
1454 lc->ldo_dir_def_hash_type =
1455 lp->ldo_dir_def_hash_type;
1456 lc->ldo_dir_striping_cached = 1;
1457 lc->ldo_dir_def_striping_set = 1;
1458 CDEBUG(D_INFO, "inherit default EA nr:%d off:%d t%u\n",
1459 (int)lc->ldo_dir_def_stripenr,
1460 (int)lc->ldo_dir_def_stripe_offset,
1461 lc->ldo_dir_def_hash_type);
1464 /* If the directory is specified with certain stripes */
1465 if (ah->dah_eadata != NULL && ah->dah_eadata_len != 0) {
1466 const struct lmv_user_md_v1 *lum1 = ah->dah_eadata;
1469 rc = lod_verify_md_striping(d, lum1);
1471 le32_to_cpu(lum1->lum_stripe_count) > 1) {
1472 /* Directory will be striped only if
1473 * stripe_count > 1 */
1475 le32_to_cpu(lum1->lum_stripe_count) - 1;
1476 lc->ldo_dir_stripe_offset =
1477 le32_to_cpu(lum1->lum_stripe_offset);
1478 lc->ldo_dir_hash_type =
1479 le32_to_cpu(lum1->lum_hash_type);
1480 CDEBUG(D_INFO, "set stripe EA nr:%hu off:%d\n",
1482 (int)lc->ldo_dir_stripe_offset);
1484 } else if (lp->ldo_dir_def_striping_set) {
1485 /* If there are default dir stripe from parent */
1486 lc->ldo_stripenr = lp->ldo_dir_def_stripenr;
1487 lc->ldo_dir_stripe_offset =
1488 lp->ldo_dir_def_stripe_offset;
1489 lc->ldo_dir_hash_type =
1490 lp->ldo_dir_def_hash_type;
1491 CDEBUG(D_INFO, "inherit EA nr:%hu off:%d\n",
1493 (int)lc->ldo_dir_stripe_offset);
1495 /* set default stripe for this directory */
1496 lc->ldo_stripenr = 0;
1497 lc->ldo_dir_stripe_offset = -1;
1500 CDEBUG(D_INFO, "final striping count:%hu, offset:%d\n",
1501 lc->ldo_stripenr, (int)lc->ldo_dir_stripe_offset);
1507 * if object is going to be striped over OSTs, transfer default
1508 * striping information to the child, so that we can use it
1509 * during declaration and creation
1511 if (!lod_object_will_be_striped(S_ISREG(child_mode),
1512 lu_object_fid(&child->do_lu)))
1515 * try from the parent
1517 if (likely(parent)) {
1518 lod_cache_parent_striping(env, lp, child_mode);
1520 lc->ldo_def_stripe_offset = (__u16) -1;
1522 if (lp->ldo_def_striping_set) {
1524 lod_object_set_pool(lc, lp->ldo_pool);
1525 lc->ldo_stripenr = lp->ldo_def_stripenr;
1526 lc->ldo_stripe_size = lp->ldo_def_stripe_size;
1527 lc->ldo_def_stripe_offset = lp->ldo_def_stripe_offset;
1528 CDEBUG(D_OTHER, "striping from parent: #%d, sz %d %s\n",
1529 lc->ldo_stripenr, lc->ldo_stripe_size,
1530 lp->ldo_pool ? lp->ldo_pool : "");
1535 * if the parent doesn't provide with specific pattern, grab fs-wide one
1537 desc = &d->lod_desc;
1538 if (lc->ldo_stripenr == 0)
1539 lc->ldo_stripenr = desc->ld_default_stripe_count;
1540 if (lc->ldo_stripe_size == 0)
1541 lc->ldo_stripe_size = desc->ld_default_stripe_size;
1542 CDEBUG(D_OTHER, "final striping: # %d stripes, sz %d from %s\n",
1543 lc->ldo_stripenr, lc->ldo_stripe_size,
1544 lc->ldo_pool ? lc->ldo_pool : "");
1547 /* we do not cache stripe information for slave stripe, see
1548 * lod_xattr_set_lov_on_dir */
1549 if (lp != NULL && lp->ldo_dir_slave_stripe)
1550 lod_lov_stripe_cache_clear(lp);
1555 #define ll_do_div64(aaa,bbb) do_div((aaa), (bbb))
1557 * this function handles a special case when truncate was done
1558 * on a stripeless object and now striping is being created
1559 * we can't lose that size, so we have to propagate it to newly
1562 static int lod_declare_init_size(const struct lu_env *env,
1563 struct dt_object *dt, struct thandle *th)
1565 struct dt_object *next = dt_object_child(dt);
1566 struct lod_object *lo = lod_dt_obj(dt);
1567 struct lu_attr *attr = &lod_env_info(env)->lti_attr;
1568 uint64_t size, offs;
1572 /* XXX: we support the simplest (RAID0) striping so far */
1573 LASSERT(lo->ldo_stripe || lo->ldo_stripenr == 0);
1574 LASSERT(lo->ldo_stripe_size > 0);
1576 rc = dt_attr_get(env, next, attr, BYPASS_CAPA);
1577 LASSERT(attr->la_valid & LA_SIZE);
1581 size = attr->la_size;
1585 /* ll_do_div64(a, b) returns a % b, and a = a / b */
1586 ll_do_div64(size, (__u64) lo->ldo_stripe_size);
1587 stripe = ll_do_div64(size, (__u64) lo->ldo_stripenr);
1589 size = size * lo->ldo_stripe_size;
1590 offs = attr->la_size;
1591 size += ll_do_div64(offs, lo->ldo_stripe_size);
1593 attr->la_valid = LA_SIZE;
1594 attr->la_size = size;
1596 rc = dt_declare_attr_set(env, lo->ldo_stripe[stripe], attr, th);
1602 * Create declaration of striped object
1604 int lod_declare_striped_object(const struct lu_env *env, struct dt_object *dt,
1605 struct lu_attr *attr,
1606 const struct lu_buf *lovea, struct thandle *th)
1608 struct lod_thread_info *info = lod_env_info(env);
1609 struct dt_object *next = dt_object_child(dt);
1610 struct lod_object *lo = lod_dt_obj(dt);
1614 if (OBD_FAIL_CHECK(OBD_FAIL_MDS_ALLOC_OBDO)) {
1615 /* failed to create striping, let's reset
1616 * config so that others don't get confused */
1617 lod_object_free_striping(env, lo);
1618 GOTO(out, rc = -ENOMEM);
1621 /* choose OST and generate appropriate objects */
1622 rc = lod_qos_prep_create(env, lo, attr, lovea, th);
1624 /* failed to create striping, let's reset
1625 * config so that others don't get confused */
1626 lod_object_free_striping(env, lo);
1631 * declare storage for striping data
1633 info->lti_buf.lb_len = lov_mds_md_size(lo->ldo_stripenr,
1634 lo->ldo_pool ? LOV_MAGIC_V3 : LOV_MAGIC_V1);
1635 rc = dt_declare_xattr_set(env, next, &info->lti_buf, XATTR_NAME_LOV,
1641 * if striping is created with local object's size > 0,
1642 * we have to propagate this size to specific object
1643 * the case is possible only when local object was created previously
1645 if (dt_object_exists(next))
1646 rc = lod_declare_init_size(env, dt, th);
1652 int lod_dir_striping_create_internal(const struct lu_env *env,
1653 struct dt_object *dt,
1654 struct lu_attr *attr,
1655 const struct dt_object_format *dof,
1659 struct lod_thread_info *info = lod_env_info(env);
1660 struct dt_object *next = dt_object_child(dt);
1661 struct lod_object *lo = lod_dt_obj(dt);
1665 if (lo->ldo_dir_def_striping_set &&
1666 !LMVEA_DELETE_VALUES(lo->ldo_stripenr,
1667 lo->ldo_dir_stripe_offset)) {
1668 struct lmv_user_md_v1 *v1 = info->lti_ea_store;
1669 int stripe_count = lo->ldo_stripenr + 1;
1671 if (info->lti_ea_store_size < sizeof(*v1)) {
1672 rc = lod_ea_store_resize(info, sizeof(*v1));
1675 v1 = info->lti_ea_store;
1678 memset(v1, 0, sizeof(*v1));
1679 v1->lum_magic = cpu_to_le32(LMV_USER_MAGIC);
1680 v1->lum_stripe_count = cpu_to_le32(stripe_count);
1681 v1->lum_stripe_offset =
1682 cpu_to_le32(lo->ldo_dir_stripe_offset);
1684 info->lti_buf.lb_buf = v1;
1685 info->lti_buf.lb_len = sizeof(*v1);
1688 rc = lod_declare_xattr_set_lmv(env, dt, attr,
1689 &info->lti_buf, th);
1691 rc = lod_xattr_set_lmv(env, dt, &info->lti_buf,
1692 XATTR_NAME_LMV, 0, th,
1698 /* Transfer default LMV striping from the parent */
1699 if (lo->ldo_dir_striping_cached &&
1700 !LMVEA_DELETE_VALUES(lo->ldo_dir_def_stripenr,
1701 lo->ldo_dir_def_stripe_offset)) {
1702 struct lmv_user_md_v1 *v1 = info->lti_ea_store;
1703 int def_stripe_count = lo->ldo_dir_def_stripenr + 1;
1705 if (info->lti_ea_store_size < sizeof(*v1)) {
1706 rc = lod_ea_store_resize(info, sizeof(*v1));
1709 v1 = info->lti_ea_store;
1712 memset(v1, 0, sizeof(*v1));
1713 v1->lum_magic = cpu_to_le32(LMV_USER_MAGIC);
1714 v1->lum_stripe_count = cpu_to_le32(def_stripe_count);
1715 v1->lum_stripe_offset =
1716 cpu_to_le32(lo->ldo_dir_def_stripe_offset);
1718 cpu_to_le32(lo->ldo_dir_def_hash_type);
1720 info->lti_buf.lb_buf = v1;
1721 info->lti_buf.lb_len = sizeof(*v1);
1723 rc = dt_declare_xattr_set(env, next, &info->lti_buf,
1724 XATTR_NAME_DEFAULT_LMV, 0,
1727 rc = dt_xattr_set(env, next, &info->lti_buf,
1728 XATTR_NAME_DEFAULT_LMV, 0, th,
1734 /* Transfer default LOV striping from the parent */
1735 if (lo->ldo_striping_cached &&
1736 !LOVEA_DELETE_VALUES(lo->ldo_def_stripe_size,
1737 lo->ldo_def_stripenr,
1738 lo->ldo_def_stripe_offset)) {
1739 struct lov_user_md_v3 *v3 = info->lti_ea_store;
1741 if (info->lti_ea_store_size < sizeof(*v3)) {
1742 rc = lod_ea_store_resize(info, sizeof(*v3));
1745 v3 = info->lti_ea_store;
1748 memset(v3, 0, sizeof(*v3));
1749 v3->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V3);
1750 v3->lmm_stripe_count = cpu_to_le16(lo->ldo_def_stripenr);
1751 v3->lmm_stripe_offset = cpu_to_le16(lo->ldo_def_stripe_offset);
1752 v3->lmm_stripe_size = cpu_to_le32(lo->ldo_def_stripe_size);
1754 strncpy(v3->lmm_pool_name, lo->ldo_pool,
1757 info->lti_buf.lb_buf = v3;
1758 info->lti_buf.lb_len = sizeof(*v3);
1761 rc = dt_declare_xattr_set(env, next, &info->lti_buf,
1762 XATTR_NAME_LOV, 0, th);
1764 rc = dt_xattr_set(env, next, &info->lti_buf,
1765 XATTR_NAME_LOV, 0, th,
1774 static int lod_declare_dir_striping_create(const struct lu_env *env,
1775 struct dt_object *dt,
1776 struct lu_attr *attr,
1777 struct dt_object_format *dof,
1780 return lod_dir_striping_create_internal(env, dt, attr, dof, th, true);
1783 static int lod_dir_striping_create(const struct lu_env *env,
1784 struct dt_object *dt,
1785 struct lu_attr *attr,
1786 struct dt_object_format *dof,
1789 return lod_dir_striping_create_internal(env, dt, attr, dof, th, false);
1792 static int lod_declare_object_create(const struct lu_env *env,
1793 struct dt_object *dt,
1794 struct lu_attr *attr,
1795 struct dt_allocation_hint *hint,
1796 struct dt_object_format *dof,
1799 struct dt_object *next = dt_object_child(dt);
1800 struct lod_object *lo = lod_dt_obj(dt);
1809 * first of all, we declare creation of local object
1811 rc = dt_declare_create(env, next, attr, hint, dof, th);
1815 if (dof->dof_type == DFT_SYM)
1816 dt->do_body_ops = &lod_body_lnk_ops;
1819 * it's lod_ah_init() who has decided the object will striped
1821 if (dof->dof_type == DFT_REGULAR) {
1822 /* callers don't want stripes */
1823 /* XXX: all tricky interactions with ->ah_make_hint() decided
1824 * to use striping, then ->declare_create() behaving differently
1825 * should be cleaned */
1826 if (dof->u.dof_reg.striped == 0)
1827 lo->ldo_stripenr = 0;
1828 if (lo->ldo_stripenr > 0)
1829 rc = lod_declare_striped_object(env, dt, attr,
1831 } else if (dof->dof_type == DFT_DIR) {
1832 rc = lod_declare_dir_striping_create(env, dt, attr, dof, th);
1838 int lod_striping_create(const struct lu_env *env, struct dt_object *dt,
1839 struct lu_attr *attr, struct dt_object_format *dof,
1842 struct lod_object *lo = lod_dt_obj(dt);
1846 LASSERT(lo->ldo_striping_cached == 0);
1848 /* create all underlying objects */
1849 for (i = 0; i < lo->ldo_stripenr; i++) {
1850 LASSERT(lo->ldo_stripe[i]);
1851 rc = dt_create(env, lo->ldo_stripe[i], attr, NULL, dof, th);
1857 rc = lod_generate_and_set_lovea(env, lo, th);
1862 static int lod_object_create(const struct lu_env *env, struct dt_object *dt,
1863 struct lu_attr *attr,
1864 struct dt_allocation_hint *hint,
1865 struct dt_object_format *dof, struct thandle *th)
1867 struct dt_object *next = dt_object_child(dt);
1868 struct lod_object *lo = lod_dt_obj(dt);
1872 /* create local object */
1873 rc = dt_create(env, next, attr, hint, dof, th);
1876 if (S_ISDIR(dt->do_lu.lo_header->loh_attr))
1877 rc = lod_dir_striping_create(env, dt, attr, dof, th);
1878 else if (lo->ldo_stripe && dof->u.dof_reg.striped != 0)
1879 rc = lod_striping_create(env, dt, attr, dof, th);
1885 static int lod_declare_object_destroy(const struct lu_env *env,
1886 struct dt_object *dt,
1889 struct dt_object *next = dt_object_child(dt);
1890 struct lod_object *lo = lod_dt_obj(dt);
1895 * we declare destroy for the local object
1897 rc = dt_declare_destroy(env, next, th);
1902 * load striping information, notice we don't do this when object
1903 * is being initialized as we don't need this information till
1904 * few specific cases like destroy, chown
1906 rc = lod_load_striping(env, lo);
1910 /* declare destroy for all underlying objects */
1911 for (i = 0; i < lo->ldo_stripenr; i++) {
1912 LASSERT(lo->ldo_stripe[i]);
1913 rc = dt_declare_destroy(env, lo->ldo_stripe[i], th);
1922 static int lod_object_destroy(const struct lu_env *env,
1923 struct dt_object *dt, struct thandle *th)
1925 struct dt_object *next = dt_object_child(dt);
1926 struct lod_object *lo = lod_dt_obj(dt);
1930 /* destroy local object */
1931 rc = dt_destroy(env, next, th);
1935 /* destroy all underlying objects */
1936 for (i = 0; i < lo->ldo_stripenr; i++) {
1937 LASSERT(lo->ldo_stripe[i]);
1938 rc = dt_destroy(env, lo->ldo_stripe[i], th);
1946 static int lod_index_try(const struct lu_env *env, struct dt_object *dt,
1947 const struct dt_index_features *feat)
1949 struct dt_object *next = dt_object_child(dt);
1953 LASSERT(next->do_ops);
1954 LASSERT(next->do_ops->do_index_try);
1956 rc = next->do_ops->do_index_try(env, next, feat);
1957 if (next->do_index_ops && dt->do_index_ops == NULL)
1958 dt->do_index_ops = &lod_index_ops;
1963 static int lod_declare_ref_add(const struct lu_env *env,
1964 struct dt_object *dt, struct thandle *th)
1966 return dt_declare_ref_add(env, dt_object_child(dt), th);
1969 static int lod_ref_add(const struct lu_env *env,
1970 struct dt_object *dt, struct thandle *th)
1972 return dt_ref_add(env, dt_object_child(dt), th);
1975 static int lod_declare_ref_del(const struct lu_env *env,
1976 struct dt_object *dt, struct thandle *th)
1978 return dt_declare_ref_del(env, dt_object_child(dt), th);
1981 static int lod_ref_del(const struct lu_env *env,
1982 struct dt_object *dt, struct thandle *th)
1984 return dt_ref_del(env, dt_object_child(dt), th);
1987 static struct obd_capa *lod_capa_get(const struct lu_env *env,
1988 struct dt_object *dt,
1989 struct lustre_capa *old, __u64 opc)
1991 return dt_capa_get(env, dt_object_child(dt), old, opc);
1994 static int lod_object_sync(const struct lu_env *env, struct dt_object *dt)
1996 return dt_object_sync(env, dt_object_child(dt));
1999 static int lod_object_lock(const struct lu_env *env,
2000 struct dt_object *dt, struct lustre_handle *lh,
2001 struct ldlm_enqueue_info *einfo,
2004 struct dt_object *next = dt_object_child(dt);
2009 * declare setattr on the local object
2011 rc = dt_object_lock(env, next, lh, einfo, policy);
2016 struct dt_object_operations lod_obj_ops = {
2017 .do_read_lock = lod_object_read_lock,
2018 .do_write_lock = lod_object_write_lock,
2019 .do_read_unlock = lod_object_read_unlock,
2020 .do_write_unlock = lod_object_write_unlock,
2021 .do_write_locked = lod_object_write_locked,
2022 .do_attr_get = lod_attr_get,
2023 .do_declare_attr_set = lod_declare_attr_set,
2024 .do_attr_set = lod_attr_set,
2025 .do_xattr_get = lod_xattr_get,
2026 .do_declare_xattr_set = lod_declare_xattr_set,
2027 .do_xattr_set = lod_xattr_set,
2028 .do_declare_xattr_del = lod_declare_xattr_del,
2029 .do_xattr_del = lod_xattr_del,
2030 .do_xattr_list = lod_xattr_list,
2031 .do_ah_init = lod_ah_init,
2032 .do_declare_create = lod_declare_object_create,
2033 .do_create = lod_object_create,
2034 .do_declare_destroy = lod_declare_object_destroy,
2035 .do_destroy = lod_object_destroy,
2036 .do_index_try = lod_index_try,
2037 .do_declare_ref_add = lod_declare_ref_add,
2038 .do_ref_add = lod_ref_add,
2039 .do_declare_ref_del = lod_declare_ref_del,
2040 .do_ref_del = lod_ref_del,
2041 .do_capa_get = lod_capa_get,
2042 .do_object_sync = lod_object_sync,
2043 .do_object_lock = lod_object_lock,
2046 static ssize_t lod_read(const struct lu_env *env, struct dt_object *dt,
2047 struct lu_buf *buf, loff_t *pos,
2048 struct lustre_capa *capa)
2050 struct dt_object *next = dt_object_child(dt);
2051 return next->do_body_ops->dbo_read(env, next, buf, pos, capa);
2054 static ssize_t lod_declare_write(const struct lu_env *env,
2055 struct dt_object *dt,
2056 const loff_t size, loff_t pos,
2059 return dt_declare_record_write(env, dt_object_child(dt),
2063 static ssize_t lod_write(const struct lu_env *env, struct dt_object *dt,
2064 const struct lu_buf *buf, loff_t *pos,
2065 struct thandle *th, struct lustre_capa *capa, int iq)
2067 struct dt_object *next = dt_object_child(dt);
2069 return next->do_body_ops->dbo_write(env, next, buf, pos, th, capa, iq);
2072 static const struct dt_body_operations lod_body_lnk_ops = {
2073 .dbo_read = lod_read,
2074 .dbo_declare_write = lod_declare_write,
2075 .dbo_write = lod_write
2078 static int lod_object_init(const struct lu_env *env, struct lu_object *o,
2079 const struct lu_object_conf *conf)
2081 struct lod_device *d = lu2lod_dev(o->lo_dev);
2082 struct lu_object *below;
2083 struct lu_device *under;
2087 * create local object
2089 under = &d->lod_child->dd_lu_dev;
2090 below = under->ld_ops->ldo_object_alloc(env, o->lo_header, under);
2094 lu_object_add(o, below);
2099 void lod_object_free_striping(const struct lu_env *env, struct lod_object *lo)
2103 if (lo->ldo_dir_stripe != NULL) {
2104 OBD_FREE_PTR(lo->ldo_dir_stripe);
2105 lo->ldo_dir_stripe = NULL;
2108 if (lo->ldo_stripe) {
2109 LASSERT(lo->ldo_stripes_allocated > 0);
2111 for (i = 0; i < lo->ldo_stripenr; i++) {
2112 if (lo->ldo_stripe[i])
2113 lu_object_put(env, &lo->ldo_stripe[i]->do_lu);
2116 i = sizeof(struct dt_object *) * lo->ldo_stripes_allocated;
2117 OBD_FREE(lo->ldo_stripe, i);
2118 lo->ldo_stripe = NULL;
2119 lo->ldo_stripes_allocated = 0;
2121 lo->ldo_stripenr = 0;
2122 lo->ldo_pattern = 0;
2126 * ->start is called once all slices are initialized, including header's
2127 * cache for mode (object type). using the type we can initialize ops
2129 static int lod_object_start(const struct lu_env *env, struct lu_object *o)
2131 if (S_ISLNK(o->lo_header->loh_attr & S_IFMT))
2132 lu2lod_obj(o)->ldo_obj.do_body_ops = &lod_body_lnk_ops;
2136 static void lod_object_free(const struct lu_env *env, struct lu_object *o)
2138 struct lod_object *mo = lu2lod_obj(o);
2141 * release all underlying object pinned
2144 lod_object_free_striping(env, mo);
2146 lod_object_set_pool(mo, NULL);
2149 OBD_SLAB_FREE_PTR(mo, lod_object_kmem);
2152 static void lod_object_release(const struct lu_env *env, struct lu_object *o)
2154 /* XXX: shouldn't we release everything here in case if object
2155 * creation failed before? */
2158 static int lod_object_print(const struct lu_env *env, void *cookie,
2159 lu_printer_t p, const struct lu_object *l)
2161 struct lod_object *o = lu2lod_obj((struct lu_object *) l);
2163 return (*p)(env, cookie, LUSTRE_LOD_NAME"-object@%p", o);
2166 struct lu_object_operations lod_lu_obj_ops = {
2167 .loo_object_init = lod_object_init,
2168 .loo_object_start = lod_object_start,
2169 .loo_object_free = lod_object_free,
2170 .loo_object_release = lod_object_release,
2171 .loo_object_print = lod_object_print,
2175 * Init remote lod object
2177 static int lod_robject_init(const struct lu_env *env, struct lu_object *lo,
2178 const struct lu_object_conf *conf)
2180 struct lod_device *lod = lu2lod_dev(lo->lo_dev);
2181 struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
2182 struct lu_device *c_dev = NULL;
2183 struct lu_object *c_obj;
2188 if (ltd->ltd_tgts_size > 0) {
2189 cfs_foreach_bit(ltd->ltd_tgt_bitmap, i) {
2190 struct lod_tgt_desc *tgt;
2191 tgt = LTD_TGT(ltd, i);
2192 LASSERT(tgt && tgt->ltd_tgt);
2193 if (tgt->ltd_index ==
2194 lu2lod_obj(lo)->ldo_mds_num) {
2195 c_dev = &(tgt->ltd_tgt->dd_lu_dev);
2200 lod_putref(lod, ltd);
2202 if (unlikely(c_dev == NULL))
2205 c_obj = c_dev->ld_ops->ldo_object_alloc(env, lo->lo_header, c_dev);
2206 if (unlikely(c_obj == NULL))
2209 lu_object_add(lo, c_obj);
2214 struct lu_object_operations lod_lu_robj_ops = {
2215 .loo_object_init = lod_robject_init,
2216 .loo_object_start = lod_object_start,
2217 .loo_object_free = lod_object_free,
2218 .loo_object_release = lod_object_release,
2219 .loo_object_print = lod_object_print,