Whamcloud - gitweb
2cbd0282b48969068642c8d850a5de8facfd508e
[fs/lustre-release.git] / lustre / lod / lod_object.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright  2009 Sun Microsystems, Inc. All rights reserved
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, 2013, Intel Corporation.
27  */
28 /*
29  * lustre/lod/lod_object.c
30  *
31  * Author: Alex Zhuravlev <alexey.zhuravlev@intel.com>
32  */
33
34 #define DEBUG_SUBSYSTEM S_MDS
35
36 #include <obd.h>
37 #include <obd_class.h>
38 #include <lustre_ver.h>
39 #include <obd_support.h>
40 #include <lprocfs_status.h>
41
42 #include <lustre_fid.h>
43 #include <lustre_param.h>
44 #include <lustre_fid.h>
45 #include <lustre_lmv.h>
46 #include <obd_lov.h>
47 #include <md_object.h>
48
49 #include "lod_internal.h"
50
51 static const char dot[] = ".";
52 static const char dotdot[] = "..";
53
54 extern struct kmem_cache *lod_object_kmem;
55 static const struct dt_body_operations lod_body_lnk_ops;
56
57 static int lod_index_lookup(const struct lu_env *env, struct dt_object *dt,
58                             struct dt_rec *rec, const struct dt_key *key,
59                             struct lustre_capa *capa)
60 {
61         struct dt_object *next = dt_object_child(dt);
62         return next->do_index_ops->dio_lookup(env, next, rec, key, capa);
63 }
64
65 static int lod_declare_index_insert(const struct lu_env *env,
66                                     struct dt_object *dt,
67                                     const struct dt_rec *rec,
68                                     const struct dt_key *key,
69                                     struct thandle *handle)
70 {
71         return dt_declare_insert(env, dt_object_child(dt), rec, key, handle);
72 }
73
74 static int lod_index_insert(const struct lu_env *env,
75                             struct dt_object *dt,
76                             const struct dt_rec *rec,
77                             const struct dt_key *key,
78                             struct thandle *th,
79                             struct lustre_capa *capa,
80                             int ign)
81 {
82         return dt_insert(env, dt_object_child(dt), rec, key, th, capa, ign);
83 }
84
85 static int lod_declare_index_delete(const struct lu_env *env,
86                                     struct dt_object *dt,
87                                     const struct dt_key *key,
88                                     struct thandle *th)
89 {
90         return dt_declare_delete(env, dt_object_child(dt), key, th);
91 }
92
93 static int lod_index_delete(const struct lu_env *env,
94                             struct dt_object *dt,
95                             const struct dt_key *key,
96                             struct thandle *th,
97                             struct lustre_capa *capa)
98 {
99         return dt_delete(env, dt_object_child(dt), key, th, capa);
100 }
101
102 static struct dt_it *lod_it_init(const struct lu_env *env,
103                                  struct dt_object *dt, __u32 attr,
104                                  struct lustre_capa *capa)
105 {
106         struct dt_object        *next = dt_object_child(dt);
107         struct lod_it           *it = &lod_env_info(env)->lti_it;
108         struct dt_it            *it_next;
109
110
111         it_next = next->do_index_ops->dio_it.init(env, next, attr, capa);
112         if (IS_ERR(it_next))
113                 return it_next;
114
115         /* currently we do not use more than one iterator per thread
116          * so we store it in thread info. if at some point we need
117          * more active iterators in a single thread, we can allocate
118          * additional ones */
119         LASSERT(it->lit_obj == NULL);
120
121         it->lit_it = it_next;
122         it->lit_obj = next;
123
124         return (struct dt_it *)it;
125 }
126
127 #define LOD_CHECK_IT(env, it)                                   \
128 {                                                               \
129         LASSERT((it)->lit_obj != NULL);                         \
130         LASSERT((it)->lit_it != NULL);                          \
131 } while(0)
132
133 void lod_it_fini(const struct lu_env *env, struct dt_it *di)
134 {
135         struct lod_it *it = (struct lod_it *)di;
136
137         LOD_CHECK_IT(env, it);
138         it->lit_obj->do_index_ops->dio_it.fini(env, it->lit_it);
139
140         /* the iterator not in use any more */
141         it->lit_obj = NULL;
142         it->lit_it = NULL;
143 }
144
145 int lod_it_get(const struct lu_env *env, struct dt_it *di,
146                const struct dt_key *key)
147 {
148         const struct lod_it *it = (const struct lod_it *)di;
149
150         LOD_CHECK_IT(env, it);
151         return it->lit_obj->do_index_ops->dio_it.get(env, it->lit_it, key);
152 }
153
154 void lod_it_put(const struct lu_env *env, struct dt_it *di)
155 {
156         struct lod_it *it = (struct lod_it *)di;
157
158         LOD_CHECK_IT(env, it);
159         return it->lit_obj->do_index_ops->dio_it.put(env, it->lit_it);
160 }
161
162 int lod_it_next(const struct lu_env *env, struct dt_it *di)
163 {
164         struct lod_it *it = (struct lod_it *)di;
165
166         LOD_CHECK_IT(env, it);
167         return it->lit_obj->do_index_ops->dio_it.next(env, it->lit_it);
168 }
169
170 struct dt_key *lod_it_key(const struct lu_env *env, const struct dt_it *di)
171 {
172         const struct lod_it *it = (const struct lod_it *)di;
173
174         LOD_CHECK_IT(env, it);
175         return it->lit_obj->do_index_ops->dio_it.key(env, it->lit_it);
176 }
177
178 int lod_it_key_size(const struct lu_env *env, const struct dt_it *di)
179 {
180         struct lod_it *it = (struct lod_it *)di;
181
182         LOD_CHECK_IT(env, it);
183         return it->lit_obj->do_index_ops->dio_it.key_size(env, it->lit_it);
184 }
185
186 int lod_it_rec(const struct lu_env *env, const struct dt_it *di,
187                struct dt_rec *rec, __u32 attr)
188 {
189         const struct lod_it *it = (const struct lod_it *)di;
190
191         LOD_CHECK_IT(env, it);
192         return it->lit_obj->do_index_ops->dio_it.rec(env, it->lit_it, rec, attr);
193 }
194
195 __u64 lod_it_store(const struct lu_env *env, const struct dt_it *di)
196 {
197         const struct lod_it *it = (const struct lod_it *)di;
198
199         LOD_CHECK_IT(env, it);
200         return it->lit_obj->do_index_ops->dio_it.store(env, it->lit_it);
201 }
202
203 int lod_it_load(const struct lu_env *env, const struct dt_it *di, __u64 hash)
204 {
205         const struct lod_it *it = (const struct lod_it *)di;
206
207         LOD_CHECK_IT(env, it);
208         return it->lit_obj->do_index_ops->dio_it.load(env, it->lit_it, hash);
209 }
210
211 int lod_it_key_rec(const struct lu_env *env, const struct dt_it *di,
212                    void* key_rec)
213 {
214         const struct lod_it *it = (const struct lod_it *)di;
215
216         LOD_CHECK_IT(env, it);
217         return it->lit_obj->do_index_ops->dio_it.key_rec(env, it->lit_it, key_rec);
218 }
219
220 static struct dt_index_operations lod_index_ops = {
221         .dio_lookup             = lod_index_lookup,
222         .dio_declare_insert     = lod_declare_index_insert,
223         .dio_insert             = lod_index_insert,
224         .dio_declare_delete     = lod_declare_index_delete,
225         .dio_delete             = lod_index_delete,
226         .dio_it = {
227                 .init           = lod_it_init,
228                 .fini           = lod_it_fini,
229                 .get            = lod_it_get,
230                 .put            = lod_it_put,
231                 .next           = lod_it_next,
232                 .key            = lod_it_key,
233                 .key_size       = lod_it_key_size,
234                 .rec            = lod_it_rec,
235                 .store          = lod_it_store,
236                 .load           = lod_it_load,
237                 .key_rec        = lod_it_key_rec,
238         }
239 };
240
241 static void lod_object_read_lock(const struct lu_env *env,
242                                  struct dt_object *dt, unsigned role)
243 {
244         dt_read_lock(env, dt_object_child(dt), role);
245 }
246
247 static void lod_object_write_lock(const struct lu_env *env,
248                                   struct dt_object *dt, unsigned role)
249 {
250         dt_write_lock(env, dt_object_child(dt), role);
251 }
252
253 static void lod_object_read_unlock(const struct lu_env *env,
254                                    struct dt_object *dt)
255 {
256         dt_read_unlock(env, dt_object_child(dt));
257 }
258
259 static void lod_object_write_unlock(const struct lu_env *env,
260                                     struct dt_object *dt)
261 {
262         dt_write_unlock(env, dt_object_child(dt));
263 }
264
265 static int lod_object_write_locked(const struct lu_env *env,
266                                    struct dt_object *dt)
267 {
268         return dt_write_locked(env, dt_object_child(dt));
269 }
270
271 static int lod_attr_get(const struct lu_env *env,
272                         struct dt_object *dt,
273                         struct lu_attr *attr,
274                         struct lustre_capa *capa)
275 {
276         return dt_attr_get(env, dt_object_child(dt), attr, capa);
277 }
278
279 static int lod_declare_attr_set(const struct lu_env *env,
280                                 struct dt_object *dt,
281                                 const struct lu_attr *attr,
282                                 struct thandle *handle)
283 {
284         struct dt_object  *next = dt_object_child(dt);
285         struct lod_object *lo = lod_dt_obj(dt);
286         int                rc, i;
287         ENTRY;
288
289         /*
290          * declare setattr on the local object
291          */
292         rc = dt_declare_attr_set(env, next, attr, handle);
293         if (rc)
294                 RETURN(rc);
295
296         /* osp_declare_attr_set() ignores all attributes other than
297          * UID, GID, and size, and osp_attr_set() ignores all but UID
298          * and GID.  Declaration of size attr setting happens through
299          * lod_declare_init_size(), and not through this function.
300          * Therefore we need not load striping unless ownership is
301          * changing.  This should save memory and (we hope) speed up
302          * rename(). */
303         if (!S_ISDIR(dt->do_lu.lo_header->loh_attr)) {
304                 if (!(attr->la_valid & (LA_UID | LA_GID)))
305                         RETURN(rc);
306         } else {
307                 if (!(attr->la_valid & (LA_UID | LA_GID | LA_MODE |
308                                         LA_ATIME | LA_MTIME | LA_CTIME)))
309                         RETURN(rc);
310         }
311         /*
312          * load striping information, notice we don't do this when object
313          * is being initialized as we don't need this information till
314          * few specific cases like destroy, chown
315          */
316         rc = lod_load_striping(env, lo);
317         if (rc)
318                 RETURN(rc);
319
320         if (lo->ldo_stripenr == 0)
321                 RETURN(0);
322
323         if (!(attr->la_valid & ~(LA_ATIME | LA_MTIME | LA_CTIME))) {
324                 struct lu_attr   *la = &lod_env_info(env)->lti_attr;
325                 bool             setattr_time = false;
326
327                 rc = dt_attr_get(env, dt_object_child(dt), la,
328                                  BYPASS_CAPA);
329                 if (rc != 0)
330                         RETURN(rc);
331
332                 /* If it will only setattr time, it will only set
333                  * time < current_time */
334                 if ((attr->la_valid & LA_ATIME &&
335                      attr->la_atime < la->la_atime) ||
336                     (attr->la_valid & LA_CTIME &&
337                      attr->la_ctime < la->la_ctime) ||
338                     (attr->la_valid & LA_MTIME &&
339                      attr->la_mtime < la->la_mtime))
340                         setattr_time = true;
341
342                 if (!setattr_time)
343                         RETURN(0);
344         }
345         /*
346          * if object is striped declare changes on the stripes
347          */
348         LASSERT(lo->ldo_stripe);
349         for (i = 0; i < lo->ldo_stripenr; i++) {
350                 LASSERT(lo->ldo_stripe[i]);
351
352                 rc = dt_declare_attr_set(env, lo->ldo_stripe[i], attr, handle);
353                 if (rc) {
354                         CERROR("failed declaration: %d\n", rc);
355                         break;
356                 }
357         }
358
359         RETURN(rc);
360 }
361
362 static int lod_attr_set(const struct lu_env *env,
363                         struct dt_object *dt,
364                         const struct lu_attr *attr,
365                         struct thandle *handle,
366                         struct lustre_capa *capa)
367 {
368         struct dt_object  *next = dt_object_child(dt);
369         struct lod_object *lo = lod_dt_obj(dt);
370         int                rc, i;
371         ENTRY;
372
373         /*
374          * apply changes to the local object
375          */
376         rc = dt_attr_set(env, next, attr, handle, capa);
377         if (rc)
378                 RETURN(rc);
379
380         if (!S_ISDIR(dt->do_lu.lo_header->loh_attr)) {
381                 if (!(attr->la_valid & (LA_UID | LA_GID)))
382                         RETURN(rc);
383         } else {
384                 if (!(attr->la_valid & (LA_UID | LA_GID | LA_MODE |
385                                         LA_ATIME | LA_MTIME | LA_CTIME)))
386                         RETURN(rc);
387         }
388
389         if (lo->ldo_stripenr == 0)
390                 RETURN(0);
391
392         if (!(attr->la_valid & ~(LA_ATIME | LA_MTIME | LA_CTIME))) {
393                 struct lu_attr   *la = &lod_env_info(env)->lti_attr;
394                 bool             setattr_time = false;
395
396                 rc = dt_attr_get(env, dt_object_child(dt), la,
397                                  BYPASS_CAPA);
398                 if (rc != 0)
399                         RETURN(rc);
400
401                 /* If it will only setattr time, it will only set
402                  * time < current_time */
403                 if ((attr->la_valid & LA_ATIME &&
404                      attr->la_atime < la->la_atime) ||
405                     (attr->la_valid & LA_CTIME &&
406                      attr->la_atime < la->la_ctime) ||
407                     (attr->la_valid & LA_MTIME &&
408                      attr->la_atime < la->la_mtime))
409                         setattr_time = true;
410
411                 if (!setattr_time)
412                         RETURN(0);
413         }
414
415         /*
416          * if object is striped, apply changes to all the stripes
417          */
418         LASSERT(lo->ldo_stripe);
419         for (i = 0; i < lo->ldo_stripenr; i++) {
420                 LASSERT(lo->ldo_stripe[i]);
421                 rc = dt_attr_set(env, lo->ldo_stripe[i], attr, handle, capa);
422                 if (rc) {
423                         CERROR("failed declaration: %d\n", rc);
424                         break;
425                 }
426         }
427
428         RETURN(rc);
429 }
430
431 static int lod_xattr_get(const struct lu_env *env, struct dt_object *dt,
432                          struct lu_buf *buf, const char *name,
433                          struct lustre_capa *capa)
434 {
435         struct lod_thread_info  *info = lod_env_info(env);
436         struct lod_device       *dev = lu2lod_dev(dt->do_lu.lo_dev);
437         int                      rc, is_root;
438         ENTRY;
439
440         rc = dt_xattr_get(env, dt_object_child(dt), buf, name, capa);
441         if (rc != -ENODATA || !S_ISDIR(dt->do_lu.lo_header->loh_attr & S_IFMT))
442                 RETURN(rc);
443
444         /*
445          * lod returns default striping on the real root of the device
446          * this is like the root stores default striping for the whole
447          * filesystem. historically we've been using a different approach
448          * and store it in the config.
449          */
450         dt_root_get(env, dev->lod_child, &info->lti_fid);
451         is_root = lu_fid_eq(&info->lti_fid, lu_object_fid(&dt->do_lu));
452
453         if (is_root && strcmp(XATTR_NAME_LOV, name) == 0) {
454                 struct lov_user_md *lum = buf->lb_buf;
455                 struct lov_desc    *desc = &dev->lod_desc;
456
457                 if (buf->lb_buf == NULL) {
458                         rc = sizeof(*lum);
459                 } else if (buf->lb_len >= sizeof(*lum)) {
460                         lum->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V1);
461                         lmm_oi_set_seq(&lum->lmm_oi, FID_SEQ_LOV_DEFAULT);
462                         lmm_oi_set_id(&lum->lmm_oi, 0);
463                         lmm_oi_cpu_to_le(&lum->lmm_oi, &lum->lmm_oi);
464                         lum->lmm_pattern = cpu_to_le32(desc->ld_pattern);
465                         lum->lmm_stripe_size = cpu_to_le32(
466                                                 desc->ld_default_stripe_size);
467                         lum->lmm_stripe_count = cpu_to_le16(
468                                                 desc->ld_default_stripe_count);
469                         lum->lmm_stripe_offset = cpu_to_le16(
470                                                 desc->ld_default_stripe_offset);
471                         rc = sizeof(*lum);
472                 } else {
473                         rc = -ERANGE;
474                 }
475         }
476
477         RETURN(rc);
478 }
479
480 static int lod_verify_md_striping(struct lod_device *lod,
481                                   const struct lmv_user_md_v1 *lum)
482 {
483         int     rc = 0;
484         ENTRY;
485
486         if (unlikely(le32_to_cpu(lum->lum_magic) != LMV_USER_MAGIC))
487                 GOTO(out, rc = -EINVAL);
488
489         if (unlikely(le32_to_cpu(lum->lum_stripe_count) == 0))
490                 GOTO(out, rc = -EINVAL);
491
492         if (unlikely(le32_to_cpu(lum->lum_stripe_count) >
493                                 lod->lod_remote_mdt_count + 1))
494                 GOTO(out, rc = -EINVAL);
495 out:
496         if (rc != 0)
497                 CERROR("%s: invalid lmv_user_md: magic = %x, "
498                        "stripe_offset = %d, stripe_count = %u: rc = %d\n",
499                        lod2obd(lod)->obd_name, le32_to_cpu(lum->lum_magic),
500                        (int)le32_to_cpu(lum->lum_stripe_offset),
501                        le32_to_cpu(lum->lum_stripe_count), rc);
502         return rc;
503 }
504
505 int lod_prep_lmv_md(const struct lu_env *env, struct dt_object *dt,
506                     struct lu_buf *lmv_buf)
507 {
508         struct lod_thread_info  *info = lod_env_info(env);
509         struct lod_device       *lod = lu2lod_dev(dt->do_lu.lo_dev);
510         struct lod_object       *lo = lod_dt_obj(dt);
511         struct lmv_mds_md_v1    *lmm1;
512         int                     stripe_count;
513         int                     lmm_size;
514         int                     i;
515         int                     rc;
516         __u32                   mdtidx;
517         ENTRY;
518
519         LASSERT(lo->ldo_dir_striped != 0);
520         LASSERT(lo->ldo_stripenr > 0);
521         stripe_count = lo->ldo_stripenr + 1;
522         lmm_size = lmv_mds_md_size(stripe_count, LMV_MAGIC);
523         if (info->lti_ea_store_size < lmm_size) {
524                 rc = lod_ea_store_resize(info, lmm_size);
525                 if (rc != 0)
526                         RETURN(rc);
527         }
528
529         lmm1 = (struct lmv_mds_md_v1 *)info->lti_ea_store;
530         lmm1->lmv_magic = cpu_to_le32(LMV_MAGIC);
531         lmm1->lmv_stripe_count = cpu_to_le32(stripe_count);
532         lmm1->lmv_hash_type = cpu_to_le32(lo->ldo_dir_hash_type);
533         rc = lod_fld_lookup(env, lod, lu_object_fid(&dt->do_lu),
534                             &mdtidx, LU_SEQ_RANGE_MDT);
535         if (rc != 0)
536                 RETURN(rc);
537
538         lmm1->lmv_master_mdt_index = cpu_to_le32(mdtidx);
539         fid_cpu_to_le(&lmm1->lmv_stripe_fids[0], lu_object_fid(&dt->do_lu));
540         for (i = 0; i < lo->ldo_stripenr; i++) {
541                 struct dt_object *dto;
542
543                 dto = lo->ldo_stripe[i];
544                 LASSERT(dto != NULL);
545                 fid_cpu_to_le(&lmm1->lmv_stripe_fids[i + 1],
546                               lu_object_fid(&dto->do_lu));
547         }
548
549         lmv_buf->lb_buf = info->lti_ea_store;
550         lmv_buf->lb_len = lmm_size;
551         lo->ldo_dir_striping_cached = 1;
552
553         RETURN(rc);
554 }
555
556 int lod_parse_dir_striping(const struct lu_env *env, struct lod_object *lo,
557                            const struct lu_buf *buf)
558 {
559         struct lod_thread_info  *info = lod_env_info(env);
560         struct lod_device       *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
561         struct lod_tgt_descs    *ltd = &lod->lod_mdt_descs;
562         struct dt_object        **stripe;
563         union lmv_mds_md        *lmm = buf->lb_buf;
564         struct lmv_mds_md_v1    *lmv1 = &lmm->lmv_md_v1;
565         struct lu_fid           *fid = &info->lti_fid;
566         int                     i;
567         int                     rc = 0;
568         ENTRY;
569
570         if (le32_to_cpu(lmv1->lmv_magic) != LMV_MAGIC_V1)
571                 RETURN(-EINVAL);
572
573         if (le32_to_cpu(lmv1->lmv_stripe_count) <= 1)
574                 RETURN(0);
575
576         fid_le_to_cpu(fid, &lmv1->lmv_stripe_fids[0]);
577         /* Do not load striping information for slave inode */
578         if (!lu_fid_eq(fid, lu_object_fid(&lo->ldo_obj.do_lu))) {
579                 lo->ldo_dir_slave_stripe = 1;
580                 RETURN(0);
581         }
582
583         LASSERT(lo->ldo_stripe == NULL);
584         OBD_ALLOC(stripe, sizeof(stripe[0]) *
585                   (le32_to_cpu(lmv1->lmv_stripe_count) - 1));
586         if (stripe == NULL)
587                 RETURN(-ENOMEM);
588
589         /* skip master stripe */
590         for (i = 1; i < le32_to_cpu(lmv1->lmv_stripe_count); i++) {
591                 struct lod_tgt_desc     *tgt;
592                 int                     idx;
593                 struct dt_object        *dto;
594
595                 fid_le_to_cpu(fid, &lmv1->lmv_stripe_fids[i]);
596                 rc = lod_fld_lookup(env, lod, fid,
597                                     &idx, LU_SEQ_RANGE_MDT);
598                 if (rc != 0)
599                         GOTO(out, rc);
600
601                 tgt = LTD_TGT(ltd, idx);
602                 if (tgt == NULL)
603                         GOTO(out, rc = -ESTALE);
604
605                 dto = dt_locate_at(env, tgt->ltd_tgt, fid,
606                                   lo->ldo_obj.do_lu.lo_dev->ld_site->ls_top_dev,
607                                   NULL);
608                 if (IS_ERR(dto))
609                         GOTO(out, rc = PTR_ERR(dto));
610
611                 stripe[i - 1] = dto;
612         }
613 out:
614         lo->ldo_stripe = stripe;
615         lo->ldo_stripenr = le32_to_cpu(lmv1->lmv_stripe_count) - 1;
616         lo->ldo_stripes_allocated = le32_to_cpu(lmv1->lmv_stripe_count) - 1;
617         if (rc != 0)
618                 lod_object_free_striping(env, lo);
619
620         RETURN(rc);
621 }
622
623 static int lod_prep_md_striped_create(const struct lu_env *env,
624                                       struct dt_object *dt,
625                                       struct lu_attr *attr,
626                                       const struct lmv_user_md_v1 *lum,
627                                       struct thandle *th)
628 {
629         struct lod_device       *lod = lu2lod_dev(dt->do_lu.lo_dev);
630         struct lod_tgt_descs    *ltd = &lod->lod_mdt_descs;
631         struct lod_object       *lo = lod_dt_obj(dt);
632         struct dt_object        **stripe;
633         struct lu_buf           lmv_buf;
634         int                     stripe_count;
635         int                     *idx_array;
636         int                     rc = 0;
637         int                     i;
638         int                     j;
639         ENTRY;
640
641         /* The lum has been verifed in lod_verify_md_striping */
642         LASSERT(le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC);
643         LASSERT(le32_to_cpu(lum->lum_stripe_count) > 0);
644
645         /* Do not need allocated master stripe */
646         stripe_count = le32_to_cpu(lum->lum_stripe_count);
647         OBD_ALLOC(stripe, sizeof(stripe[0]) * (stripe_count - 1));
648         if (stripe == NULL)
649                 RETURN(-ENOMEM);
650
651         OBD_ALLOC(idx_array, sizeof(idx_array[0]) * stripe_count);
652         if (idx_array == NULL)
653                 GOTO(out_free, rc = -ENOMEM);
654
655         idx_array[0] = le32_to_cpu(lum->lum_stripe_offset);
656         for (i = 1; i < stripe_count; i++) {
657                 struct lod_tgt_desc     *tgt;
658                 struct dt_object        *dto;
659                 struct lu_fid           fid;
660                 int                     idx;
661                 struct lu_object_conf   conf = { 0 };
662
663                 idx = (idx_array[i - 1] + 1) % (lod->lod_remote_mdt_count + 1);
664
665                 for (j = 0; j < lod->lod_remote_mdt_count;
666                      j++, idx = (idx + 1) % (lod->lod_remote_mdt_count + 1)) {
667                         bool already_allocated = false;
668                         int k;
669
670                         CDEBUG(D_INFO, "try idx %d, mdt cnt %d,"
671                                " allocated %d, last allocated %d\n", idx,
672                                lod->lod_remote_mdt_count, i, idx_array[i - 1]);
673
674                         /* Find next avaible target */
675                         if (!cfs_bitmap_check(ltd->ltd_tgt_bitmap, idx))
676                                 continue;
677
678                         /* check whether the idx already exists
679                          * in current allocated array */
680                         for (k = 0; k < i; k++) {
681                                 if (idx_array[k] == idx) {
682                                         already_allocated = true;
683                                         break;
684                                 }
685                         }
686
687                         if (already_allocated)
688                                 continue;
689
690                         break;
691                 }
692
693                 /* Can not allocate more stripes */
694                 if (j == lod->lod_remote_mdt_count) {
695                         CDEBUG(D_INFO, "%s: require stripes %d only get %d\n",
696                                lod2obd(lod)->obd_name, stripe_count, i - 1);
697                         break;
698                 }
699
700                 CDEBUG(D_INFO, "idx %d, mdt cnt %d,"
701                        " allocated %d, last allocated %d\n", idx,
702                        lod->lod_remote_mdt_count, i, idx_array[i - 1]);
703
704                 tgt = LTD_TGT(ltd, idx);
705                 LASSERT(tgt != NULL);
706
707                 rc = obd_fid_alloc(tgt->ltd_exp, &fid, NULL);
708                 if (rc < 0)
709                         GOTO(out_put, rc);
710                 rc = 0;
711
712                 conf.loc_flags = LOC_F_NEW;
713                 dto = dt_locate_at(env, tgt->ltd_tgt, &fid,
714                                   dt->do_lu.lo_dev->ld_site->ls_top_dev, &conf);
715                 if (IS_ERR(dto))
716                         GOTO(out_put, rc = PTR_ERR(dto));
717                 stripe[i - 1] = dto;
718                 idx_array[i] = idx;
719         }
720
721         lo->ldo_dir_striped = 1;
722         lo->ldo_stripe = stripe;
723         lo->ldo_stripenr = i - 1;
724         lo->ldo_stripes_allocated = stripe_count - 1;
725
726         if (lo->ldo_stripenr == 0)
727                 GOTO(out_put, rc = -ENOSPC);
728
729         rc = lod_prep_lmv_md(env, dt, &lmv_buf);
730         if (rc != 0)
731                 GOTO(out_put, rc);
732
733         for (i = 0; i < lo->ldo_stripenr; i++) {
734                 struct dt_object *dto;
735
736                 dto = stripe[i];
737                 /* only create slave striped object */
738                 rc = dt_declare_create(env, dto, attr, NULL, NULL, th);
739                 if (rc != 0)
740                         GOTO(out_put, rc);
741
742                 if (!dt_try_as_dir(env, dto))
743                         GOTO(out_put, rc = -EINVAL);
744
745                 rc = dt_declare_insert(env, dto,
746                      (const struct dt_rec *)lu_object_fid(&dto->do_lu),
747                      (const struct dt_key *)dot, th);
748                 if (rc != 0)
749                         GOTO(out_put, rc);
750
751                 /* master stripe FID will be put to .. */
752                 rc = dt_declare_insert(env, dto,
753                      (const struct dt_rec *)lu_object_fid(&dt->do_lu),
754                      (const struct dt_key *)dotdot, th);
755                 if (rc != 0)
756                         GOTO(out_put, rc);
757
758                 /* probably nothing to inherite */
759                 if (lo->ldo_striping_cached &&
760                     !LOVEA_DELETE_VALUES(lo->ldo_def_stripe_size,
761                                          lo->ldo_def_stripenr,
762                                          lo->ldo_def_stripe_offset)) {
763                         struct lod_thread_info  *info;
764                         struct lov_user_md_v3   *v3;
765
766                         /* sigh, lti_ea_store has been used for lmv_buf,
767                          * so we have to allocate buffer for default
768                          * stripe EA */
769                         OBD_ALLOC_PTR(v3);
770                         if (v3 == NULL)
771                                 GOTO(out_put, rc = -ENOMEM);
772
773                         memset(v3, 0, sizeof(*v3));
774                         v3->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V3);
775                         v3->lmm_stripe_count =
776                                 cpu_to_le32(lo->ldo_def_stripenr);
777                         v3->lmm_stripe_offset =
778                                 cpu_to_le32(lo->ldo_def_stripe_offset);
779                         v3->lmm_stripe_size =
780                                 cpu_to_le32(lo->ldo_def_stripe_size);
781                         if (lo->ldo_pool)
782                                 strncpy(v3->lmm_pool_name, lo->ldo_pool,
783                                         LOV_MAXPOOLNAME);
784
785                         info = lod_env_info(env);
786                         info->lti_buf.lb_buf = v3;
787                         info->lti_buf.lb_len = sizeof(*v3);
788                         rc = dt_declare_xattr_set(env, dto,
789                                                   &info->lti_buf,
790                                                   XATTR_NAME_LOV,
791                                                   0, th);
792                         OBD_FREE_PTR(v3);
793                         if (rc != 0)
794                                 GOTO(out_put, rc);
795                 }
796                 rc = dt_declare_xattr_set(env, dto, &lmv_buf, XATTR_NAME_LMV, 0,
797                                           th);
798                 if (rc != 0)
799                         GOTO(out_put, rc);
800         }
801
802         rc = dt_declare_xattr_set(env, dt, &lmv_buf, XATTR_NAME_LMV, 0, th);
803         if (rc != 0)
804                 GOTO(out_put, rc);
805
806 out_put:
807         if (rc < 0) {
808                 for (i = 0; i < stripe_count - 1; i++)
809                         if (stripe[i] != NULL)
810                                 lu_object_put(env, &stripe[i]->do_lu);
811                 OBD_FREE(stripe, sizeof(stripe[0]) * (stripe_count - 1));
812         }
813
814 out_free:
815         if (idx_array != NULL)
816                 OBD_FREE(idx_array, sizeof(idx_array[0]) * stripe_count);
817
818         RETURN(rc);
819 }
820
821 /**
822  * Declare create striped md object.
823  */
824 static int lod_declare_xattr_set_lmv(const struct lu_env *env,
825                                      struct dt_object *dt,
826                                      struct lu_attr *attr,
827                                      const struct lu_buf *lum_buf,
828                                      struct thandle *th)
829 {
830         struct lod_object       *lo = lod_dt_obj(dt);
831         struct lod_device       *lod = lu2lod_dev(dt->do_lu.lo_dev);
832         struct lmv_user_md_v1   *lum;
833         int                     rc;
834         ENTRY;
835
836         lum = lum_buf->lb_buf;
837         LASSERT(lum != NULL);
838
839         CDEBUG(D_INFO, "lum magic = %x count = %u offset = %d\n",
840                le32_to_cpu(lum->lum_magic), le32_to_cpu(lum->lum_stripe_count),
841                (int)le32_to_cpu(lum->lum_stripe_offset));
842
843         if (le32_to_cpu(lum->lum_stripe_count) <= 1)
844                 GOTO(out, rc = 0);
845
846         rc = lod_verify_md_striping(lod, lum);
847         if (rc != 0)
848                 GOTO(out, rc);
849
850         /* prepare dir striped objects */
851         rc = lod_prep_md_striped_create(env, dt, attr, lum, th);
852         if (rc != 0) {
853                 /* failed to create striping, let's reset
854                  * config so that others don't get confused */
855                 lod_object_free_striping(env, lo);
856                 GOTO(out, rc);
857         }
858 out:
859         RETURN(rc);
860 }
861
862 /*
863  * LOV xattr is a storage for striping, and LOD owns this xattr.
864  * but LOD allows others to control striping to some extent
865  * - to reset strping
866  * - to set new defined striping
867  * - to set new semi-defined striping
868  *   - number of stripes is defined
869  *   - number of stripes + osts are defined
870  *   - ??
871  */
872 static int lod_declare_xattr_set(const struct lu_env *env,
873                                  struct dt_object *dt,
874                                  const struct lu_buf *buf,
875                                  const char *name, int fl,
876                                  struct thandle *th)
877 {
878         struct dt_object *next = dt_object_child(dt);
879         struct lu_attr   *attr = &lod_env_info(env)->lti_attr;
880         __u32             mode;
881         int               rc;
882         ENTRY;
883
884         /*
885          * allow to declare predefined striping on a new (!mode) object
886          * which is supposed to be replay of regular file creation
887          * (when LOV setting is declared)
888          * LU_XATTR_REPLACE is set to indicate a layout swap
889          */
890         mode = dt->do_lu.lo_header->loh_attr & S_IFMT;
891         if ((S_ISREG(mode) || mode == 0) && strcmp(name, XATTR_NAME_LOV) == 0 &&
892              !(fl & LU_XATTR_REPLACE)) {
893                 /*
894                  * this is a request to manipulate object's striping
895                  */
896                 if (dt_object_exists(dt)) {
897                         rc = dt_attr_get(env, next, attr, BYPASS_CAPA);
898                         if (rc)
899                                 RETURN(rc);
900                 } else {
901                         memset(attr, 0, sizeof(*attr));
902                         attr->la_valid = LA_TYPE | LA_MODE;
903                         attr->la_mode = S_IFREG;
904                 }
905                 rc = lod_declare_striped_object(env, dt, attr, buf, th);
906         } else if (S_ISDIR(mode)) {
907                 struct lod_device       *d = lu2lod_dev(dt->do_lu.lo_dev);
908                 struct lod_object       *lo = lod_dt_obj(dt);
909                 int                     i;
910
911                 if (strcmp(name, XATTR_NAME_DEFAULT_LMV) == 0) {
912                         struct lmv_user_md_v1 *lum;
913
914                         LASSERT(buf != NULL && buf->lb_buf != NULL);
915                         lum = buf->lb_buf;
916                         rc = lod_verify_md_striping(d, lum);
917                         if (rc != 0)
918                                 RETURN(rc);
919                 }
920
921                 rc = dt_declare_xattr_set(env, next, buf, name, fl, th);
922                 if (rc != 0)
923                         RETURN(rc);
924
925                 /* set xattr to each stripes, if needed */
926                 rc = lod_load_striping(env, lo);
927                 if (rc != 0)
928                         RETURN(rc);
929
930                 if (lo->ldo_stripenr == 0)
931                         RETURN(rc);
932
933                 for (i = 0; i < lo->ldo_stripenr; i++) {
934                         LASSERT(lo->ldo_stripe[i]);
935                         rc = dt_declare_xattr_set(env, lo->ldo_stripe[i], buf,
936                                                   name, fl, th);
937                         if (rc != 0)
938                                 break;
939                 }
940         } else {
941                 rc = dt_declare_xattr_set(env, next, buf, name, fl, th);
942         }
943
944         RETURN(rc);
945 }
946
947 static void lod_lov_stripe_cache_clear(struct lod_object *lo)
948 {
949         lo->ldo_striping_cached = 0;
950         lo->ldo_def_striping_set = 0;
951         lod_object_set_pool(lo, NULL);
952         lo->ldo_def_stripe_size = 0;
953         lo->ldo_def_stripenr = 0;
954 }
955
956 static int lod_xattr_set_lov_on_dir(const struct lu_env *env,
957                                     struct dt_object *dt,
958                                     const struct lu_buf *buf,
959                                     const char *name, int fl,
960                                     struct thandle *th,
961                                     struct lustre_capa *capa)
962 {
963         struct lod_device       *d = lu2lod_dev(dt->do_lu.lo_dev);
964         struct dt_object        *next = dt_object_child(dt);
965         struct lod_object       *l = lod_dt_obj(dt);
966         struct lov_user_md_v1   *lum;
967         struct lov_user_md_v3   *v3 = NULL;
968         int                      rc;
969         ENTRY;
970
971         /* If it is striped dir, we should clear the stripe cache for
972          * slave stripe as well, but there are no effective way to
973          * notify the LOD on the slave MDT, so we do not cache stripe
974          * information for slave stripe for now. XXX*/
975         lod_lov_stripe_cache_clear(l);
976         LASSERT(buf != NULL && buf->lb_buf != NULL);
977         lum = buf->lb_buf;
978
979         rc = lod_verify_striping(d, buf, 0);
980         if (rc)
981                 RETURN(rc);
982
983         if (lum->lmm_magic == LOV_USER_MAGIC_V3)
984                 v3 = buf->lb_buf;
985
986         /* if { size, offset, count } = { 0, -1, 0 } and no pool
987          * (i.e. all default values specified) then delete default
988          * striping from dir. */
989         CDEBUG(D_OTHER,
990                 "set default striping: sz %u # %u offset %d %s %s\n",
991                 (unsigned)lum->lmm_stripe_size,
992                 (unsigned)lum->lmm_stripe_count,
993                 (int)lum->lmm_stripe_offset,
994                 v3 ? "from" : "", v3 ? v3->lmm_pool_name : "");
995
996         if (LOVEA_DELETE_VALUES((lum->lmm_stripe_size),
997                                 (lum->lmm_stripe_count),
998                                 (lum->lmm_stripe_offset)) &&
999                         lum->lmm_magic == LOV_USER_MAGIC_V1) {
1000                 rc = dt_xattr_del(env, next, name, th, capa);
1001                 if (rc == -ENODATA)
1002                         rc = 0;
1003         } else {
1004                 rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
1005         }
1006
1007         RETURN(rc);
1008 }
1009
1010 static int lod_xattr_set_default_lmv_on_dir(const struct lu_env *env,
1011                                             struct dt_object *dt,
1012                                             const struct lu_buf *buf,
1013                                             const char *name, int fl,
1014                                             struct thandle *th,
1015                                             struct lustre_capa *capa)
1016 {
1017         struct dt_object        *next = dt_object_child(dt);
1018         struct lod_object       *l = lod_dt_obj(dt);
1019         struct lmv_user_md_v1   *lum;
1020         int                      rc;
1021         ENTRY;
1022
1023         LASSERT(buf != NULL && buf->lb_buf != NULL);
1024         lum = buf->lb_buf;
1025
1026         CDEBUG(D_OTHER, "set default stripe_count # %u stripe_offset %d\n",
1027               le32_to_cpu(lum->lum_stripe_count),
1028               (int)le32_to_cpu(lum->lum_stripe_offset));
1029
1030         if (LMVEA_DELETE_VALUES((le32_to_cpu(lum->lum_stripe_count)),
1031                                  le32_to_cpu(lum->lum_stripe_offset)) &&
1032                                 le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC) {
1033                 rc = dt_xattr_del(env, next, name, th, capa);
1034                 if (rc == -ENODATA)
1035                         rc = 0;
1036         } else {
1037                 rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
1038                 if (rc != 0)
1039                         RETURN(rc);
1040
1041                 /* Update default stripe cache */
1042                 if (l->ldo_dir_stripe == NULL) {
1043                         OBD_ALLOC_PTR(l->ldo_dir_stripe);
1044                         if (l->ldo_dir_stripe == NULL)
1045                                 RETURN(-ENOMEM);
1046                 }
1047
1048                 l->ldo_dir_striping_cached = 0;
1049                 l->ldo_dir_def_striping_set = 1;
1050                 l->ldo_dir_def_stripenr =
1051                         le32_to_cpu(lum->lum_stripe_count) - 1;
1052         }
1053
1054         RETURN(rc);
1055 }
1056
1057 static int lod_xattr_set_lmv(const struct lu_env *env, struct dt_object *dt,
1058                              const struct lu_buf *buf, const char *name,
1059                              int fl, struct thandle *th,
1060                              struct lustre_capa *capa)
1061 {
1062         struct lod_object       *lo = lod_dt_obj(dt);
1063         struct lu_buf           lmv_buf;
1064         int                     i;
1065         int                     rc;
1066         ENTRY;
1067
1068         if (!S_ISDIR(dt->do_lu.lo_header->loh_attr))
1069                 RETURN(-ENOTDIR);
1070
1071         /* The stripes are supposed to be allocated in declare phase,
1072          * if there are no stripes being allocated, it will skip */
1073         if (lo->ldo_stripenr == 0)
1074                 RETURN(0);
1075
1076         rc = lod_prep_lmv_md(env, dt, &lmv_buf);
1077         if (rc != 0)
1078                 RETURN(rc);
1079
1080         for (i = 0; i < lo->ldo_stripenr; i++) {
1081                 struct dt_object *dto;
1082                 struct lu_attr  *attr = &lod_env_info(env)->lti_attr;
1083
1084                 dto = lo->ldo_stripe[i];
1085                 memset(attr, 0, sizeof(*attr));
1086                 attr->la_valid = LA_TYPE | LA_MODE;
1087                 attr->la_mode = S_IFDIR;
1088                 rc = dt_create(env, dto, attr, NULL, NULL, th);
1089                 if (rc != 0)
1090                         RETURN(rc);
1091
1092                 rc = dt_insert(env, dto,
1093                               (const struct dt_rec *)lu_object_fid(&dto->do_lu),
1094                               (const struct dt_key *)dot, th, capa, 0);
1095                 if (rc != 0)
1096                         RETURN(rc);
1097
1098                 rc = dt_insert(env, dto,
1099                               (struct dt_rec *)lu_object_fid(&dt->do_lu),
1100                               (const struct dt_key *)dotdot, th, capa, 0);
1101                 if (rc != 0)
1102                         RETURN(rc);
1103
1104                 if (lo->ldo_striping_cached &&
1105                     !LOVEA_DELETE_VALUES(lo->ldo_def_stripe_size,
1106                                          lo->ldo_def_stripenr,
1107                                          lo->ldo_def_stripe_offset)) {
1108                         struct lod_thread_info  *info;
1109                         struct lov_user_md_v3   *v3;
1110
1111                         /* sigh, lti_ea_store has been used for lmv_buf,
1112                          * so we have to allocate buffer for default
1113                          * stripe EA */
1114                         OBD_ALLOC_PTR(v3);
1115                         if (v3 == NULL)
1116                                 RETURN(-ENOMEM);
1117
1118                         memset(v3, 0, sizeof(*v3));
1119                         v3->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V3);
1120                         v3->lmm_stripe_count =
1121                                 cpu_to_le32(lo->ldo_def_stripenr);
1122                         v3->lmm_stripe_offset =
1123                                 cpu_to_le32(lo->ldo_def_stripe_offset);
1124                         v3->lmm_stripe_size =
1125                                 cpu_to_le32(lo->ldo_def_stripe_size);
1126                         if (lo->ldo_pool)
1127                                 strncpy(v3->lmm_pool_name, lo->ldo_pool,
1128                                         LOV_MAXPOOLNAME);
1129
1130                         info = lod_env_info(env);
1131                         info->lti_buf.lb_buf = v3;
1132                         info->lti_buf.lb_len = sizeof(*v3);
1133                         rc = dt_xattr_set(env, dto, &info->lti_buf,
1134                                           XATTR_NAME_LOV, 0, th, capa);
1135                         OBD_FREE_PTR(v3);
1136                         if (rc != 0)
1137                                 RETURN(rc);
1138                 }
1139
1140                 rc = dt_xattr_set(env, dto, &lmv_buf, XATTR_NAME_LMV, fl, th,
1141                                   capa);
1142         }
1143
1144         rc = dt_xattr_set(env, dt, &lmv_buf, XATTR_NAME_LMV, fl, th, capa);
1145
1146         RETURN(rc);
1147 }
1148
1149 static int lod_xattr_set(const struct lu_env *env,
1150                          struct dt_object *dt, const struct lu_buf *buf,
1151                          const char *name, int fl, struct thandle *th,
1152                          struct lustre_capa *capa)
1153 {
1154         struct lod_object       *lo = lod_dt_obj(dt);
1155         struct dt_object        *next = dt_object_child(dt);
1156         __u32                    attr;
1157         int                      rc;
1158         int                     i;
1159         ENTRY;
1160
1161         attr = dt->do_lu.lo_header->loh_attr & S_IFMT;
1162         if (S_ISDIR(attr) && strcmp(name, XATTR_NAME_LOV) == 0) {
1163                 rc = lod_xattr_set_lov_on_dir(env, dt, buf, name, fl, th, capa);
1164         } else if (S_ISREG(attr) && !strcmp(name, XATTR_NAME_LOV)) {
1165                 /* in case of lov EA swap, just set it
1166                  * if not, it is a replay so check striping match what we
1167                  * already have during req replay, declare_xattr_set()
1168                  * defines striping, then create() does the work
1169                 */
1170                 if (fl & LU_XATTR_REPLACE) {
1171                         /* free stripes, then update disk */
1172                         lod_object_free_striping(env, lod_dt_obj(dt));
1173                         rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
1174                 } else {
1175                         rc = lod_striping_create(env, dt, NULL, NULL, th);
1176                 }
1177         } else if (strcmp(name, XATTR_NAME_DEFAULT_LMV) == 0) {
1178                 if (!S_ISDIR(attr))
1179                         RETURN(-ENOTDIR);
1180                 rc = lod_xattr_set_default_lmv_on_dir(env, dt, buf, name, fl,
1181                                                       th, capa);
1182         } else {
1183                 /*
1184                  * behave transparantly for all other EAs
1185                  */
1186                 rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
1187         }
1188
1189         if (rc != 0 || !S_ISDIR(attr))
1190                 RETURN(rc);
1191
1192         if (lo->ldo_stripenr == 0)
1193                 RETURN(rc);
1194
1195         for (i = 0; i < lo->ldo_stripenr; i++) {
1196                 LASSERT(lo->ldo_stripe[i]);
1197                 rc = dt_xattr_set(env, lo->ldo_stripe[i], buf, name, fl, th,
1198                                   capa);
1199                 if (rc != 0)
1200                         break;
1201         }
1202
1203         RETURN(rc);
1204 }
1205
1206 static int lod_declare_xattr_del(const struct lu_env *env,
1207                                  struct dt_object *dt, const char *name,
1208                                  struct thandle *th)
1209 {
1210         return dt_declare_xattr_del(env, dt_object_child(dt), name, th);
1211 }
1212
1213 static int lod_xattr_del(const struct lu_env *env, struct dt_object *dt,
1214                          const char *name, struct thandle *th,
1215                          struct lustre_capa *capa)
1216 {
1217         if (!strcmp(name, XATTR_NAME_LOV))
1218                 lod_object_free_striping(env, lod_dt_obj(dt));
1219         return dt_xattr_del(env, dt_object_child(dt), name, th, capa);
1220 }
1221
1222 static int lod_xattr_list(const struct lu_env *env,
1223                           struct dt_object *dt, struct lu_buf *buf,
1224                           struct lustre_capa *capa)
1225 {
1226         return dt_xattr_list(env, dt_object_child(dt), buf, capa);
1227 }
1228
1229 int lod_object_set_pool(struct lod_object *o, char *pool)
1230 {
1231         int len;
1232
1233         if (o->ldo_pool) {
1234                 len = strlen(o->ldo_pool);
1235                 OBD_FREE(o->ldo_pool, len + 1);
1236                 o->ldo_pool = NULL;
1237         }
1238         if (pool) {
1239                 len = strlen(pool);
1240                 OBD_ALLOC(o->ldo_pool, len + 1);
1241                 if (o->ldo_pool == NULL)
1242                         return -ENOMEM;
1243                 strcpy(o->ldo_pool, pool);
1244         }
1245         return 0;
1246 }
1247
1248 static inline int lod_object_will_be_striped(int is_reg, const struct lu_fid *fid)
1249 {
1250         return (is_reg && fid_seq(fid) != FID_SEQ_LOCAL_FILE);
1251 }
1252
1253
1254 static int lod_cache_parent_lov_striping(const struct lu_env *env,
1255                                          struct lod_object *lp)
1256 {
1257         struct lod_thread_info  *info = lod_env_info(env);
1258         struct lov_user_md_v1   *v1 = NULL;
1259         struct lov_user_md_v3   *v3 = NULL;
1260         int                      rc;
1261         ENTRY;
1262
1263         /* called from MDD without parent being write locked,
1264          * lock it here */
1265         dt_write_lock(env, dt_object_child(&lp->ldo_obj), 0);
1266         rc = lod_get_lov_ea(env, lp);
1267         if (rc < 0)
1268                 GOTO(unlock, rc);
1269
1270         if (rc < sizeof(struct lov_user_md)) {
1271                 /* don't lookup for non-existing or invalid striping */
1272                 lp->ldo_def_striping_set = 0;
1273                 lp->ldo_striping_cached = 1;
1274                 lp->ldo_def_stripe_size = 0;
1275                 lp->ldo_def_stripenr = 0;
1276                 lp->ldo_def_stripe_offset = (typeof(v1->lmm_stripe_offset))(-1);
1277                 GOTO(unlock, rc = 0);
1278         }
1279
1280         rc = 0;
1281         v1 = info->lti_ea_store;
1282         if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_V1))
1283                 lustre_swab_lov_user_md_v1(v1);
1284         else if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_V3))
1285                 lustre_swab_lov_user_md_v3(v3);
1286
1287         if (v1->lmm_magic != LOV_MAGIC_V3 && v1->lmm_magic != LOV_MAGIC_V1)
1288                 GOTO(unlock, rc = 0);
1289
1290         if (v1->lmm_pattern != LOV_PATTERN_RAID0 && v1->lmm_pattern != 0)
1291                 GOTO(unlock, rc = 0);
1292
1293         lp->ldo_def_stripenr = v1->lmm_stripe_count;
1294         lp->ldo_def_stripe_size = v1->lmm_stripe_size;
1295         lp->ldo_def_stripe_offset = v1->lmm_stripe_offset;
1296         lp->ldo_striping_cached = 1;
1297         lp->ldo_def_striping_set = 1;
1298         if (v1->lmm_magic == LOV_USER_MAGIC_V3) {
1299                 /* XXX: sanity check here */
1300                 v3 = (struct lov_user_md_v3 *) v1;
1301                 if (v3->lmm_pool_name[0])
1302                         lod_object_set_pool(lp, v3->lmm_pool_name);
1303         }
1304         EXIT;
1305 unlock:
1306         dt_write_unlock(env, dt_object_child(&lp->ldo_obj));
1307         return rc;
1308 }
1309
1310
1311 static int lod_cache_parent_lmv_striping(const struct lu_env *env,
1312                                          struct lod_object *lp)
1313 {
1314         struct lod_thread_info  *info = lod_env_info(env);
1315         struct lmv_user_md_v1   *v1 = NULL;
1316         int                      rc;
1317         ENTRY;
1318
1319         /* called from MDD without parent being write locked,
1320          * lock it here */
1321         dt_write_lock(env, dt_object_child(&lp->ldo_obj), 0);
1322         rc = lod_get_default_lmv_ea(env, lp);
1323         if (rc < 0)
1324                 GOTO(unlock, rc);
1325
1326         if (rc < sizeof(struct lmv_user_md)) {
1327                 /* don't lookup for non-existing or invalid striping */
1328                 lp->ldo_dir_def_striping_set = 0;
1329                 lp->ldo_dir_striping_cached = 1;
1330                 lp->ldo_dir_def_stripenr = 0;
1331                 lp->ldo_dir_def_stripe_offset =
1332                                         (typeof(v1->lum_stripe_offset))(-1);
1333                 lp->ldo_dir_def_hash_type = LMV_HASH_TYPE_FNV_1A_64;
1334                 GOTO(unlock, rc = 0);
1335         }
1336
1337         rc = 0;
1338         v1 = info->lti_ea_store;
1339
1340         lp->ldo_dir_def_stripenr = le32_to_cpu(v1->lum_stripe_count) - 1;
1341         lp->ldo_dir_def_stripe_offset = le32_to_cpu(v1->lum_stripe_offset);
1342         lp->ldo_dir_def_hash_type = le32_to_cpu(v1->lum_hash_type);
1343         lp->ldo_dir_def_striping_set = 1;
1344         lp->ldo_dir_striping_cached = 1;
1345
1346         EXIT;
1347 unlock:
1348         dt_write_unlock(env, dt_object_child(&lp->ldo_obj));
1349         return rc;
1350 }
1351
1352 static int lod_cache_parent_striping(const struct lu_env *env,
1353                                      struct lod_object *lp,
1354                                      umode_t child_mode)
1355 {
1356         int rc = 0;
1357         ENTRY;
1358
1359         rc = lod_load_striping(env, lp);
1360         if (rc != 0)
1361                 RETURN(rc);
1362
1363         if (!lp->ldo_striping_cached) {
1364                 /* we haven't tried to get default striping for
1365                  * the directory yet, let's cache it in the object */
1366                 rc = lod_cache_parent_lov_striping(env, lp);
1367                 if (rc != 0)
1368                         RETURN(rc);
1369         }
1370
1371         if (S_ISDIR(child_mode) && !lp->ldo_dir_striping_cached)
1372                 rc = lod_cache_parent_lmv_striping(env, lp);
1373
1374         RETURN(rc);
1375 }
1376
1377 /**
1378  * used to transfer default striping data to the object being created
1379  */
1380 static void lod_ah_init(const struct lu_env *env,
1381                         struct dt_allocation_hint *ah,
1382                         struct dt_object *parent,
1383                         struct dt_object *child,
1384                         umode_t child_mode)
1385 {
1386         struct lod_device *d = lu2lod_dev(child->do_lu.lo_dev);
1387         struct dt_object  *nextp = NULL;
1388         struct dt_object  *nextc;
1389         struct lod_object *lp = NULL;
1390         struct lod_object *lc;
1391         struct lov_desc   *desc;
1392         ENTRY;
1393
1394         LASSERT(child);
1395
1396         if (likely(parent)) {
1397                 nextp = dt_object_child(parent);
1398                 lp = lod_dt_obj(parent);
1399         }
1400
1401         nextc = dt_object_child(child);
1402         lc = lod_dt_obj(child);
1403
1404         LASSERT(lc->ldo_stripenr == 0);
1405         LASSERT(lc->ldo_stripe == NULL);
1406
1407         /*
1408          * local object may want some hints
1409          * in case of late striping creation, ->ah_init()
1410          * can be called with local object existing
1411          */
1412         if (!dt_object_exists(nextc) || dt_object_remote(nextc))
1413                 nextc->do_ops->do_ah_init(env, ah, dt_object_remote(nextp) ?
1414                                           NULL : nextp, nextc, child_mode);
1415
1416         if (S_ISDIR(child_mode)) {
1417                 int rc;
1418
1419                 if (lc->ldo_dir_stripe == NULL) {
1420                         OBD_ALLOC_PTR(lc->ldo_dir_stripe);
1421                         if (lc->ldo_dir_stripe == NULL)
1422                                 return;
1423                 }
1424
1425                 if (lp->ldo_dir_stripe == NULL) {
1426                         OBD_ALLOC_PTR(lp->ldo_dir_stripe);
1427                         if (lp->ldo_dir_stripe == NULL)
1428                                 return;
1429                 }
1430
1431                 rc = lod_cache_parent_striping(env, lp, child_mode);
1432                 if (rc != 0)
1433                         return;
1434
1435                 /* transfer defaults to new directory */
1436                 if (lp->ldo_striping_cached) {
1437                         if (lp->ldo_pool)
1438                                 lod_object_set_pool(lc, lp->ldo_pool);
1439                         lc->ldo_def_stripenr = lp->ldo_def_stripenr;
1440                         lc->ldo_def_stripe_size = lp->ldo_def_stripe_size;
1441                         lc->ldo_def_stripe_offset = lp->ldo_def_stripe_offset;
1442                         lc->ldo_striping_cached = 1;
1443                         lc->ldo_def_striping_set = 1;
1444                         CDEBUG(D_OTHER, "inherite EA sz:%d off:%d nr:%d\n",
1445                                (int)lc->ldo_def_stripe_size,
1446                                (int)lc->ldo_def_stripe_offset,
1447                                (int)lc->ldo_def_stripenr);
1448                 }
1449
1450                 /* transfer dir defaults to new directory */
1451                 if (lp->ldo_dir_striping_cached) {
1452                         lc->ldo_dir_def_stripenr = lp->ldo_dir_def_stripenr;
1453                         lc->ldo_dir_def_stripe_offset =
1454                                                   lp->ldo_dir_def_stripe_offset;
1455                         lc->ldo_dir_def_hash_type =
1456                                                   lp->ldo_dir_def_hash_type;
1457                         lc->ldo_dir_striping_cached = 1;
1458                         lc->ldo_dir_def_striping_set = 1;
1459                         CDEBUG(D_INFO, "inherit default EA nr:%d off:%d t%u\n",
1460                                (int)lc->ldo_dir_def_stripenr,
1461                                (int)lc->ldo_dir_def_stripe_offset,
1462                                lc->ldo_dir_def_hash_type);
1463                 }
1464
1465                 /* If the directory is specified with certain stripes */
1466                 if (ah->dah_eadata != NULL && ah->dah_eadata_len != 0) {
1467                         const struct lmv_user_md_v1 *lum1 = ah->dah_eadata;
1468                         int rc;
1469
1470                         rc = lod_verify_md_striping(d, lum1);
1471                         if (rc == 0 &&
1472                                 le32_to_cpu(lum1->lum_stripe_count) > 1) {
1473                                 /* Directory will be striped only if
1474                                  * stripe_count > 1 */
1475                                 lc->ldo_stripenr =
1476                                         le32_to_cpu(lum1->lum_stripe_count) - 1;
1477                                 lc->ldo_dir_stripe_offset =
1478                                         le32_to_cpu(lum1->lum_stripe_offset);
1479                                 lc->ldo_dir_hash_type =
1480                                         le32_to_cpu(lum1->lum_hash_type);
1481                                 CDEBUG(D_INFO, "set stripe EA nr:%hu off:%d\n",
1482                                        lc->ldo_stripenr,
1483                                        (int)lc->ldo_dir_stripe_offset);
1484                         }
1485                 } else if (lp->ldo_dir_def_striping_set) {
1486                         /* If there are default dir stripe from parent */
1487                         lc->ldo_stripenr = lp->ldo_dir_def_stripenr;
1488                         lc->ldo_dir_stripe_offset =
1489                                         lp->ldo_dir_def_stripe_offset;
1490                         lc->ldo_dir_hash_type =
1491                                         lp->ldo_dir_def_hash_type;
1492                         CDEBUG(D_INFO, "inherit EA nr:%hu off:%d\n",
1493                                lc->ldo_stripenr,
1494                                (int)lc->ldo_dir_stripe_offset);
1495                 } else {
1496                         /* set default stripe for this directory */
1497                         lc->ldo_stripenr = 0;
1498                         lc->ldo_dir_stripe_offset = -1;
1499                 }
1500
1501                 CDEBUG(D_INFO, "final striping count:%hu, offset:%d\n",
1502                        lc->ldo_stripenr, (int)lc->ldo_dir_stripe_offset);
1503
1504                 goto out;
1505         }
1506
1507         /*
1508          * if object is going to be striped over OSTs, transfer default
1509          * striping information to the child, so that we can use it
1510          * during declaration and creation
1511          */
1512         if (!lod_object_will_be_striped(S_ISREG(child_mode),
1513                                         lu_object_fid(&child->do_lu)))
1514                 goto out;
1515         /*
1516          * try from the parent
1517          */
1518         if (likely(parent)) {
1519                 lod_cache_parent_striping(env, lp, child_mode);
1520
1521                 lc->ldo_def_stripe_offset = (__u16) -1;
1522
1523                 if (lp->ldo_def_striping_set) {
1524                         if (lp->ldo_pool)
1525                                 lod_object_set_pool(lc, lp->ldo_pool);
1526                         lc->ldo_stripenr = lp->ldo_def_stripenr;
1527                         lc->ldo_stripe_size = lp->ldo_def_stripe_size;
1528                         lc->ldo_def_stripe_offset = lp->ldo_def_stripe_offset;
1529                         CDEBUG(D_OTHER, "striping from parent: #%d, sz %d %s\n",
1530                                lc->ldo_stripenr, lc->ldo_stripe_size,
1531                                lp->ldo_pool ? lp->ldo_pool : "");
1532                 }
1533         }
1534
1535         /*
1536          * if the parent doesn't provide with specific pattern, grab fs-wide one
1537          */
1538         desc = &d->lod_desc;
1539         if (lc->ldo_stripenr == 0)
1540                 lc->ldo_stripenr = desc->ld_default_stripe_count;
1541         if (lc->ldo_stripe_size == 0)
1542                 lc->ldo_stripe_size = desc->ld_default_stripe_size;
1543         CDEBUG(D_OTHER, "final striping: # %d stripes, sz %d from %s\n",
1544                lc->ldo_stripenr, lc->ldo_stripe_size,
1545                lc->ldo_pool ? lc->ldo_pool : "");
1546
1547 out:
1548         /* we do not cache stripe information for slave stripe, see
1549          * lod_xattr_set_lov_on_dir */
1550         if (lp != NULL && lp->ldo_dir_slave_stripe)
1551                 lod_lov_stripe_cache_clear(lp);
1552
1553         EXIT;
1554 }
1555
1556 #define ll_do_div64(aaa,bbb)    do_div((aaa), (bbb))
1557 /*
1558  * this function handles a special case when truncate was done
1559  * on a stripeless object and now striping is being created
1560  * we can't lose that size, so we have to propagate it to newly
1561  * created object
1562  */
1563 static int lod_declare_init_size(const struct lu_env *env,
1564                                  struct dt_object *dt, struct thandle *th)
1565 {
1566         struct dt_object   *next = dt_object_child(dt);
1567         struct lod_object  *lo = lod_dt_obj(dt);
1568         struct lu_attr     *attr = &lod_env_info(env)->lti_attr;
1569         uint64_t            size, offs;
1570         int                 rc, stripe;
1571         ENTRY;
1572
1573         /* XXX: we support the simplest (RAID0) striping so far */
1574         LASSERT(lo->ldo_stripe || lo->ldo_stripenr == 0);
1575         LASSERT(lo->ldo_stripe_size > 0);
1576
1577         rc = dt_attr_get(env, next, attr, BYPASS_CAPA);
1578         LASSERT(attr->la_valid & LA_SIZE);
1579         if (rc)
1580                 RETURN(rc);
1581
1582         size = attr->la_size;
1583         if (size == 0)
1584                 RETURN(0);
1585
1586         /* ll_do_div64(a, b) returns a % b, and a = a / b */
1587         ll_do_div64(size, (__u64) lo->ldo_stripe_size);
1588         stripe = ll_do_div64(size, (__u64) lo->ldo_stripenr);
1589
1590         size = size * lo->ldo_stripe_size;
1591         offs = attr->la_size;
1592         size += ll_do_div64(offs, lo->ldo_stripe_size);
1593
1594         attr->la_valid = LA_SIZE;
1595         attr->la_size = size;
1596
1597         rc = dt_declare_attr_set(env, lo->ldo_stripe[stripe], attr, th);
1598
1599         RETURN(rc);
1600 }
1601
1602 /**
1603  * Create declaration of striped object
1604  */
1605 int lod_declare_striped_object(const struct lu_env *env, struct dt_object *dt,
1606                                struct lu_attr *attr,
1607                                const struct lu_buf *lovea, struct thandle *th)
1608 {
1609         struct lod_thread_info  *info = lod_env_info(env);
1610         struct dt_object        *next = dt_object_child(dt);
1611         struct lod_object       *lo = lod_dt_obj(dt);
1612         int                      rc;
1613         ENTRY;
1614
1615         if (OBD_FAIL_CHECK(OBD_FAIL_MDS_ALLOC_OBDO)) {
1616                 /* failed to create striping, let's reset
1617                  * config so that others don't get confused */
1618                 lod_object_free_striping(env, lo);
1619                 GOTO(out, rc = -ENOMEM);
1620         }
1621
1622         /* choose OST and generate appropriate objects */
1623         rc = lod_qos_prep_create(env, lo, attr, lovea, th);
1624         if (rc) {
1625                 /* failed to create striping, let's reset
1626                  * config so that others don't get confused */
1627                 lod_object_free_striping(env, lo);
1628                 GOTO(out, rc);
1629         }
1630
1631         /*
1632          * declare storage for striping data
1633          */
1634         info->lti_buf.lb_len = lov_mds_md_size(lo->ldo_stripenr,
1635                                 lo->ldo_pool ?  LOV_MAGIC_V3 : LOV_MAGIC_V1);
1636         rc = dt_declare_xattr_set(env, next, &info->lti_buf, XATTR_NAME_LOV,
1637                                   0, th);
1638         if (rc)
1639                 GOTO(out, rc);
1640
1641         /*
1642          * if striping is created with local object's size > 0,
1643          * we have to propagate this size to specific object
1644          * the case is possible only when local object was created previously
1645          */
1646         if (dt_object_exists(next))
1647                 rc = lod_declare_init_size(env, dt, th);
1648
1649 out:
1650         RETURN(rc);
1651 }
1652
1653 int lod_dir_striping_create_internal(const struct lu_env *env,
1654                                      struct dt_object *dt,
1655                                      struct lu_attr *attr,
1656                                      const struct dt_object_format *dof,
1657                                      struct thandle *th,
1658                                      bool declare)
1659 {
1660         struct lod_thread_info  *info = lod_env_info(env);
1661         struct dt_object        *next = dt_object_child(dt);
1662         struct lod_object       *lo = lod_dt_obj(dt);
1663         int                     rc;
1664         ENTRY;
1665
1666         if (lo->ldo_dir_def_striping_set &&
1667             !LMVEA_DELETE_VALUES(lo->ldo_stripenr,
1668                                  lo->ldo_dir_stripe_offset)) {
1669                 struct lmv_user_md_v1 *v1 = info->lti_ea_store;
1670                 int stripe_count = lo->ldo_stripenr + 1;
1671
1672                 if (info->lti_ea_store_size < sizeof(*v1)) {
1673                         rc = lod_ea_store_resize(info, sizeof(*v1));
1674                         if (rc != 0)
1675                                 RETURN(rc);
1676                         v1 = info->lti_ea_store;
1677                 }
1678
1679                 memset(v1, 0, sizeof(*v1));
1680                 v1->lum_magic = cpu_to_le32(LMV_USER_MAGIC);
1681                 v1->lum_stripe_count = cpu_to_le32(stripe_count);
1682                 v1->lum_stripe_offset =
1683                                 cpu_to_le32(lo->ldo_dir_stripe_offset);
1684
1685                 info->lti_buf.lb_buf = v1;
1686                 info->lti_buf.lb_len = sizeof(*v1);
1687
1688                 if (declare)
1689                         rc = lod_declare_xattr_set_lmv(env, dt, attr,
1690                                                        &info->lti_buf, th);
1691                 else
1692                         rc = lod_xattr_set_lmv(env, dt, &info->lti_buf,
1693                                                XATTR_NAME_LMV, 0, th,
1694                                                BYPASS_CAPA);
1695                 if (rc != 0)
1696                         RETURN(rc);
1697         }
1698
1699         /* Transfer default LMV striping from the parent */
1700         if (lo->ldo_dir_striping_cached &&
1701             !LMVEA_DELETE_VALUES(lo->ldo_dir_def_stripenr,
1702                                  lo->ldo_dir_def_stripe_offset)) {
1703                 struct lmv_user_md_v1 *v1 = info->lti_ea_store;
1704                 int def_stripe_count = lo->ldo_dir_def_stripenr + 1;
1705
1706                 if (info->lti_ea_store_size < sizeof(*v1)) {
1707                         rc = lod_ea_store_resize(info, sizeof(*v1));
1708                         if (rc != 0)
1709                                 RETURN(rc);
1710                         v1 = info->lti_ea_store;
1711                 }
1712
1713                 memset(v1, 0, sizeof(*v1));
1714                 v1->lum_magic = cpu_to_le32(LMV_USER_MAGIC);
1715                 v1->lum_stripe_count = cpu_to_le32(def_stripe_count);
1716                 v1->lum_stripe_offset =
1717                                 cpu_to_le32(lo->ldo_dir_def_stripe_offset);
1718                 v1->lum_hash_type =
1719                                 cpu_to_le32(lo->ldo_dir_def_hash_type);
1720
1721                 info->lti_buf.lb_buf = v1;
1722                 info->lti_buf.lb_len = sizeof(*v1);
1723                 if (declare)
1724                         rc = dt_declare_xattr_set(env, next, &info->lti_buf,
1725                                                   XATTR_NAME_DEFAULT_LMV, 0,
1726                                                   th);
1727                 else
1728                         rc = dt_xattr_set(env, next, &info->lti_buf,
1729                                            XATTR_NAME_DEFAULT_LMV, 0, th,
1730                                            BYPASS_CAPA);
1731                 if (rc != 0)
1732                         RETURN(rc);
1733         }
1734
1735         /* Transfer default LOV striping from the parent */
1736         if (lo->ldo_striping_cached &&
1737             !LOVEA_DELETE_VALUES(lo->ldo_def_stripe_size,
1738                                  lo->ldo_def_stripenr,
1739                                  lo->ldo_def_stripe_offset)) {
1740                 struct lov_user_md_v3 *v3 = info->lti_ea_store;
1741
1742                 if (info->lti_ea_store_size < sizeof(*v3)) {
1743                         rc = lod_ea_store_resize(info, sizeof(*v3));
1744                         if (rc != 0)
1745                                 RETURN(rc);
1746                         v3 = info->lti_ea_store;
1747                 }
1748
1749                 memset(v3, 0, sizeof(*v3));
1750                 v3->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V3);
1751                 v3->lmm_stripe_count = cpu_to_le16(lo->ldo_def_stripenr);
1752                 v3->lmm_stripe_offset = cpu_to_le16(lo->ldo_def_stripe_offset);
1753                 v3->lmm_stripe_size = cpu_to_le32(lo->ldo_def_stripe_size);
1754                 if (lo->ldo_pool)
1755                         strncpy(v3->lmm_pool_name, lo->ldo_pool,
1756                                 LOV_MAXPOOLNAME);
1757
1758                 info->lti_buf.lb_buf = v3;
1759                 info->lti_buf.lb_len = sizeof(*v3);
1760
1761                 if (declare)
1762                         rc = dt_declare_xattr_set(env, next, &info->lti_buf,
1763                                                   XATTR_NAME_LOV, 0, th);
1764                 else
1765                         rc = dt_xattr_set(env, next, &info->lti_buf,
1766                                           XATTR_NAME_LOV, 0, th,
1767                                           BYPASS_CAPA);
1768                 if (rc != 0)
1769                         RETURN(rc);
1770         }
1771
1772         RETURN(0);
1773 }
1774
1775 static int lod_declare_dir_striping_create(const struct lu_env *env,
1776                                            struct dt_object *dt,
1777                                            struct lu_attr *attr,
1778                                            struct dt_object_format *dof,
1779                                            struct thandle *th)
1780 {
1781         return lod_dir_striping_create_internal(env, dt, attr, dof, th, true);
1782 }
1783
1784 static int lod_dir_striping_create(const struct lu_env *env,
1785                                    struct dt_object *dt,
1786                                    struct lu_attr *attr,
1787                                    struct dt_object_format *dof,
1788                                    struct thandle *th)
1789 {
1790         return lod_dir_striping_create_internal(env, dt, attr, dof, th, false);
1791 }
1792
1793 static int lod_declare_object_create(const struct lu_env *env,
1794                                      struct dt_object *dt,
1795                                      struct lu_attr *attr,
1796                                      struct dt_allocation_hint *hint,
1797                                      struct dt_object_format *dof,
1798                                      struct thandle *th)
1799 {
1800         struct dt_object   *next = dt_object_child(dt);
1801         struct lod_object  *lo = lod_dt_obj(dt);
1802         int                 rc;
1803         ENTRY;
1804
1805         LASSERT(dof);
1806         LASSERT(attr);
1807         LASSERT(th);
1808
1809         /*
1810          * first of all, we declare creation of local object
1811          */
1812         rc = dt_declare_create(env, next, attr, hint, dof, th);
1813         if (rc)
1814                 GOTO(out, rc);
1815
1816         if (dof->dof_type == DFT_SYM)
1817                 dt->do_body_ops = &lod_body_lnk_ops;
1818
1819         /*
1820          * it's lod_ah_init() who has decided the object will striped
1821          */
1822         if (dof->dof_type == DFT_REGULAR) {
1823                 /* callers don't want stripes */
1824                 /* XXX: all tricky interactions with ->ah_make_hint() decided
1825                  * to use striping, then ->declare_create() behaving differently
1826                  * should be cleaned */
1827                 if (dof->u.dof_reg.striped == 0)
1828                         lo->ldo_stripenr = 0;
1829                 if (lo->ldo_stripenr > 0)
1830                         rc = lod_declare_striped_object(env, dt, attr,
1831                                                         NULL, th);
1832         } else if (dof->dof_type == DFT_DIR) {
1833                 rc = lod_declare_dir_striping_create(env, dt, attr, dof, th);
1834         }
1835 out:
1836         RETURN(rc);
1837 }
1838
1839 int lod_striping_create(const struct lu_env *env, struct dt_object *dt,
1840                         struct lu_attr *attr, struct dt_object_format *dof,
1841                         struct thandle *th)
1842 {
1843         struct lod_object *lo = lod_dt_obj(dt);
1844         int                rc = 0, i;
1845         ENTRY;
1846
1847         LASSERT(lo->ldo_striping_cached == 0);
1848
1849         /* create all underlying objects */
1850         for (i = 0; i < lo->ldo_stripenr; i++) {
1851                 LASSERT(lo->ldo_stripe[i]);
1852                 rc = dt_create(env, lo->ldo_stripe[i], attr, NULL, dof, th);
1853
1854                 if (rc)
1855                         break;
1856         }
1857         if (rc == 0)
1858                 rc = lod_generate_and_set_lovea(env, lo, th);
1859
1860         RETURN(rc);
1861 }
1862
1863 static int lod_object_create(const struct lu_env *env, struct dt_object *dt,
1864                              struct lu_attr *attr,
1865                              struct dt_allocation_hint *hint,
1866                              struct dt_object_format *dof, struct thandle *th)
1867 {
1868         struct dt_object   *next = dt_object_child(dt);
1869         struct lod_object  *lo = lod_dt_obj(dt);
1870         int                 rc;
1871         ENTRY;
1872
1873         /* create local object */
1874         rc = dt_create(env, next, attr, hint, dof, th);
1875
1876         if (rc == 0) {
1877                 if (S_ISDIR(dt->do_lu.lo_header->loh_attr))
1878                         rc = lod_dir_striping_create(env, dt, attr, dof, th);
1879                 else if (lo->ldo_stripe && dof->u.dof_reg.striped != 0)
1880                         rc = lod_striping_create(env, dt, attr, dof, th);
1881         }
1882
1883         RETURN(rc);
1884 }
1885
1886 static int lod_declare_object_destroy(const struct lu_env *env,
1887                                       struct dt_object *dt,
1888                                       struct thandle *th)
1889 {
1890         struct dt_object   *next = dt_object_child(dt);
1891         struct lod_object  *lo = lod_dt_obj(dt);
1892         int                 rc, i;
1893         ENTRY;
1894
1895         /*
1896          * we declare destroy for the local object
1897          */
1898         rc = dt_declare_destroy(env, next, th);
1899         if (rc)
1900                 RETURN(rc);
1901
1902         /*
1903          * load striping information, notice we don't do this when object
1904          * is being initialized as we don't need this information till
1905          * few specific cases like destroy, chown
1906          */
1907         rc = lod_load_striping(env, lo);
1908         if (rc)
1909                 RETURN(rc);
1910
1911         /* declare destroy for all underlying objects */
1912         for (i = 0; i < lo->ldo_stripenr; i++) {
1913                 LASSERT(lo->ldo_stripe[i]);
1914                 rc = dt_declare_destroy(env, lo->ldo_stripe[i], th);
1915
1916                 if (rc)
1917                         break;
1918         }
1919
1920         RETURN(rc);
1921 }
1922
1923 static int lod_object_destroy(const struct lu_env *env,
1924                 struct dt_object *dt, struct thandle *th)
1925 {
1926         struct dt_object  *next = dt_object_child(dt);
1927         struct lod_object *lo = lod_dt_obj(dt);
1928         int                rc, i;
1929         ENTRY;
1930
1931         /* destroy local object */
1932         rc = dt_destroy(env, next, th);
1933         if (rc)
1934                 RETURN(rc);
1935
1936         /* destroy all underlying objects */
1937         for (i = 0; i < lo->ldo_stripenr; i++) {
1938                 LASSERT(lo->ldo_stripe[i]);
1939                 rc = dt_destroy(env, lo->ldo_stripe[i], th);
1940                 if (rc)
1941                         break;
1942         }
1943
1944         RETURN(rc);
1945 }
1946
1947 static int lod_index_try(const struct lu_env *env, struct dt_object *dt,
1948                          const struct dt_index_features *feat)
1949 {
1950         struct dt_object *next = dt_object_child(dt);
1951         int               rc;
1952         ENTRY;
1953
1954         LASSERT(next->do_ops);
1955         LASSERT(next->do_ops->do_index_try);
1956
1957         rc = next->do_ops->do_index_try(env, next, feat);
1958         if (next->do_index_ops && dt->do_index_ops == NULL)
1959                 dt->do_index_ops = &lod_index_ops;
1960
1961         RETURN(rc);
1962 }
1963
1964 static int lod_declare_ref_add(const struct lu_env *env,
1965                                struct dt_object *dt, struct thandle *th)
1966 {
1967         return dt_declare_ref_add(env, dt_object_child(dt), th);
1968 }
1969
1970 static int lod_ref_add(const struct lu_env *env,
1971                        struct dt_object *dt, struct thandle *th)
1972 {
1973         return dt_ref_add(env, dt_object_child(dt), th);
1974 }
1975
1976 static int lod_declare_ref_del(const struct lu_env *env,
1977                                struct dt_object *dt, struct thandle *th)
1978 {
1979         return dt_declare_ref_del(env, dt_object_child(dt), th);
1980 }
1981
1982 static int lod_ref_del(const struct lu_env *env,
1983                        struct dt_object *dt, struct thandle *th)
1984 {
1985         return dt_ref_del(env, dt_object_child(dt), th);
1986 }
1987
1988 static struct obd_capa *lod_capa_get(const struct lu_env *env,
1989                                      struct dt_object *dt,
1990                                      struct lustre_capa *old, __u64 opc)
1991 {
1992         return dt_capa_get(env, dt_object_child(dt), old, opc);
1993 }
1994
1995 static int lod_object_sync(const struct lu_env *env, struct dt_object *dt)
1996 {
1997         return dt_object_sync(env, dt_object_child(dt));
1998 }
1999
2000 static int lod_object_lock(const struct lu_env *env,
2001                            struct dt_object *dt, struct lustre_handle *lh,
2002                            struct ldlm_enqueue_info *einfo,
2003                            void *policy)
2004 {
2005         struct dt_object   *next = dt_object_child(dt);
2006         int              rc;
2007         ENTRY;
2008
2009         /*
2010          * declare setattr on the local object
2011          */
2012         rc = dt_object_lock(env, next, lh, einfo, policy);
2013
2014         RETURN(rc);
2015 }
2016
2017 struct dt_object_operations lod_obj_ops = {
2018         .do_read_lock           = lod_object_read_lock,
2019         .do_write_lock          = lod_object_write_lock,
2020         .do_read_unlock         = lod_object_read_unlock,
2021         .do_write_unlock        = lod_object_write_unlock,
2022         .do_write_locked        = lod_object_write_locked,
2023         .do_attr_get            = lod_attr_get,
2024         .do_declare_attr_set    = lod_declare_attr_set,
2025         .do_attr_set            = lod_attr_set,
2026         .do_xattr_get           = lod_xattr_get,
2027         .do_declare_xattr_set   = lod_declare_xattr_set,
2028         .do_xattr_set           = lod_xattr_set,
2029         .do_declare_xattr_del   = lod_declare_xattr_del,
2030         .do_xattr_del           = lod_xattr_del,
2031         .do_xattr_list          = lod_xattr_list,
2032         .do_ah_init             = lod_ah_init,
2033         .do_declare_create      = lod_declare_object_create,
2034         .do_create              = lod_object_create,
2035         .do_declare_destroy     = lod_declare_object_destroy,
2036         .do_destroy             = lod_object_destroy,
2037         .do_index_try           = lod_index_try,
2038         .do_declare_ref_add     = lod_declare_ref_add,
2039         .do_ref_add             = lod_ref_add,
2040         .do_declare_ref_del     = lod_declare_ref_del,
2041         .do_ref_del             = lod_ref_del,
2042         .do_capa_get            = lod_capa_get,
2043         .do_object_sync         = lod_object_sync,
2044         .do_object_lock         = lod_object_lock,
2045 };
2046
2047 static ssize_t lod_read(const struct lu_env *env, struct dt_object *dt,
2048                         struct lu_buf *buf, loff_t *pos,
2049                         struct lustre_capa *capa)
2050 {
2051         struct dt_object *next = dt_object_child(dt);
2052         return next->do_body_ops->dbo_read(env, next, buf, pos, capa);
2053 }
2054
2055 static ssize_t lod_declare_write(const struct lu_env *env,
2056                                  struct dt_object *dt,
2057                                  const loff_t size, loff_t pos,
2058                                  struct thandle *th)
2059 {
2060         return dt_declare_record_write(env, dt_object_child(dt),
2061                                        size, pos, th);
2062 }
2063
2064 static ssize_t lod_write(const struct lu_env *env, struct dt_object *dt,
2065                          const struct lu_buf *buf, loff_t *pos,
2066                          struct thandle *th, struct lustre_capa *capa, int iq)
2067 {
2068         struct dt_object *next = dt_object_child(dt);
2069         LASSERT(next);
2070         return next->do_body_ops->dbo_write(env, next, buf, pos, th, capa, iq);
2071 }
2072
2073 static const struct dt_body_operations lod_body_lnk_ops = {
2074         .dbo_read               = lod_read,
2075         .dbo_declare_write      = lod_declare_write,
2076         .dbo_write              = lod_write
2077 };
2078
2079 static int lod_object_init(const struct lu_env *env, struct lu_object *lo,
2080                            const struct lu_object_conf *conf)
2081 {
2082         struct lod_device       *lod    = lu2lod_dev(lo->lo_dev);
2083         struct lu_device        *cdev   = NULL;
2084         struct lu_object        *cobj;
2085         struct lod_tgt_descs    *ltd    = NULL;
2086         struct lod_tgt_desc     *tgt;
2087         mdsno_t                  idx    = 0;
2088         int                      type   = LU_SEQ_RANGE_ANY;
2089         int                      rc;
2090         ENTRY;
2091
2092         rc = lod_fld_lookup(env, lod, lu_object_fid(lo), &idx, &type);
2093         if (rc != 0)
2094                 RETURN(rc);
2095
2096         if (type == LU_SEQ_RANGE_MDT &&
2097             idx == lu_site2seq(lo->lo_dev->ld_site)->ss_node_id) {
2098                 cdev = &lod->lod_child->dd_lu_dev;
2099         } else if (type == LU_SEQ_RANGE_MDT) {
2100                 ltd = &lod->lod_mdt_descs;
2101                 lod_getref(ltd);
2102         } else if (type == LU_SEQ_RANGE_OST) {
2103                 ltd = &lod->lod_ost_descs;
2104                 lod_getref(ltd);
2105         } else {
2106                 LBUG();
2107         }
2108
2109         if (ltd != NULL) {
2110                 if (ltd->ltd_tgts_size > idx &&
2111                     cfs_bitmap_check(ltd->ltd_tgt_bitmap, idx)) {
2112                         tgt = LTD_TGT(ltd, idx);
2113
2114                         LASSERT(tgt != NULL);
2115                         LASSERT(tgt->ltd_tgt != NULL);
2116
2117                         cdev = &(tgt->ltd_tgt->dd_lu_dev);
2118                 }
2119                 lod_putref(lod, ltd);
2120         }
2121
2122         if (unlikely(cdev == NULL))
2123                 RETURN(-ENOENT);
2124
2125         cobj = cdev->ld_ops->ldo_object_alloc(env, lo->lo_header, cdev);
2126         if (unlikely(cobj == NULL))
2127                 RETURN(-ENOMEM);
2128
2129         lu_object_add(lo, cobj);
2130
2131         RETURN(0);
2132 }
2133
2134 void lod_object_free_striping(const struct lu_env *env, struct lod_object *lo)
2135 {
2136         int i;
2137
2138         if (lo->ldo_dir_stripe != NULL) {
2139                 OBD_FREE_PTR(lo->ldo_dir_stripe);
2140                 lo->ldo_dir_stripe = NULL;
2141         }
2142
2143         if (lo->ldo_stripe) {
2144                 LASSERT(lo->ldo_stripes_allocated > 0);
2145
2146                 for (i = 0; i < lo->ldo_stripenr; i++) {
2147                         if (lo->ldo_stripe[i])
2148                                 lu_object_put(env, &lo->ldo_stripe[i]->do_lu);
2149                 }
2150
2151                 i = sizeof(struct dt_object *) * lo->ldo_stripes_allocated;
2152                 OBD_FREE(lo->ldo_stripe, i);
2153                 lo->ldo_stripe = NULL;
2154                 lo->ldo_stripes_allocated = 0;
2155         }
2156         lo->ldo_stripenr = 0;
2157         lo->ldo_pattern = 0;
2158 }
2159
2160 /*
2161  * ->start is called once all slices are initialized, including header's
2162  * cache for mode (object type). using the type we can initialize ops
2163  */
2164 static int lod_object_start(const struct lu_env *env, struct lu_object *o)
2165 {
2166         if (S_ISLNK(o->lo_header->loh_attr & S_IFMT))
2167                 lu2lod_obj(o)->ldo_obj.do_body_ops = &lod_body_lnk_ops;
2168         return 0;
2169 }
2170
2171 static void lod_object_free(const struct lu_env *env, struct lu_object *o)
2172 {
2173         struct lod_object *mo = lu2lod_obj(o);
2174
2175         /*
2176          * release all underlying object pinned
2177          */
2178
2179         lod_object_free_striping(env, mo);
2180
2181         lod_object_set_pool(mo, NULL);
2182
2183         lu_object_fini(o);
2184         OBD_SLAB_FREE_PTR(mo, lod_object_kmem);
2185 }
2186
2187 static void lod_object_release(const struct lu_env *env, struct lu_object *o)
2188 {
2189         /* XXX: shouldn't we release everything here in case if object
2190          * creation failed before? */
2191 }
2192
2193 static int lod_object_print(const struct lu_env *env, void *cookie,
2194                             lu_printer_t p, const struct lu_object *l)
2195 {
2196         struct lod_object *o = lu2lod_obj((struct lu_object *) l);
2197
2198         return (*p)(env, cookie, LUSTRE_LOD_NAME"-object@%p", o);
2199 }
2200
2201 struct lu_object_operations lod_lu_obj_ops = {
2202         .loo_object_init        = lod_object_init,
2203         .loo_object_start       = lod_object_start,
2204         .loo_object_free        = lod_object_free,
2205         .loo_object_release     = lod_object_release,
2206         .loo_object_print       = lod_object_print,
2207 };