Whamcloud - gitweb
LU-3531 mdt: delete striped directory
[fs/lustre-release.git] / lustre / lod / lod_object.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright  2009 Sun Microsystems, Inc. All rights reserved
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, 2013, Intel Corporation.
27  */
28 /*
29  * lustre/lod/lod_object.c
30  *
31  * Author: Alex Zhuravlev <alexey.zhuravlev@intel.com>
32  */
33
34 #define DEBUG_SUBSYSTEM S_MDS
35
36 #include <obd.h>
37 #include <obd_class.h>
38 #include <lustre_ver.h>
39 #include <obd_support.h>
40 #include <lprocfs_status.h>
41
42 #include <lustre_fid.h>
43 #include <lustre_param.h>
44 #include <lustre_fid.h>
45 #include <lustre_lmv.h>
46 #include <obd_lov.h>
47 #include <md_object.h>
48
49 #include "lod_internal.h"
50
51 static const char dot[] = ".";
52 static const char dotdot[] = "..";
53
54 extern struct kmem_cache *lod_object_kmem;
55 static const struct dt_body_operations lod_body_lnk_ops;
56
57 static int lod_index_lookup(const struct lu_env *env, struct dt_object *dt,
58                             struct dt_rec *rec, const struct dt_key *key,
59                             struct lustre_capa *capa)
60 {
61         struct dt_object *next = dt_object_child(dt);
62         return next->do_index_ops->dio_lookup(env, next, rec, key, capa);
63 }
64
65 static int lod_declare_index_insert(const struct lu_env *env,
66                                     struct dt_object *dt,
67                                     const struct dt_rec *rec,
68                                     const struct dt_key *key,
69                                     struct thandle *handle)
70 {
71         return dt_declare_insert(env, dt_object_child(dt), rec, key, handle);
72 }
73
74 static int lod_index_insert(const struct lu_env *env,
75                             struct dt_object *dt,
76                             const struct dt_rec *rec,
77                             const struct dt_key *key,
78                             struct thandle *th,
79                             struct lustre_capa *capa,
80                             int ign)
81 {
82         return dt_insert(env, dt_object_child(dt), rec, key, th, capa, ign);
83 }
84
85 static int lod_declare_index_delete(const struct lu_env *env,
86                                     struct dt_object *dt,
87                                     const struct dt_key *key,
88                                     struct thandle *th)
89 {
90         return dt_declare_delete(env, dt_object_child(dt), key, th);
91 }
92
93 static int lod_index_delete(const struct lu_env *env,
94                             struct dt_object *dt,
95                             const struct dt_key *key,
96                             struct thandle *th,
97                             struct lustre_capa *capa)
98 {
99         return dt_delete(env, dt_object_child(dt), key, th, capa);
100 }
101
102 static struct dt_it *lod_it_init(const struct lu_env *env,
103                                  struct dt_object *dt, __u32 attr,
104                                  struct lustre_capa *capa)
105 {
106         struct dt_object        *next = dt_object_child(dt);
107         struct lod_it           *it = &lod_env_info(env)->lti_it;
108         struct dt_it            *it_next;
109
110
111         it_next = next->do_index_ops->dio_it.init(env, next, attr, capa);
112         if (IS_ERR(it_next))
113                 return it_next;
114
115         /* currently we do not use more than one iterator per thread
116          * so we store it in thread info. if at some point we need
117          * more active iterators in a single thread, we can allocate
118          * additional ones */
119         LASSERT(it->lit_obj == NULL);
120
121         it->lit_it = it_next;
122         it->lit_obj = next;
123
124         return (struct dt_it *)it;
125 }
126
127 #define LOD_CHECK_IT(env, it)                                   \
128 {                                                               \
129         LASSERT((it)->lit_obj != NULL);                         \
130         LASSERT((it)->lit_it != NULL);                          \
131 } while(0)
132
133 void lod_it_fini(const struct lu_env *env, struct dt_it *di)
134 {
135         struct lod_it *it = (struct lod_it *)di;
136
137         LOD_CHECK_IT(env, it);
138         it->lit_obj->do_index_ops->dio_it.fini(env, it->lit_it);
139
140         /* the iterator not in use any more */
141         it->lit_obj = NULL;
142         it->lit_it = NULL;
143 }
144
145 int lod_it_get(const struct lu_env *env, struct dt_it *di,
146                const struct dt_key *key)
147 {
148         const struct lod_it *it = (const struct lod_it *)di;
149
150         LOD_CHECK_IT(env, it);
151         return it->lit_obj->do_index_ops->dio_it.get(env, it->lit_it, key);
152 }
153
154 void lod_it_put(const struct lu_env *env, struct dt_it *di)
155 {
156         struct lod_it *it = (struct lod_it *)di;
157
158         LOD_CHECK_IT(env, it);
159         return it->lit_obj->do_index_ops->dio_it.put(env, it->lit_it);
160 }
161
162 int lod_it_next(const struct lu_env *env, struct dt_it *di)
163 {
164         struct lod_it *it = (struct lod_it *)di;
165
166         LOD_CHECK_IT(env, it);
167         return it->lit_obj->do_index_ops->dio_it.next(env, it->lit_it);
168 }
169
170 struct dt_key *lod_it_key(const struct lu_env *env, const struct dt_it *di)
171 {
172         const struct lod_it *it = (const struct lod_it *)di;
173
174         LOD_CHECK_IT(env, it);
175         return it->lit_obj->do_index_ops->dio_it.key(env, it->lit_it);
176 }
177
178 int lod_it_key_size(const struct lu_env *env, const struct dt_it *di)
179 {
180         struct lod_it *it = (struct lod_it *)di;
181
182         LOD_CHECK_IT(env, it);
183         return it->lit_obj->do_index_ops->dio_it.key_size(env, it->lit_it);
184 }
185
186 int lod_it_rec(const struct lu_env *env, const struct dt_it *di,
187                struct dt_rec *rec, __u32 attr)
188 {
189         const struct lod_it *it = (const struct lod_it *)di;
190
191         LOD_CHECK_IT(env, it);
192         return it->lit_obj->do_index_ops->dio_it.rec(env, it->lit_it, rec, attr);
193 }
194
195 __u64 lod_it_store(const struct lu_env *env, const struct dt_it *di)
196 {
197         const struct lod_it *it = (const struct lod_it *)di;
198
199         LOD_CHECK_IT(env, it);
200         return it->lit_obj->do_index_ops->dio_it.store(env, it->lit_it);
201 }
202
203 int lod_it_load(const struct lu_env *env, const struct dt_it *di, __u64 hash)
204 {
205         const struct lod_it *it = (const struct lod_it *)di;
206
207         LOD_CHECK_IT(env, it);
208         return it->lit_obj->do_index_ops->dio_it.load(env, it->lit_it, hash);
209 }
210
211 int lod_it_key_rec(const struct lu_env *env, const struct dt_it *di,
212                    void* key_rec)
213 {
214         const struct lod_it *it = (const struct lod_it *)di;
215
216         LOD_CHECK_IT(env, it);
217         return it->lit_obj->do_index_ops->dio_it.key_rec(env, it->lit_it, key_rec);
218 }
219
220 static struct dt_index_operations lod_index_ops = {
221         .dio_lookup             = lod_index_lookup,
222         .dio_declare_insert     = lod_declare_index_insert,
223         .dio_insert             = lod_index_insert,
224         .dio_declare_delete     = lod_declare_index_delete,
225         .dio_delete             = lod_index_delete,
226         .dio_it = {
227                 .init           = lod_it_init,
228                 .fini           = lod_it_fini,
229                 .get            = lod_it_get,
230                 .put            = lod_it_put,
231                 .next           = lod_it_next,
232                 .key            = lod_it_key,
233                 .key_size       = lod_it_key_size,
234                 .rec            = lod_it_rec,
235                 .store          = lod_it_store,
236                 .load           = lod_it_load,
237                 .key_rec        = lod_it_key_rec,
238         }
239 };
240
241 static void lod_object_read_lock(const struct lu_env *env,
242                                  struct dt_object *dt, unsigned role)
243 {
244         dt_read_lock(env, dt_object_child(dt), role);
245 }
246
247 static void lod_object_write_lock(const struct lu_env *env,
248                                   struct dt_object *dt, unsigned role)
249 {
250         dt_write_lock(env, dt_object_child(dt), role);
251 }
252
253 static void lod_object_read_unlock(const struct lu_env *env,
254                                    struct dt_object *dt)
255 {
256         dt_read_unlock(env, dt_object_child(dt));
257 }
258
259 static void lod_object_write_unlock(const struct lu_env *env,
260                                     struct dt_object *dt)
261 {
262         dt_write_unlock(env, dt_object_child(dt));
263 }
264
265 static int lod_object_write_locked(const struct lu_env *env,
266                                    struct dt_object *dt)
267 {
268         return dt_write_locked(env, dt_object_child(dt));
269 }
270
271 static int lod_attr_get(const struct lu_env *env,
272                         struct dt_object *dt,
273                         struct lu_attr *attr,
274                         struct lustre_capa *capa)
275 {
276         return dt_attr_get(env, dt_object_child(dt), attr, capa);
277 }
278
279 static int lod_declare_attr_set(const struct lu_env *env,
280                                 struct dt_object *dt,
281                                 const struct lu_attr *attr,
282                                 struct thandle *handle)
283 {
284         struct dt_object  *next = dt_object_child(dt);
285         struct lod_object *lo = lod_dt_obj(dt);
286         int                rc, i;
287         ENTRY;
288
289         /*
290          * declare setattr on the local object
291          */
292         rc = dt_declare_attr_set(env, next, attr, handle);
293         if (rc)
294                 RETURN(rc);
295
296         /* osp_declare_attr_set() ignores all attributes other than
297          * UID, GID, and size, and osp_attr_set() ignores all but UID
298          * and GID.  Declaration of size attr setting happens through
299          * lod_declare_init_size(), and not through this function.
300          * Therefore we need not load striping unless ownership is
301          * changing.  This should save memory and (we hope) speed up
302          * rename(). */
303         if (!S_ISDIR(dt->do_lu.lo_header->loh_attr)) {
304                 if (!(attr->la_valid & (LA_UID | LA_GID)))
305                         RETURN(rc);
306
307                 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_OWNER))
308                         RETURN(0);
309         } else {
310                 if (!(attr->la_valid & (LA_UID | LA_GID | LA_MODE |
311                                         LA_ATIME | LA_MTIME | LA_CTIME)))
312                         RETURN(rc);
313         }
314         /*
315          * load striping information, notice we don't do this when object
316          * is being initialized as we don't need this information till
317          * few specific cases like destroy, chown
318          */
319         rc = lod_load_striping(env, lo);
320         if (rc)
321                 RETURN(rc);
322
323         if (lo->ldo_stripenr == 0)
324                 RETURN(0);
325
326         if (!(attr->la_valid & ~(LA_ATIME | LA_MTIME | LA_CTIME))) {
327                 struct lu_attr   *la = &lod_env_info(env)->lti_attr;
328                 bool             setattr_time = false;
329
330                 rc = dt_attr_get(env, dt_object_child(dt), la,
331                                  BYPASS_CAPA);
332                 if (rc != 0)
333                         RETURN(rc);
334
335                 /* If it will only setattr time, it will only set
336                  * time < current_time */
337                 if ((attr->la_valid & LA_ATIME &&
338                      attr->la_atime < la->la_atime) ||
339                     (attr->la_valid & LA_CTIME &&
340                      attr->la_ctime < la->la_ctime) ||
341                     (attr->la_valid & LA_MTIME &&
342                      attr->la_mtime < la->la_mtime))
343                         setattr_time = true;
344
345                 if (!setattr_time)
346                         RETURN(0);
347         }
348         /*
349          * if object is striped declare changes on the stripes
350          */
351         LASSERT(lo->ldo_stripe);
352         for (i = 0; i < lo->ldo_stripenr; i++) {
353                 LASSERT(lo->ldo_stripe[i]);
354
355                 rc = dt_declare_attr_set(env, lo->ldo_stripe[i], attr, handle);
356                 if (rc) {
357                         CERROR("failed declaration: %d\n", rc);
358                         break;
359                 }
360         }
361
362         RETURN(rc);
363 }
364
365 static int lod_attr_set(const struct lu_env *env,
366                         struct dt_object *dt,
367                         const struct lu_attr *attr,
368                         struct thandle *handle,
369                         struct lustre_capa *capa)
370 {
371         struct dt_object  *next = dt_object_child(dt);
372         struct lod_object *lo = lod_dt_obj(dt);
373         int                rc, i;
374         ENTRY;
375
376         /*
377          * apply changes to the local object
378          */
379         rc = dt_attr_set(env, next, attr, handle, capa);
380         if (rc)
381                 RETURN(rc);
382
383         if (!S_ISDIR(dt->do_lu.lo_header->loh_attr)) {
384                 if (!(attr->la_valid & (LA_UID | LA_GID)))
385                         RETURN(rc);
386
387                 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_OWNER))
388                         RETURN(0);
389         } else {
390                 if (!(attr->la_valid & (LA_UID | LA_GID | LA_MODE |
391                                         LA_ATIME | LA_MTIME | LA_CTIME)))
392                         RETURN(rc);
393         }
394
395         if (lo->ldo_stripenr == 0)
396                 RETURN(0);
397
398         if (!(attr->la_valid & ~(LA_ATIME | LA_MTIME | LA_CTIME))) {
399                 struct lu_attr   *la = &lod_env_info(env)->lti_attr;
400                 bool             setattr_time = false;
401
402                 rc = dt_attr_get(env, dt_object_child(dt), la,
403                                  BYPASS_CAPA);
404                 if (rc != 0)
405                         RETURN(rc);
406
407                 /* If it will only setattr time, it will only set
408                  * time < current_time */
409                 if ((attr->la_valid & LA_ATIME &&
410                      attr->la_atime < la->la_atime) ||
411                     (attr->la_valid & LA_CTIME &&
412                      attr->la_ctime < la->la_ctime) ||
413                     (attr->la_valid & LA_MTIME &&
414                      attr->la_mtime < la->la_mtime))
415                         setattr_time = true;
416
417                 if (!setattr_time)
418                         RETURN(0);
419         }
420
421         /*
422          * if object is striped, apply changes to all the stripes
423          */
424         LASSERT(lo->ldo_stripe);
425         for (i = 0; i < lo->ldo_stripenr; i++) {
426                 LASSERT(lo->ldo_stripe[i]);
427                 rc = dt_attr_set(env, lo->ldo_stripe[i], attr, handle, capa);
428                 if (rc) {
429                         CERROR("failed declaration: %d\n", rc);
430                         break;
431                 }
432         }
433
434         RETURN(rc);
435 }
436
437 static int lod_xattr_get(const struct lu_env *env, struct dt_object *dt,
438                          struct lu_buf *buf, const char *name,
439                          struct lustre_capa *capa)
440 {
441         struct lod_thread_info  *info = lod_env_info(env);
442         struct lod_device       *dev = lu2lod_dev(dt->do_lu.lo_dev);
443         int                      rc, is_root;
444         ENTRY;
445
446         rc = dt_xattr_get(env, dt_object_child(dt), buf, name, capa);
447         if (rc != -ENODATA || !S_ISDIR(dt->do_lu.lo_header->loh_attr & S_IFMT))
448                 RETURN(rc);
449
450         /*
451          * lod returns default striping on the real root of the device
452          * this is like the root stores default striping for the whole
453          * filesystem. historically we've been using a different approach
454          * and store it in the config.
455          */
456         dt_root_get(env, dev->lod_child, &info->lti_fid);
457         is_root = lu_fid_eq(&info->lti_fid, lu_object_fid(&dt->do_lu));
458
459         if (is_root && strcmp(XATTR_NAME_LOV, name) == 0) {
460                 struct lov_user_md *lum = buf->lb_buf;
461                 struct lov_desc    *desc = &dev->lod_desc;
462
463                 if (buf->lb_buf == NULL) {
464                         rc = sizeof(*lum);
465                 } else if (buf->lb_len >= sizeof(*lum)) {
466                         lum->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V1);
467                         lmm_oi_set_seq(&lum->lmm_oi, FID_SEQ_LOV_DEFAULT);
468                         lmm_oi_set_id(&lum->lmm_oi, 0);
469                         lmm_oi_cpu_to_le(&lum->lmm_oi, &lum->lmm_oi);
470                         lum->lmm_pattern = cpu_to_le32(desc->ld_pattern);
471                         lum->lmm_stripe_size = cpu_to_le32(
472                                                 desc->ld_default_stripe_size);
473                         lum->lmm_stripe_count = cpu_to_le16(
474                                                 desc->ld_default_stripe_count);
475                         lum->lmm_stripe_offset = cpu_to_le16(
476                                                 desc->ld_default_stripe_offset);
477                         rc = sizeof(*lum);
478                 } else {
479                         rc = -ERANGE;
480                 }
481         }
482
483         RETURN(rc);
484 }
485
486 static int lod_verify_md_striping(struct lod_device *lod,
487                                   const struct lmv_user_md_v1 *lum)
488 {
489         int     rc = 0;
490         ENTRY;
491
492         if (unlikely(le32_to_cpu(lum->lum_magic) != LMV_USER_MAGIC))
493                 GOTO(out, rc = -EINVAL);
494
495         if (unlikely(le32_to_cpu(lum->lum_stripe_count) == 0))
496                 GOTO(out, rc = -EINVAL);
497
498         if (unlikely(le32_to_cpu(lum->lum_stripe_count) >
499                                 lod->lod_remote_mdt_count + 1))
500                 GOTO(out, rc = -EINVAL);
501 out:
502         if (rc != 0)
503                 CERROR("%s: invalid lmv_user_md: magic = %x, "
504                        "stripe_offset = %d, stripe_count = %u: rc = %d\n",
505                        lod2obd(lod)->obd_name, le32_to_cpu(lum->lum_magic),
506                        (int)le32_to_cpu(lum->lum_stripe_offset),
507                        le32_to_cpu(lum->lum_stripe_count), rc);
508         return rc;
509 }
510
511 int lod_prep_lmv_md(const struct lu_env *env, struct dt_object *dt,
512                     struct lu_buf *lmv_buf)
513 {
514         struct lod_thread_info  *info = lod_env_info(env);
515         struct lod_device       *lod = lu2lod_dev(dt->do_lu.lo_dev);
516         struct lod_object       *lo = lod_dt_obj(dt);
517         struct lmv_mds_md_v1    *lmm1;
518         int                     stripe_count;
519         int                     lmm_size;
520         int                     type = LU_SEQ_RANGE_ANY;
521         int                     i;
522         int                     rc;
523         __u32                   mdtidx;
524         ENTRY;
525
526         LASSERT(lo->ldo_dir_striped != 0);
527         LASSERT(lo->ldo_stripenr > 0);
528         stripe_count = lo->ldo_stripenr + 1;
529         lmm_size = lmv_mds_md_size(stripe_count, LMV_MAGIC);
530         if (info->lti_ea_store_size < lmm_size) {
531                 rc = lod_ea_store_resize(info, lmm_size);
532                 if (rc != 0)
533                         RETURN(rc);
534         }
535
536         lmm1 = (struct lmv_mds_md_v1 *)info->lti_ea_store;
537         lmm1->lmv_magic = cpu_to_le32(LMV_MAGIC);
538         lmm1->lmv_stripe_count = cpu_to_le32(stripe_count);
539         lmm1->lmv_hash_type = cpu_to_le32(lo->ldo_dir_hash_type);
540         rc = lod_fld_lookup(env, lod, lu_object_fid(&dt->do_lu),
541                             &mdtidx, &type);
542         if (rc != 0)
543                 RETURN(rc);
544
545         lmm1->lmv_master_mdt_index = cpu_to_le32(mdtidx);
546         fid_cpu_to_le(&lmm1->lmv_stripe_fids[0], lu_object_fid(&dt->do_lu));
547         for (i = 0; i < lo->ldo_stripenr; i++) {
548                 struct dt_object *dto;
549
550                 dto = lo->ldo_stripe[i];
551                 LASSERT(dto != NULL);
552                 fid_cpu_to_le(&lmm1->lmv_stripe_fids[i + 1],
553                               lu_object_fid(&dto->do_lu));
554         }
555
556         lmv_buf->lb_buf = info->lti_ea_store;
557         lmv_buf->lb_len = lmm_size;
558         lo->ldo_dir_striping_cached = 1;
559
560         RETURN(rc);
561 }
562
563 int lod_parse_dir_striping(const struct lu_env *env, struct lod_object *lo,
564                            const struct lu_buf *buf)
565 {
566         struct lod_thread_info  *info = lod_env_info(env);
567         struct lod_device       *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
568         struct lod_tgt_descs    *ltd = &lod->lod_mdt_descs;
569         struct dt_object        **stripe;
570         union lmv_mds_md        *lmm = buf->lb_buf;
571         struct lmv_mds_md_v1    *lmv1 = &lmm->lmv_md_v1;
572         struct lu_fid           *fid = &info->lti_fid;
573         int                     i;
574         int                     rc = 0;
575         ENTRY;
576
577         if (le32_to_cpu(lmv1->lmv_magic) != LMV_MAGIC_V1)
578                 RETURN(-EINVAL);
579
580         if (le32_to_cpu(lmv1->lmv_stripe_count) <= 1)
581                 RETURN(0);
582
583         fid_le_to_cpu(fid, &lmv1->lmv_stripe_fids[0]);
584         /* Do not load striping information for slave inode */
585         if (!lu_fid_eq(fid, lu_object_fid(&lo->ldo_obj.do_lu))) {
586                 lo->ldo_dir_slave_stripe = 1;
587                 RETURN(0);
588         }
589
590         LASSERT(lo->ldo_stripe == NULL);
591         OBD_ALLOC(stripe, sizeof(stripe[0]) *
592                   (le32_to_cpu(lmv1->lmv_stripe_count) - 1));
593         if (stripe == NULL)
594                 RETURN(-ENOMEM);
595
596         /* skip master stripe */
597         for (i = 1; i < le32_to_cpu(lmv1->lmv_stripe_count); i++) {
598                 struct lod_tgt_desc     *tgt;
599                 int                     idx;
600                 int                     type = LU_SEQ_RANGE_ANY;
601                 struct dt_object        *dto;
602
603                 fid_le_to_cpu(fid, &lmv1->lmv_stripe_fids[i]);
604                 rc = lod_fld_lookup(env, lod, fid, &idx, &type);
605                 if (rc != 0)
606                         GOTO(out, rc);
607
608                 tgt = LTD_TGT(ltd, idx);
609                 if (tgt == NULL)
610                         GOTO(out, rc = -ESTALE);
611
612                 dto = dt_locate_at(env, tgt->ltd_tgt, fid,
613                                   lo->ldo_obj.do_lu.lo_dev->ld_site->ls_top_dev,
614                                   NULL);
615                 if (IS_ERR(dto))
616                         GOTO(out, rc = PTR_ERR(dto));
617
618                 stripe[i - 1] = dto;
619         }
620 out:
621         lo->ldo_stripe = stripe;
622         lo->ldo_stripenr = le32_to_cpu(lmv1->lmv_stripe_count) - 1;
623         lo->ldo_stripes_allocated = le32_to_cpu(lmv1->lmv_stripe_count) - 1;
624         if (rc != 0)
625                 lod_object_free_striping(env, lo);
626
627         RETURN(rc);
628 }
629
630 static int lod_prep_md_striped_create(const struct lu_env *env,
631                                       struct dt_object *dt,
632                                       struct lu_attr *attr,
633                                       const struct lmv_user_md_v1 *lum,
634                                       struct thandle *th)
635 {
636         struct lod_device       *lod = lu2lod_dev(dt->do_lu.lo_dev);
637         struct lod_tgt_descs    *ltd = &lod->lod_mdt_descs;
638         struct lod_object       *lo = lod_dt_obj(dt);
639         struct dt_object        **stripe;
640         struct lu_buf           lmv_buf;
641         int                     stripe_count;
642         int                     *idx_array;
643         int                     rc = 0;
644         int                     i;
645         int                     j;
646         ENTRY;
647
648         /* The lum has been verifed in lod_verify_md_striping */
649         LASSERT(le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC);
650         LASSERT(le32_to_cpu(lum->lum_stripe_count) > 0);
651
652         /* Do not need allocated master stripe */
653         stripe_count = le32_to_cpu(lum->lum_stripe_count);
654         OBD_ALLOC(stripe, sizeof(stripe[0]) * (stripe_count - 1));
655         if (stripe == NULL)
656                 RETURN(-ENOMEM);
657
658         OBD_ALLOC(idx_array, sizeof(idx_array[0]) * stripe_count);
659         if (idx_array == NULL)
660                 GOTO(out_free, rc = -ENOMEM);
661
662         idx_array[0] = le32_to_cpu(lum->lum_stripe_offset);
663         for (i = 1; i < stripe_count; i++) {
664                 struct lod_tgt_desc     *tgt;
665                 struct dt_object        *dto;
666                 struct lu_fid           fid;
667                 int                     idx;
668                 struct lu_object_conf   conf = { 0 };
669
670                 idx = (idx_array[i - 1] + 1) % (lod->lod_remote_mdt_count + 1);
671
672                 for (j = 0; j < lod->lod_remote_mdt_count;
673                      j++, idx = (idx + 1) % (lod->lod_remote_mdt_count + 1)) {
674                         bool already_allocated = false;
675                         int k;
676
677                         CDEBUG(D_INFO, "try idx %d, mdt cnt %d,"
678                                " allocated %d, last allocated %d\n", idx,
679                                lod->lod_remote_mdt_count, i, idx_array[i - 1]);
680
681                         /* Find next avaible target */
682                         if (!cfs_bitmap_check(ltd->ltd_tgt_bitmap, idx))
683                                 continue;
684
685                         /* check whether the idx already exists
686                          * in current allocated array */
687                         for (k = 0; k < i; k++) {
688                                 if (idx_array[k] == idx) {
689                                         already_allocated = true;
690                                         break;
691                                 }
692                         }
693
694                         if (already_allocated)
695                                 continue;
696
697                         break;
698                 }
699
700                 /* Can not allocate more stripes */
701                 if (j == lod->lod_remote_mdt_count) {
702                         CDEBUG(D_INFO, "%s: require stripes %d only get %d\n",
703                                lod2obd(lod)->obd_name, stripe_count, i - 1);
704                         break;
705                 }
706
707                 CDEBUG(D_INFO, "idx %d, mdt cnt %d,"
708                        " allocated %d, last allocated %d\n", idx,
709                        lod->lod_remote_mdt_count, i, idx_array[i - 1]);
710
711                 tgt = LTD_TGT(ltd, idx);
712                 LASSERT(tgt != NULL);
713
714                 rc = obd_fid_alloc(tgt->ltd_exp, &fid, NULL);
715                 if (rc < 0)
716                         GOTO(out_put, rc);
717                 rc = 0;
718
719                 conf.loc_flags = LOC_F_NEW;
720                 dto = dt_locate_at(env, tgt->ltd_tgt, &fid,
721                                   dt->do_lu.lo_dev->ld_site->ls_top_dev, &conf);
722                 if (IS_ERR(dto))
723                         GOTO(out_put, rc = PTR_ERR(dto));
724                 stripe[i - 1] = dto;
725                 idx_array[i] = idx;
726         }
727
728         lo->ldo_dir_striped = 1;
729         lo->ldo_stripe = stripe;
730         lo->ldo_stripenr = i - 1;
731         lo->ldo_stripes_allocated = stripe_count - 1;
732
733         if (lo->ldo_stripenr == 0)
734                 GOTO(out_put, rc = -ENOSPC);
735
736         rc = lod_prep_lmv_md(env, dt, &lmv_buf);
737         if (rc != 0)
738                 GOTO(out_put, rc);
739
740         for (i = 0; i < lo->ldo_stripenr; i++) {
741                 struct dt_object *dto;
742
743                 dto = stripe[i];
744                 /* only create slave striped object */
745                 rc = dt_declare_create(env, dto, attr, NULL, NULL, th);
746                 if (rc != 0)
747                         GOTO(out_put, rc);
748
749                 if (!dt_try_as_dir(env, dto))
750                         GOTO(out_put, rc = -EINVAL);
751
752                 rc = dt_declare_insert(env, dto,
753                      (const struct dt_rec *)lu_object_fid(&dto->do_lu),
754                      (const struct dt_key *)dot, th);
755                 if (rc != 0)
756                         GOTO(out_put, rc);
757
758                 /* master stripe FID will be put to .. */
759                 rc = dt_declare_insert(env, dto,
760                      (const struct dt_rec *)lu_object_fid(&dt->do_lu),
761                      (const struct dt_key *)dotdot, th);
762                 if (rc != 0)
763                         GOTO(out_put, rc);
764
765                 /* probably nothing to inherite */
766                 if (lo->ldo_striping_cached &&
767                     !LOVEA_DELETE_VALUES(lo->ldo_def_stripe_size,
768                                          lo->ldo_def_stripenr,
769                                          lo->ldo_def_stripe_offset)) {
770                         struct lod_thread_info  *info;
771                         struct lov_user_md_v3   *v3;
772
773                         /* sigh, lti_ea_store has been used for lmv_buf,
774                          * so we have to allocate buffer for default
775                          * stripe EA */
776                         OBD_ALLOC_PTR(v3);
777                         if (v3 == NULL)
778                                 GOTO(out_put, rc = -ENOMEM);
779
780                         memset(v3, 0, sizeof(*v3));
781                         v3->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V3);
782                         v3->lmm_stripe_count =
783                                 cpu_to_le32(lo->ldo_def_stripenr);
784                         v3->lmm_stripe_offset =
785                                 cpu_to_le32(lo->ldo_def_stripe_offset);
786                         v3->lmm_stripe_size =
787                                 cpu_to_le32(lo->ldo_def_stripe_size);
788                         if (lo->ldo_pool)
789                                 strncpy(v3->lmm_pool_name, lo->ldo_pool,
790                                         LOV_MAXPOOLNAME);
791
792                         info = lod_env_info(env);
793                         info->lti_buf.lb_buf = v3;
794                         info->lti_buf.lb_len = sizeof(*v3);
795                         rc = dt_declare_xattr_set(env, dto,
796                                                   &info->lti_buf,
797                                                   XATTR_NAME_LOV,
798                                                   0, th);
799                         OBD_FREE_PTR(v3);
800                         if (rc != 0)
801                                 GOTO(out_put, rc);
802                 }
803                 rc = dt_declare_xattr_set(env, dto, &lmv_buf, XATTR_NAME_LMV, 0,
804                                           th);
805                 if (rc != 0)
806                         GOTO(out_put, rc);
807         }
808
809         rc = dt_declare_xattr_set(env, dt, &lmv_buf, XATTR_NAME_LMV, 0, th);
810         if (rc != 0)
811                 GOTO(out_put, rc);
812
813 out_put:
814         if (rc < 0) {
815                 for (i = 0; i < stripe_count - 1; i++)
816                         if (stripe[i] != NULL)
817                                 lu_object_put(env, &stripe[i]->do_lu);
818                 OBD_FREE(stripe, sizeof(stripe[0]) * (stripe_count - 1));
819         }
820
821 out_free:
822         if (idx_array != NULL)
823                 OBD_FREE(idx_array, sizeof(idx_array[0]) * stripe_count);
824
825         RETURN(rc);
826 }
827
828 /**
829  * Declare create striped md object.
830  */
831 static int lod_declare_xattr_set_lmv(const struct lu_env *env,
832                                      struct dt_object *dt,
833                                      struct lu_attr *attr,
834                                      const struct lu_buf *lum_buf,
835                                      struct thandle *th)
836 {
837         struct lod_object       *lo = lod_dt_obj(dt);
838         struct lod_device       *lod = lu2lod_dev(dt->do_lu.lo_dev);
839         struct lmv_user_md_v1   *lum;
840         int                     rc;
841         ENTRY;
842
843         lum = lum_buf->lb_buf;
844         LASSERT(lum != NULL);
845
846         CDEBUG(D_INFO, "lum magic = %x count = %u offset = %d\n",
847                le32_to_cpu(lum->lum_magic), le32_to_cpu(lum->lum_stripe_count),
848                (int)le32_to_cpu(lum->lum_stripe_offset));
849
850         if (le32_to_cpu(lum->lum_stripe_count) <= 1)
851                 GOTO(out, rc = 0);
852
853         rc = lod_verify_md_striping(lod, lum);
854         if (rc != 0)
855                 GOTO(out, rc);
856
857         /* prepare dir striped objects */
858         rc = lod_prep_md_striped_create(env, dt, attr, lum, th);
859         if (rc != 0) {
860                 /* failed to create striping, let's reset
861                  * config so that others don't get confused */
862                 lod_object_free_striping(env, lo);
863                 GOTO(out, rc);
864         }
865 out:
866         RETURN(rc);
867 }
868
869 /*
870  * LOV xattr is a storage for striping, and LOD owns this xattr.
871  * but LOD allows others to control striping to some extent
872  * - to reset strping
873  * - to set new defined striping
874  * - to set new semi-defined striping
875  *   - number of stripes is defined
876  *   - number of stripes + osts are defined
877  *   - ??
878  */
879 static int lod_declare_xattr_set(const struct lu_env *env,
880                                  struct dt_object *dt,
881                                  const struct lu_buf *buf,
882                                  const char *name, int fl,
883                                  struct thandle *th)
884 {
885         struct dt_object *next = dt_object_child(dt);
886         struct lu_attr   *attr = &lod_env_info(env)->lti_attr;
887         __u32             mode;
888         int               rc;
889         ENTRY;
890
891         /*
892          * allow to declare predefined striping on a new (!mode) object
893          * which is supposed to be replay of regular file creation
894          * (when LOV setting is declared)
895          * LU_XATTR_REPLACE is set to indicate a layout swap
896          */
897         mode = dt->do_lu.lo_header->loh_attr & S_IFMT;
898         if ((S_ISREG(mode) || mode == 0) && strcmp(name, XATTR_NAME_LOV) == 0 &&
899              !(fl & LU_XATTR_REPLACE)) {
900                 /*
901                  * this is a request to manipulate object's striping
902                  */
903                 if (dt_object_exists(dt)) {
904                         rc = dt_attr_get(env, next, attr, BYPASS_CAPA);
905                         if (rc)
906                                 RETURN(rc);
907                 } else {
908                         memset(attr, 0, sizeof(*attr));
909                         attr->la_valid = LA_TYPE | LA_MODE;
910                         attr->la_mode = S_IFREG;
911                 }
912                 rc = lod_declare_striped_object(env, dt, attr, buf, th);
913         } else if (S_ISDIR(mode)) {
914                 struct lod_device       *d = lu2lod_dev(dt->do_lu.lo_dev);
915                 struct lod_object       *lo = lod_dt_obj(dt);
916                 int                     i;
917
918                 if (strcmp(name, XATTR_NAME_DEFAULT_LMV) == 0) {
919                         struct lmv_user_md_v1 *lum;
920
921                         LASSERT(buf != NULL && buf->lb_buf != NULL);
922                         lum = buf->lb_buf;
923                         rc = lod_verify_md_striping(d, lum);
924                         if (rc != 0)
925                                 RETURN(rc);
926                 }
927
928                 rc = dt_declare_xattr_set(env, next, buf, name, fl, th);
929                 if (rc != 0)
930                         RETURN(rc);
931
932                 /* set xattr to each stripes, if needed */
933                 rc = lod_load_striping(env, lo);
934                 if (rc != 0)
935                         RETURN(rc);
936
937                 if (lo->ldo_stripenr == 0)
938                         RETURN(rc);
939
940                 for (i = 0; i < lo->ldo_stripenr; i++) {
941                         LASSERT(lo->ldo_stripe[i]);
942                         rc = dt_declare_xattr_set(env, lo->ldo_stripe[i], buf,
943                                                   name, fl, th);
944                         if (rc != 0)
945                                 break;
946                 }
947         } else {
948                 rc = dt_declare_xattr_set(env, next, buf, name, fl, th);
949         }
950
951         RETURN(rc);
952 }
953
954 static void lod_lov_stripe_cache_clear(struct lod_object *lo)
955 {
956         lo->ldo_striping_cached = 0;
957         lo->ldo_def_striping_set = 0;
958         lod_object_set_pool(lo, NULL);
959         lo->ldo_def_stripe_size = 0;
960         lo->ldo_def_stripenr = 0;
961 }
962
963 static int lod_xattr_set_lov_on_dir(const struct lu_env *env,
964                                     struct dt_object *dt,
965                                     const struct lu_buf *buf,
966                                     const char *name, int fl,
967                                     struct thandle *th,
968                                     struct lustre_capa *capa)
969 {
970         struct lod_device       *d = lu2lod_dev(dt->do_lu.lo_dev);
971         struct dt_object        *next = dt_object_child(dt);
972         struct lod_object       *l = lod_dt_obj(dt);
973         struct lov_user_md_v1   *lum;
974         struct lov_user_md_v3   *v3 = NULL;
975         int                      rc;
976         ENTRY;
977
978         /* If it is striped dir, we should clear the stripe cache for
979          * slave stripe as well, but there are no effective way to
980          * notify the LOD on the slave MDT, so we do not cache stripe
981          * information for slave stripe for now. XXX*/
982         lod_lov_stripe_cache_clear(l);
983         LASSERT(buf != NULL && buf->lb_buf != NULL);
984         lum = buf->lb_buf;
985
986         rc = lod_verify_striping(d, buf, 0);
987         if (rc)
988                 RETURN(rc);
989
990         if (lum->lmm_magic == LOV_USER_MAGIC_V3)
991                 v3 = buf->lb_buf;
992
993         /* if { size, offset, count } = { 0, -1, 0 } and no pool
994          * (i.e. all default values specified) then delete default
995          * striping from dir. */
996         CDEBUG(D_OTHER,
997                 "set default striping: sz %u # %u offset %d %s %s\n",
998                 (unsigned)lum->lmm_stripe_size,
999                 (unsigned)lum->lmm_stripe_count,
1000                 (int)lum->lmm_stripe_offset,
1001                 v3 ? "from" : "", v3 ? v3->lmm_pool_name : "");
1002
1003         if (LOVEA_DELETE_VALUES((lum->lmm_stripe_size),
1004                                 (lum->lmm_stripe_count),
1005                                 (lum->lmm_stripe_offset)) &&
1006                         lum->lmm_magic == LOV_USER_MAGIC_V1) {
1007                 rc = dt_xattr_del(env, next, name, th, capa);
1008                 if (rc == -ENODATA)
1009                         rc = 0;
1010         } else {
1011                 rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
1012         }
1013
1014         RETURN(rc);
1015 }
1016
1017 static int lod_xattr_set_default_lmv_on_dir(const struct lu_env *env,
1018                                             struct dt_object *dt,
1019                                             const struct lu_buf *buf,
1020                                             const char *name, int fl,
1021                                             struct thandle *th,
1022                                             struct lustre_capa *capa)
1023 {
1024         struct dt_object        *next = dt_object_child(dt);
1025         struct lod_object       *l = lod_dt_obj(dt);
1026         struct lmv_user_md_v1   *lum;
1027         int                      rc;
1028         ENTRY;
1029
1030         LASSERT(buf != NULL && buf->lb_buf != NULL);
1031         lum = buf->lb_buf;
1032
1033         CDEBUG(D_OTHER, "set default stripe_count # %u stripe_offset %d\n",
1034               le32_to_cpu(lum->lum_stripe_count),
1035               (int)le32_to_cpu(lum->lum_stripe_offset));
1036
1037         if (LMVEA_DELETE_VALUES((le32_to_cpu(lum->lum_stripe_count)),
1038                                  le32_to_cpu(lum->lum_stripe_offset)) &&
1039                                 le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC) {
1040                 rc = dt_xattr_del(env, next, name, th, capa);
1041                 if (rc == -ENODATA)
1042                         rc = 0;
1043         } else {
1044                 rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
1045                 if (rc != 0)
1046                         RETURN(rc);
1047
1048                 /* Update default stripe cache */
1049                 if (l->ldo_dir_stripe == NULL) {
1050                         OBD_ALLOC_PTR(l->ldo_dir_stripe);
1051                         if (l->ldo_dir_stripe == NULL)
1052                                 RETURN(-ENOMEM);
1053                 }
1054
1055                 l->ldo_dir_striping_cached = 0;
1056                 l->ldo_dir_def_striping_set = 1;
1057                 l->ldo_dir_def_stripenr =
1058                         le32_to_cpu(lum->lum_stripe_count) - 1;
1059         }
1060
1061         RETURN(rc);
1062 }
1063
1064 static int lod_xattr_set_lmv(const struct lu_env *env, struct dt_object *dt,
1065                              const struct lu_buf *buf, const char *name,
1066                              int fl, struct thandle *th,
1067                              struct lustre_capa *capa)
1068 {
1069         struct lod_object       *lo = lod_dt_obj(dt);
1070         struct lu_buf           lmv_buf;
1071         int                     i;
1072         int                     rc;
1073         ENTRY;
1074
1075         if (!S_ISDIR(dt->do_lu.lo_header->loh_attr))
1076                 RETURN(-ENOTDIR);
1077
1078         /* The stripes are supposed to be allocated in declare phase,
1079          * if there are no stripes being allocated, it will skip */
1080         if (lo->ldo_stripenr == 0)
1081                 RETURN(0);
1082
1083         rc = lod_prep_lmv_md(env, dt, &lmv_buf);
1084         if (rc != 0)
1085                 RETURN(rc);
1086
1087         for (i = 0; i < lo->ldo_stripenr; i++) {
1088                 struct dt_object *dto;
1089                 struct lu_attr  *attr = &lod_env_info(env)->lti_attr;
1090
1091                 dto = lo->ldo_stripe[i];
1092                 memset(attr, 0, sizeof(*attr));
1093                 attr->la_valid = LA_TYPE | LA_MODE;
1094                 attr->la_mode = S_IFDIR;
1095                 rc = dt_create(env, dto, attr, NULL, NULL, th);
1096                 if (rc != 0)
1097                         RETURN(rc);
1098
1099                 rc = dt_insert(env, dto,
1100                               (const struct dt_rec *)lu_object_fid(&dto->do_lu),
1101                               (const struct dt_key *)dot, th, capa, 0);
1102                 if (rc != 0)
1103                         RETURN(rc);
1104
1105                 rc = dt_insert(env, dto,
1106                               (struct dt_rec *)lu_object_fid(&dt->do_lu),
1107                               (const struct dt_key *)dotdot, th, capa, 0);
1108                 if (rc != 0)
1109                         RETURN(rc);
1110
1111                 if (lo->ldo_striping_cached &&
1112                     !LOVEA_DELETE_VALUES(lo->ldo_def_stripe_size,
1113                                          lo->ldo_def_stripenr,
1114                                          lo->ldo_def_stripe_offset)) {
1115                         struct lod_thread_info  *info;
1116                         struct lov_user_md_v3   *v3;
1117
1118                         /* sigh, lti_ea_store has been used for lmv_buf,
1119                          * so we have to allocate buffer for default
1120                          * stripe EA */
1121                         OBD_ALLOC_PTR(v3);
1122                         if (v3 == NULL)
1123                                 RETURN(-ENOMEM);
1124
1125                         memset(v3, 0, sizeof(*v3));
1126                         v3->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V3);
1127                         v3->lmm_stripe_count =
1128                                 cpu_to_le32(lo->ldo_def_stripenr);
1129                         v3->lmm_stripe_offset =
1130                                 cpu_to_le32(lo->ldo_def_stripe_offset);
1131                         v3->lmm_stripe_size =
1132                                 cpu_to_le32(lo->ldo_def_stripe_size);
1133                         if (lo->ldo_pool)
1134                                 strncpy(v3->lmm_pool_name, lo->ldo_pool,
1135                                         LOV_MAXPOOLNAME);
1136
1137                         info = lod_env_info(env);
1138                         info->lti_buf.lb_buf = v3;
1139                         info->lti_buf.lb_len = sizeof(*v3);
1140                         rc = dt_xattr_set(env, dto, &info->lti_buf,
1141                                           XATTR_NAME_LOV, 0, th, capa);
1142                         OBD_FREE_PTR(v3);
1143                         if (rc != 0)
1144                                 RETURN(rc);
1145                 }
1146
1147                 rc = dt_xattr_set(env, dto, &lmv_buf, XATTR_NAME_LMV, fl, th,
1148                                   capa);
1149         }
1150
1151         rc = dt_xattr_set(env, dt, &lmv_buf, XATTR_NAME_LMV, fl, th, capa);
1152
1153         RETURN(rc);
1154 }
1155
1156 static int lod_xattr_set(const struct lu_env *env,
1157                          struct dt_object *dt, const struct lu_buf *buf,
1158                          const char *name, int fl, struct thandle *th,
1159                          struct lustre_capa *capa)
1160 {
1161         struct lod_object       *lo = lod_dt_obj(dt);
1162         struct dt_object        *next = dt_object_child(dt);
1163         __u32                    attr;
1164         int                      rc;
1165         int                     i;
1166         ENTRY;
1167
1168         attr = dt->do_lu.lo_header->loh_attr & S_IFMT;
1169         if (S_ISDIR(attr) && strcmp(name, XATTR_NAME_LOV) == 0) {
1170                 rc = lod_xattr_set_lov_on_dir(env, dt, buf, name, fl, th, capa);
1171         } else if (S_ISREG(attr) && !strcmp(name, XATTR_NAME_LOV)) {
1172                 /* in case of lov EA swap, just set it
1173                  * if not, it is a replay so check striping match what we
1174                  * already have during req replay, declare_xattr_set()
1175                  * defines striping, then create() does the work
1176                 */
1177                 if (fl & LU_XATTR_REPLACE) {
1178                         /* free stripes, then update disk */
1179                         lod_object_free_striping(env, lod_dt_obj(dt));
1180                         rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
1181                 } else {
1182                         rc = lod_striping_create(env, dt, NULL, NULL, th);
1183                 }
1184         } else if (strcmp(name, XATTR_NAME_DEFAULT_LMV) == 0) {
1185                 if (!S_ISDIR(attr))
1186                         RETURN(-ENOTDIR);
1187                 rc = lod_xattr_set_default_lmv_on_dir(env, dt, buf, name, fl,
1188                                                       th, capa);
1189         } else {
1190                 /*
1191                  * behave transparantly for all other EAs
1192                  */
1193                 rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
1194         }
1195
1196         if (rc != 0 || !S_ISDIR(attr))
1197                 RETURN(rc);
1198
1199         if (lo->ldo_stripenr == 0)
1200                 RETURN(rc);
1201
1202         for (i = 0; i < lo->ldo_stripenr; i++) {
1203                 LASSERT(lo->ldo_stripe[i]);
1204                 rc = dt_xattr_set(env, lo->ldo_stripe[i], buf, name, fl, th,
1205                                   capa);
1206                 if (rc != 0)
1207                         break;
1208         }
1209
1210         RETURN(rc);
1211 }
1212
1213 static int lod_declare_xattr_del(const struct lu_env *env,
1214                                  struct dt_object *dt, const char *name,
1215                                  struct thandle *th)
1216 {
1217         return dt_declare_xattr_del(env, dt_object_child(dt), name, th);
1218 }
1219
1220 static int lod_xattr_del(const struct lu_env *env, struct dt_object *dt,
1221                          const char *name, struct thandle *th,
1222                          struct lustre_capa *capa)
1223 {
1224         if (!strcmp(name, XATTR_NAME_LOV))
1225                 lod_object_free_striping(env, lod_dt_obj(dt));
1226         return dt_xattr_del(env, dt_object_child(dt), name, th, capa);
1227 }
1228
1229 static int lod_xattr_list(const struct lu_env *env,
1230                           struct dt_object *dt, struct lu_buf *buf,
1231                           struct lustre_capa *capa)
1232 {
1233         return dt_xattr_list(env, dt_object_child(dt), buf, capa);
1234 }
1235
1236 int lod_object_set_pool(struct lod_object *o, char *pool)
1237 {
1238         int len;
1239
1240         if (o->ldo_pool) {
1241                 len = strlen(o->ldo_pool);
1242                 OBD_FREE(o->ldo_pool, len + 1);
1243                 o->ldo_pool = NULL;
1244         }
1245         if (pool) {
1246                 len = strlen(pool);
1247                 OBD_ALLOC(o->ldo_pool, len + 1);
1248                 if (o->ldo_pool == NULL)
1249                         return -ENOMEM;
1250                 strcpy(o->ldo_pool, pool);
1251         }
1252         return 0;
1253 }
1254
1255 static inline int lod_object_will_be_striped(int is_reg, const struct lu_fid *fid)
1256 {
1257         return (is_reg && fid_seq(fid) != FID_SEQ_LOCAL_FILE);
1258 }
1259
1260
1261 static int lod_cache_parent_lov_striping(const struct lu_env *env,
1262                                          struct lod_object *lp)
1263 {
1264         struct lod_thread_info  *info = lod_env_info(env);
1265         struct lov_user_md_v1   *v1 = NULL;
1266         struct lov_user_md_v3   *v3 = NULL;
1267         int                      rc;
1268         ENTRY;
1269
1270         /* called from MDD without parent being write locked,
1271          * lock it here */
1272         dt_write_lock(env, dt_object_child(&lp->ldo_obj), 0);
1273         rc = lod_get_lov_ea(env, lp);
1274         if (rc < 0)
1275                 GOTO(unlock, rc);
1276
1277         if (rc < sizeof(struct lov_user_md)) {
1278                 /* don't lookup for non-existing or invalid striping */
1279                 lp->ldo_def_striping_set = 0;
1280                 lp->ldo_striping_cached = 1;
1281                 lp->ldo_def_stripe_size = 0;
1282                 lp->ldo_def_stripenr = 0;
1283                 lp->ldo_def_stripe_offset = (typeof(v1->lmm_stripe_offset))(-1);
1284                 GOTO(unlock, rc = 0);
1285         }
1286
1287         rc = 0;
1288         v1 = info->lti_ea_store;
1289         if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_V1))
1290                 lustre_swab_lov_user_md_v1(v1);
1291         else if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_V3))
1292                 lustre_swab_lov_user_md_v3(v3);
1293
1294         if (v1->lmm_magic != LOV_MAGIC_V3 && v1->lmm_magic != LOV_MAGIC_V1)
1295                 GOTO(unlock, rc = 0);
1296
1297         if (v1->lmm_pattern != LOV_PATTERN_RAID0 && v1->lmm_pattern != 0)
1298                 GOTO(unlock, rc = 0);
1299
1300         lp->ldo_def_stripenr = v1->lmm_stripe_count;
1301         lp->ldo_def_stripe_size = v1->lmm_stripe_size;
1302         lp->ldo_def_stripe_offset = v1->lmm_stripe_offset;
1303         lp->ldo_striping_cached = 1;
1304         lp->ldo_def_striping_set = 1;
1305         if (v1->lmm_magic == LOV_USER_MAGIC_V3) {
1306                 /* XXX: sanity check here */
1307                 v3 = (struct lov_user_md_v3 *) v1;
1308                 if (v3->lmm_pool_name[0])
1309                         lod_object_set_pool(lp, v3->lmm_pool_name);
1310         }
1311         EXIT;
1312 unlock:
1313         dt_write_unlock(env, dt_object_child(&lp->ldo_obj));
1314         return rc;
1315 }
1316
1317
1318 static int lod_cache_parent_lmv_striping(const struct lu_env *env,
1319                                          struct lod_object *lp)
1320 {
1321         struct lod_thread_info  *info = lod_env_info(env);
1322         struct lmv_user_md_v1   *v1 = NULL;
1323         int                      rc;
1324         ENTRY;
1325
1326         /* called from MDD without parent being write locked,
1327          * lock it here */
1328         dt_write_lock(env, dt_object_child(&lp->ldo_obj), 0);
1329         rc = lod_get_default_lmv_ea(env, lp);
1330         if (rc < 0)
1331                 GOTO(unlock, rc);
1332
1333         if (rc < sizeof(struct lmv_user_md)) {
1334                 /* don't lookup for non-existing or invalid striping */
1335                 lp->ldo_dir_def_striping_set = 0;
1336                 lp->ldo_dir_striping_cached = 1;
1337                 lp->ldo_dir_def_stripenr = 0;
1338                 lp->ldo_dir_def_stripe_offset =
1339                                         (typeof(v1->lum_stripe_offset))(-1);
1340                 lp->ldo_dir_def_hash_type = LMV_HASH_TYPE_FNV_1A_64;
1341                 GOTO(unlock, rc = 0);
1342         }
1343
1344         rc = 0;
1345         v1 = info->lti_ea_store;
1346
1347         lp->ldo_dir_def_stripenr = le32_to_cpu(v1->lum_stripe_count) - 1;
1348         lp->ldo_dir_def_stripe_offset = le32_to_cpu(v1->lum_stripe_offset);
1349         lp->ldo_dir_def_hash_type = le32_to_cpu(v1->lum_hash_type);
1350         lp->ldo_dir_def_striping_set = 1;
1351         lp->ldo_dir_striping_cached = 1;
1352
1353         EXIT;
1354 unlock:
1355         dt_write_unlock(env, dt_object_child(&lp->ldo_obj));
1356         return rc;
1357 }
1358
1359 static int lod_cache_parent_striping(const struct lu_env *env,
1360                                      struct lod_object *lp,
1361                                      umode_t child_mode)
1362 {
1363         int rc = 0;
1364         ENTRY;
1365
1366         rc = lod_load_striping(env, lp);
1367         if (rc != 0)
1368                 RETURN(rc);
1369
1370         if (!lp->ldo_striping_cached) {
1371                 /* we haven't tried to get default striping for
1372                  * the directory yet, let's cache it in the object */
1373                 rc = lod_cache_parent_lov_striping(env, lp);
1374                 if (rc != 0)
1375                         RETURN(rc);
1376         }
1377
1378         if (S_ISDIR(child_mode) && !lp->ldo_dir_striping_cached)
1379                 rc = lod_cache_parent_lmv_striping(env, lp);
1380
1381         RETURN(rc);
1382 }
1383
1384 /**
1385  * used to transfer default striping data to the object being created
1386  */
1387 static void lod_ah_init(const struct lu_env *env,
1388                         struct dt_allocation_hint *ah,
1389                         struct dt_object *parent,
1390                         struct dt_object *child,
1391                         umode_t child_mode)
1392 {
1393         struct lod_device *d = lu2lod_dev(child->do_lu.lo_dev);
1394         struct dt_object  *nextp = NULL;
1395         struct dt_object  *nextc;
1396         struct lod_object *lp = NULL;
1397         struct lod_object *lc;
1398         struct lov_desc   *desc;
1399         ENTRY;
1400
1401         LASSERT(child);
1402
1403         if (likely(parent)) {
1404                 nextp = dt_object_child(parent);
1405                 lp = lod_dt_obj(parent);
1406         }
1407
1408         nextc = dt_object_child(child);
1409         lc = lod_dt_obj(child);
1410
1411         LASSERT(lc->ldo_stripenr == 0);
1412         LASSERT(lc->ldo_stripe == NULL);
1413
1414         /*
1415          * local object may want some hints
1416          * in case of late striping creation, ->ah_init()
1417          * can be called with local object existing
1418          */
1419         if (!dt_object_exists(nextc) || dt_object_remote(nextc))
1420                 nextc->do_ops->do_ah_init(env, ah, dt_object_remote(nextp) ?
1421                                           NULL : nextp, nextc, child_mode);
1422
1423         if (S_ISDIR(child_mode)) {
1424                 int rc;
1425
1426                 if (lc->ldo_dir_stripe == NULL) {
1427                         OBD_ALLOC_PTR(lc->ldo_dir_stripe);
1428                         if (lc->ldo_dir_stripe == NULL)
1429                                 return;
1430                 }
1431
1432                 if (lp->ldo_dir_stripe == NULL) {
1433                         OBD_ALLOC_PTR(lp->ldo_dir_stripe);
1434                         if (lp->ldo_dir_stripe == NULL)
1435                                 return;
1436                 }
1437
1438                 rc = lod_cache_parent_striping(env, lp, child_mode);
1439                 if (rc != 0)
1440                         return;
1441
1442                 /* transfer defaults to new directory */
1443                 if (lp->ldo_striping_cached) {
1444                         if (lp->ldo_pool)
1445                                 lod_object_set_pool(lc, lp->ldo_pool);
1446                         lc->ldo_def_stripenr = lp->ldo_def_stripenr;
1447                         lc->ldo_def_stripe_size = lp->ldo_def_stripe_size;
1448                         lc->ldo_def_stripe_offset = lp->ldo_def_stripe_offset;
1449                         lc->ldo_striping_cached = 1;
1450                         lc->ldo_def_striping_set = 1;
1451                         CDEBUG(D_OTHER, "inherite EA sz:%d off:%d nr:%d\n",
1452                                (int)lc->ldo_def_stripe_size,
1453                                (int)lc->ldo_def_stripe_offset,
1454                                (int)lc->ldo_def_stripenr);
1455                 }
1456
1457                 /* transfer dir defaults to new directory */
1458                 if (lp->ldo_dir_striping_cached) {
1459                         lc->ldo_dir_def_stripenr = lp->ldo_dir_def_stripenr;
1460                         lc->ldo_dir_def_stripe_offset =
1461                                                   lp->ldo_dir_def_stripe_offset;
1462                         lc->ldo_dir_def_hash_type =
1463                                                   lp->ldo_dir_def_hash_type;
1464                         lc->ldo_dir_striping_cached = 1;
1465                         lc->ldo_dir_def_striping_set = 1;
1466                         CDEBUG(D_INFO, "inherit default EA nr:%d off:%d t%u\n",
1467                                (int)lc->ldo_dir_def_stripenr,
1468                                (int)lc->ldo_dir_def_stripe_offset,
1469                                lc->ldo_dir_def_hash_type);
1470                 }
1471
1472                 /* If the directory is specified with certain stripes */
1473                 if (ah->dah_eadata != NULL && ah->dah_eadata_len != 0) {
1474                         const struct lmv_user_md_v1 *lum1 = ah->dah_eadata;
1475                         int rc;
1476
1477                         rc = lod_verify_md_striping(d, lum1);
1478                         if (rc == 0 &&
1479                                 le32_to_cpu(lum1->lum_stripe_count) > 1) {
1480                                 /* Directory will be striped only if
1481                                  * stripe_count > 1 */
1482                                 lc->ldo_stripenr =
1483                                         le32_to_cpu(lum1->lum_stripe_count) - 1;
1484                                 lc->ldo_dir_stripe_offset =
1485                                         le32_to_cpu(lum1->lum_stripe_offset);
1486                                 lc->ldo_dir_hash_type =
1487                                         le32_to_cpu(lum1->lum_hash_type);
1488                                 CDEBUG(D_INFO, "set stripe EA nr:%hu off:%d\n",
1489                                        lc->ldo_stripenr,
1490                                        (int)lc->ldo_dir_stripe_offset);
1491                         }
1492                 } else if (lp->ldo_dir_def_striping_set) {
1493                         /* If there are default dir stripe from parent */
1494                         lc->ldo_stripenr = lp->ldo_dir_def_stripenr;
1495                         lc->ldo_dir_stripe_offset =
1496                                         lp->ldo_dir_def_stripe_offset;
1497                         lc->ldo_dir_hash_type =
1498                                         lp->ldo_dir_def_hash_type;
1499                         CDEBUG(D_INFO, "inherit EA nr:%hu off:%d\n",
1500                                lc->ldo_stripenr,
1501                                (int)lc->ldo_dir_stripe_offset);
1502                 } else {
1503                         /* set default stripe for this directory */
1504                         lc->ldo_stripenr = 0;
1505                         lc->ldo_dir_stripe_offset = -1;
1506                 }
1507
1508                 CDEBUG(D_INFO, "final striping count:%hu, offset:%d\n",
1509                        lc->ldo_stripenr, (int)lc->ldo_dir_stripe_offset);
1510
1511                 goto out;
1512         }
1513
1514         /*
1515          * if object is going to be striped over OSTs, transfer default
1516          * striping information to the child, so that we can use it
1517          * during declaration and creation
1518          */
1519         if (!lod_object_will_be_striped(S_ISREG(child_mode),
1520                                         lu_object_fid(&child->do_lu)))
1521                 goto out;
1522         /*
1523          * try from the parent
1524          */
1525         if (likely(parent)) {
1526                 lod_cache_parent_striping(env, lp, child_mode);
1527
1528                 lc->ldo_def_stripe_offset = (__u16) -1;
1529
1530                 if (lp->ldo_def_striping_set) {
1531                         if (lp->ldo_pool)
1532                                 lod_object_set_pool(lc, lp->ldo_pool);
1533                         lc->ldo_stripenr = lp->ldo_def_stripenr;
1534                         lc->ldo_stripe_size = lp->ldo_def_stripe_size;
1535                         lc->ldo_def_stripe_offset = lp->ldo_def_stripe_offset;
1536                         CDEBUG(D_OTHER, "striping from parent: #%d, sz %d %s\n",
1537                                lc->ldo_stripenr, lc->ldo_stripe_size,
1538                                lp->ldo_pool ? lp->ldo_pool : "");
1539                 }
1540         }
1541
1542         /*
1543          * if the parent doesn't provide with specific pattern, grab fs-wide one
1544          */
1545         desc = &d->lod_desc;
1546         if (lc->ldo_stripenr == 0)
1547                 lc->ldo_stripenr = desc->ld_default_stripe_count;
1548         if (lc->ldo_stripe_size == 0)
1549                 lc->ldo_stripe_size = desc->ld_default_stripe_size;
1550         CDEBUG(D_OTHER, "final striping: # %d stripes, sz %d from %s\n",
1551                lc->ldo_stripenr, lc->ldo_stripe_size,
1552                lc->ldo_pool ? lc->ldo_pool : "");
1553
1554 out:
1555         /* we do not cache stripe information for slave stripe, see
1556          * lod_xattr_set_lov_on_dir */
1557         if (lp != NULL && lp->ldo_dir_slave_stripe)
1558                 lod_lov_stripe_cache_clear(lp);
1559
1560         EXIT;
1561 }
1562
1563 #define ll_do_div64(aaa,bbb)    do_div((aaa), (bbb))
1564 /*
1565  * this function handles a special case when truncate was done
1566  * on a stripeless object and now striping is being created
1567  * we can't lose that size, so we have to propagate it to newly
1568  * created object
1569  */
1570 static int lod_declare_init_size(const struct lu_env *env,
1571                                  struct dt_object *dt, struct thandle *th)
1572 {
1573         struct dt_object   *next = dt_object_child(dt);
1574         struct lod_object  *lo = lod_dt_obj(dt);
1575         struct lu_attr     *attr = &lod_env_info(env)->lti_attr;
1576         uint64_t            size, offs;
1577         int                 rc, stripe;
1578         ENTRY;
1579
1580         /* XXX: we support the simplest (RAID0) striping so far */
1581         LASSERT(lo->ldo_stripe || lo->ldo_stripenr == 0);
1582         LASSERT(lo->ldo_stripe_size > 0);
1583
1584         rc = dt_attr_get(env, next, attr, BYPASS_CAPA);
1585         LASSERT(attr->la_valid & LA_SIZE);
1586         if (rc)
1587                 RETURN(rc);
1588
1589         size = attr->la_size;
1590         if (size == 0)
1591                 RETURN(0);
1592
1593         /* ll_do_div64(a, b) returns a % b, and a = a / b */
1594         ll_do_div64(size, (__u64) lo->ldo_stripe_size);
1595         stripe = ll_do_div64(size, (__u64) lo->ldo_stripenr);
1596
1597         size = size * lo->ldo_stripe_size;
1598         offs = attr->la_size;
1599         size += ll_do_div64(offs, lo->ldo_stripe_size);
1600
1601         attr->la_valid = LA_SIZE;
1602         attr->la_size = size;
1603
1604         rc = dt_declare_attr_set(env, lo->ldo_stripe[stripe], attr, th);
1605
1606         RETURN(rc);
1607 }
1608
1609 /**
1610  * Create declaration of striped object
1611  */
1612 int lod_declare_striped_object(const struct lu_env *env, struct dt_object *dt,
1613                                struct lu_attr *attr,
1614                                const struct lu_buf *lovea, struct thandle *th)
1615 {
1616         struct lod_thread_info  *info = lod_env_info(env);
1617         struct dt_object        *next = dt_object_child(dt);
1618         struct lod_object       *lo = lod_dt_obj(dt);
1619         int                      rc;
1620         ENTRY;
1621
1622         if (OBD_FAIL_CHECK(OBD_FAIL_MDS_ALLOC_OBDO)) {
1623                 /* failed to create striping, let's reset
1624                  * config so that others don't get confused */
1625                 lod_object_free_striping(env, lo);
1626                 GOTO(out, rc = -ENOMEM);
1627         }
1628
1629         /* choose OST and generate appropriate objects */
1630         rc = lod_qos_prep_create(env, lo, attr, lovea, th);
1631         if (rc) {
1632                 /* failed to create striping, let's reset
1633                  * config so that others don't get confused */
1634                 lod_object_free_striping(env, lo);
1635                 GOTO(out, rc);
1636         }
1637
1638         /*
1639          * declare storage for striping data
1640          */
1641         info->lti_buf.lb_len = lov_mds_md_size(lo->ldo_stripenr,
1642                                 lo->ldo_pool ?  LOV_MAGIC_V3 : LOV_MAGIC_V1);
1643         rc = dt_declare_xattr_set(env, next, &info->lti_buf, XATTR_NAME_LOV,
1644                                   0, th);
1645         if (rc)
1646                 GOTO(out, rc);
1647
1648         /*
1649          * if striping is created with local object's size > 0,
1650          * we have to propagate this size to specific object
1651          * the case is possible only when local object was created previously
1652          */
1653         if (dt_object_exists(next))
1654                 rc = lod_declare_init_size(env, dt, th);
1655
1656 out:
1657         RETURN(rc);
1658 }
1659
1660 int lod_dir_striping_create_internal(const struct lu_env *env,
1661                                      struct dt_object *dt,
1662                                      struct lu_attr *attr,
1663                                      const struct dt_object_format *dof,
1664                                      struct thandle *th,
1665                                      bool declare)
1666 {
1667         struct lod_thread_info  *info = lod_env_info(env);
1668         struct dt_object        *next = dt_object_child(dt);
1669         struct lod_object       *lo = lod_dt_obj(dt);
1670         int                     rc;
1671         ENTRY;
1672
1673         if (lo->ldo_dir_def_striping_set &&
1674             !LMVEA_DELETE_VALUES(lo->ldo_stripenr,
1675                                  lo->ldo_dir_stripe_offset)) {
1676                 struct lmv_user_md_v1 *v1 = info->lti_ea_store;
1677                 int stripe_count = lo->ldo_stripenr + 1;
1678
1679                 if (info->lti_ea_store_size < sizeof(*v1)) {
1680                         rc = lod_ea_store_resize(info, sizeof(*v1));
1681                         if (rc != 0)
1682                                 RETURN(rc);
1683                         v1 = info->lti_ea_store;
1684                 }
1685
1686                 memset(v1, 0, sizeof(*v1));
1687                 v1->lum_magic = cpu_to_le32(LMV_USER_MAGIC);
1688                 v1->lum_stripe_count = cpu_to_le32(stripe_count);
1689                 v1->lum_stripe_offset =
1690                                 cpu_to_le32(lo->ldo_dir_stripe_offset);
1691
1692                 info->lti_buf.lb_buf = v1;
1693                 info->lti_buf.lb_len = sizeof(*v1);
1694
1695                 if (declare)
1696                         rc = lod_declare_xattr_set_lmv(env, dt, attr,
1697                                                        &info->lti_buf, th);
1698                 else
1699                         rc = lod_xattr_set_lmv(env, dt, &info->lti_buf,
1700                                                XATTR_NAME_LMV, 0, th,
1701                                                BYPASS_CAPA);
1702                 if (rc != 0)
1703                         RETURN(rc);
1704         }
1705
1706         /* Transfer default LMV striping from the parent */
1707         if (lo->ldo_dir_striping_cached &&
1708             !LMVEA_DELETE_VALUES(lo->ldo_dir_def_stripenr,
1709                                  lo->ldo_dir_def_stripe_offset)) {
1710                 struct lmv_user_md_v1 *v1 = info->lti_ea_store;
1711                 int def_stripe_count = lo->ldo_dir_def_stripenr + 1;
1712
1713                 if (info->lti_ea_store_size < sizeof(*v1)) {
1714                         rc = lod_ea_store_resize(info, sizeof(*v1));
1715                         if (rc != 0)
1716                                 RETURN(rc);
1717                         v1 = info->lti_ea_store;
1718                 }
1719
1720                 memset(v1, 0, sizeof(*v1));
1721                 v1->lum_magic = cpu_to_le32(LMV_USER_MAGIC);
1722                 v1->lum_stripe_count = cpu_to_le32(def_stripe_count);
1723                 v1->lum_stripe_offset =
1724                                 cpu_to_le32(lo->ldo_dir_def_stripe_offset);
1725                 v1->lum_hash_type =
1726                                 cpu_to_le32(lo->ldo_dir_def_hash_type);
1727
1728                 info->lti_buf.lb_buf = v1;
1729                 info->lti_buf.lb_len = sizeof(*v1);
1730                 if (declare)
1731                         rc = dt_declare_xattr_set(env, next, &info->lti_buf,
1732                                                   XATTR_NAME_DEFAULT_LMV, 0,
1733                                                   th);
1734                 else
1735                         rc = dt_xattr_set(env, next, &info->lti_buf,
1736                                            XATTR_NAME_DEFAULT_LMV, 0, th,
1737                                            BYPASS_CAPA);
1738                 if (rc != 0)
1739                         RETURN(rc);
1740         }
1741
1742         /* Transfer default LOV striping from the parent */
1743         if (lo->ldo_striping_cached &&
1744             !LOVEA_DELETE_VALUES(lo->ldo_def_stripe_size,
1745                                  lo->ldo_def_stripenr,
1746                                  lo->ldo_def_stripe_offset)) {
1747                 struct lov_user_md_v3 *v3 = info->lti_ea_store;
1748
1749                 if (info->lti_ea_store_size < sizeof(*v3)) {
1750                         rc = lod_ea_store_resize(info, sizeof(*v3));
1751                         if (rc != 0)
1752                                 RETURN(rc);
1753                         v3 = info->lti_ea_store;
1754                 }
1755
1756                 memset(v3, 0, sizeof(*v3));
1757                 v3->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V3);
1758                 v3->lmm_stripe_count = cpu_to_le16(lo->ldo_def_stripenr);
1759                 v3->lmm_stripe_offset = cpu_to_le16(lo->ldo_def_stripe_offset);
1760                 v3->lmm_stripe_size = cpu_to_le32(lo->ldo_def_stripe_size);
1761                 if (lo->ldo_pool)
1762                         strncpy(v3->lmm_pool_name, lo->ldo_pool,
1763                                 LOV_MAXPOOLNAME);
1764
1765                 info->lti_buf.lb_buf = v3;
1766                 info->lti_buf.lb_len = sizeof(*v3);
1767
1768                 if (declare)
1769                         rc = dt_declare_xattr_set(env, next, &info->lti_buf,
1770                                                   XATTR_NAME_LOV, 0, th);
1771                 else
1772                         rc = dt_xattr_set(env, next, &info->lti_buf,
1773                                           XATTR_NAME_LOV, 0, th,
1774                                           BYPASS_CAPA);
1775                 if (rc != 0)
1776                         RETURN(rc);
1777         }
1778
1779         RETURN(0);
1780 }
1781
1782 static int lod_declare_dir_striping_create(const struct lu_env *env,
1783                                            struct dt_object *dt,
1784                                            struct lu_attr *attr,
1785                                            struct dt_object_format *dof,
1786                                            struct thandle *th)
1787 {
1788         return lod_dir_striping_create_internal(env, dt, attr, dof, th, true);
1789 }
1790
1791 static int lod_dir_striping_create(const struct lu_env *env,
1792                                    struct dt_object *dt,
1793                                    struct lu_attr *attr,
1794                                    struct dt_object_format *dof,
1795                                    struct thandle *th)
1796 {
1797         return lod_dir_striping_create_internal(env, dt, attr, dof, th, false);
1798 }
1799
1800 static int lod_declare_object_create(const struct lu_env *env,
1801                                      struct dt_object *dt,
1802                                      struct lu_attr *attr,
1803                                      struct dt_allocation_hint *hint,
1804                                      struct dt_object_format *dof,
1805                                      struct thandle *th)
1806 {
1807         struct dt_object   *next = dt_object_child(dt);
1808         struct lod_object  *lo = lod_dt_obj(dt);
1809         int                 rc;
1810         ENTRY;
1811
1812         LASSERT(dof);
1813         LASSERT(attr);
1814         LASSERT(th);
1815
1816         /*
1817          * first of all, we declare creation of local object
1818          */
1819         rc = dt_declare_create(env, next, attr, hint, dof, th);
1820         if (rc)
1821                 GOTO(out, rc);
1822
1823         if (dof->dof_type == DFT_SYM)
1824                 dt->do_body_ops = &lod_body_lnk_ops;
1825
1826         /*
1827          * it's lod_ah_init() who has decided the object will striped
1828          */
1829         if (dof->dof_type == DFT_REGULAR) {
1830                 /* callers don't want stripes */
1831                 /* XXX: all tricky interactions with ->ah_make_hint() decided
1832                  * to use striping, then ->declare_create() behaving differently
1833                  * should be cleaned */
1834                 if (dof->u.dof_reg.striped == 0)
1835                         lo->ldo_stripenr = 0;
1836                 if (lo->ldo_stripenr > 0)
1837                         rc = lod_declare_striped_object(env, dt, attr,
1838                                                         NULL, th);
1839         } else if (dof->dof_type == DFT_DIR) {
1840                 rc = lod_declare_dir_striping_create(env, dt, attr, dof, th);
1841         }
1842 out:
1843         RETURN(rc);
1844 }
1845
1846 int lod_striping_create(const struct lu_env *env, struct dt_object *dt,
1847                         struct lu_attr *attr, struct dt_object_format *dof,
1848                         struct thandle *th)
1849 {
1850         struct lod_object *lo = lod_dt_obj(dt);
1851         int                rc = 0, i;
1852         ENTRY;
1853
1854         LASSERT(lo->ldo_striping_cached == 0);
1855
1856         /* create all underlying objects */
1857         for (i = 0; i < lo->ldo_stripenr; i++) {
1858                 LASSERT(lo->ldo_stripe[i]);
1859                 rc = dt_create(env, lo->ldo_stripe[i], attr, NULL, dof, th);
1860
1861                 if (rc)
1862                         break;
1863         }
1864         if (rc == 0)
1865                 rc = lod_generate_and_set_lovea(env, lo, th);
1866
1867         RETURN(rc);
1868 }
1869
1870 static int lod_object_create(const struct lu_env *env, struct dt_object *dt,
1871                              struct lu_attr *attr,
1872                              struct dt_allocation_hint *hint,
1873                              struct dt_object_format *dof, struct thandle *th)
1874 {
1875         struct dt_object   *next = dt_object_child(dt);
1876         struct lod_object  *lo = lod_dt_obj(dt);
1877         int                 rc;
1878         ENTRY;
1879
1880         /* create local object */
1881         rc = dt_create(env, next, attr, hint, dof, th);
1882
1883         if (rc == 0) {
1884                 if (S_ISDIR(dt->do_lu.lo_header->loh_attr))
1885                         rc = lod_dir_striping_create(env, dt, attr, dof, th);
1886                 else if (lo->ldo_stripe && dof->u.dof_reg.striped != 0)
1887                         rc = lod_striping_create(env, dt, attr, dof, th);
1888         }
1889
1890         RETURN(rc);
1891 }
1892
1893 static int lod_declare_object_destroy(const struct lu_env *env,
1894                                       struct dt_object *dt,
1895                                       struct thandle *th)
1896 {
1897         struct dt_object   *next = dt_object_child(dt);
1898         struct lod_object  *lo = lod_dt_obj(dt);
1899         int                 rc, i;
1900         ENTRY;
1901
1902         /*
1903          * we declare destroy for the local object
1904          */
1905         rc = dt_declare_destroy(env, next, th);
1906         if (rc)
1907                 RETURN(rc);
1908
1909         /*
1910          * load striping information, notice we don't do this when object
1911          * is being initialized as we don't need this information till
1912          * few specific cases like destroy, chown
1913          */
1914         rc = lod_load_striping(env, lo);
1915         if (rc)
1916                 RETURN(rc);
1917
1918         /* declare destroy for all underlying objects */
1919         for (i = 0; i < lo->ldo_stripenr; i++) {
1920                 LASSERT(lo->ldo_stripe[i]);
1921                 rc = dt_declare_destroy(env, lo->ldo_stripe[i], th);
1922
1923                 if (rc)
1924                         break;
1925         }
1926
1927         RETURN(rc);
1928 }
1929
1930 static int lod_object_destroy(const struct lu_env *env,
1931                 struct dt_object *dt, struct thandle *th)
1932 {
1933         struct dt_object  *next = dt_object_child(dt);
1934         struct lod_object *lo = lod_dt_obj(dt);
1935         int                rc, i;
1936         ENTRY;
1937
1938         /* destroy local object */
1939         rc = dt_destroy(env, next, th);
1940         if (rc)
1941                 RETURN(rc);
1942
1943         /* destroy all underlying objects */
1944         for (i = 0; i < lo->ldo_stripenr; i++) {
1945                 LASSERT(lo->ldo_stripe[i]);
1946                 /* for striped directory, next == ldo_stripe[0] */
1947                 if (next != lo->ldo_stripe[i]) {
1948                         rc = dt_destroy(env, lo->ldo_stripe[i], th);
1949                         if (rc)
1950                                 break;
1951                 }
1952         }
1953
1954         RETURN(rc);
1955 }
1956
1957 static int lod_index_try(const struct lu_env *env, struct dt_object *dt,
1958                          const struct dt_index_features *feat)
1959 {
1960         struct dt_object *next = dt_object_child(dt);
1961         int               rc;
1962         ENTRY;
1963
1964         LASSERT(next->do_ops);
1965         LASSERT(next->do_ops->do_index_try);
1966
1967         rc = next->do_ops->do_index_try(env, next, feat);
1968         if (next->do_index_ops && dt->do_index_ops == NULL)
1969                 dt->do_index_ops = &lod_index_ops;
1970
1971         RETURN(rc);
1972 }
1973
1974 static int lod_declare_ref_add(const struct lu_env *env,
1975                                struct dt_object *dt, struct thandle *th)
1976 {
1977         return dt_declare_ref_add(env, dt_object_child(dt), th);
1978 }
1979
1980 static int lod_ref_add(const struct lu_env *env,
1981                        struct dt_object *dt, struct thandle *th)
1982 {
1983         return dt_ref_add(env, dt_object_child(dt), th);
1984 }
1985
1986 static int lod_declare_ref_del(const struct lu_env *env,
1987                                struct dt_object *dt, struct thandle *th)
1988 {
1989         return dt_declare_ref_del(env, dt_object_child(dt), th);
1990 }
1991
1992 static int lod_ref_del(const struct lu_env *env,
1993                        struct dt_object *dt, struct thandle *th)
1994 {
1995         return dt_ref_del(env, dt_object_child(dt), th);
1996 }
1997
1998 static struct obd_capa *lod_capa_get(const struct lu_env *env,
1999                                      struct dt_object *dt,
2000                                      struct lustre_capa *old, __u64 opc)
2001 {
2002         return dt_capa_get(env, dt_object_child(dt), old, opc);
2003 }
2004
2005 static int lod_object_sync(const struct lu_env *env, struct dt_object *dt)
2006 {
2007         return dt_object_sync(env, dt_object_child(dt));
2008 }
2009
2010 struct lod_slave_locks  {
2011         int                     lsl_lock_count;
2012         struct lustre_handle    lsl_handle[0];
2013 };
2014
2015 static int lod_object_unlock_internal(const struct lu_env *env,
2016                                       struct dt_object *dt,
2017                                       struct ldlm_enqueue_info *einfo,
2018                                       ldlm_policy_data_t *policy)
2019 {
2020         struct lod_object       *lo = lod_dt_obj(dt);
2021         struct lod_slave_locks  *slave_locks = einfo->ei_cbdata;
2022         int                     rc = 0;
2023         int                     i;
2024         ENTRY;
2025
2026         if (slave_locks == NULL)
2027                 RETURN(0);
2028
2029         for (i = 0; i < slave_locks->lsl_lock_count; i++) {
2030                 if (lustre_handle_is_used(&slave_locks->lsl_handle[i])) {
2031                         int     rc1;
2032
2033                         einfo->ei_cbdata = &slave_locks->lsl_handle[i];
2034                         rc1 = dt_object_unlock(env, lo->ldo_stripe[i], einfo,
2035                                                policy);
2036                         if (rc1 < 0)
2037                                 rc = rc == 0 ? rc1 : rc;
2038                 }
2039         }
2040
2041         RETURN(rc);
2042 }
2043
2044 static int lod_object_unlock(const struct lu_env *env, struct dt_object *dt,
2045                              struct ldlm_enqueue_info *einfo,
2046                              union ldlm_policy_data *policy)
2047 {
2048         struct lod_object       *lo = lod_dt_obj(dt);
2049         struct lod_slave_locks  *slave_locks = einfo->ei_cbdata;
2050         int                     slave_locks_size;
2051         int                     rc;
2052         ENTRY;
2053
2054         if (slave_locks == NULL)
2055                 RETURN(0);
2056
2057         rc = lod_load_striping(env, lo);
2058         if (rc != 0)
2059                 RETURN(rc);
2060
2061         /* Note: for remote lock for single stripe dir, MDT will cancel
2062          * the lock by lockh directly */
2063         if (lo->ldo_stripenr == 0 && dt_object_remote(dt_object_child(dt)))
2064                 RETURN(0);
2065
2066         if (!S_ISDIR(dt->do_lu.lo_header->loh_attr))
2067                 RETURN(-ENOTDIR);
2068
2069         /* Only cancel slave lock for striped dir */
2070         rc = lod_object_unlock_internal(env, dt, einfo, policy);
2071
2072         slave_locks_size = sizeof(*slave_locks) + slave_locks->lsl_lock_count *
2073                            sizeof(slave_locks->lsl_handle[0]);
2074         OBD_FREE(slave_locks, slave_locks_size);
2075         einfo->ei_cbdata = NULL;
2076
2077         RETURN(rc);
2078 }
2079
2080 static int lod_object_lock(const struct lu_env *env,
2081                            struct dt_object *dt,
2082                            struct lustre_handle *lh,
2083                            struct ldlm_enqueue_info *einfo,
2084                            union ldlm_policy_data *policy)
2085 {
2086         struct lod_object       *lo = lod_dt_obj(dt);
2087         int                     rc = 0;
2088         int                     i;
2089         int                     slave_locks_size;
2090         struct lod_slave_locks  *slave_locks = NULL;
2091         ENTRY;
2092
2093         /* remote object lock */
2094         if (!einfo->ei_enq_slave) {
2095                 LASSERT(dt_object_remote(dt));
2096                 return dt_object_lock(env, dt_object_child(dt), lh, einfo,
2097                                       policy);
2098         }
2099
2100         if (!S_ISDIR(dt->do_lu.lo_header->loh_attr))
2101                 RETURN(-ENOTDIR);
2102
2103         rc = lod_load_striping(env, lo);
2104         if (rc != 0)
2105                 RETURN(rc);
2106
2107         /* No stripes */
2108         if (lo->ldo_stripenr == 0)
2109                 RETURN(0);
2110
2111         slave_locks_size = sizeof(*slave_locks) + lo->ldo_stripenr *
2112                            sizeof(slave_locks->lsl_handle[0]);
2113         /* Freed in lod_object_unlock */
2114         OBD_ALLOC(slave_locks, slave_locks_size);
2115         if (slave_locks == NULL)
2116                 RETURN(-ENOMEM);
2117         slave_locks->lsl_lock_count = lo->ldo_stripenr;
2118
2119         /* striped directory lock */
2120         for (i = 0; i < lo->ldo_stripenr; i++) {
2121                 struct lustre_handle    lockh;
2122
2123                 LASSERT(lo->ldo_stripe[i]);
2124                 rc = dt_object_lock(env, lo->ldo_stripe[i], &lockh, einfo,
2125                                     policy);
2126                 if (rc != 0)
2127                         GOTO(out, rc);
2128
2129                 slave_locks->lsl_handle[i] = lockh;
2130         }
2131
2132         einfo->ei_cbdata = slave_locks;
2133
2134 out:
2135         if (rc != 0 && slave_locks != NULL) {
2136                 einfo->ei_cbdata = slave_locks;
2137                 lod_object_unlock_internal(env, dt, einfo, policy);
2138                 OBD_FREE(slave_locks, slave_locks_size);
2139                 einfo->ei_cbdata = NULL;
2140         }
2141
2142         RETURN(rc);
2143 }
2144
2145 struct dt_object_operations lod_obj_ops = {
2146         .do_read_lock           = lod_object_read_lock,
2147         .do_write_lock          = lod_object_write_lock,
2148         .do_read_unlock         = lod_object_read_unlock,
2149         .do_write_unlock        = lod_object_write_unlock,
2150         .do_write_locked        = lod_object_write_locked,
2151         .do_attr_get            = lod_attr_get,
2152         .do_declare_attr_set    = lod_declare_attr_set,
2153         .do_attr_set            = lod_attr_set,
2154         .do_xattr_get           = lod_xattr_get,
2155         .do_declare_xattr_set   = lod_declare_xattr_set,
2156         .do_xattr_set           = lod_xattr_set,
2157         .do_declare_xattr_del   = lod_declare_xattr_del,
2158         .do_xattr_del           = lod_xattr_del,
2159         .do_xattr_list          = lod_xattr_list,
2160         .do_ah_init             = lod_ah_init,
2161         .do_declare_create      = lod_declare_object_create,
2162         .do_create              = lod_object_create,
2163         .do_declare_destroy     = lod_declare_object_destroy,
2164         .do_destroy             = lod_object_destroy,
2165         .do_index_try           = lod_index_try,
2166         .do_declare_ref_add     = lod_declare_ref_add,
2167         .do_ref_add             = lod_ref_add,
2168         .do_declare_ref_del     = lod_declare_ref_del,
2169         .do_ref_del             = lod_ref_del,
2170         .do_capa_get            = lod_capa_get,
2171         .do_object_sync         = lod_object_sync,
2172         .do_object_lock         = lod_object_lock,
2173         .do_object_unlock       = lod_object_unlock,
2174 };
2175
2176 static ssize_t lod_read(const struct lu_env *env, struct dt_object *dt,
2177                         struct lu_buf *buf, loff_t *pos,
2178                         struct lustre_capa *capa)
2179 {
2180         struct dt_object *next = dt_object_child(dt);
2181         return next->do_body_ops->dbo_read(env, next, buf, pos, capa);
2182 }
2183
2184 static ssize_t lod_declare_write(const struct lu_env *env,
2185                                  struct dt_object *dt,
2186                                  const loff_t size, loff_t pos,
2187                                  struct thandle *th)
2188 {
2189         return dt_declare_record_write(env, dt_object_child(dt),
2190                                        size, pos, th);
2191 }
2192
2193 static ssize_t lod_write(const struct lu_env *env, struct dt_object *dt,
2194                          const struct lu_buf *buf, loff_t *pos,
2195                          struct thandle *th, struct lustre_capa *capa, int iq)
2196 {
2197         struct dt_object *next = dt_object_child(dt);
2198         LASSERT(next);
2199         return next->do_body_ops->dbo_write(env, next, buf, pos, th, capa, iq);
2200 }
2201
2202 static const struct dt_body_operations lod_body_lnk_ops = {
2203         .dbo_read               = lod_read,
2204         .dbo_declare_write      = lod_declare_write,
2205         .dbo_write              = lod_write
2206 };
2207
2208 static int lod_object_init(const struct lu_env *env, struct lu_object *lo,
2209                            const struct lu_object_conf *conf)
2210 {
2211         struct lod_device       *lod    = lu2lod_dev(lo->lo_dev);
2212         struct lu_device        *cdev   = NULL;
2213         struct lu_object        *cobj;
2214         struct lod_tgt_descs    *ltd    = NULL;
2215         struct lod_tgt_desc     *tgt;
2216         mdsno_t                  idx    = 0;
2217         int                      type   = LU_SEQ_RANGE_ANY;
2218         int                      rc;
2219         ENTRY;
2220
2221         rc = lod_fld_lookup(env, lod, lu_object_fid(lo), &idx, &type);
2222         if (rc != 0)
2223                 RETURN(rc);
2224
2225         if (type == LU_SEQ_RANGE_MDT &&
2226             idx == lu_site2seq(lo->lo_dev->ld_site)->ss_node_id) {
2227                 cdev = &lod->lod_child->dd_lu_dev;
2228         } else if (type == LU_SEQ_RANGE_MDT) {
2229                 ltd = &lod->lod_mdt_descs;
2230                 lod_getref(ltd);
2231         } else if (type == LU_SEQ_RANGE_OST) {
2232                 ltd = &lod->lod_ost_descs;
2233                 lod_getref(ltd);
2234         } else {
2235                 LBUG();
2236         }
2237
2238         if (ltd != NULL) {
2239                 if (ltd->ltd_tgts_size > idx &&
2240                     cfs_bitmap_check(ltd->ltd_tgt_bitmap, idx)) {
2241                         tgt = LTD_TGT(ltd, idx);
2242
2243                         LASSERT(tgt != NULL);
2244                         LASSERT(tgt->ltd_tgt != NULL);
2245
2246                         cdev = &(tgt->ltd_tgt->dd_lu_dev);
2247                 }
2248                 lod_putref(lod, ltd);
2249         }
2250
2251         if (unlikely(cdev == NULL))
2252                 RETURN(-ENOENT);
2253
2254         cobj = cdev->ld_ops->ldo_object_alloc(env, lo->lo_header, cdev);
2255         if (unlikely(cobj == NULL))
2256                 RETURN(-ENOMEM);
2257
2258         lu_object_add(lo, cobj);
2259
2260         RETURN(0);
2261 }
2262
2263 void lod_object_free_striping(const struct lu_env *env, struct lod_object *lo)
2264 {
2265         int i;
2266
2267         if (lo->ldo_dir_stripe != NULL) {
2268                 OBD_FREE_PTR(lo->ldo_dir_stripe);
2269                 lo->ldo_dir_stripe = NULL;
2270         }
2271
2272         if (lo->ldo_stripe) {
2273                 LASSERT(lo->ldo_stripes_allocated > 0);
2274
2275                 for (i = 0; i < lo->ldo_stripenr; i++) {
2276                         if (lo->ldo_stripe[i])
2277                                 lu_object_put(env, &lo->ldo_stripe[i]->do_lu);
2278                 }
2279
2280                 i = sizeof(struct dt_object *) * lo->ldo_stripes_allocated;
2281                 OBD_FREE(lo->ldo_stripe, i);
2282                 lo->ldo_stripe = NULL;
2283                 lo->ldo_stripes_allocated = 0;
2284         }
2285         lo->ldo_stripenr = 0;
2286         lo->ldo_pattern = 0;
2287 }
2288
2289 /*
2290  * ->start is called once all slices are initialized, including header's
2291  * cache for mode (object type). using the type we can initialize ops
2292  */
2293 static int lod_object_start(const struct lu_env *env, struct lu_object *o)
2294 {
2295         if (S_ISLNK(o->lo_header->loh_attr & S_IFMT))
2296                 lu2lod_obj(o)->ldo_obj.do_body_ops = &lod_body_lnk_ops;
2297         return 0;
2298 }
2299
2300 static void lod_object_free(const struct lu_env *env, struct lu_object *o)
2301 {
2302         struct lod_object *mo = lu2lod_obj(o);
2303
2304         /*
2305          * release all underlying object pinned
2306          */
2307
2308         lod_object_free_striping(env, mo);
2309
2310         lod_object_set_pool(mo, NULL);
2311
2312         lu_object_fini(o);
2313         OBD_SLAB_FREE_PTR(mo, lod_object_kmem);
2314 }
2315
2316 static void lod_object_release(const struct lu_env *env, struct lu_object *o)
2317 {
2318         /* XXX: shouldn't we release everything here in case if object
2319          * creation failed before? */
2320 }
2321
2322 static int lod_object_print(const struct lu_env *env, void *cookie,
2323                             lu_printer_t p, const struct lu_object *l)
2324 {
2325         struct lod_object *o = lu2lod_obj((struct lu_object *) l);
2326
2327         return (*p)(env, cookie, LUSTRE_LOD_NAME"-object@%p", o);
2328 }
2329
2330 struct lu_object_operations lod_lu_obj_ops = {
2331         .loo_object_init        = lod_object_init,
2332         .loo_object_start       = lod_object_start,
2333         .loo_object_free        = lod_object_free,
2334         .loo_object_release     = lod_object_release,
2335         .loo_object_print       = lod_object_print,
2336 };