Whamcloud - gitweb
LU-3336 lfsck: recreate the lost MDT-object
[fs/lustre-release.git] / lustre / lod / lod_object.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright  2009 Sun Microsystems, Inc. All rights reserved
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, 2013, Intel Corporation.
27  */
28 /*
29  * lustre/lod/lod_object.c
30  *
31  * Author: Alex Zhuravlev <alexey.zhuravlev@intel.com>
32  */
33
34 #define DEBUG_SUBSYSTEM S_MDS
35
36 #include <obd.h>
37 #include <obd_class.h>
38 #include <lustre_ver.h>
39 #include <obd_support.h>
40 #include <lprocfs_status.h>
41
42 #include <lustre_fid.h>
43 #include <lustre_param.h>
44 #include <lustre_fid.h>
45 #include <lustre_lmv.h>
46 #include <obd_lov.h>
47 #include <md_object.h>
48
49 #include "lod_internal.h"
50
51 static const char dot[] = ".";
52 static const char dotdot[] = "..";
53
54 extern struct kmem_cache *lod_object_kmem;
55 static const struct dt_body_operations lod_body_lnk_ops;
56
57 static int lod_index_lookup(const struct lu_env *env, struct dt_object *dt,
58                             struct dt_rec *rec, const struct dt_key *key,
59                             struct lustre_capa *capa)
60 {
61         struct dt_object *next = dt_object_child(dt);
62         return next->do_index_ops->dio_lookup(env, next, rec, key, capa);
63 }
64
65 static int lod_declare_index_insert(const struct lu_env *env,
66                                     struct dt_object *dt,
67                                     const struct dt_rec *rec,
68                                     const struct dt_key *key,
69                                     struct thandle *handle)
70 {
71         return dt_declare_insert(env, dt_object_child(dt), rec, key, handle);
72 }
73
74 static int lod_index_insert(const struct lu_env *env,
75                             struct dt_object *dt,
76                             const struct dt_rec *rec,
77                             const struct dt_key *key,
78                             struct thandle *th,
79                             struct lustre_capa *capa,
80                             int ign)
81 {
82         return dt_insert(env, dt_object_child(dt), rec, key, th, capa, ign);
83 }
84
85 static int lod_declare_index_delete(const struct lu_env *env,
86                                     struct dt_object *dt,
87                                     const struct dt_key *key,
88                                     struct thandle *th)
89 {
90         return dt_declare_delete(env, dt_object_child(dt), key, th);
91 }
92
93 static int lod_index_delete(const struct lu_env *env,
94                             struct dt_object *dt,
95                             const struct dt_key *key,
96                             struct thandle *th,
97                             struct lustre_capa *capa)
98 {
99         return dt_delete(env, dt_object_child(dt), key, th, capa);
100 }
101
102 static struct dt_it *lod_it_init(const struct lu_env *env,
103                                  struct dt_object *dt, __u32 attr,
104                                  struct lustre_capa *capa)
105 {
106         struct dt_object        *next = dt_object_child(dt);
107         struct lod_it           *it = &lod_env_info(env)->lti_it;
108         struct dt_it            *it_next;
109
110
111         it_next = next->do_index_ops->dio_it.init(env, next, attr, capa);
112         if (IS_ERR(it_next))
113                 return it_next;
114
115         /* currently we do not use more than one iterator per thread
116          * so we store it in thread info. if at some point we need
117          * more active iterators in a single thread, we can allocate
118          * additional ones */
119         LASSERT(it->lit_obj == NULL);
120
121         it->lit_it = it_next;
122         it->lit_obj = next;
123
124         return (struct dt_it *)it;
125 }
126
127 #define LOD_CHECK_IT(env, it)                                   \
128 {                                                               \
129         LASSERT((it)->lit_obj != NULL);                         \
130         LASSERT((it)->lit_it != NULL);                          \
131 } while(0)
132
133 void lod_it_fini(const struct lu_env *env, struct dt_it *di)
134 {
135         struct lod_it *it = (struct lod_it *)di;
136
137         LOD_CHECK_IT(env, it);
138         it->lit_obj->do_index_ops->dio_it.fini(env, it->lit_it);
139
140         /* the iterator not in use any more */
141         it->lit_obj = NULL;
142         it->lit_it = NULL;
143 }
144
145 int lod_it_get(const struct lu_env *env, struct dt_it *di,
146                const struct dt_key *key)
147 {
148         const struct lod_it *it = (const struct lod_it *)di;
149
150         LOD_CHECK_IT(env, it);
151         return it->lit_obj->do_index_ops->dio_it.get(env, it->lit_it, key);
152 }
153
154 void lod_it_put(const struct lu_env *env, struct dt_it *di)
155 {
156         struct lod_it *it = (struct lod_it *)di;
157
158         LOD_CHECK_IT(env, it);
159         return it->lit_obj->do_index_ops->dio_it.put(env, it->lit_it);
160 }
161
162 int lod_it_next(const struct lu_env *env, struct dt_it *di)
163 {
164         struct lod_it *it = (struct lod_it *)di;
165
166         LOD_CHECK_IT(env, it);
167         return it->lit_obj->do_index_ops->dio_it.next(env, it->lit_it);
168 }
169
170 struct dt_key *lod_it_key(const struct lu_env *env, const struct dt_it *di)
171 {
172         const struct lod_it *it = (const struct lod_it *)di;
173
174         LOD_CHECK_IT(env, it);
175         return it->lit_obj->do_index_ops->dio_it.key(env, it->lit_it);
176 }
177
178 int lod_it_key_size(const struct lu_env *env, const struct dt_it *di)
179 {
180         struct lod_it *it = (struct lod_it *)di;
181
182         LOD_CHECK_IT(env, it);
183         return it->lit_obj->do_index_ops->dio_it.key_size(env, it->lit_it);
184 }
185
186 int lod_it_rec(const struct lu_env *env, const struct dt_it *di,
187                struct dt_rec *rec, __u32 attr)
188 {
189         const struct lod_it *it = (const struct lod_it *)di;
190
191         LOD_CHECK_IT(env, it);
192         return it->lit_obj->do_index_ops->dio_it.rec(env, it->lit_it, rec, attr);
193 }
194
195 __u64 lod_it_store(const struct lu_env *env, const struct dt_it *di)
196 {
197         const struct lod_it *it = (const struct lod_it *)di;
198
199         LOD_CHECK_IT(env, it);
200         return it->lit_obj->do_index_ops->dio_it.store(env, it->lit_it);
201 }
202
203 int lod_it_load(const struct lu_env *env, const struct dt_it *di, __u64 hash)
204 {
205         const struct lod_it *it = (const struct lod_it *)di;
206
207         LOD_CHECK_IT(env, it);
208         return it->lit_obj->do_index_ops->dio_it.load(env, it->lit_it, hash);
209 }
210
211 int lod_it_key_rec(const struct lu_env *env, const struct dt_it *di,
212                    void* key_rec)
213 {
214         const struct lod_it *it = (const struct lod_it *)di;
215
216         LOD_CHECK_IT(env, it);
217         return it->lit_obj->do_index_ops->dio_it.key_rec(env, it->lit_it, key_rec);
218 }
219
220 static struct dt_index_operations lod_index_ops = {
221         .dio_lookup             = lod_index_lookup,
222         .dio_declare_insert     = lod_declare_index_insert,
223         .dio_insert             = lod_index_insert,
224         .dio_declare_delete     = lod_declare_index_delete,
225         .dio_delete             = lod_index_delete,
226         .dio_it = {
227                 .init           = lod_it_init,
228                 .fini           = lod_it_fini,
229                 .get            = lod_it_get,
230                 .put            = lod_it_put,
231                 .next           = lod_it_next,
232                 .key            = lod_it_key,
233                 .key_size       = lod_it_key_size,
234                 .rec            = lod_it_rec,
235                 .store          = lod_it_store,
236                 .load           = lod_it_load,
237                 .key_rec        = lod_it_key_rec,
238         }
239 };
240
241 static void lod_object_read_lock(const struct lu_env *env,
242                                  struct dt_object *dt, unsigned role)
243 {
244         dt_read_lock(env, dt_object_child(dt), role);
245 }
246
247 static void lod_object_write_lock(const struct lu_env *env,
248                                   struct dt_object *dt, unsigned role)
249 {
250         dt_write_lock(env, dt_object_child(dt), role);
251 }
252
253 static void lod_object_read_unlock(const struct lu_env *env,
254                                    struct dt_object *dt)
255 {
256         dt_read_unlock(env, dt_object_child(dt));
257 }
258
259 static void lod_object_write_unlock(const struct lu_env *env,
260                                     struct dt_object *dt)
261 {
262         dt_write_unlock(env, dt_object_child(dt));
263 }
264
265 static int lod_object_write_locked(const struct lu_env *env,
266                                    struct dt_object *dt)
267 {
268         return dt_write_locked(env, dt_object_child(dt));
269 }
270
271 static int lod_attr_get(const struct lu_env *env,
272                         struct dt_object *dt,
273                         struct lu_attr *attr,
274                         struct lustre_capa *capa)
275 {
276         return dt_attr_get(env, dt_object_child(dt), attr, capa);
277 }
278
279 static int lod_declare_attr_set(const struct lu_env *env,
280                                 struct dt_object *dt,
281                                 const struct lu_attr *attr,
282                                 struct thandle *handle)
283 {
284         struct dt_object  *next = dt_object_child(dt);
285         struct lod_object *lo = lod_dt_obj(dt);
286         int                rc, i;
287         ENTRY;
288
289         /*
290          * declare setattr on the local object
291          */
292         rc = dt_declare_attr_set(env, next, attr, handle);
293         if (rc)
294                 RETURN(rc);
295
296         /* osp_declare_attr_set() ignores all attributes other than
297          * UID, GID, and size, and osp_attr_set() ignores all but UID
298          * and GID.  Declaration of size attr setting happens through
299          * lod_declare_init_size(), and not through this function.
300          * Therefore we need not load striping unless ownership is
301          * changing.  This should save memory and (we hope) speed up
302          * rename(). */
303         if (!S_ISDIR(dt->do_lu.lo_header->loh_attr)) {
304                 if (!(attr->la_valid & (LA_UID | LA_GID)))
305                         RETURN(rc);
306
307                 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_OWNER))
308                         RETURN(0);
309         } else {
310                 if (!(attr->la_valid & (LA_UID | LA_GID | LA_MODE |
311                                         LA_ATIME | LA_MTIME | LA_CTIME)))
312                         RETURN(rc);
313         }
314         /*
315          * load striping information, notice we don't do this when object
316          * is being initialized as we don't need this information till
317          * few specific cases like destroy, chown
318          */
319         rc = lod_load_striping(env, lo);
320         if (rc)
321                 RETURN(rc);
322
323         if (lo->ldo_stripenr == 0)
324                 RETURN(0);
325
326         if (!(attr->la_valid & ~(LA_ATIME | LA_MTIME | LA_CTIME))) {
327                 struct lu_attr   *la = &lod_env_info(env)->lti_attr;
328                 bool             setattr_time = false;
329
330                 rc = dt_attr_get(env, dt_object_child(dt), la,
331                                  BYPASS_CAPA);
332                 if (rc != 0)
333                         RETURN(rc);
334
335                 /* If it will only setattr time, it will only set
336                  * time < current_time */
337                 if ((attr->la_valid & LA_ATIME &&
338                      attr->la_atime < la->la_atime) ||
339                     (attr->la_valid & LA_CTIME &&
340                      attr->la_ctime < la->la_ctime) ||
341                     (attr->la_valid & LA_MTIME &&
342                      attr->la_mtime < la->la_mtime))
343                         setattr_time = true;
344
345                 if (!setattr_time)
346                         RETURN(0);
347         }
348         /*
349          * if object is striped declare changes on the stripes
350          */
351         LASSERT(lo->ldo_stripe);
352         for (i = 0; i < lo->ldo_stripenr; i++) {
353                 LASSERT(lo->ldo_stripe[i]);
354
355                 rc = dt_declare_attr_set(env, lo->ldo_stripe[i], attr, handle);
356                 if (rc) {
357                         CERROR("failed declaration: %d\n", rc);
358                         break;
359                 }
360         }
361
362         if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LOST_STRIPE) &&
363             dt_object_exists(next) != 0 &&
364             dt_object_remote(next) == 0)
365                 dt_declare_xattr_del(env, next, XATTR_NAME_LOV, handle);
366
367         if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_CHANGE_STRIPE) &&
368             dt_object_exists(next) &&
369             dt_object_remote(next) == 0 && S_ISREG(attr->la_mode)) {
370                 struct lod_thread_info *info = lod_env_info(env);
371                 struct lu_buf *buf = &info->lti_buf;
372
373                 buf->lb_buf = info->lti_ea_store;
374                 buf->lb_len = info->lti_ea_store_size;
375                 dt_declare_xattr_set(env, next, buf, XATTR_NAME_LOV,
376                                      LU_XATTR_REPLACE, handle);
377         }
378
379         RETURN(rc);
380 }
381
382 static int lod_attr_set(const struct lu_env *env,
383                         struct dt_object *dt,
384                         const struct lu_attr *attr,
385                         struct thandle *handle,
386                         struct lustre_capa *capa)
387 {
388         struct dt_object  *next = dt_object_child(dt);
389         struct lod_object *lo = lod_dt_obj(dt);
390         int                rc, i;
391         ENTRY;
392
393         /*
394          * apply changes to the local object
395          */
396         rc = dt_attr_set(env, next, attr, handle, capa);
397         if (rc)
398                 RETURN(rc);
399
400         if (!S_ISDIR(dt->do_lu.lo_header->loh_attr)) {
401                 if (!(attr->la_valid & (LA_UID | LA_GID)))
402                         RETURN(rc);
403
404                 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_OWNER))
405                         RETURN(0);
406         } else {
407                 if (!(attr->la_valid & (LA_UID | LA_GID | LA_MODE |
408                                         LA_ATIME | LA_MTIME | LA_CTIME)))
409                         RETURN(rc);
410         }
411
412         if (lo->ldo_stripenr == 0)
413                 RETURN(0);
414
415         if (!(attr->la_valid & ~(LA_ATIME | LA_MTIME | LA_CTIME))) {
416                 struct lu_attr   *la = &lod_env_info(env)->lti_attr;
417                 bool             setattr_time = false;
418
419                 rc = dt_attr_get(env, dt_object_child(dt), la,
420                                  BYPASS_CAPA);
421                 if (rc != 0)
422                         RETURN(rc);
423
424                 /* If it will only setattr time, it will only set
425                  * time < current_time */
426                 if ((attr->la_valid & LA_ATIME &&
427                      attr->la_atime < la->la_atime) ||
428                     (attr->la_valid & LA_CTIME &&
429                      attr->la_ctime < la->la_ctime) ||
430                     (attr->la_valid & LA_MTIME &&
431                      attr->la_mtime < la->la_mtime))
432                         setattr_time = true;
433
434                 if (!setattr_time)
435                         RETURN(0);
436         }
437
438         /*
439          * if object is striped, apply changes to all the stripes
440          */
441         LASSERT(lo->ldo_stripe);
442         for (i = 0; i < lo->ldo_stripenr; i++) {
443                 LASSERT(lo->ldo_stripe[i]);
444                 rc = dt_attr_set(env, lo->ldo_stripe[i], attr, handle, capa);
445                 if (rc) {
446                         CERROR("failed declaration: %d\n", rc);
447                         break;
448                 }
449         }
450
451         if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LOST_STRIPE) &&
452             dt_object_exists(next) != 0 &&
453             dt_object_remote(next) == 0)
454                 dt_xattr_del(env, next, XATTR_NAME_LOV, handle, BYPASS_CAPA);
455
456         if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_CHANGE_STRIPE) &&
457             dt_object_exists(next) &&
458             dt_object_remote(next) == 0 && S_ISREG(attr->la_mode)) {
459                 struct lod_thread_info *info = lod_env_info(env);
460                 struct lu_buf *buf = &info->lti_buf;
461                 struct ost_id *oi = &info->lti_ostid;
462                 struct lu_fid *fid = &info->lti_fid;
463                 struct lov_mds_md_v1 *lmm;
464                 struct lov_ost_data_v1 *objs;
465                 __u32 magic;
466                 int rc1;
467
468                 rc1 = lod_get_lov_ea(env, lo);
469                 if (rc1  <= 0)
470                         RETURN(rc);
471
472                 buf->lb_buf = info->lti_ea_store;
473                 buf->lb_len = info->lti_ea_store_size;
474                 lmm = info->lti_ea_store;
475                 magic = le32_to_cpu(lmm->lmm_magic);
476                 if (magic == LOV_MAGIC_V1)
477                         objs = &(lmm->lmm_objects[0]);
478                 else
479                         objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[0];
480                 ostid_le_to_cpu(&objs->l_ost_oi, oi);
481                 ostid_to_fid(fid, oi, le32_to_cpu(objs->l_ost_idx));
482                 fid->f_oid--;
483                 fid_to_ostid(fid, oi);
484                 ostid_cpu_to_le(oi, &objs->l_ost_oi);
485                 dt_xattr_set(env, next, buf, XATTR_NAME_LOV,
486                              LU_XATTR_REPLACE, handle, BYPASS_CAPA);
487         }
488
489         RETURN(rc);
490 }
491
492 static int lod_xattr_get(const struct lu_env *env, struct dt_object *dt,
493                          struct lu_buf *buf, const char *name,
494                          struct lustre_capa *capa)
495 {
496         struct lod_thread_info  *info = lod_env_info(env);
497         struct lod_device       *dev = lu2lod_dev(dt->do_lu.lo_dev);
498         int                      rc, is_root;
499         ENTRY;
500
501         rc = dt_xattr_get(env, dt_object_child(dt), buf, name, capa);
502         if (rc != -ENODATA || !S_ISDIR(dt->do_lu.lo_header->loh_attr & S_IFMT))
503                 RETURN(rc);
504
505         /*
506          * lod returns default striping on the real root of the device
507          * this is like the root stores default striping for the whole
508          * filesystem. historically we've been using a different approach
509          * and store it in the config.
510          */
511         dt_root_get(env, dev->lod_child, &info->lti_fid);
512         is_root = lu_fid_eq(&info->lti_fid, lu_object_fid(&dt->do_lu));
513
514         if (is_root && strcmp(XATTR_NAME_LOV, name) == 0) {
515                 struct lov_user_md *lum = buf->lb_buf;
516                 struct lov_desc    *desc = &dev->lod_desc;
517
518                 if (buf->lb_buf == NULL) {
519                         rc = sizeof(*lum);
520                 } else if (buf->lb_len >= sizeof(*lum)) {
521                         lum->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V1);
522                         lmm_oi_set_seq(&lum->lmm_oi, FID_SEQ_LOV_DEFAULT);
523                         lmm_oi_set_id(&lum->lmm_oi, 0);
524                         lmm_oi_cpu_to_le(&lum->lmm_oi, &lum->lmm_oi);
525                         lum->lmm_pattern = cpu_to_le32(desc->ld_pattern);
526                         lum->lmm_stripe_size = cpu_to_le32(
527                                                 desc->ld_default_stripe_size);
528                         lum->lmm_stripe_count = cpu_to_le16(
529                                                 desc->ld_default_stripe_count);
530                         lum->lmm_stripe_offset = cpu_to_le16(
531                                                 desc->ld_default_stripe_offset);
532                         rc = sizeof(*lum);
533                 } else {
534                         rc = -ERANGE;
535                 }
536         }
537
538         RETURN(rc);
539 }
540
541 static int lod_verify_md_striping(struct lod_device *lod,
542                                   const struct lmv_user_md_v1 *lum)
543 {
544         int     rc = 0;
545         ENTRY;
546
547         if (unlikely(le32_to_cpu(lum->lum_magic) != LMV_USER_MAGIC))
548                 GOTO(out, rc = -EINVAL);
549
550         if (unlikely(le32_to_cpu(lum->lum_stripe_count) == 0))
551                 GOTO(out, rc = -EINVAL);
552
553         if (unlikely(le32_to_cpu(lum->lum_stripe_count) >
554                                 lod->lod_remote_mdt_count + 1))
555                 GOTO(out, rc = -EINVAL);
556 out:
557         if (rc != 0)
558                 CERROR("%s: invalid lmv_user_md: magic = %x, "
559                        "stripe_offset = %d, stripe_count = %u: rc = %d\n",
560                        lod2obd(lod)->obd_name, le32_to_cpu(lum->lum_magic),
561                        (int)le32_to_cpu(lum->lum_stripe_offset),
562                        le32_to_cpu(lum->lum_stripe_count), rc);
563         return rc;
564 }
565
566 int lod_prep_lmv_md(const struct lu_env *env, struct dt_object *dt,
567                     struct lu_buf *lmv_buf)
568 {
569         struct lod_thread_info  *info = lod_env_info(env);
570         struct lod_device       *lod = lu2lod_dev(dt->do_lu.lo_dev);
571         struct lod_object       *lo = lod_dt_obj(dt);
572         struct lmv_mds_md_v1    *lmm1;
573         int                     stripe_count;
574         int                     lmm_size;
575         int                     type = LU_SEQ_RANGE_ANY;
576         int                     i;
577         int                     rc;
578         __u32                   mdtidx;
579         ENTRY;
580
581         LASSERT(lo->ldo_dir_striped != 0);
582         LASSERT(lo->ldo_stripenr > 0);
583         stripe_count = lo->ldo_stripenr + 1;
584         lmm_size = lmv_mds_md_size(stripe_count, LMV_MAGIC);
585         if (info->lti_ea_store_size < lmm_size) {
586                 rc = lod_ea_store_resize(info, lmm_size);
587                 if (rc != 0)
588                         RETURN(rc);
589         }
590
591         lmm1 = (struct lmv_mds_md_v1 *)info->lti_ea_store;
592         lmm1->lmv_magic = cpu_to_le32(LMV_MAGIC);
593         lmm1->lmv_stripe_count = cpu_to_le32(stripe_count);
594         lmm1->lmv_hash_type = cpu_to_le32(lo->ldo_dir_hash_type);
595         rc = lod_fld_lookup(env, lod, lu_object_fid(&dt->do_lu),
596                             &mdtidx, &type);
597         if (rc != 0)
598                 RETURN(rc);
599
600         lmm1->lmv_master_mdt_index = cpu_to_le32(mdtidx);
601         fid_cpu_to_le(&lmm1->lmv_stripe_fids[0], lu_object_fid(&dt->do_lu));
602         for (i = 0; i < lo->ldo_stripenr; i++) {
603                 struct dt_object *dto;
604
605                 dto = lo->ldo_stripe[i];
606                 LASSERT(dto != NULL);
607                 fid_cpu_to_le(&lmm1->lmv_stripe_fids[i + 1],
608                               lu_object_fid(&dto->do_lu));
609         }
610
611         lmv_buf->lb_buf = info->lti_ea_store;
612         lmv_buf->lb_len = lmm_size;
613         lo->ldo_dir_striping_cached = 1;
614
615         RETURN(rc);
616 }
617
618 int lod_parse_dir_striping(const struct lu_env *env, struct lod_object *lo,
619                            const struct lu_buf *buf)
620 {
621         struct lod_thread_info  *info = lod_env_info(env);
622         struct lod_device       *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
623         struct lod_tgt_descs    *ltd = &lod->lod_mdt_descs;
624         struct dt_object        **stripe;
625         union lmv_mds_md        *lmm = buf->lb_buf;
626         struct lmv_mds_md_v1    *lmv1 = &lmm->lmv_md_v1;
627         struct lu_fid           *fid = &info->lti_fid;
628         int                     i;
629         int                     rc = 0;
630         ENTRY;
631
632         if (le32_to_cpu(lmv1->lmv_magic) != LMV_MAGIC_V1)
633                 RETURN(-EINVAL);
634
635         if (le32_to_cpu(lmv1->lmv_stripe_count) <= 1)
636                 RETURN(0);
637
638         fid_le_to_cpu(fid, &lmv1->lmv_stripe_fids[0]);
639         /* Do not load striping information for slave inode */
640         if (!lu_fid_eq(fid, lu_object_fid(&lo->ldo_obj.do_lu))) {
641                 lo->ldo_dir_slave_stripe = 1;
642                 RETURN(0);
643         }
644
645         LASSERT(lo->ldo_stripe == NULL);
646         OBD_ALLOC(stripe, sizeof(stripe[0]) *
647                   (le32_to_cpu(lmv1->lmv_stripe_count) - 1));
648         if (stripe == NULL)
649                 RETURN(-ENOMEM);
650
651         /* skip master stripe */
652         for (i = 1; i < le32_to_cpu(lmv1->lmv_stripe_count); i++) {
653                 struct lod_tgt_desc     *tgt;
654                 int                     idx;
655                 int                     type = LU_SEQ_RANGE_ANY;
656                 struct dt_object        *dto;
657
658                 fid_le_to_cpu(fid, &lmv1->lmv_stripe_fids[i]);
659                 rc = lod_fld_lookup(env, lod, fid, &idx, &type);
660                 if (rc != 0)
661                         GOTO(out, rc);
662
663                 tgt = LTD_TGT(ltd, idx);
664                 if (tgt == NULL)
665                         GOTO(out, rc = -ESTALE);
666
667                 dto = dt_locate_at(env, tgt->ltd_tgt, fid,
668                                   lo->ldo_obj.do_lu.lo_dev->ld_site->ls_top_dev,
669                                   NULL);
670                 if (IS_ERR(dto))
671                         GOTO(out, rc = PTR_ERR(dto));
672
673                 stripe[i - 1] = dto;
674         }
675 out:
676         lo->ldo_stripe = stripe;
677         lo->ldo_stripenr = le32_to_cpu(lmv1->lmv_stripe_count) - 1;
678         lo->ldo_stripes_allocated = le32_to_cpu(lmv1->lmv_stripe_count) - 1;
679         if (rc != 0)
680                 lod_object_free_striping(env, lo);
681
682         RETURN(rc);
683 }
684
685 static int lod_prep_md_striped_create(const struct lu_env *env,
686                                       struct dt_object *dt,
687                                       struct lu_attr *attr,
688                                       const struct lmv_user_md_v1 *lum,
689                                       struct thandle *th)
690 {
691         struct lod_device       *lod = lu2lod_dev(dt->do_lu.lo_dev);
692         struct lod_tgt_descs    *ltd = &lod->lod_mdt_descs;
693         struct lod_object       *lo = lod_dt_obj(dt);
694         struct dt_object        **stripe;
695         struct lu_buf           lmv_buf;
696         int                     stripe_count;
697         int                     *idx_array;
698         int                     rc = 0;
699         int                     i;
700         int                     j;
701         ENTRY;
702
703         /* The lum has been verifed in lod_verify_md_striping */
704         LASSERT(le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC);
705         LASSERT(le32_to_cpu(lum->lum_stripe_count) > 0);
706
707         /* Do not need allocated master stripe */
708         stripe_count = le32_to_cpu(lum->lum_stripe_count);
709         OBD_ALLOC(stripe, sizeof(stripe[0]) * (stripe_count - 1));
710         if (stripe == NULL)
711                 RETURN(-ENOMEM);
712
713         OBD_ALLOC(idx_array, sizeof(idx_array[0]) * stripe_count);
714         if (idx_array == NULL)
715                 GOTO(out_free, rc = -ENOMEM);
716
717         idx_array[0] = le32_to_cpu(lum->lum_stripe_offset);
718         for (i = 1; i < stripe_count; i++) {
719                 struct lod_tgt_desc     *tgt;
720                 struct dt_object        *dto;
721                 struct lu_fid           fid;
722                 int                     idx;
723                 struct lu_object_conf   conf = { 0 };
724
725                 idx = (idx_array[i - 1] + 1) % (lod->lod_remote_mdt_count + 1);
726
727                 for (j = 0; j < lod->lod_remote_mdt_count;
728                      j++, idx = (idx + 1) % (lod->lod_remote_mdt_count + 1)) {
729                         bool already_allocated = false;
730                         int k;
731
732                         CDEBUG(D_INFO, "try idx %d, mdt cnt %d,"
733                                " allocated %d, last allocated %d\n", idx,
734                                lod->lod_remote_mdt_count, i, idx_array[i - 1]);
735
736                         /* Find next avaible target */
737                         if (!cfs_bitmap_check(ltd->ltd_tgt_bitmap, idx))
738                                 continue;
739
740                         /* check whether the idx already exists
741                          * in current allocated array */
742                         for (k = 0; k < i; k++) {
743                                 if (idx_array[k] == idx) {
744                                         already_allocated = true;
745                                         break;
746                                 }
747                         }
748
749                         if (already_allocated)
750                                 continue;
751
752                         break;
753                 }
754
755                 /* Can not allocate more stripes */
756                 if (j == lod->lod_remote_mdt_count) {
757                         CDEBUG(D_INFO, "%s: require stripes %d only get %d\n",
758                                lod2obd(lod)->obd_name, stripe_count, i - 1);
759                         break;
760                 }
761
762                 CDEBUG(D_INFO, "idx %d, mdt cnt %d,"
763                        " allocated %d, last allocated %d\n", idx,
764                        lod->lod_remote_mdt_count, i, idx_array[i - 1]);
765
766                 tgt = LTD_TGT(ltd, idx);
767                 LASSERT(tgt != NULL);
768
769                 rc = obd_fid_alloc(tgt->ltd_exp, &fid, NULL);
770                 if (rc < 0)
771                         GOTO(out_put, rc);
772                 rc = 0;
773
774                 conf.loc_flags = LOC_F_NEW;
775                 dto = dt_locate_at(env, tgt->ltd_tgt, &fid,
776                                   dt->do_lu.lo_dev->ld_site->ls_top_dev, &conf);
777                 if (IS_ERR(dto))
778                         GOTO(out_put, rc = PTR_ERR(dto));
779                 stripe[i - 1] = dto;
780                 idx_array[i] = idx;
781         }
782
783         lo->ldo_dir_striped = 1;
784         lo->ldo_stripe = stripe;
785         lo->ldo_stripenr = i - 1;
786         lo->ldo_stripes_allocated = stripe_count - 1;
787
788         if (lo->ldo_stripenr == 0)
789                 GOTO(out_put, rc = -ENOSPC);
790
791         rc = lod_prep_lmv_md(env, dt, &lmv_buf);
792         if (rc != 0)
793                 GOTO(out_put, rc);
794
795         for (i = 0; i < lo->ldo_stripenr; i++) {
796                 struct dt_object *dto;
797
798                 dto = stripe[i];
799                 /* only create slave striped object */
800                 rc = dt_declare_create(env, dto, attr, NULL, NULL, th);
801                 if (rc != 0)
802                         GOTO(out_put, rc);
803
804                 if (!dt_try_as_dir(env, dto))
805                         GOTO(out_put, rc = -EINVAL);
806
807                 rc = dt_declare_insert(env, dto,
808                      (const struct dt_rec *)lu_object_fid(&dto->do_lu),
809                      (const struct dt_key *)dot, th);
810                 if (rc != 0)
811                         GOTO(out_put, rc);
812
813                 /* master stripe FID will be put to .. */
814                 rc = dt_declare_insert(env, dto,
815                      (const struct dt_rec *)lu_object_fid(&dt->do_lu),
816                      (const struct dt_key *)dotdot, th);
817                 if (rc != 0)
818                         GOTO(out_put, rc);
819
820                 /* probably nothing to inherite */
821                 if (lo->ldo_striping_cached &&
822                     !LOVEA_DELETE_VALUES(lo->ldo_def_stripe_size,
823                                          lo->ldo_def_stripenr,
824                                          lo->ldo_def_stripe_offset)) {
825                         struct lod_thread_info  *info;
826                         struct lov_user_md_v3   *v3;
827
828                         /* sigh, lti_ea_store has been used for lmv_buf,
829                          * so we have to allocate buffer for default
830                          * stripe EA */
831                         OBD_ALLOC_PTR(v3);
832                         if (v3 == NULL)
833                                 GOTO(out_put, rc = -ENOMEM);
834
835                         memset(v3, 0, sizeof(*v3));
836                         v3->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V3);
837                         v3->lmm_stripe_count =
838                                 cpu_to_le32(lo->ldo_def_stripenr);
839                         v3->lmm_stripe_offset =
840                                 cpu_to_le32(lo->ldo_def_stripe_offset);
841                         v3->lmm_stripe_size =
842                                 cpu_to_le32(lo->ldo_def_stripe_size);
843                         if (lo->ldo_pool)
844                                 strncpy(v3->lmm_pool_name, lo->ldo_pool,
845                                         LOV_MAXPOOLNAME);
846
847                         info = lod_env_info(env);
848                         info->lti_buf.lb_buf = v3;
849                         info->lti_buf.lb_len = sizeof(*v3);
850                         rc = dt_declare_xattr_set(env, dto,
851                                                   &info->lti_buf,
852                                                   XATTR_NAME_LOV,
853                                                   0, th);
854                         OBD_FREE_PTR(v3);
855                         if (rc != 0)
856                                 GOTO(out_put, rc);
857                 }
858                 rc = dt_declare_xattr_set(env, dto, &lmv_buf, XATTR_NAME_LMV, 0,
859                                           th);
860                 if (rc != 0)
861                         GOTO(out_put, rc);
862         }
863
864         rc = dt_declare_xattr_set(env, dt, &lmv_buf, XATTR_NAME_LMV, 0, th);
865         if (rc != 0)
866                 GOTO(out_put, rc);
867
868 out_put:
869         if (rc < 0) {
870                 for (i = 0; i < stripe_count - 1; i++)
871                         if (stripe[i] != NULL)
872                                 lu_object_put(env, &stripe[i]->do_lu);
873                 OBD_FREE(stripe, sizeof(stripe[0]) * (stripe_count - 1));
874         }
875
876 out_free:
877         if (idx_array != NULL)
878                 OBD_FREE(idx_array, sizeof(idx_array[0]) * stripe_count);
879
880         RETURN(rc);
881 }
882
883 /**
884  * Declare create striped md object.
885  */
886 static int lod_declare_xattr_set_lmv(const struct lu_env *env,
887                                      struct dt_object *dt,
888                                      struct lu_attr *attr,
889                                      const struct lu_buf *lum_buf,
890                                      struct thandle *th)
891 {
892         struct lod_object       *lo = lod_dt_obj(dt);
893         struct lod_device       *lod = lu2lod_dev(dt->do_lu.lo_dev);
894         struct lmv_user_md_v1   *lum;
895         int                     rc;
896         ENTRY;
897
898         lum = lum_buf->lb_buf;
899         LASSERT(lum != NULL);
900
901         CDEBUG(D_INFO, "lum magic = %x count = %u offset = %d\n",
902                le32_to_cpu(lum->lum_magic), le32_to_cpu(lum->lum_stripe_count),
903                (int)le32_to_cpu(lum->lum_stripe_offset));
904
905         if (le32_to_cpu(lum->lum_stripe_count) <= 1)
906                 GOTO(out, rc = 0);
907
908         rc = lod_verify_md_striping(lod, lum);
909         if (rc != 0)
910                 GOTO(out, rc);
911
912         /* prepare dir striped objects */
913         rc = lod_prep_md_striped_create(env, dt, attr, lum, th);
914         if (rc != 0) {
915                 /* failed to create striping, let's reset
916                  * config so that others don't get confused */
917                 lod_object_free_striping(env, lo);
918                 GOTO(out, rc);
919         }
920 out:
921         RETURN(rc);
922 }
923
924 /*
925  * LOV xattr is a storage for striping, and LOD owns this xattr.
926  * but LOD allows others to control striping to some extent
927  * - to reset strping
928  * - to set new defined striping
929  * - to set new semi-defined striping
930  *   - number of stripes is defined
931  *   - number of stripes + osts are defined
932  *   - ??
933  */
934 static int lod_declare_xattr_set(const struct lu_env *env,
935                                  struct dt_object *dt,
936                                  const struct lu_buf *buf,
937                                  const char *name, int fl,
938                                  struct thandle *th)
939 {
940         struct dt_object *next = dt_object_child(dt);
941         struct lu_attr   *attr = &lod_env_info(env)->lti_attr;
942         __u32             mode;
943         int               rc;
944         ENTRY;
945
946         /*
947          * allow to declare predefined striping on a new (!mode) object
948          * which is supposed to be replay of regular file creation
949          * (when LOV setting is declared)
950          * LU_XATTR_REPLACE is set to indicate a layout swap
951          */
952         mode = dt->do_lu.lo_header->loh_attr & S_IFMT;
953         if ((S_ISREG(mode) || mode == 0) && strcmp(name, XATTR_NAME_LOV) == 0 &&
954              !(fl & LU_XATTR_REPLACE)) {
955                 /*
956                  * this is a request to manipulate object's striping
957                  */
958                 if (dt_object_exists(dt)) {
959                         rc = dt_attr_get(env, next, attr, BYPASS_CAPA);
960                         if (rc)
961                                 RETURN(rc);
962                 } else {
963                         memset(attr, 0, sizeof(*attr));
964                         attr->la_valid = LA_TYPE | LA_MODE;
965                         attr->la_mode = S_IFREG;
966                 }
967                 rc = lod_declare_striped_object(env, dt, attr, buf, th);
968         } else if (S_ISDIR(mode)) {
969                 struct lod_device       *d = lu2lod_dev(dt->do_lu.lo_dev);
970                 struct lod_object       *lo = lod_dt_obj(dt);
971                 int                     i;
972
973                 if (strcmp(name, XATTR_NAME_DEFAULT_LMV) == 0) {
974                         struct lmv_user_md_v1 *lum;
975
976                         LASSERT(buf != NULL && buf->lb_buf != NULL);
977                         lum = buf->lb_buf;
978                         rc = lod_verify_md_striping(d, lum);
979                         if (rc != 0)
980                                 RETURN(rc);
981                 }
982
983                 rc = dt_declare_xattr_set(env, next, buf, name, fl, th);
984                 if (rc != 0)
985                         RETURN(rc);
986
987                 /* set xattr to each stripes, if needed */
988                 rc = lod_load_striping(env, lo);
989                 if (rc != 0)
990                         RETURN(rc);
991
992                 if (lo->ldo_stripenr == 0)
993                         RETURN(rc);
994
995                 for (i = 0; i < lo->ldo_stripenr; i++) {
996                         LASSERT(lo->ldo_stripe[i]);
997                         rc = dt_declare_xattr_set(env, lo->ldo_stripe[i], buf,
998                                                   name, fl, th);
999                         if (rc != 0)
1000                                 break;
1001                 }
1002         } else {
1003                 rc = dt_declare_xattr_set(env, next, buf, name, fl, th);
1004         }
1005
1006         RETURN(rc);
1007 }
1008
1009 static void lod_lov_stripe_cache_clear(struct lod_object *lo)
1010 {
1011         lo->ldo_striping_cached = 0;
1012         lo->ldo_def_striping_set = 0;
1013         lod_object_set_pool(lo, NULL);
1014         lo->ldo_def_stripe_size = 0;
1015         lo->ldo_def_stripenr = 0;
1016 }
1017
1018 static int lod_xattr_set_lov_on_dir(const struct lu_env *env,
1019                                     struct dt_object *dt,
1020                                     const struct lu_buf *buf,
1021                                     const char *name, int fl,
1022                                     struct thandle *th,
1023                                     struct lustre_capa *capa)
1024 {
1025         struct lod_device       *d = lu2lod_dev(dt->do_lu.lo_dev);
1026         struct dt_object        *next = dt_object_child(dt);
1027         struct lod_object       *l = lod_dt_obj(dt);
1028         struct lov_user_md_v1   *lum;
1029         struct lov_user_md_v3   *v3 = NULL;
1030         int                      rc;
1031         ENTRY;
1032
1033         /* If it is striped dir, we should clear the stripe cache for
1034          * slave stripe as well, but there are no effective way to
1035          * notify the LOD on the slave MDT, so we do not cache stripe
1036          * information for slave stripe for now. XXX*/
1037         lod_lov_stripe_cache_clear(l);
1038         LASSERT(buf != NULL && buf->lb_buf != NULL);
1039         lum = buf->lb_buf;
1040
1041         rc = lod_verify_striping(d, buf, 0);
1042         if (rc)
1043                 RETURN(rc);
1044
1045         if (lum->lmm_magic == LOV_USER_MAGIC_V3)
1046                 v3 = buf->lb_buf;
1047
1048         /* if { size, offset, count } = { 0, -1, 0 } and no pool
1049          * (i.e. all default values specified) then delete default
1050          * striping from dir. */
1051         CDEBUG(D_OTHER,
1052                 "set default striping: sz %u # %u offset %d %s %s\n",
1053                 (unsigned)lum->lmm_stripe_size,
1054                 (unsigned)lum->lmm_stripe_count,
1055                 (int)lum->lmm_stripe_offset,
1056                 v3 ? "from" : "", v3 ? v3->lmm_pool_name : "");
1057
1058         if (LOVEA_DELETE_VALUES((lum->lmm_stripe_size),
1059                                 (lum->lmm_stripe_count),
1060                                 (lum->lmm_stripe_offset)) &&
1061                         lum->lmm_magic == LOV_USER_MAGIC_V1) {
1062                 rc = dt_xattr_del(env, next, name, th, capa);
1063                 if (rc == -ENODATA)
1064                         rc = 0;
1065         } else {
1066                 rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
1067         }
1068
1069         RETURN(rc);
1070 }
1071
1072 static int lod_xattr_set_default_lmv_on_dir(const struct lu_env *env,
1073                                             struct dt_object *dt,
1074                                             const struct lu_buf *buf,
1075                                             const char *name, int fl,
1076                                             struct thandle *th,
1077                                             struct lustre_capa *capa)
1078 {
1079         struct dt_object        *next = dt_object_child(dt);
1080         struct lod_object       *l = lod_dt_obj(dt);
1081         struct lmv_user_md_v1   *lum;
1082         int                      rc;
1083         ENTRY;
1084
1085         LASSERT(buf != NULL && buf->lb_buf != NULL);
1086         lum = buf->lb_buf;
1087
1088         CDEBUG(D_OTHER, "set default stripe_count # %u stripe_offset %d\n",
1089               le32_to_cpu(lum->lum_stripe_count),
1090               (int)le32_to_cpu(lum->lum_stripe_offset));
1091
1092         if (LMVEA_DELETE_VALUES((le32_to_cpu(lum->lum_stripe_count)),
1093                                  le32_to_cpu(lum->lum_stripe_offset)) &&
1094                                 le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC) {
1095                 rc = dt_xattr_del(env, next, name, th, capa);
1096                 if (rc == -ENODATA)
1097                         rc = 0;
1098         } else {
1099                 rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
1100                 if (rc != 0)
1101                         RETURN(rc);
1102
1103                 /* Update default stripe cache */
1104                 if (l->ldo_dir_stripe == NULL) {
1105                         OBD_ALLOC_PTR(l->ldo_dir_stripe);
1106                         if (l->ldo_dir_stripe == NULL)
1107                                 RETURN(-ENOMEM);
1108                 }
1109
1110                 l->ldo_dir_striping_cached = 0;
1111                 l->ldo_dir_def_striping_set = 1;
1112                 l->ldo_dir_def_stripenr =
1113                         le32_to_cpu(lum->lum_stripe_count) - 1;
1114         }
1115
1116         RETURN(rc);
1117 }
1118
1119 static int lod_xattr_set_lmv(const struct lu_env *env, struct dt_object *dt,
1120                              const struct lu_buf *buf, const char *name,
1121                              int fl, struct thandle *th,
1122                              struct lustre_capa *capa)
1123 {
1124         struct lod_object       *lo = lod_dt_obj(dt);
1125         struct lu_buf           lmv_buf;
1126         int                     i;
1127         int                     rc;
1128         ENTRY;
1129
1130         if (!S_ISDIR(dt->do_lu.lo_header->loh_attr))
1131                 RETURN(-ENOTDIR);
1132
1133         /* The stripes are supposed to be allocated in declare phase,
1134          * if there are no stripes being allocated, it will skip */
1135         if (lo->ldo_stripenr == 0)
1136                 RETURN(0);
1137
1138         rc = lod_prep_lmv_md(env, dt, &lmv_buf);
1139         if (rc != 0)
1140                 RETURN(rc);
1141
1142         for (i = 0; i < lo->ldo_stripenr; i++) {
1143                 struct dt_object *dto;
1144                 struct lu_attr  *attr = &lod_env_info(env)->lti_attr;
1145
1146                 dto = lo->ldo_stripe[i];
1147                 memset(attr, 0, sizeof(*attr));
1148                 attr->la_valid = LA_TYPE | LA_MODE;
1149                 attr->la_mode = S_IFDIR;
1150                 rc = dt_create(env, dto, attr, NULL, NULL, th);
1151                 if (rc != 0)
1152                         RETURN(rc);
1153
1154                 rc = dt_insert(env, dto,
1155                               (const struct dt_rec *)lu_object_fid(&dto->do_lu),
1156                               (const struct dt_key *)dot, th, capa, 0);
1157                 if (rc != 0)
1158                         RETURN(rc);
1159
1160                 rc = dt_insert(env, dto,
1161                               (struct dt_rec *)lu_object_fid(&dt->do_lu),
1162                               (const struct dt_key *)dotdot, th, capa, 0);
1163                 if (rc != 0)
1164                         RETURN(rc);
1165
1166                 if (lo->ldo_striping_cached &&
1167                     !LOVEA_DELETE_VALUES(lo->ldo_def_stripe_size,
1168                                          lo->ldo_def_stripenr,
1169                                          lo->ldo_def_stripe_offset)) {
1170                         struct lod_thread_info  *info;
1171                         struct lov_user_md_v3   *v3;
1172
1173                         /* sigh, lti_ea_store has been used for lmv_buf,
1174                          * so we have to allocate buffer for default
1175                          * stripe EA */
1176                         OBD_ALLOC_PTR(v3);
1177                         if (v3 == NULL)
1178                                 RETURN(-ENOMEM);
1179
1180                         memset(v3, 0, sizeof(*v3));
1181                         v3->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V3);
1182                         v3->lmm_stripe_count =
1183                                 cpu_to_le32(lo->ldo_def_stripenr);
1184                         v3->lmm_stripe_offset =
1185                                 cpu_to_le32(lo->ldo_def_stripe_offset);
1186                         v3->lmm_stripe_size =
1187                                 cpu_to_le32(lo->ldo_def_stripe_size);
1188                         if (lo->ldo_pool)
1189                                 strncpy(v3->lmm_pool_name, lo->ldo_pool,
1190                                         LOV_MAXPOOLNAME);
1191
1192                         info = lod_env_info(env);
1193                         info->lti_buf.lb_buf = v3;
1194                         info->lti_buf.lb_len = sizeof(*v3);
1195                         rc = dt_xattr_set(env, dto, &info->lti_buf,
1196                                           XATTR_NAME_LOV, 0, th, capa);
1197                         OBD_FREE_PTR(v3);
1198                         if (rc != 0)
1199                                 RETURN(rc);
1200                 }
1201
1202                 rc = dt_xattr_set(env, dto, &lmv_buf, XATTR_NAME_LMV, fl, th,
1203                                   capa);
1204         }
1205
1206         rc = dt_xattr_set(env, dt, &lmv_buf, XATTR_NAME_LMV, fl, th, capa);
1207
1208         RETURN(rc);
1209 }
1210
1211 static int lod_xattr_set(const struct lu_env *env,
1212                          struct dt_object *dt, const struct lu_buf *buf,
1213                          const char *name, int fl, struct thandle *th,
1214                          struct lustre_capa *capa)
1215 {
1216         struct lod_object       *lo = lod_dt_obj(dt);
1217         struct dt_object        *next = dt_object_child(dt);
1218         __u32                    attr;
1219         int                      rc;
1220         int                     i;
1221         ENTRY;
1222
1223         attr = dt->do_lu.lo_header->loh_attr & S_IFMT;
1224         if (S_ISDIR(attr) && strcmp(name, XATTR_NAME_LOV) == 0) {
1225                 rc = lod_xattr_set_lov_on_dir(env, dt, buf, name, fl, th, capa);
1226         } else if (S_ISREG(attr) && !strcmp(name, XATTR_NAME_LOV)) {
1227                 /* in case of lov EA swap, just set it
1228                  * if not, it is a replay so check striping match what we
1229                  * already have during req replay, declare_xattr_set()
1230                  * defines striping, then create() does the work
1231                 */
1232                 if (fl & LU_XATTR_REPLACE) {
1233                         /* free stripes, then update disk */
1234                         lod_object_free_striping(env, lod_dt_obj(dt));
1235                         rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
1236                 } else {
1237                         rc = lod_striping_create(env, dt, NULL, NULL, th);
1238                 }
1239         } else if (strcmp(name, XATTR_NAME_DEFAULT_LMV) == 0) {
1240                 if (!S_ISDIR(attr))
1241                         RETURN(-ENOTDIR);
1242                 rc = lod_xattr_set_default_lmv_on_dir(env, dt, buf, name, fl,
1243                                                       th, capa);
1244         } else {
1245                 /*
1246                  * behave transparantly for all other EAs
1247                  */
1248                 rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
1249         }
1250
1251         if (rc != 0 || !S_ISDIR(attr))
1252                 RETURN(rc);
1253
1254         if (lo->ldo_stripenr == 0)
1255                 RETURN(rc);
1256
1257         for (i = 0; i < lo->ldo_stripenr; i++) {
1258                 LASSERT(lo->ldo_stripe[i]);
1259                 rc = dt_xattr_set(env, lo->ldo_stripe[i], buf, name, fl, th,
1260                                   capa);
1261                 if (rc != 0)
1262                         break;
1263         }
1264
1265         RETURN(rc);
1266 }
1267
1268 static int lod_declare_xattr_del(const struct lu_env *env,
1269                                  struct dt_object *dt, const char *name,
1270                                  struct thandle *th)
1271 {
1272         return dt_declare_xattr_del(env, dt_object_child(dt), name, th);
1273 }
1274
1275 static int lod_xattr_del(const struct lu_env *env, struct dt_object *dt,
1276                          const char *name, struct thandle *th,
1277                          struct lustre_capa *capa)
1278 {
1279         if (!strcmp(name, XATTR_NAME_LOV))
1280                 lod_object_free_striping(env, lod_dt_obj(dt));
1281         return dt_xattr_del(env, dt_object_child(dt), name, th, capa);
1282 }
1283
1284 static int lod_xattr_list(const struct lu_env *env,
1285                           struct dt_object *dt, struct lu_buf *buf,
1286                           struct lustre_capa *capa)
1287 {
1288         return dt_xattr_list(env, dt_object_child(dt), buf, capa);
1289 }
1290
1291 int lod_object_set_pool(struct lod_object *o, char *pool)
1292 {
1293         int len;
1294
1295         if (o->ldo_pool) {
1296                 len = strlen(o->ldo_pool);
1297                 OBD_FREE(o->ldo_pool, len + 1);
1298                 o->ldo_pool = NULL;
1299         }
1300         if (pool) {
1301                 len = strlen(pool);
1302                 OBD_ALLOC(o->ldo_pool, len + 1);
1303                 if (o->ldo_pool == NULL)
1304                         return -ENOMEM;
1305                 strcpy(o->ldo_pool, pool);
1306         }
1307         return 0;
1308 }
1309
1310 static inline int lod_object_will_be_striped(int is_reg, const struct lu_fid *fid)
1311 {
1312         return (is_reg && fid_seq(fid) != FID_SEQ_LOCAL_FILE);
1313 }
1314
1315
1316 static int lod_cache_parent_lov_striping(const struct lu_env *env,
1317                                          struct lod_object *lp)
1318 {
1319         struct lod_thread_info  *info = lod_env_info(env);
1320         struct lov_user_md_v1   *v1 = NULL;
1321         struct lov_user_md_v3   *v3 = NULL;
1322         int                      rc;
1323         ENTRY;
1324
1325         /* called from MDD without parent being write locked,
1326          * lock it here */
1327         dt_write_lock(env, dt_object_child(&lp->ldo_obj), 0);
1328         rc = lod_get_lov_ea(env, lp);
1329         if (rc < 0)
1330                 GOTO(unlock, rc);
1331
1332         if (rc < sizeof(struct lov_user_md)) {
1333                 /* don't lookup for non-existing or invalid striping */
1334                 lp->ldo_def_striping_set = 0;
1335                 lp->ldo_striping_cached = 1;
1336                 lp->ldo_def_stripe_size = 0;
1337                 lp->ldo_def_stripenr = 0;
1338                 lp->ldo_def_stripe_offset = (typeof(v1->lmm_stripe_offset))(-1);
1339                 GOTO(unlock, rc = 0);
1340         }
1341
1342         rc = 0;
1343         v1 = info->lti_ea_store;
1344         if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_V1))
1345                 lustre_swab_lov_user_md_v1(v1);
1346         else if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_V3))
1347                 lustre_swab_lov_user_md_v3(v3);
1348
1349         if (v1->lmm_magic != LOV_MAGIC_V3 && v1->lmm_magic != LOV_MAGIC_V1)
1350                 GOTO(unlock, rc = 0);
1351
1352         if (v1->lmm_pattern != LOV_PATTERN_RAID0 && v1->lmm_pattern != 0)
1353                 GOTO(unlock, rc = 0);
1354
1355         lp->ldo_def_stripenr = v1->lmm_stripe_count;
1356         lp->ldo_def_stripe_size = v1->lmm_stripe_size;
1357         lp->ldo_def_stripe_offset = v1->lmm_stripe_offset;
1358         lp->ldo_striping_cached = 1;
1359         lp->ldo_def_striping_set = 1;
1360         if (v1->lmm_magic == LOV_USER_MAGIC_V3) {
1361                 /* XXX: sanity check here */
1362                 v3 = (struct lov_user_md_v3 *) v1;
1363                 if (v3->lmm_pool_name[0])
1364                         lod_object_set_pool(lp, v3->lmm_pool_name);
1365         }
1366         EXIT;
1367 unlock:
1368         dt_write_unlock(env, dt_object_child(&lp->ldo_obj));
1369         return rc;
1370 }
1371
1372
1373 static int lod_cache_parent_lmv_striping(const struct lu_env *env,
1374                                          struct lod_object *lp)
1375 {
1376         struct lod_thread_info  *info = lod_env_info(env);
1377         struct lmv_user_md_v1   *v1 = NULL;
1378         int                      rc;
1379         ENTRY;
1380
1381         /* called from MDD without parent being write locked,
1382          * lock it here */
1383         dt_write_lock(env, dt_object_child(&lp->ldo_obj), 0);
1384         rc = lod_get_default_lmv_ea(env, lp);
1385         if (rc < 0)
1386                 GOTO(unlock, rc);
1387
1388         if (rc < sizeof(struct lmv_user_md)) {
1389                 /* don't lookup for non-existing or invalid striping */
1390                 lp->ldo_dir_def_striping_set = 0;
1391                 lp->ldo_dir_striping_cached = 1;
1392                 lp->ldo_dir_def_stripenr = 0;
1393                 lp->ldo_dir_def_stripe_offset =
1394                                         (typeof(v1->lum_stripe_offset))(-1);
1395                 lp->ldo_dir_def_hash_type = LMV_HASH_TYPE_FNV_1A_64;
1396                 GOTO(unlock, rc = 0);
1397         }
1398
1399         rc = 0;
1400         v1 = info->lti_ea_store;
1401
1402         lp->ldo_dir_def_stripenr = le32_to_cpu(v1->lum_stripe_count) - 1;
1403         lp->ldo_dir_def_stripe_offset = le32_to_cpu(v1->lum_stripe_offset);
1404         lp->ldo_dir_def_hash_type = le32_to_cpu(v1->lum_hash_type);
1405         lp->ldo_dir_def_striping_set = 1;
1406         lp->ldo_dir_striping_cached = 1;
1407
1408         EXIT;
1409 unlock:
1410         dt_write_unlock(env, dt_object_child(&lp->ldo_obj));
1411         return rc;
1412 }
1413
1414 static int lod_cache_parent_striping(const struct lu_env *env,
1415                                      struct lod_object *lp,
1416                                      umode_t child_mode)
1417 {
1418         int rc = 0;
1419         ENTRY;
1420
1421         rc = lod_load_striping(env, lp);
1422         if (rc != 0)
1423                 RETURN(rc);
1424
1425         if (!lp->ldo_striping_cached) {
1426                 /* we haven't tried to get default striping for
1427                  * the directory yet, let's cache it in the object */
1428                 rc = lod_cache_parent_lov_striping(env, lp);
1429                 if (rc != 0)
1430                         RETURN(rc);
1431         }
1432
1433         if (S_ISDIR(child_mode) && !lp->ldo_dir_striping_cached)
1434                 rc = lod_cache_parent_lmv_striping(env, lp);
1435
1436         RETURN(rc);
1437 }
1438
1439 /**
1440  * used to transfer default striping data to the object being created
1441  */
1442 static void lod_ah_init(const struct lu_env *env,
1443                         struct dt_allocation_hint *ah,
1444                         struct dt_object *parent,
1445                         struct dt_object *child,
1446                         umode_t child_mode)
1447 {
1448         struct lod_device *d = lu2lod_dev(child->do_lu.lo_dev);
1449         struct dt_object  *nextp = NULL;
1450         struct dt_object  *nextc;
1451         struct lod_object *lp = NULL;
1452         struct lod_object *lc;
1453         struct lov_desc   *desc;
1454         ENTRY;
1455
1456         LASSERT(child);
1457
1458         if (likely(parent)) {
1459                 nextp = dt_object_child(parent);
1460                 lp = lod_dt_obj(parent);
1461         }
1462
1463         nextc = dt_object_child(child);
1464         lc = lod_dt_obj(child);
1465
1466         LASSERT(lc->ldo_stripenr == 0);
1467         LASSERT(lc->ldo_stripe == NULL);
1468
1469         /*
1470          * local object may want some hints
1471          * in case of late striping creation, ->ah_init()
1472          * can be called with local object existing
1473          */
1474         if (!dt_object_exists(nextc) || dt_object_remote(nextc))
1475                 nextc->do_ops->do_ah_init(env, ah, dt_object_remote(nextp) ?
1476                                           NULL : nextp, nextc, child_mode);
1477
1478         if (S_ISDIR(child_mode)) {
1479                 int rc;
1480
1481                 if (lc->ldo_dir_stripe == NULL) {
1482                         OBD_ALLOC_PTR(lc->ldo_dir_stripe);
1483                         if (lc->ldo_dir_stripe == NULL)
1484                                 return;
1485                 }
1486
1487                 if (lp->ldo_dir_stripe == NULL) {
1488                         OBD_ALLOC_PTR(lp->ldo_dir_stripe);
1489                         if (lp->ldo_dir_stripe == NULL)
1490                                 return;
1491                 }
1492
1493                 rc = lod_cache_parent_striping(env, lp, child_mode);
1494                 if (rc != 0)
1495                         return;
1496
1497                 /* transfer defaults to new directory */
1498                 if (lp->ldo_striping_cached) {
1499                         if (lp->ldo_pool)
1500                                 lod_object_set_pool(lc, lp->ldo_pool);
1501                         lc->ldo_def_stripenr = lp->ldo_def_stripenr;
1502                         lc->ldo_def_stripe_size = lp->ldo_def_stripe_size;
1503                         lc->ldo_def_stripe_offset = lp->ldo_def_stripe_offset;
1504                         lc->ldo_striping_cached = 1;
1505                         lc->ldo_def_striping_set = 1;
1506                         CDEBUG(D_OTHER, "inherite EA sz:%d off:%d nr:%d\n",
1507                                (int)lc->ldo_def_stripe_size,
1508                                (int)lc->ldo_def_stripe_offset,
1509                                (int)lc->ldo_def_stripenr);
1510                 }
1511
1512                 /* transfer dir defaults to new directory */
1513                 if (lp->ldo_dir_striping_cached) {
1514                         lc->ldo_dir_def_stripenr = lp->ldo_dir_def_stripenr;
1515                         lc->ldo_dir_def_stripe_offset =
1516                                                   lp->ldo_dir_def_stripe_offset;
1517                         lc->ldo_dir_def_hash_type =
1518                                                   lp->ldo_dir_def_hash_type;
1519                         lc->ldo_dir_striping_cached = 1;
1520                         lc->ldo_dir_def_striping_set = 1;
1521                         CDEBUG(D_INFO, "inherit default EA nr:%d off:%d t%u\n",
1522                                (int)lc->ldo_dir_def_stripenr,
1523                                (int)lc->ldo_dir_def_stripe_offset,
1524                                lc->ldo_dir_def_hash_type);
1525                 }
1526
1527                 /* If the directory is specified with certain stripes */
1528                 if (ah->dah_eadata != NULL && ah->dah_eadata_len != 0) {
1529                         const struct lmv_user_md_v1 *lum1 = ah->dah_eadata;
1530                         int rc;
1531
1532                         rc = lod_verify_md_striping(d, lum1);
1533                         if (rc == 0 &&
1534                                 le32_to_cpu(lum1->lum_stripe_count) > 1) {
1535                                 /* Directory will be striped only if
1536                                  * stripe_count > 1 */
1537                                 lc->ldo_stripenr =
1538                                         le32_to_cpu(lum1->lum_stripe_count) - 1;
1539                                 lc->ldo_dir_stripe_offset =
1540                                         le32_to_cpu(lum1->lum_stripe_offset);
1541                                 lc->ldo_dir_hash_type =
1542                                         le32_to_cpu(lum1->lum_hash_type);
1543                                 CDEBUG(D_INFO, "set stripe EA nr:%hu off:%d\n",
1544                                        lc->ldo_stripenr,
1545                                        (int)lc->ldo_dir_stripe_offset);
1546                         }
1547                 } else if (lp->ldo_dir_def_striping_set) {
1548                         /* If there are default dir stripe from parent */
1549                         lc->ldo_stripenr = lp->ldo_dir_def_stripenr;
1550                         lc->ldo_dir_stripe_offset =
1551                                         lp->ldo_dir_def_stripe_offset;
1552                         lc->ldo_dir_hash_type =
1553                                         lp->ldo_dir_def_hash_type;
1554                         CDEBUG(D_INFO, "inherit EA nr:%hu off:%d\n",
1555                                lc->ldo_stripenr,
1556                                (int)lc->ldo_dir_stripe_offset);
1557                 } else {
1558                         /* set default stripe for this directory */
1559                         lc->ldo_stripenr = 0;
1560                         lc->ldo_dir_stripe_offset = -1;
1561                 }
1562
1563                 CDEBUG(D_INFO, "final striping count:%hu, offset:%d\n",
1564                        lc->ldo_stripenr, (int)lc->ldo_dir_stripe_offset);
1565
1566                 goto out;
1567         }
1568
1569         /*
1570          * if object is going to be striped over OSTs, transfer default
1571          * striping information to the child, so that we can use it
1572          * during declaration and creation
1573          */
1574         if (!lod_object_will_be_striped(S_ISREG(child_mode),
1575                                         lu_object_fid(&child->do_lu)))
1576                 goto out;
1577         /*
1578          * try from the parent
1579          */
1580         if (likely(parent)) {
1581                 lod_cache_parent_striping(env, lp, child_mode);
1582
1583                 lc->ldo_def_stripe_offset = (__u16) -1;
1584
1585                 if (lp->ldo_def_striping_set) {
1586                         if (lp->ldo_pool)
1587                                 lod_object_set_pool(lc, lp->ldo_pool);
1588                         lc->ldo_stripenr = lp->ldo_def_stripenr;
1589                         lc->ldo_stripe_size = lp->ldo_def_stripe_size;
1590                         lc->ldo_def_stripe_offset = lp->ldo_def_stripe_offset;
1591                         CDEBUG(D_OTHER, "striping from parent: #%d, sz %d %s\n",
1592                                lc->ldo_stripenr, lc->ldo_stripe_size,
1593                                lp->ldo_pool ? lp->ldo_pool : "");
1594                 }
1595         }
1596
1597         /*
1598          * if the parent doesn't provide with specific pattern, grab fs-wide one
1599          */
1600         desc = &d->lod_desc;
1601         if (lc->ldo_stripenr == 0)
1602                 lc->ldo_stripenr = desc->ld_default_stripe_count;
1603         if (lc->ldo_stripe_size == 0)
1604                 lc->ldo_stripe_size = desc->ld_default_stripe_size;
1605         CDEBUG(D_OTHER, "final striping: # %d stripes, sz %d from %s\n",
1606                lc->ldo_stripenr, lc->ldo_stripe_size,
1607                lc->ldo_pool ? lc->ldo_pool : "");
1608
1609 out:
1610         /* we do not cache stripe information for slave stripe, see
1611          * lod_xattr_set_lov_on_dir */
1612         if (lp != NULL && lp->ldo_dir_slave_stripe)
1613                 lod_lov_stripe_cache_clear(lp);
1614
1615         EXIT;
1616 }
1617
1618 #define ll_do_div64(aaa,bbb)    do_div((aaa), (bbb))
1619 /*
1620  * this function handles a special case when truncate was done
1621  * on a stripeless object and now striping is being created
1622  * we can't lose that size, so we have to propagate it to newly
1623  * created object
1624  */
1625 static int lod_declare_init_size(const struct lu_env *env,
1626                                  struct dt_object *dt, struct thandle *th)
1627 {
1628         struct dt_object   *next = dt_object_child(dt);
1629         struct lod_object  *lo = lod_dt_obj(dt);
1630         struct lu_attr     *attr = &lod_env_info(env)->lti_attr;
1631         uint64_t            size, offs;
1632         int                 rc, stripe;
1633         ENTRY;
1634
1635         /* XXX: we support the simplest (RAID0) striping so far */
1636         LASSERT(lo->ldo_stripe || lo->ldo_stripenr == 0);
1637         LASSERT(lo->ldo_stripe_size > 0);
1638
1639         rc = dt_attr_get(env, next, attr, BYPASS_CAPA);
1640         LASSERT(attr->la_valid & LA_SIZE);
1641         if (rc)
1642                 RETURN(rc);
1643
1644         size = attr->la_size;
1645         if (size == 0)
1646                 RETURN(0);
1647
1648         /* ll_do_div64(a, b) returns a % b, and a = a / b */
1649         ll_do_div64(size, (__u64) lo->ldo_stripe_size);
1650         stripe = ll_do_div64(size, (__u64) lo->ldo_stripenr);
1651
1652         size = size * lo->ldo_stripe_size;
1653         offs = attr->la_size;
1654         size += ll_do_div64(offs, lo->ldo_stripe_size);
1655
1656         attr->la_valid = LA_SIZE;
1657         attr->la_size = size;
1658
1659         rc = dt_declare_attr_set(env, lo->ldo_stripe[stripe], attr, th);
1660
1661         RETURN(rc);
1662 }
1663
1664 /**
1665  * Create declaration of striped object
1666  */
1667 int lod_declare_striped_object(const struct lu_env *env, struct dt_object *dt,
1668                                struct lu_attr *attr,
1669                                const struct lu_buf *lovea, struct thandle *th)
1670 {
1671         struct lod_thread_info  *info = lod_env_info(env);
1672         struct dt_object        *next = dt_object_child(dt);
1673         struct lod_object       *lo = lod_dt_obj(dt);
1674         int                      rc;
1675         ENTRY;
1676
1677         if (OBD_FAIL_CHECK(OBD_FAIL_MDS_ALLOC_OBDO)) {
1678                 /* failed to create striping, let's reset
1679                  * config so that others don't get confused */
1680                 lod_object_free_striping(env, lo);
1681                 GOTO(out, rc = -ENOMEM);
1682         }
1683
1684         /* choose OST and generate appropriate objects */
1685         rc = lod_qos_prep_create(env, lo, attr, lovea, th);
1686         if (rc) {
1687                 /* failed to create striping, let's reset
1688                  * config so that others don't get confused */
1689                 lod_object_free_striping(env, lo);
1690                 GOTO(out, rc);
1691         }
1692
1693         /*
1694          * declare storage for striping data
1695          */
1696         info->lti_buf.lb_len = lov_mds_md_size(lo->ldo_stripenr,
1697                                 lo->ldo_pool ?  LOV_MAGIC_V3 : LOV_MAGIC_V1);
1698         rc = dt_declare_xattr_set(env, next, &info->lti_buf, XATTR_NAME_LOV,
1699                                   0, th);
1700         if (rc)
1701                 GOTO(out, rc);
1702
1703         /*
1704          * if striping is created with local object's size > 0,
1705          * we have to propagate this size to specific object
1706          * the case is possible only when local object was created previously
1707          */
1708         if (dt_object_exists(next))
1709                 rc = lod_declare_init_size(env, dt, th);
1710
1711 out:
1712         RETURN(rc);
1713 }
1714
1715 int lod_dir_striping_create_internal(const struct lu_env *env,
1716                                      struct dt_object *dt,
1717                                      struct lu_attr *attr,
1718                                      const struct dt_object_format *dof,
1719                                      struct thandle *th,
1720                                      bool declare)
1721 {
1722         struct lod_thread_info  *info = lod_env_info(env);
1723         struct dt_object        *next = dt_object_child(dt);
1724         struct lod_object       *lo = lod_dt_obj(dt);
1725         int                     rc;
1726         ENTRY;
1727
1728         if (lo->ldo_dir_def_striping_set &&
1729             !LMVEA_DELETE_VALUES(lo->ldo_stripenr,
1730                                  lo->ldo_dir_stripe_offset)) {
1731                 struct lmv_user_md_v1 *v1 = info->lti_ea_store;
1732                 int stripe_count = lo->ldo_stripenr + 1;
1733
1734                 if (info->lti_ea_store_size < sizeof(*v1)) {
1735                         rc = lod_ea_store_resize(info, sizeof(*v1));
1736                         if (rc != 0)
1737                                 RETURN(rc);
1738                         v1 = info->lti_ea_store;
1739                 }
1740
1741                 memset(v1, 0, sizeof(*v1));
1742                 v1->lum_magic = cpu_to_le32(LMV_USER_MAGIC);
1743                 v1->lum_stripe_count = cpu_to_le32(stripe_count);
1744                 v1->lum_stripe_offset =
1745                                 cpu_to_le32(lo->ldo_dir_stripe_offset);
1746
1747                 info->lti_buf.lb_buf = v1;
1748                 info->lti_buf.lb_len = sizeof(*v1);
1749
1750                 if (declare)
1751                         rc = lod_declare_xattr_set_lmv(env, dt, attr,
1752                                                        &info->lti_buf, th);
1753                 else
1754                         rc = lod_xattr_set_lmv(env, dt, &info->lti_buf,
1755                                                XATTR_NAME_LMV, 0, th,
1756                                                BYPASS_CAPA);
1757                 if (rc != 0)
1758                         RETURN(rc);
1759         }
1760
1761         /* Transfer default LMV striping from the parent */
1762         if (lo->ldo_dir_striping_cached &&
1763             !LMVEA_DELETE_VALUES(lo->ldo_dir_def_stripenr,
1764                                  lo->ldo_dir_def_stripe_offset)) {
1765                 struct lmv_user_md_v1 *v1 = info->lti_ea_store;
1766                 int def_stripe_count = lo->ldo_dir_def_stripenr + 1;
1767
1768                 if (info->lti_ea_store_size < sizeof(*v1)) {
1769                         rc = lod_ea_store_resize(info, sizeof(*v1));
1770                         if (rc != 0)
1771                                 RETURN(rc);
1772                         v1 = info->lti_ea_store;
1773                 }
1774
1775                 memset(v1, 0, sizeof(*v1));
1776                 v1->lum_magic = cpu_to_le32(LMV_USER_MAGIC);
1777                 v1->lum_stripe_count = cpu_to_le32(def_stripe_count);
1778                 v1->lum_stripe_offset =
1779                                 cpu_to_le32(lo->ldo_dir_def_stripe_offset);
1780                 v1->lum_hash_type =
1781                                 cpu_to_le32(lo->ldo_dir_def_hash_type);
1782
1783                 info->lti_buf.lb_buf = v1;
1784                 info->lti_buf.lb_len = sizeof(*v1);
1785                 if (declare)
1786                         rc = dt_declare_xattr_set(env, next, &info->lti_buf,
1787                                                   XATTR_NAME_DEFAULT_LMV, 0,
1788                                                   th);
1789                 else
1790                         rc = dt_xattr_set(env, next, &info->lti_buf,
1791                                            XATTR_NAME_DEFAULT_LMV, 0, th,
1792                                            BYPASS_CAPA);
1793                 if (rc != 0)
1794                         RETURN(rc);
1795         }
1796
1797         /* Transfer default LOV striping from the parent */
1798         if (lo->ldo_striping_cached &&
1799             !LOVEA_DELETE_VALUES(lo->ldo_def_stripe_size,
1800                                  lo->ldo_def_stripenr,
1801                                  lo->ldo_def_stripe_offset)) {
1802                 struct lov_user_md_v3 *v3 = info->lti_ea_store;
1803
1804                 if (info->lti_ea_store_size < sizeof(*v3)) {
1805                         rc = lod_ea_store_resize(info, sizeof(*v3));
1806                         if (rc != 0)
1807                                 RETURN(rc);
1808                         v3 = info->lti_ea_store;
1809                 }
1810
1811                 memset(v3, 0, sizeof(*v3));
1812                 v3->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V3);
1813                 v3->lmm_stripe_count = cpu_to_le16(lo->ldo_def_stripenr);
1814                 v3->lmm_stripe_offset = cpu_to_le16(lo->ldo_def_stripe_offset);
1815                 v3->lmm_stripe_size = cpu_to_le32(lo->ldo_def_stripe_size);
1816                 if (lo->ldo_pool)
1817                         strncpy(v3->lmm_pool_name, lo->ldo_pool,
1818                                 LOV_MAXPOOLNAME);
1819
1820                 info->lti_buf.lb_buf = v3;
1821                 info->lti_buf.lb_len = sizeof(*v3);
1822
1823                 if (declare)
1824                         rc = dt_declare_xattr_set(env, next, &info->lti_buf,
1825                                                   XATTR_NAME_LOV, 0, th);
1826                 else
1827                         rc = dt_xattr_set(env, next, &info->lti_buf,
1828                                           XATTR_NAME_LOV, 0, th,
1829                                           BYPASS_CAPA);
1830                 if (rc != 0)
1831                         RETURN(rc);
1832         }
1833
1834         RETURN(0);
1835 }
1836
1837 static int lod_declare_dir_striping_create(const struct lu_env *env,
1838                                            struct dt_object *dt,
1839                                            struct lu_attr *attr,
1840                                            struct dt_object_format *dof,
1841                                            struct thandle *th)
1842 {
1843         return lod_dir_striping_create_internal(env, dt, attr, dof, th, true);
1844 }
1845
1846 static int lod_dir_striping_create(const struct lu_env *env,
1847                                    struct dt_object *dt,
1848                                    struct lu_attr *attr,
1849                                    struct dt_object_format *dof,
1850                                    struct thandle *th)
1851 {
1852         return lod_dir_striping_create_internal(env, dt, attr, dof, th, false);
1853 }
1854
1855 static int lod_declare_object_create(const struct lu_env *env,
1856                                      struct dt_object *dt,
1857                                      struct lu_attr *attr,
1858                                      struct dt_allocation_hint *hint,
1859                                      struct dt_object_format *dof,
1860                                      struct thandle *th)
1861 {
1862         struct dt_object   *next = dt_object_child(dt);
1863         struct lod_object  *lo = lod_dt_obj(dt);
1864         int                 rc;
1865         ENTRY;
1866
1867         LASSERT(dof);
1868         LASSERT(attr);
1869         LASSERT(th);
1870
1871         /*
1872          * first of all, we declare creation of local object
1873          */
1874         rc = dt_declare_create(env, next, attr, hint, dof, th);
1875         if (rc)
1876                 GOTO(out, rc);
1877
1878         if (dof->dof_type == DFT_SYM)
1879                 dt->do_body_ops = &lod_body_lnk_ops;
1880
1881         /*
1882          * it's lod_ah_init() who has decided the object will striped
1883          */
1884         if (dof->dof_type == DFT_REGULAR) {
1885                 /* callers don't want stripes */
1886                 /* XXX: all tricky interactions with ->ah_make_hint() decided
1887                  * to use striping, then ->declare_create() behaving differently
1888                  * should be cleaned */
1889                 if (dof->u.dof_reg.striped == 0)
1890                         lo->ldo_stripenr = 0;
1891                 if (lo->ldo_stripenr > 0)
1892                         rc = lod_declare_striped_object(env, dt, attr,
1893                                                         NULL, th);
1894         } else if (dof->dof_type == DFT_DIR) {
1895                 rc = lod_declare_dir_striping_create(env, dt, attr, dof, th);
1896         }
1897 out:
1898         RETURN(rc);
1899 }
1900
1901 int lod_striping_create(const struct lu_env *env, struct dt_object *dt,
1902                         struct lu_attr *attr, struct dt_object_format *dof,
1903                         struct thandle *th)
1904 {
1905         struct lod_object *lo = lod_dt_obj(dt);
1906         int                rc = 0, i;
1907         ENTRY;
1908
1909         LASSERT(lo->ldo_striping_cached == 0);
1910
1911         /* create all underlying objects */
1912         for (i = 0; i < lo->ldo_stripenr; i++) {
1913                 LASSERT(lo->ldo_stripe[i]);
1914                 rc = dt_create(env, lo->ldo_stripe[i], attr, NULL, dof, th);
1915
1916                 if (rc)
1917                         break;
1918         }
1919         if (rc == 0)
1920                 rc = lod_generate_and_set_lovea(env, lo, th);
1921
1922         RETURN(rc);
1923 }
1924
1925 static int lod_object_create(const struct lu_env *env, struct dt_object *dt,
1926                              struct lu_attr *attr,
1927                              struct dt_allocation_hint *hint,
1928                              struct dt_object_format *dof, struct thandle *th)
1929 {
1930         struct dt_object   *next = dt_object_child(dt);
1931         struct lod_object  *lo = lod_dt_obj(dt);
1932         int                 rc;
1933         ENTRY;
1934
1935         /* create local object */
1936         rc = dt_create(env, next, attr, hint, dof, th);
1937
1938         if (rc == 0) {
1939                 if (S_ISDIR(dt->do_lu.lo_header->loh_attr))
1940                         rc = lod_dir_striping_create(env, dt, attr, dof, th);
1941                 else if (lo->ldo_stripe && dof->u.dof_reg.striped != 0)
1942                         rc = lod_striping_create(env, dt, attr, dof, th);
1943         }
1944
1945         RETURN(rc);
1946 }
1947
1948 static int lod_declare_object_destroy(const struct lu_env *env,
1949                                       struct dt_object *dt,
1950                                       struct thandle *th)
1951 {
1952         struct dt_object   *next = dt_object_child(dt);
1953         struct lod_object  *lo = lod_dt_obj(dt);
1954         int                 rc, i;
1955         ENTRY;
1956
1957         /*
1958          * we declare destroy for the local object
1959          */
1960         rc = dt_declare_destroy(env, next, th);
1961         if (rc)
1962                 RETURN(rc);
1963
1964         if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LOST_MDTOBJ))
1965                 RETURN(0);
1966
1967         /*
1968          * load striping information, notice we don't do this when object
1969          * is being initialized as we don't need this information till
1970          * few specific cases like destroy, chown
1971          */
1972         rc = lod_load_striping(env, lo);
1973         if (rc)
1974                 RETURN(rc);
1975
1976         /* declare destroy for all underlying objects */
1977         for (i = 0; i < lo->ldo_stripenr; i++) {
1978                 LASSERT(lo->ldo_stripe[i]);
1979                 rc = dt_declare_destroy(env, lo->ldo_stripe[i], th);
1980
1981                 if (rc)
1982                         break;
1983         }
1984
1985         RETURN(rc);
1986 }
1987
1988 static int lod_object_destroy(const struct lu_env *env,
1989                 struct dt_object *dt, struct thandle *th)
1990 {
1991         struct dt_object  *next = dt_object_child(dt);
1992         struct lod_object *lo = lod_dt_obj(dt);
1993         int                rc, i;
1994         ENTRY;
1995
1996         /* destroy local object */
1997         rc = dt_destroy(env, next, th);
1998         if (rc)
1999                 RETURN(rc);
2000
2001         if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LOST_MDTOBJ))
2002                 RETURN(0);
2003
2004         /* destroy all underlying objects */
2005         for (i = 0; i < lo->ldo_stripenr; i++) {
2006                 LASSERT(lo->ldo_stripe[i]);
2007                 /* for striped directory, next == ldo_stripe[0] */
2008                 if (next != lo->ldo_stripe[i]) {
2009                         rc = dt_destroy(env, lo->ldo_stripe[i], th);
2010                         if (rc)
2011                                 break;
2012                 }
2013         }
2014
2015         RETURN(rc);
2016 }
2017
2018 static int lod_index_try(const struct lu_env *env, struct dt_object *dt,
2019                          const struct dt_index_features *feat)
2020 {
2021         struct dt_object *next = dt_object_child(dt);
2022         int               rc;
2023         ENTRY;
2024
2025         LASSERT(next->do_ops);
2026         LASSERT(next->do_ops->do_index_try);
2027
2028         rc = next->do_ops->do_index_try(env, next, feat);
2029         if (next->do_index_ops && dt->do_index_ops == NULL)
2030                 dt->do_index_ops = &lod_index_ops;
2031
2032         RETURN(rc);
2033 }
2034
2035 static int lod_declare_ref_add(const struct lu_env *env,
2036                                struct dt_object *dt, struct thandle *th)
2037 {
2038         return dt_declare_ref_add(env, dt_object_child(dt), th);
2039 }
2040
2041 static int lod_ref_add(const struct lu_env *env,
2042                        struct dt_object *dt, struct thandle *th)
2043 {
2044         return dt_ref_add(env, dt_object_child(dt), th);
2045 }
2046
2047 static int lod_declare_ref_del(const struct lu_env *env,
2048                                struct dt_object *dt, struct thandle *th)
2049 {
2050         return dt_declare_ref_del(env, dt_object_child(dt), th);
2051 }
2052
2053 static int lod_ref_del(const struct lu_env *env,
2054                        struct dt_object *dt, struct thandle *th)
2055 {
2056         return dt_ref_del(env, dt_object_child(dt), th);
2057 }
2058
2059 static struct obd_capa *lod_capa_get(const struct lu_env *env,
2060                                      struct dt_object *dt,
2061                                      struct lustre_capa *old, __u64 opc)
2062 {
2063         return dt_capa_get(env, dt_object_child(dt), old, opc);
2064 }
2065
2066 static int lod_object_sync(const struct lu_env *env, struct dt_object *dt)
2067 {
2068         return dt_object_sync(env, dt_object_child(dt));
2069 }
2070
2071 struct lod_slave_locks  {
2072         int                     lsl_lock_count;
2073         struct lustre_handle    lsl_handle[0];
2074 };
2075
2076 static int lod_object_unlock_internal(const struct lu_env *env,
2077                                       struct dt_object *dt,
2078                                       struct ldlm_enqueue_info *einfo,
2079                                       ldlm_policy_data_t *policy)
2080 {
2081         struct lod_object       *lo = lod_dt_obj(dt);
2082         struct lod_slave_locks  *slave_locks = einfo->ei_cbdata;
2083         int                     rc = 0;
2084         int                     i;
2085         ENTRY;
2086
2087         if (slave_locks == NULL)
2088                 RETURN(0);
2089
2090         for (i = 0; i < slave_locks->lsl_lock_count; i++) {
2091                 if (lustre_handle_is_used(&slave_locks->lsl_handle[i])) {
2092                         int     rc1;
2093
2094                         einfo->ei_cbdata = &slave_locks->lsl_handle[i];
2095                         rc1 = dt_object_unlock(env, lo->ldo_stripe[i], einfo,
2096                                                policy);
2097                         if (rc1 < 0)
2098                                 rc = rc == 0 ? rc1 : rc;
2099                 }
2100         }
2101
2102         RETURN(rc);
2103 }
2104
2105 static int lod_object_unlock(const struct lu_env *env, struct dt_object *dt,
2106                              struct ldlm_enqueue_info *einfo,
2107                              union ldlm_policy_data *policy)
2108 {
2109         struct lod_object       *lo = lod_dt_obj(dt);
2110         struct lod_slave_locks  *slave_locks = einfo->ei_cbdata;
2111         int                     slave_locks_size;
2112         int                     rc;
2113         ENTRY;
2114
2115         if (slave_locks == NULL)
2116                 RETURN(0);
2117
2118         rc = lod_load_striping(env, lo);
2119         if (rc != 0)
2120                 RETURN(rc);
2121
2122         /* Note: for remote lock for single stripe dir, MDT will cancel
2123          * the lock by lockh directly */
2124         if (lo->ldo_stripenr == 0 && dt_object_remote(dt_object_child(dt)))
2125                 RETURN(0);
2126
2127         if (!S_ISDIR(dt->do_lu.lo_header->loh_attr))
2128                 RETURN(-ENOTDIR);
2129
2130         /* Only cancel slave lock for striped dir */
2131         rc = lod_object_unlock_internal(env, dt, einfo, policy);
2132
2133         slave_locks_size = sizeof(*slave_locks) + slave_locks->lsl_lock_count *
2134                            sizeof(slave_locks->lsl_handle[0]);
2135         OBD_FREE(slave_locks, slave_locks_size);
2136         einfo->ei_cbdata = NULL;
2137
2138         RETURN(rc);
2139 }
2140
2141 static int lod_object_lock(const struct lu_env *env,
2142                            struct dt_object *dt,
2143                            struct lustre_handle *lh,
2144                            struct ldlm_enqueue_info *einfo,
2145                            union ldlm_policy_data *policy)
2146 {
2147         struct lod_object       *lo = lod_dt_obj(dt);
2148         int                     rc = 0;
2149         int                     i;
2150         int                     slave_locks_size;
2151         struct lod_slave_locks  *slave_locks = NULL;
2152         ENTRY;
2153
2154         /* remote object lock */
2155         if (!einfo->ei_enq_slave) {
2156                 LASSERT(dt_object_remote(dt));
2157                 return dt_object_lock(env, dt_object_child(dt), lh, einfo,
2158                                       policy);
2159         }
2160
2161         if (!S_ISDIR(dt->do_lu.lo_header->loh_attr))
2162                 RETURN(-ENOTDIR);
2163
2164         rc = lod_load_striping(env, lo);
2165         if (rc != 0)
2166                 RETURN(rc);
2167
2168         /* No stripes */
2169         if (lo->ldo_stripenr == 0)
2170                 RETURN(0);
2171
2172         slave_locks_size = sizeof(*slave_locks) + lo->ldo_stripenr *
2173                            sizeof(slave_locks->lsl_handle[0]);
2174         /* Freed in lod_object_unlock */
2175         OBD_ALLOC(slave_locks, slave_locks_size);
2176         if (slave_locks == NULL)
2177                 RETURN(-ENOMEM);
2178         slave_locks->lsl_lock_count = lo->ldo_stripenr;
2179
2180         /* striped directory lock */
2181         for (i = 0; i < lo->ldo_stripenr; i++) {
2182                 struct lustre_handle    lockh;
2183
2184                 LASSERT(lo->ldo_stripe[i]);
2185                 rc = dt_object_lock(env, lo->ldo_stripe[i], &lockh, einfo,
2186                                     policy);
2187                 if (rc != 0)
2188                         GOTO(out, rc);
2189
2190                 slave_locks->lsl_handle[i] = lockh;
2191         }
2192
2193         einfo->ei_cbdata = slave_locks;
2194
2195 out:
2196         if (rc != 0 && slave_locks != NULL) {
2197                 einfo->ei_cbdata = slave_locks;
2198                 lod_object_unlock_internal(env, dt, einfo, policy);
2199                 OBD_FREE(slave_locks, slave_locks_size);
2200                 einfo->ei_cbdata = NULL;
2201         }
2202
2203         RETURN(rc);
2204 }
2205
2206 struct dt_object_operations lod_obj_ops = {
2207         .do_read_lock           = lod_object_read_lock,
2208         .do_write_lock          = lod_object_write_lock,
2209         .do_read_unlock         = lod_object_read_unlock,
2210         .do_write_unlock        = lod_object_write_unlock,
2211         .do_write_locked        = lod_object_write_locked,
2212         .do_attr_get            = lod_attr_get,
2213         .do_declare_attr_set    = lod_declare_attr_set,
2214         .do_attr_set            = lod_attr_set,
2215         .do_xattr_get           = lod_xattr_get,
2216         .do_declare_xattr_set   = lod_declare_xattr_set,
2217         .do_xattr_set           = lod_xattr_set,
2218         .do_declare_xattr_del   = lod_declare_xattr_del,
2219         .do_xattr_del           = lod_xattr_del,
2220         .do_xattr_list          = lod_xattr_list,
2221         .do_ah_init             = lod_ah_init,
2222         .do_declare_create      = lod_declare_object_create,
2223         .do_create              = lod_object_create,
2224         .do_declare_destroy     = lod_declare_object_destroy,
2225         .do_destroy             = lod_object_destroy,
2226         .do_index_try           = lod_index_try,
2227         .do_declare_ref_add     = lod_declare_ref_add,
2228         .do_ref_add             = lod_ref_add,
2229         .do_declare_ref_del     = lod_declare_ref_del,
2230         .do_ref_del             = lod_ref_del,
2231         .do_capa_get            = lod_capa_get,
2232         .do_object_sync         = lod_object_sync,
2233         .do_object_lock         = lod_object_lock,
2234         .do_object_unlock       = lod_object_unlock,
2235 };
2236
2237 static ssize_t lod_read(const struct lu_env *env, struct dt_object *dt,
2238                         struct lu_buf *buf, loff_t *pos,
2239                         struct lustre_capa *capa)
2240 {
2241         struct dt_object *next = dt_object_child(dt);
2242         return next->do_body_ops->dbo_read(env, next, buf, pos, capa);
2243 }
2244
2245 static ssize_t lod_declare_write(const struct lu_env *env,
2246                                  struct dt_object *dt,
2247                                  const loff_t size, loff_t pos,
2248                                  struct thandle *th)
2249 {
2250         return dt_declare_record_write(env, dt_object_child(dt),
2251                                        size, pos, th);
2252 }
2253
2254 static ssize_t lod_write(const struct lu_env *env, struct dt_object *dt,
2255                          const struct lu_buf *buf, loff_t *pos,
2256                          struct thandle *th, struct lustre_capa *capa, int iq)
2257 {
2258         struct dt_object *next = dt_object_child(dt);
2259         LASSERT(next);
2260         return next->do_body_ops->dbo_write(env, next, buf, pos, th, capa, iq);
2261 }
2262
2263 static const struct dt_body_operations lod_body_lnk_ops = {
2264         .dbo_read               = lod_read,
2265         .dbo_declare_write      = lod_declare_write,
2266         .dbo_write              = lod_write
2267 };
2268
2269 static int lod_object_init(const struct lu_env *env, struct lu_object *lo,
2270                            const struct lu_object_conf *conf)
2271 {
2272         struct lod_device       *lod    = lu2lod_dev(lo->lo_dev);
2273         struct lu_device        *cdev   = NULL;
2274         struct lu_object        *cobj;
2275         struct lod_tgt_descs    *ltd    = NULL;
2276         struct lod_tgt_desc     *tgt;
2277         mdsno_t                  idx    = 0;
2278         int                      type   = LU_SEQ_RANGE_ANY;
2279         int                      rc;
2280         ENTRY;
2281
2282         rc = lod_fld_lookup(env, lod, lu_object_fid(lo), &idx, &type);
2283         if (rc != 0)
2284                 RETURN(rc);
2285
2286         if (type == LU_SEQ_RANGE_MDT &&
2287             idx == lu_site2seq(lo->lo_dev->ld_site)->ss_node_id) {
2288                 cdev = &lod->lod_child->dd_lu_dev;
2289         } else if (type == LU_SEQ_RANGE_MDT) {
2290                 ltd = &lod->lod_mdt_descs;
2291                 lod_getref(ltd);
2292         } else if (type == LU_SEQ_RANGE_OST) {
2293                 ltd = &lod->lod_ost_descs;
2294                 lod_getref(ltd);
2295         } else {
2296                 LBUG();
2297         }
2298
2299         if (ltd != NULL) {
2300                 if (ltd->ltd_tgts_size > idx &&
2301                     cfs_bitmap_check(ltd->ltd_tgt_bitmap, idx)) {
2302                         tgt = LTD_TGT(ltd, idx);
2303
2304                         LASSERT(tgt != NULL);
2305                         LASSERT(tgt->ltd_tgt != NULL);
2306
2307                         cdev = &(tgt->ltd_tgt->dd_lu_dev);
2308                 }
2309                 lod_putref(lod, ltd);
2310         }
2311
2312         if (unlikely(cdev == NULL))
2313                 RETURN(-ENOENT);
2314
2315         cobj = cdev->ld_ops->ldo_object_alloc(env, lo->lo_header, cdev);
2316         if (unlikely(cobj == NULL))
2317                 RETURN(-ENOMEM);
2318
2319         lu_object_add(lo, cobj);
2320
2321         RETURN(0);
2322 }
2323
2324 void lod_object_free_striping(const struct lu_env *env, struct lod_object *lo)
2325 {
2326         int i;
2327
2328         if (lo->ldo_dir_stripe != NULL) {
2329                 OBD_FREE_PTR(lo->ldo_dir_stripe);
2330                 lo->ldo_dir_stripe = NULL;
2331         }
2332
2333         if (lo->ldo_stripe) {
2334                 LASSERT(lo->ldo_stripes_allocated > 0);
2335
2336                 for (i = 0; i < lo->ldo_stripenr; i++) {
2337                         if (lo->ldo_stripe[i])
2338                                 lu_object_put(env, &lo->ldo_stripe[i]->do_lu);
2339                 }
2340
2341                 i = sizeof(struct dt_object *) * lo->ldo_stripes_allocated;
2342                 OBD_FREE(lo->ldo_stripe, i);
2343                 lo->ldo_stripe = NULL;
2344                 lo->ldo_stripes_allocated = 0;
2345         }
2346         lo->ldo_stripenr = 0;
2347         lo->ldo_pattern = 0;
2348 }
2349
2350 /*
2351  * ->start is called once all slices are initialized, including header's
2352  * cache for mode (object type). using the type we can initialize ops
2353  */
2354 static int lod_object_start(const struct lu_env *env, struct lu_object *o)
2355 {
2356         if (S_ISLNK(o->lo_header->loh_attr & S_IFMT))
2357                 lu2lod_obj(o)->ldo_obj.do_body_ops = &lod_body_lnk_ops;
2358         return 0;
2359 }
2360
2361 static void lod_object_free(const struct lu_env *env, struct lu_object *o)
2362 {
2363         struct lod_object *mo = lu2lod_obj(o);
2364
2365         /*
2366          * release all underlying object pinned
2367          */
2368
2369         lod_object_free_striping(env, mo);
2370
2371         lod_object_set_pool(mo, NULL);
2372
2373         lu_object_fini(o);
2374         OBD_SLAB_FREE_PTR(mo, lod_object_kmem);
2375 }
2376
2377 static void lod_object_release(const struct lu_env *env, struct lu_object *o)
2378 {
2379         /* XXX: shouldn't we release everything here in case if object
2380          * creation failed before? */
2381 }
2382
2383 static int lod_object_print(const struct lu_env *env, void *cookie,
2384                             lu_printer_t p, const struct lu_object *l)
2385 {
2386         struct lod_object *o = lu2lod_obj((struct lu_object *) l);
2387
2388         return (*p)(env, cookie, LUSTRE_LOD_NAME"-object@%p", o);
2389 }
2390
2391 struct lu_object_operations lod_lu_obj_ops = {
2392         .loo_object_init        = lod_object_init,
2393         .loo_object_start       = lod_object_start,
2394         .loo_object_free        = lod_object_free,
2395         .loo_object_release     = lod_object_release,
2396         .loo_object_print       = lod_object_print,
2397 };