Whamcloud - gitweb
b29bb436df5f418a074837eec046c101746eb9ac
[fs/lustre-release.git] / lustre / lov / lov_ea.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2016, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * lustre/lov/lov_ea.c
33  *
34  * Author: Wang Di <wangdi@clusterfs.com>
35  */
36
37 #define DEBUG_SUBSYSTEM S_LOV
38
39 #include <linux/math64.h>
40 #include <linux/sort.h>
41 #include <libcfs/libcfs.h>
42
43 #include <obd_class.h>
44 #include "lov_internal.h"
45
46 static inline void
47 lu_extent_le_to_cpu(struct lu_extent *dst, const struct lu_extent *src)
48 {
49         dst->e_start = le64_to_cpu(src->e_start);
50         dst->e_end = le64_to_cpu(src->e_end);
51 }
52
53 /* Find minimum stripe maxbytes value.  For inactive or
54  * reconnecting targets use LUSTRE_EXT3_STRIPE_MAXBYTES. */
55 static loff_t lov_tgt_maxbytes(struct lov_tgt_desc *tgt)
56 {
57         struct obd_import *imp;
58         loff_t maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES;
59
60         if (!tgt->ltd_active)
61                 return maxbytes;
62
63         imp = tgt->ltd_obd->u.cli.cl_import;
64         if (imp == NULL)
65                 return maxbytes;
66
67         spin_lock(&imp->imp_lock);
68         if (imp->imp_state == LUSTRE_IMP_FULL &&
69             (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_MAXBYTES) &&
70             imp->imp_connect_data.ocd_maxbytes > 0)
71                 maxbytes = imp->imp_connect_data.ocd_maxbytes;
72
73         spin_unlock(&imp->imp_lock);
74
75         return maxbytes;
76 }
77
78 static int lsm_lmm_verify_v1v3(struct lov_mds_md *lmm, size_t lmm_size,
79                                u16 stripe_count)
80 {
81         if (stripe_count > LOV_V1_INSANE_STRIPE_COUNT) {
82                 CERROR("bad stripe count %d\n", stripe_count);
83                 lov_dump_lmm_common(D_WARNING, lmm);
84                 return -EINVAL;
85         }
86
87         if (lmm_oi_id(&lmm->lmm_oi) == 0) {
88                 CERROR("zero object id\n");
89                 lov_dump_lmm_common(D_WARNING, lmm);
90                 return -EINVAL;
91         }
92
93         if (lov_pattern(le32_to_cpu(lmm->lmm_pattern)) != LOV_PATTERN_RAID0) {
94                 CERROR("bad striping pattern\n");
95                 lov_dump_lmm_common(D_WARNING, lmm);
96                 return -EINVAL;
97         }
98
99         if (lmm->lmm_stripe_size == 0 ||
100             (le32_to_cpu(lmm->lmm_stripe_size)&(LOV_MIN_STRIPE_SIZE-1)) != 0) {
101                 CERROR("bad stripe size %u\n",
102                        le32_to_cpu(lmm->lmm_stripe_size));
103                 lov_dump_lmm_common(D_WARNING, lmm);
104                 return -EINVAL;
105         }
106         return 0;
107 }
108
109 static void lsme_free(struct lov_stripe_md_entry *lsme)
110 {
111         unsigned int stripe_count = lsme->lsme_stripe_count;
112         unsigned int i;
113         size_t lsme_size;
114
115         if (!lsme_inited(lsme) ||
116             lsme->lsme_pattern & LOV_PATTERN_F_RELEASED)
117                 stripe_count = 0;
118         for (i = 0; i < stripe_count; i++)
119                 OBD_SLAB_FREE_PTR(lsme->lsme_oinfo[i], lov_oinfo_slab);
120
121         lsme_size = offsetof(typeof(*lsme), lsme_oinfo[stripe_count]);
122         OBD_FREE_LARGE(lsme, lsme_size);
123 }
124
125 void lsm_free(struct lov_stripe_md *lsm)
126 {
127         unsigned int entry_count = lsm->lsm_entry_count;
128         unsigned int i;
129         size_t lsm_size;
130
131         for (i = 0; i < entry_count; i++)
132                 lsme_free(lsm->lsm_entries[i]);
133
134         lsm_size = offsetof(typeof(*lsm), lsm_entries[entry_count]);
135         OBD_FREE(lsm, lsm_size);
136 }
137
138 /**
139  * Unpack a struct lov_mds_md into a struct lov_stripe_md_entry.
140  *
141  * The caller should set id and extent.
142  */
143 static struct lov_stripe_md_entry *
144 lsme_unpack(struct lov_obd *lov, struct lov_mds_md *lmm, size_t buf_size,
145             const char *pool_name, bool inited, struct lov_ost_data_v1 *objects,
146             loff_t *maxbytes)
147 {
148         struct lov_stripe_md_entry *lsme;
149         size_t lsme_size;
150         loff_t min_stripe_maxbytes = 0;
151         loff_t lov_bytes;
152         u32 magic;
153         u32 pattern;
154         unsigned int stripe_count;
155         unsigned int i;
156         int rc;
157
158         magic = le32_to_cpu(lmm->lmm_magic);
159         if (magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3)
160                 RETURN(ERR_PTR(-EINVAL));
161
162         pattern = le32_to_cpu(lmm->lmm_pattern);
163         if (pattern & LOV_PATTERN_F_RELEASED || !inited)
164                 stripe_count = 0;
165         else
166                 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
167
168         if (buf_size < (magic == LOV_MAGIC_V1 ? sizeof(struct lov_mds_md_v1) :
169                                                 sizeof(struct lov_mds_md_v3))) {
170                 CERROR("LOV EA %s too small: %zu, need %u\n",
171                        magic == LOV_MAGIC_V1 ? "V1" : "V3", buf_size,
172                        lov_mds_md_size(stripe_count, magic == LOV_MAGIC_V1 ?
173                                        LOV_MAGIC_V1 : LOV_MAGIC_V3));
174                 lov_dump_lmm_common(D_WARNING, lmm);
175                 return ERR_PTR(-EINVAL);
176         }
177
178         rc = lsm_lmm_verify_v1v3(lmm, buf_size, stripe_count);
179         if (rc < 0)
180                 return ERR_PTR(rc);
181
182         lsme_size = offsetof(typeof(*lsme), lsme_oinfo[stripe_count]);
183         OBD_ALLOC_LARGE(lsme, lsme_size);
184         if (lsme == NULL)
185                 RETURN(ERR_PTR(-ENOMEM));
186
187         lsme->lsme_magic = magic;
188         lsme->lsme_pattern = pattern;
189         lsme->lsme_flags = 0;
190         lsme->lsme_stripe_size = le32_to_cpu(lmm->lmm_stripe_size);
191         /* preserve the possible -1 stripe count for uninstantiated component */
192         lsme->lsme_stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
193         lsme->lsme_layout_gen = le16_to_cpu(lmm->lmm_layout_gen);
194
195         if (pool_name != NULL) {
196                 size_t pool_name_len;
197
198                 pool_name_len = strlcpy(lsme->lsme_pool_name, pool_name,
199                                         sizeof(lsme->lsme_pool_name));
200                 if (pool_name_len >= sizeof(lsme->lsme_pool_name))
201                         GOTO(out_lsme, rc = -E2BIG);
202         }
203
204         for (i = 0; i < stripe_count; i++) {
205                 struct lov_oinfo *loi;
206                 struct lov_tgt_desc *ltd;
207
208                 OBD_SLAB_ALLOC_PTR_GFP(loi, lov_oinfo_slab, GFP_NOFS);
209                 if (loi == NULL)
210                         GOTO(out_lsme, rc = -ENOMEM);
211
212                 lsme->lsme_oinfo[i] = loi;
213
214                 ostid_le_to_cpu(&objects[i].l_ost_oi, &loi->loi_oi);
215                 loi->loi_ost_idx = le32_to_cpu(objects[i].l_ost_idx);
216                 loi->loi_ost_gen = le32_to_cpu(objects[i].l_ost_gen);
217                 if (lov_oinfo_is_dummy(loi))
218                         continue;
219
220                 if (loi->loi_ost_idx >= lov->desc.ld_tgt_count &&
221                     !lov2obd(lov)->obd_process_conf) {
222                         CERROR("%s: OST index %d more than OST count %d\n",
223                                (char*)lov->desc.ld_uuid.uuid,
224                                loi->loi_ost_idx, lov->desc.ld_tgt_count);
225                         lov_dump_lmm_v1(D_WARNING, lmm);
226                         GOTO(out_lsme, rc = -EINVAL);
227                 }
228
229                 ltd = lov->lov_tgts[loi->loi_ost_idx];
230                 if (ltd == NULL) {
231                         CERROR("%s: OST index %d missing\n",
232                                (char*)lov->desc.ld_uuid.uuid, loi->loi_ost_idx);
233                         lov_dump_lmm_v1(D_WARNING, lmm);
234                         continue;
235                 }
236
237                 lov_bytes = lov_tgt_maxbytes(ltd);
238                 if (min_stripe_maxbytes == 0 || lov_bytes < min_stripe_maxbytes)
239                         min_stripe_maxbytes = lov_bytes;
240         }
241
242         if (min_stripe_maxbytes == 0)
243                 min_stripe_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES;
244
245         lov_bytes = min_stripe_maxbytes * stripe_count;
246
247         if (maxbytes != NULL) {
248                 if (lov_bytes < min_stripe_maxbytes) /* handle overflow */
249                         *maxbytes = MAX_LFS_FILESIZE;
250                 else
251                         *maxbytes = lov_bytes;
252         }
253
254         return lsme;
255
256 out_lsme:
257         for (i = 0; i < stripe_count; i++) {
258                 struct lov_oinfo *loi = lsme->lsme_oinfo[i];
259
260                 if (loi != NULL)
261                         OBD_SLAB_FREE_PTR(lsme->lsme_oinfo[i], lov_oinfo_slab);
262         }
263         OBD_FREE_LARGE(lsme, lsme_size);
264
265         return ERR_PTR(rc);
266 }
267
268 static struct
269 lov_stripe_md *lsm_unpackmd_v1v3(struct lov_obd *lov, struct lov_mds_md *lmm,
270                                  size_t buf_size, const char *pool_name,
271                                  struct lov_ost_data_v1 *objects)
272 {
273         struct lov_stripe_md *lsm;
274         struct lov_stripe_md_entry *lsme;
275         size_t lsm_size;
276         loff_t maxbytes;
277         u32 pattern;
278         int rc;
279
280         pattern = le32_to_cpu(lmm->lmm_pattern);
281
282         lsme = lsme_unpack(lov, lmm, buf_size, pool_name, true, objects,
283                            &maxbytes);
284         if (IS_ERR(lsme))
285                 RETURN(ERR_CAST(lsme));
286
287         lsme->lsme_flags = LCME_FL_INIT;
288         lsme->lsme_extent.e_start = 0;
289         lsme->lsme_extent.e_end = LUSTRE_EOF;
290
291         lsm_size = offsetof(typeof(*lsm), lsm_entries[1]);
292         OBD_ALLOC(lsm, lsm_size);
293         if (lsm == NULL)
294                 GOTO(out_lsme, rc = -ENOMEM);
295
296         atomic_set(&lsm->lsm_refc, 1);
297         spin_lock_init(&lsm->lsm_lock);
298         lsm->lsm_maxbytes = maxbytes;
299         lmm_oi_le_to_cpu(&lsm->lsm_oi, &lmm->lmm_oi);
300         lsm->lsm_magic = le32_to_cpu(lmm->lmm_magic);
301         lsm->lsm_layout_gen = le16_to_cpu(lmm->lmm_layout_gen);
302         lsm->lsm_entry_count = 1;
303         lsm->lsm_is_released = pattern & LOV_PATTERN_F_RELEASED;
304         lsm->lsm_entries[0] = lsme;
305
306         return lsm;
307
308 out_lsme:
309         lsme_free(lsme);
310
311         return ERR_PTR(rc);
312 }
313
314 static inline struct lov_stripe_md *
315 lsm_unpackmd_v1(struct lov_obd *lov, void *buf, size_t buf_size)
316 {
317         struct lov_mds_md_v1 *lmm = buf;
318
319         return lsm_unpackmd_v1v3(lov, buf, buf_size, NULL, lmm->lmm_objects);
320 }
321
322 const struct lsm_operations lsm_v1_ops = {
323         .lsm_unpackmd           = lsm_unpackmd_v1,
324 };
325
326 static inline
327 struct lov_stripe_md *lsm_unpackmd_v3(struct lov_obd *lov, void *buf,
328                                       size_t buf_size)
329 {
330         struct lov_mds_md_v3 *lmm = buf;
331
332         return lsm_unpackmd_v1v3(lov, buf, buf_size, lmm->lmm_pool_name,
333                                  lmm->lmm_objects);
334 }
335
336 const struct lsm_operations lsm_v3_ops = {
337         .lsm_unpackmd           = lsm_unpackmd_v3,
338 };
339
340 static int lsm_verify_comp_md_v1(struct lov_comp_md_v1 *lcm,
341                                  size_t lcm_buf_size)
342 {
343         unsigned int entry_count;
344         unsigned int i;
345         size_t lcm_size;
346
347         lcm_size = le32_to_cpu(lcm->lcm_size);
348         if (lcm_buf_size < lcm_size) {
349                 CERROR("bad LCM buffer size %zu, expected %zu\n",
350                        lcm_buf_size, lcm_size);
351                 RETURN(-EINVAL);
352         }
353
354         entry_count = le16_to_cpu(lcm->lcm_entry_count);
355         for (i = 0; i < entry_count; i++) {
356                 struct lov_comp_md_entry_v1 *lcme = &lcm->lcm_entries[i];
357                 size_t blob_offset;
358                 size_t blob_size;
359
360                 blob_offset = le32_to_cpu(lcme->lcme_offset);
361                 blob_size = le32_to_cpu(lcme->lcme_size);
362
363                 if (lcm_size < blob_offset || lcm_size < blob_size ||
364                     lcm_size < blob_offset + blob_size) {
365                         CERROR("LCM entry %u has invalid blob: "
366                                "LCM size = %zu, offset = %zu, size = %zu\n",
367                                le32_to_cpu(lcme->lcme_id),
368                                lcm_size, blob_offset, blob_size);
369                         RETURN(-EINVAL);
370                 }
371         }
372
373         return 0;
374 }
375
376 static struct lov_stripe_md_entry *
377 lsme_unpack_comp(struct lov_obd *lov, struct lov_mds_md *lmm,
378                  size_t lmm_buf_size, bool inited, loff_t *maxbytes)
379 {
380         unsigned int magic;
381         unsigned int stripe_count;
382
383         stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
384         if (stripe_count == 0)
385                 RETURN(ERR_PTR(-EINVAL));
386         /* un-instantiated lmm contains no ost id info, i.e. lov_ost_data_v1 */
387         if (!inited)
388                 stripe_count = 0;
389
390         magic = le32_to_cpu(lmm->lmm_magic);
391         if (magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3)
392                 RETURN(ERR_PTR(-EINVAL));
393
394         if (lmm_buf_size < lov_mds_md_size(stripe_count, magic))
395                 RETURN(ERR_PTR(-EINVAL));
396
397         if (magic == LOV_MAGIC_V1) {
398                 return lsme_unpack(lov, lmm, lmm_buf_size, NULL,
399                                    inited, lmm->lmm_objects, maxbytes);
400         } else {
401                 struct lov_mds_md_v3 *lmm3 = (struct lov_mds_md_v3 *)lmm;
402
403                 return lsme_unpack(lov, lmm, lmm_buf_size, lmm3->lmm_pool_name,
404                                    inited, lmm3->lmm_objects, maxbytes);
405         }
406 }
407
408 static struct lov_stripe_md *
409 lsm_unpackmd_comp_md_v1(struct lov_obd *lov, void *buf, size_t buf_size)
410 {
411         struct lov_comp_md_v1 *lcm = buf;
412         struct lov_stripe_md *lsm;
413         size_t lsm_size;
414         unsigned int entry_count = 0;
415         unsigned int i;
416         loff_t maxbytes;
417         int rc;
418
419         rc = lsm_verify_comp_md_v1(buf, buf_size);
420         if (rc < 0)
421                 return ERR_PTR(rc);
422
423         entry_count = le16_to_cpu(lcm->lcm_entry_count);
424
425         lsm_size = offsetof(typeof(*lsm), lsm_entries[entry_count]);
426         OBD_ALLOC(lsm, lsm_size);
427         if (lsm == NULL)
428                 return ERR_PTR(-ENOMEM);
429
430         atomic_set(&lsm->lsm_refc, 1);
431         spin_lock_init(&lsm->lsm_lock);
432         lsm->lsm_magic = le32_to_cpu(lcm->lcm_magic);
433         lsm->lsm_layout_gen = le32_to_cpu(lcm->lcm_layout_gen);
434         lsm->lsm_entry_count = entry_count;
435         lsm->lsm_is_released = true;
436         lsm->lsm_maxbytes = LLONG_MIN;
437
438         for (i = 0; i < entry_count; i++) {
439                 struct lov_comp_md_entry_v1 *lcme = &lcm->lcm_entries[i];
440                 struct lov_stripe_md_entry *lsme;
441                 size_t blob_offset;
442                 size_t blob_size;
443                 void *blob;
444
445                 blob_offset = le32_to_cpu(lcme->lcme_offset);
446                 blob_size = le32_to_cpu(lcme->lcme_size);
447                 blob = (char *)lcm + blob_offset;
448
449                 lsme = lsme_unpack_comp(lov, blob, blob_size,
450                                         le32_to_cpu(lcme->lcme_flags) &
451                                         LCME_FL_INIT,
452                                         (i == entry_count - 1) ? &maxbytes :
453                                                                  NULL);
454                 if (IS_ERR(lsme))
455                         GOTO(out_lsm, rc = PTR_ERR(lsme));
456
457                 if (!(lsme->lsme_pattern & LOV_PATTERN_F_RELEASED))
458                         lsm->lsm_is_released = false;
459
460                 lsm->lsm_entries[i] = lsme;
461                 lsme->lsme_id = le32_to_cpu(lcme->lcme_id);
462                 lsme->lsme_flags = le32_to_cpu(lcme->lcme_flags);
463                 lu_extent_le_to_cpu(&lsme->lsme_extent, &lcme->lcme_extent);
464
465                 if (i == entry_count - 1) {
466                         lsm->lsm_maxbytes = (loff_t)lsme->lsme_extent.e_start +
467                                             maxbytes;
468                         /* the last component hasn't been defined, or
469                          * lsm_maxbytes overflowed. */
470                         if (lsme->lsme_extent.e_end != LUSTRE_EOF ||
471                             lsm->lsm_maxbytes <
472                             (loff_t)lsme->lsme_extent.e_start)
473                                 lsm->lsm_maxbytes = MAX_LFS_FILESIZE;
474                 }
475         }
476
477         RETURN(lsm);
478
479 out_lsm:
480         for (i = 0; i < entry_count; i++)
481                 if (lsm->lsm_entries[i] != NULL)
482                         lsme_free(lsm->lsm_entries[i]);
483
484         OBD_FREE(lsm, lsm_size);
485
486         RETURN(ERR_PTR(rc));
487 }
488
489 const struct lsm_operations lsm_comp_md_v1_ops = {
490         .lsm_unpackmd         = lsm_unpackmd_comp_md_v1,
491 };
492
493 void dump_lsm(unsigned int level, const struct lov_stripe_md *lsm)
494 {
495         int i, j;
496
497         CDEBUG(level, "lsm %p, objid "DOSTID", maxbytes %#llx, magic 0x%08X, "
498                "refc: %d, entry: %u, layout_gen %u\n",
499                lsm, POSTID(&lsm->lsm_oi), lsm->lsm_maxbytes, lsm->lsm_magic,
500                atomic_read(&lsm->lsm_refc), lsm->lsm_entry_count,
501                lsm->lsm_layout_gen);
502
503         for (i = 0; i < lsm->lsm_entry_count; i++) {
504                 struct lov_stripe_md_entry *lse = lsm->lsm_entries[i];
505
506                 CDEBUG(level, DEXT ": id: %u, flags: %x, "
507                        "magic 0x%08X, layout_gen %u, "
508                        "stripe count %u, sstripe size %u, "
509                        "pool: ["LOV_POOLNAMEF"]\n",
510                        PEXT(&lse->lsme_extent), lse->lsme_id, lse->lsme_flags,
511                        lse->lsme_magic, lse->lsme_layout_gen,
512                        lse->lsme_stripe_count, lse->lsme_stripe_size,
513                        lse->lsme_pool_name);
514                 if (!lsme_inited(lse) ||
515                     lse->lsme_pattern & LOV_PATTERN_F_RELEASED)
516                         continue;
517                 for (j = 0; j < lse->lsme_stripe_count; j++) {
518                         CDEBUG(level, "   oinfo:%p: ostid: "DOSTID
519                                " ost idx: %d gen: %d\n",
520                                lse->lsme_oinfo[j],
521                                POSTID(&lse->lsme_oinfo[j]->loi_oi),
522                                lse->lsme_oinfo[j]->loi_ost_idx,
523                                lse->lsme_oinfo[j]->loi_ost_gen);
524                 }
525         }
526 }
527
528 int lov_lsm_entry(const struct lov_stripe_md *lsm, __u64 offset)
529 {
530         int i;
531
532         for (i = 0; i < lsm->lsm_entry_count; i++) {
533                 struct lov_stripe_md_entry *lse = lsm->lsm_entries[i];
534
535                 if ((offset >= lse->lsme_extent.e_start &&
536                      offset < lse->lsme_extent.e_end) ||
537                     (offset == OBD_OBJECT_EOF &&
538                      lse->lsme_extent.e_end == OBD_OBJECT_EOF))
539                         return i;
540         }
541
542         return -1;
543 }