Whamcloud - gitweb
92b00da288259c2541f4c4ecac25b5e0a5d16b38
[fs/lustre-release.git] / lustre / lov / lov_ea.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * lustre/lov/lov_ea.c
33  *
34  * Author: Wang Di <wangdi@clusterfs.com>
35  */
36
37 #define DEBUG_SUBSYSTEM S_LOV
38
39 #include <linux/math64.h>
40 #include <linux/sort.h>
41 #include <libcfs/libcfs.h>
42
43 #include <obd_class.h>
44 #include "lov_internal.h"
45
46 static inline void
47 lu_extent_le_to_cpu(struct lu_extent *dst, const struct lu_extent *src)
48 {
49         dst->e_start = le64_to_cpu(src->e_start);
50         dst->e_end = le64_to_cpu(src->e_end);
51 }
52
53 /* Find minimum stripe maxbytes value.  For inactive or
54  * reconnecting targets use LUSTRE_EXT3_STRIPE_MAXBYTES. */
55 static loff_t lov_tgt_maxbytes(struct lov_tgt_desc *tgt)
56 {
57         struct obd_import *imp;
58         loff_t maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES;
59
60         if (!tgt->ltd_active)
61                 return maxbytes;
62
63         imp = tgt->ltd_obd->u.cli.cl_import;
64         if (imp == NULL)
65                 return maxbytes;
66
67         spin_lock(&imp->imp_lock);
68         if ((imp->imp_state == LUSTRE_IMP_FULL ||
69             imp->imp_state == LUSTRE_IMP_IDLE) &&
70             (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_MAXBYTES) &&
71             imp->imp_connect_data.ocd_maxbytes > 0)
72                 maxbytes = imp->imp_connect_data.ocd_maxbytes;
73
74         spin_unlock(&imp->imp_lock);
75
76         return maxbytes;
77 }
78
79 static int lsm_lmm_verify_v1v3(struct lov_mds_md *lmm, size_t lmm_size,
80                                u16 stripe_count)
81 {
82         if (stripe_count > LOV_V1_INSANE_STRIPE_COUNT) {
83                 CERROR("bad stripe count %d\n", stripe_count);
84                 lov_dump_lmm_common(D_WARNING, lmm);
85                 return -EINVAL;
86         }
87
88         if (lmm_oi_id(&lmm->lmm_oi) == 0) {
89                 CERROR("zero object id\n");
90                 lov_dump_lmm_common(D_WARNING, lmm);
91                 return -EINVAL;
92         }
93
94         if (lov_pattern(le32_to_cpu(lmm->lmm_pattern)) != LOV_PATTERN_MDT &&
95             lov_pattern(le32_to_cpu(lmm->lmm_pattern)) != LOV_PATTERN_RAID0) {
96                 CERROR("bad striping pattern\n");
97                 lov_dump_lmm_common(D_WARNING, lmm);
98                 return -EINVAL;
99         }
100
101         if (lmm->lmm_stripe_size == 0 ||
102             (le32_to_cpu(lmm->lmm_stripe_size)&(LOV_MIN_STRIPE_SIZE-1)) != 0) {
103                 CERROR("bad stripe size %u\n",
104                        le32_to_cpu(lmm->lmm_stripe_size));
105                 lov_dump_lmm_common(D_WARNING, lmm);
106                 return -EINVAL;
107         }
108         return 0;
109 }
110
111 static void lsme_free(struct lov_stripe_md_entry *lsme)
112 {
113         unsigned int stripe_count = lsme->lsme_stripe_count;
114         unsigned int i;
115         size_t lsme_size;
116
117         if (!lsme_inited(lsme) ||
118             lsme->lsme_pattern & LOV_PATTERN_F_RELEASED)
119                 stripe_count = 0;
120         for (i = 0; i < stripe_count; i++)
121                 OBD_SLAB_FREE_PTR(lsme->lsme_oinfo[i], lov_oinfo_slab);
122
123         lsme_size = offsetof(typeof(*lsme), lsme_oinfo[stripe_count]);
124         OBD_FREE_LARGE(lsme, lsme_size);
125 }
126
127 void lsm_free(struct lov_stripe_md *lsm)
128 {
129         unsigned int entry_count = lsm->lsm_entry_count;
130         unsigned int i;
131         size_t lsm_size;
132
133         for (i = 0; i < entry_count; i++)
134                 lsme_free(lsm->lsm_entries[i]);
135
136         lsm_size = offsetof(typeof(*lsm), lsm_entries[entry_count]);
137         OBD_FREE(lsm, lsm_size);
138 }
139
140 /**
141  * Unpack a struct lov_mds_md into a struct lov_stripe_md_entry.
142  *
143  * The caller should set id and extent.
144  */
145 static struct lov_stripe_md_entry *
146 lsme_unpack(struct lov_obd *lov, struct lov_mds_md *lmm, size_t buf_size,
147             const char *pool_name, bool inited, struct lov_ost_data_v1 *objects,
148             loff_t *maxbytes)
149 {
150         struct lov_stripe_md_entry *lsme;
151         size_t lsme_size;
152         loff_t min_stripe_maxbytes = 0;
153         loff_t lov_bytes;
154         u32 magic;
155         u32 pattern;
156         unsigned int stripe_count;
157         unsigned int i;
158         int rc;
159
160         magic = le32_to_cpu(lmm->lmm_magic);
161         if (magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3)
162                 RETURN(ERR_PTR(-EINVAL));
163
164         pattern = le32_to_cpu(lmm->lmm_pattern);
165         if (pattern & LOV_PATTERN_F_RELEASED || !inited)
166                 stripe_count = 0;
167         else
168                 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
169
170         if (buf_size < (magic == LOV_MAGIC_V1 ? sizeof(struct lov_mds_md_v1) :
171                                                 sizeof(struct lov_mds_md_v3))) {
172                 CERROR("LOV EA %s too small: %zu, need %u\n",
173                        magic == LOV_MAGIC_V1 ? "V1" : "V3", buf_size,
174                        lov_mds_md_size(stripe_count, magic == LOV_MAGIC_V1 ?
175                                        LOV_MAGIC_V1 : LOV_MAGIC_V3));
176                 lov_dump_lmm_common(D_WARNING, lmm);
177                 return ERR_PTR(-EINVAL);
178         }
179
180         rc = lsm_lmm_verify_v1v3(lmm, buf_size, stripe_count);
181         if (rc < 0)
182                 return ERR_PTR(rc);
183
184         lsme_size = offsetof(typeof(*lsme), lsme_oinfo[stripe_count]);
185         OBD_ALLOC_LARGE(lsme, lsme_size);
186         if (lsme == NULL)
187                 RETURN(ERR_PTR(-ENOMEM));
188
189         lsme->lsme_magic = magic;
190         lsme->lsme_pattern = pattern;
191         lsme->lsme_flags = 0;
192         lsme->lsme_stripe_size = le32_to_cpu(lmm->lmm_stripe_size);
193         /* preserve the possible -1 stripe count for uninstantiated component */
194         lsme->lsme_stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
195         lsme->lsme_layout_gen = le16_to_cpu(lmm->lmm_layout_gen);
196
197         if (pool_name != NULL) {
198                 size_t pool_name_len;
199
200                 pool_name_len = strlcpy(lsme->lsme_pool_name, pool_name,
201                                         sizeof(lsme->lsme_pool_name));
202                 if (pool_name_len >= sizeof(lsme->lsme_pool_name))
203                         GOTO(out_lsme, rc = -E2BIG);
204         }
205
206         /* with Data-on-MDT set maxbytes to stripe size */
207         if (lsme_is_dom(lsme)) {
208                 lov_bytes = lsme->lsme_stripe_size;
209                 goto out_dom;
210         }
211
212         for (i = 0; i < stripe_count; i++) {
213                 struct lov_oinfo *loi;
214                 struct lov_tgt_desc *ltd;
215
216                 OBD_SLAB_ALLOC_PTR_GFP(loi, lov_oinfo_slab, GFP_NOFS);
217                 if (loi == NULL)
218                         GOTO(out_lsme, rc = -ENOMEM);
219
220                 lsme->lsme_oinfo[i] = loi;
221
222                 ostid_le_to_cpu(&objects[i].l_ost_oi, &loi->loi_oi);
223                 loi->loi_ost_idx = le32_to_cpu(objects[i].l_ost_idx);
224                 loi->loi_ost_gen = le32_to_cpu(objects[i].l_ost_gen);
225                 if (lov_oinfo_is_dummy(loi))
226                         continue;
227
228                 if (loi->loi_ost_idx >= lov->desc.ld_tgt_count &&
229                     !lov2obd(lov)->obd_process_conf) {
230                         CERROR("%s: OST index %d more than OST count %d\n",
231                                (char*)lov->desc.ld_uuid.uuid,
232                                loi->loi_ost_idx, lov->desc.ld_tgt_count);
233                         lov_dump_lmm_v1(D_WARNING, lmm);
234                         GOTO(out_lsme, rc = -EINVAL);
235                 }
236
237                 ltd = lov->lov_tgts[loi->loi_ost_idx];
238                 if (ltd == NULL) {
239                         CERROR("%s: OST index %d missing\n",
240                                (char*)lov->desc.ld_uuid.uuid, loi->loi_ost_idx);
241                         lov_dump_lmm_v1(D_WARNING, lmm);
242                         continue;
243                 }
244
245                 lov_bytes = lov_tgt_maxbytes(ltd);
246                 if (min_stripe_maxbytes == 0 || lov_bytes < min_stripe_maxbytes)
247                         min_stripe_maxbytes = lov_bytes;
248         }
249
250         if (min_stripe_maxbytes == 0)
251                 min_stripe_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES;
252
253         lov_bytes = min_stripe_maxbytes * stripe_count;
254
255 out_dom:
256         if (maxbytes != NULL) {
257                 if (lov_bytes < min_stripe_maxbytes) /* handle overflow */
258                         *maxbytes = MAX_LFS_FILESIZE;
259                 else
260                         *maxbytes = lov_bytes;
261         }
262
263         return lsme;
264
265 out_lsme:
266         for (i = 0; i < stripe_count; i++) {
267                 struct lov_oinfo *loi = lsme->lsme_oinfo[i];
268
269                 if (loi != NULL)
270                         OBD_SLAB_FREE_PTR(lsme->lsme_oinfo[i], lov_oinfo_slab);
271         }
272         OBD_FREE_LARGE(lsme, lsme_size);
273
274         return ERR_PTR(rc);
275 }
276
277 static struct
278 lov_stripe_md *lsm_unpackmd_v1v3(struct lov_obd *lov, struct lov_mds_md *lmm,
279                                  size_t buf_size, const char *pool_name,
280                                  struct lov_ost_data_v1 *objects)
281 {
282         struct lov_stripe_md *lsm;
283         struct lov_stripe_md_entry *lsme;
284         size_t lsm_size;
285         loff_t maxbytes;
286         u32 pattern;
287         int rc;
288
289         pattern = le32_to_cpu(lmm->lmm_pattern);
290
291         lsme = lsme_unpack(lov, lmm, buf_size, pool_name, true, objects,
292                            &maxbytes);
293         if (IS_ERR(lsme))
294                 RETURN(ERR_CAST(lsme));
295
296         lsme->lsme_flags = LCME_FL_INIT;
297         lsme->lsme_extent.e_start = 0;
298         lsme->lsme_extent.e_end = LUSTRE_EOF;
299
300         lsm_size = offsetof(typeof(*lsm), lsm_entries[1]);
301         OBD_ALLOC(lsm, lsm_size);
302         if (lsm == NULL)
303                 GOTO(out_lsme, rc = -ENOMEM);
304
305         atomic_set(&lsm->lsm_refc, 1);
306         spin_lock_init(&lsm->lsm_lock);
307         lsm->lsm_maxbytes = maxbytes;
308         lmm_oi_le_to_cpu(&lsm->lsm_oi, &lmm->lmm_oi);
309         lsm->lsm_magic = le32_to_cpu(lmm->lmm_magic);
310         lsm->lsm_layout_gen = le16_to_cpu(lmm->lmm_layout_gen);
311         lsm->lsm_entry_count = 1;
312         lsm->lsm_is_released = pattern & LOV_PATTERN_F_RELEASED;
313         lsm->lsm_entries[0] = lsme;
314
315         return lsm;
316
317 out_lsme:
318         lsme_free(lsme);
319
320         return ERR_PTR(rc);
321 }
322
323 static inline struct lov_stripe_md *
324 lsm_unpackmd_v1(struct lov_obd *lov, void *buf, size_t buf_size)
325 {
326         struct lov_mds_md_v1 *lmm = buf;
327
328         return lsm_unpackmd_v1v3(lov, buf, buf_size, NULL, lmm->lmm_objects);
329 }
330
331 const struct lsm_operations lsm_v1_ops = {
332         .lsm_unpackmd           = lsm_unpackmd_v1,
333 };
334
335 static inline
336 struct lov_stripe_md *lsm_unpackmd_v3(struct lov_obd *lov, void *buf,
337                                       size_t buf_size)
338 {
339         struct lov_mds_md_v3 *lmm = buf;
340
341         return lsm_unpackmd_v1v3(lov, buf, buf_size, lmm->lmm_pool_name,
342                                  lmm->lmm_objects);
343 }
344
345 const struct lsm_operations lsm_v3_ops = {
346         .lsm_unpackmd           = lsm_unpackmd_v3,
347 };
348
349 static int lsm_verify_comp_md_v1(struct lov_comp_md_v1 *lcm,
350                                  size_t lcm_buf_size)
351 {
352         unsigned int entry_count;
353         unsigned int i;
354         size_t lcm_size;
355
356         lcm_size = le32_to_cpu(lcm->lcm_size);
357         if (lcm_buf_size < lcm_size) {
358                 CERROR("bad LCM buffer size %zu, expected %zu\n",
359                        lcm_buf_size, lcm_size);
360                 RETURN(-EINVAL);
361         }
362
363         entry_count = le16_to_cpu(lcm->lcm_entry_count);
364         for (i = 0; i < entry_count; i++) {
365                 struct lov_comp_md_entry_v1 *lcme = &lcm->lcm_entries[i];
366                 size_t blob_offset;
367                 size_t blob_size;
368
369                 blob_offset = le32_to_cpu(lcme->lcme_offset);
370                 blob_size = le32_to_cpu(lcme->lcme_size);
371
372                 if (lcm_size < blob_offset || lcm_size < blob_size ||
373                     lcm_size < blob_offset + blob_size) {
374                         CERROR("LCM entry %u has invalid blob: "
375                                "LCM size = %zu, offset = %zu, size = %zu\n",
376                                le32_to_cpu(lcme->lcme_id),
377                                lcm_size, blob_offset, blob_size);
378                         RETURN(-EINVAL);
379                 }
380         }
381
382         return 0;
383 }
384
385 static struct lov_stripe_md_entry *
386 lsme_unpack_comp(struct lov_obd *lov, struct lov_mds_md *lmm,
387                  size_t lmm_buf_size, bool inited, loff_t *maxbytes)
388 {
389         unsigned int magic;
390         unsigned int stripe_count;
391
392         stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
393         if (stripe_count == 0 &&
394             lov_pattern(le32_to_cpu(lmm->lmm_pattern)) != LOV_PATTERN_MDT)
395                 RETURN(ERR_PTR(-EINVAL));
396         /* un-instantiated lmm contains no ost id info, i.e. lov_ost_data_v1 */
397         if (!inited)
398                 stripe_count = 0;
399
400         magic = le32_to_cpu(lmm->lmm_magic);
401         if (magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3)
402                 RETURN(ERR_PTR(-EINVAL));
403
404         if (lmm_buf_size < lov_mds_md_size(stripe_count, magic))
405                 RETURN(ERR_PTR(-EINVAL));
406
407         if (magic == LOV_MAGIC_V1) {
408                 return lsme_unpack(lov, lmm, lmm_buf_size, NULL,
409                                    inited, lmm->lmm_objects, maxbytes);
410         } else {
411                 struct lov_mds_md_v3 *lmm3 = (struct lov_mds_md_v3 *)lmm;
412
413                 return lsme_unpack(lov, lmm, lmm_buf_size, lmm3->lmm_pool_name,
414                                    inited, lmm3->lmm_objects, maxbytes);
415         }
416 }
417
418 static struct lov_stripe_md *
419 lsm_unpackmd_comp_md_v1(struct lov_obd *lov, void *buf, size_t buf_size)
420 {
421         struct lov_comp_md_v1 *lcm = buf;
422         struct lov_stripe_md *lsm;
423         size_t lsm_size;
424         unsigned int entry_count = 0;
425         unsigned int i;
426         loff_t maxbytes;
427         int rc;
428
429         rc = lsm_verify_comp_md_v1(buf, buf_size);
430         if (rc < 0)
431                 return ERR_PTR(rc);
432
433         entry_count = le16_to_cpu(lcm->lcm_entry_count);
434
435         lsm_size = offsetof(typeof(*lsm), lsm_entries[entry_count]);
436         OBD_ALLOC(lsm, lsm_size);
437         if (lsm == NULL)
438                 return ERR_PTR(-ENOMEM);
439
440         atomic_set(&lsm->lsm_refc, 1);
441         spin_lock_init(&lsm->lsm_lock);
442         lsm->lsm_magic = le32_to_cpu(lcm->lcm_magic);
443         lsm->lsm_layout_gen = le32_to_cpu(lcm->lcm_layout_gen);
444         lsm->lsm_entry_count = entry_count;
445         lsm->lsm_mirror_count = le16_to_cpu(lcm->lcm_mirror_count);
446         lsm->lsm_flags = le16_to_cpu(lcm->lcm_flags);
447         lsm->lsm_is_released = true;
448         lsm->lsm_maxbytes = LLONG_MIN;
449
450         for (i = 0; i < entry_count; i++) {
451                 struct lov_comp_md_entry_v1 *lcme = &lcm->lcm_entries[i];
452                 struct lov_stripe_md_entry *lsme;
453                 size_t blob_offset;
454                 size_t blob_size;
455                 void *blob;
456
457                 blob_offset = le32_to_cpu(lcme->lcme_offset);
458                 blob_size = le32_to_cpu(lcme->lcme_size);
459                 blob = (char *)lcm + blob_offset;
460
461                 lsme = lsme_unpack_comp(lov, blob, blob_size,
462                                         le32_to_cpu(lcme->lcme_flags) &
463                                         LCME_FL_INIT,
464                                         (i == entry_count - 1) ? &maxbytes :
465                                                                  NULL);
466                 if (IS_ERR(lsme))
467                         GOTO(out_lsm, rc = PTR_ERR(lsme));
468
469                 if (!(lsme->lsme_pattern & LOV_PATTERN_F_RELEASED))
470                         lsm->lsm_is_released = false;
471
472                 lsm->lsm_entries[i] = lsme;
473                 lsme->lsme_id = le32_to_cpu(lcme->lcme_id);
474                 lsme->lsme_flags = le32_to_cpu(lcme->lcme_flags);
475                 if (lsme->lsme_flags & LCME_FL_NOSYNC)
476                         lsme->lsme_timestamp =
477                                 le64_to_cpu(lcme->lcme_timestamp);
478                 lu_extent_le_to_cpu(&lsme->lsme_extent, &lcme->lcme_extent);
479
480                 if (i == entry_count - 1) {
481                         lsm->lsm_maxbytes = (loff_t)lsme->lsme_extent.e_start +
482                                             maxbytes;
483                         /* the last component hasn't been defined, or
484                          * lsm_maxbytes overflowed. */
485                         if (!lsme_is_dom(lsme) &&
486                             (lsme->lsme_extent.e_end != LUSTRE_EOF ||
487                              lsm->lsm_maxbytes <
488                              (loff_t)lsme->lsme_extent.e_start))
489                                 lsm->lsm_maxbytes = MAX_LFS_FILESIZE;
490                 }
491         }
492
493         RETURN(lsm);
494
495 out_lsm:
496         for (i = 0; i < entry_count; i++)
497                 if (lsm->lsm_entries[i] != NULL)
498                         lsme_free(lsm->lsm_entries[i]);
499
500         OBD_FREE(lsm, lsm_size);
501
502         RETURN(ERR_PTR(rc));
503 }
504
505 const struct lsm_operations lsm_comp_md_v1_ops = {
506         .lsm_unpackmd         = lsm_unpackmd_comp_md_v1,
507 };
508
509 void dump_lsm(unsigned int level, const struct lov_stripe_md *lsm)
510 {
511         int i, j;
512
513         CDEBUG(level, "lsm %p, objid "DOSTID", maxbytes %#llx, magic 0x%08X, "
514                "refc: %d, entry: %u, layout_gen %u\n",
515                lsm, POSTID(&lsm->lsm_oi), lsm->lsm_maxbytes, lsm->lsm_magic,
516                atomic_read(&lsm->lsm_refc), lsm->lsm_entry_count,
517                lsm->lsm_layout_gen);
518
519         for (i = 0; i < lsm->lsm_entry_count; i++) {
520                 struct lov_stripe_md_entry *lse = lsm->lsm_entries[i];
521
522                 CDEBUG(level, DEXT ": id: %u, flags: %x, "
523                        "magic 0x%08X, layout_gen %u, "
524                        "stripe count %u, sstripe size %u, "
525                        "pool: ["LOV_POOLNAMEF"]\n",
526                        PEXT(&lse->lsme_extent), lse->lsme_id, lse->lsme_flags,
527                        lse->lsme_magic, lse->lsme_layout_gen,
528                        lse->lsme_stripe_count, lse->lsme_stripe_size,
529                        lse->lsme_pool_name);
530                 if (!lsme_inited(lse) ||
531                     lse->lsme_pattern & LOV_PATTERN_F_RELEASED)
532                         continue;
533                 for (j = 0; j < lse->lsme_stripe_count; j++) {
534                         CDEBUG(level, "   oinfo:%p: ostid: "DOSTID
535                                " ost idx: %d gen: %d\n",
536                                lse->lsme_oinfo[j],
537                                POSTID(&lse->lsme_oinfo[j]->loi_oi),
538                                lse->lsme_oinfo[j]->loi_ost_idx,
539                                lse->lsme_oinfo[j]->loi_ost_gen);
540                 }
541         }
542 }
543
544 int lov_lsm_entry(const struct lov_stripe_md *lsm, __u64 offset)
545 {
546         int i;
547
548         for (i = 0; i < lsm->lsm_entry_count; i++) {
549                 struct lov_stripe_md_entry *lse = lsm->lsm_entries[i];
550
551                 if ((offset >= lse->lsme_extent.e_start &&
552                      offset < lse->lsme_extent.e_end) ||
553                     (offset == OBD_OBJECT_EOF &&
554                      lse->lsme_extent.e_end == OBD_OBJECT_EOF))
555                         return i;
556         }
557
558         return -1;
559 }