4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2017, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
34 * Author: Wang Di <wangdi@clusterfs.com>
37 #define DEBUG_SUBSYSTEM S_LOV
39 #include <linux/math64.h>
40 #include <linux/sort.h>
41 #include <libcfs/libcfs.h>
43 #include <obd_class.h>
44 #include "lov_internal.h"
47 lu_extent_le_to_cpu(struct lu_extent *dst, const struct lu_extent *src)
49 dst->e_start = le64_to_cpu(src->e_start);
50 dst->e_end = le64_to_cpu(src->e_end);
54 * Find minimum stripe maxbytes value. For inactive or
55 * reconnecting targets use LUSTRE_EXT3_STRIPE_MAXBYTES.
57 static loff_t lov_tgt_maxbytes(struct lov_tgt_desc *tgt)
59 struct obd_import *imp;
60 loff_t maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES;
65 imp = tgt->ltd_obd->u.cli.cl_import;
69 spin_lock(&imp->imp_lock);
70 if ((imp->imp_state == LUSTRE_IMP_FULL ||
71 imp->imp_state == LUSTRE_IMP_IDLE) &&
72 (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_MAXBYTES) &&
73 imp->imp_connect_data.ocd_maxbytes > 0)
74 maxbytes = imp->imp_connect_data.ocd_maxbytes;
76 spin_unlock(&imp->imp_lock);
81 static int lsm_lmm_verify_v1v3(struct lov_mds_md *lmm, size_t lmm_size,
84 u32 pattern = le32_to_cpu(lmm->lmm_pattern);
87 if (stripe_count > LOV_V1_INSANE_STRIPE_COUNT) {
89 CERROR("lov: bad stripe count %d: rc = %d\n",
91 lov_dump_lmm_common(D_WARNING, lmm);
95 if (lmm_oi_id(&lmm->lmm_oi) == 0) {
97 CERROR("lov: zero object id: rc = %d\n", rc);
98 lov_dump_lmm_common(D_WARNING, lmm);
102 if (!lov_pattern_supported(lov_pattern(pattern))) {
104 CERROR("lov: unrecognized striping pattern: rc = %d\n", rc);
105 lov_dump_lmm_common(D_WARNING, lmm);
109 if (lmm->lmm_stripe_size == 0 ||
110 (le32_to_cpu(lmm->lmm_stripe_size)&(LOV_MIN_STRIPE_SIZE-1)) != 0) {
112 CERROR("lov: bad stripe size %u: rc = %d\n",
113 le32_to_cpu(lmm->lmm_stripe_size), rc);
114 lov_dump_lmm_common(D_WARNING, lmm);
122 static void lsme_free(struct lov_stripe_md_entry *lsme)
124 unsigned int stripe_count = lsme->lsme_stripe_count;
128 if (!lsme_inited(lsme) ||
129 lsme->lsme_pattern & LOV_PATTERN_F_RELEASED)
131 for (i = 0; i < stripe_count; i++)
132 OBD_SLAB_FREE_PTR(lsme->lsme_oinfo[i], lov_oinfo_slab);
134 lsme_size = offsetof(typeof(*lsme), lsme_oinfo[stripe_count]);
135 OBD_FREE_LARGE(lsme, lsme_size);
138 void lsm_free(struct lov_stripe_md *lsm)
140 unsigned int entry_count = lsm->lsm_entry_count;
144 if (lsm->lsm_magic == LOV_MAGIC_FOREIGN) {
145 OBD_FREE_LARGE(lsm_foreign(lsm), lsm->lsm_foreign_size);
147 for (i = 0; i < entry_count; i++)
148 lsme_free(lsm->lsm_entries[i]);
151 lsm_size = lsm->lsm_magic == LOV_MAGIC_FOREIGN ?
152 offsetof(typeof(*lsm), lsm_entries[1]) :
153 offsetof(typeof(*lsm), lsm_entries[entry_count]);
154 OBD_FREE(lsm, lsm_size);
158 * Unpack a struct lov_mds_md into a struct lov_stripe_md_entry.
160 * The caller should set id and extent.
162 static struct lov_stripe_md_entry *
163 lsme_unpack(struct lov_obd *lov, struct lov_mds_md *lmm, size_t buf_size,
164 const char *pool_name, bool inited, struct lov_ost_data_v1 *objects,
167 struct lov_stripe_md_entry *lsme;
169 loff_t min_stripe_maxbytes = 0;
173 unsigned int stripe_count;
177 magic = le32_to_cpu(lmm->lmm_magic);
178 if (magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3)
179 RETURN(ERR_PTR(-EINVAL));
181 pattern = le32_to_cpu(lmm->lmm_pattern);
182 if (pattern & LOV_PATTERN_F_RELEASED || !inited)
185 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
187 if (buf_size < (magic == LOV_MAGIC_V1 ? sizeof(struct lov_mds_md_v1) :
188 sizeof(struct lov_mds_md_v3))) {
189 CERROR("LOV EA %s too small: %zu, need %u\n",
190 magic == LOV_MAGIC_V1 ? "V1" : "V3", buf_size,
191 lov_mds_md_size(stripe_count, magic == LOV_MAGIC_V1 ?
192 LOV_MAGIC_V1 : LOV_MAGIC_V3));
193 lov_dump_lmm_common(D_WARNING, lmm);
194 return ERR_PTR(-EINVAL);
197 rc = lsm_lmm_verify_v1v3(lmm, buf_size, stripe_count);
201 lsme_size = offsetof(typeof(*lsme), lsme_oinfo[stripe_count]);
202 OBD_ALLOC_LARGE(lsme, lsme_size);
204 RETURN(ERR_PTR(-ENOMEM));
206 lsme->lsme_magic = magic;
207 lsme->lsme_pattern = pattern;
208 lsme->lsme_flags = 0;
209 lsme->lsme_stripe_size = le32_to_cpu(lmm->lmm_stripe_size);
210 /* preserve the possible -1 stripe count for uninstantiated component */
211 lsme->lsme_stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
212 lsme->lsme_layout_gen = le16_to_cpu(lmm->lmm_layout_gen);
215 size_t pool_name_len;
217 pool_name_len = strlcpy(lsme->lsme_pool_name, pool_name,
218 sizeof(lsme->lsme_pool_name));
219 if (pool_name_len >= sizeof(lsme->lsme_pool_name))
220 GOTO(out_lsme, rc = -E2BIG);
223 /* with Data-on-MDT set maxbytes to stripe size */
224 if (lsme_is_dom(lsme)) {
226 lov_bytes = lsme->lsme_stripe_size;
233 for (i = 0; i < stripe_count; i++) {
234 struct lov_oinfo *loi;
235 struct lov_tgt_desc *ltd;
237 OBD_SLAB_ALLOC_PTR_GFP(loi, lov_oinfo_slab, GFP_NOFS);
239 GOTO(out_lsme, rc = -ENOMEM);
241 lsme->lsme_oinfo[i] = loi;
243 ostid_le_to_cpu(&objects[i].l_ost_oi, &loi->loi_oi);
244 loi->loi_ost_idx = le32_to_cpu(objects[i].l_ost_idx);
245 loi->loi_ost_gen = le32_to_cpu(objects[i].l_ost_gen);
246 if (lov_oinfo_is_dummy(loi))
249 if (loi->loi_ost_idx >= lov->desc.ld_tgt_count &&
250 !lov2obd(lov)->obd_process_conf) {
251 CERROR("%s: OST index %d more than OST count %d\n",
252 (char*)lov->desc.ld_uuid.uuid,
253 loi->loi_ost_idx, lov->desc.ld_tgt_count);
254 lov_dump_lmm_v1(D_WARNING, lmm);
255 GOTO(out_lsme, rc = -EINVAL);
258 ltd = lov->lov_tgts[loi->loi_ost_idx];
260 CERROR("%s: OST index %d missing\n",
261 (char*)lov->desc.ld_uuid.uuid, loi->loi_ost_idx);
262 lov_dump_lmm_v1(D_WARNING, lmm);
266 lov_bytes = lov_tgt_maxbytes(ltd);
267 if (min_stripe_maxbytes == 0 || lov_bytes < min_stripe_maxbytes)
268 min_stripe_maxbytes = lov_bytes;
272 if (min_stripe_maxbytes == 0)
273 min_stripe_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES;
275 if (stripe_count == 0)
276 stripe_count = lov->desc.ld_tgt_count;
278 if (min_stripe_maxbytes <= LLONG_MAX / stripe_count)
279 lov_bytes = min_stripe_maxbytes * stripe_count;
281 lov_bytes = MAX_LFS_FILESIZE;
283 *maxbytes = min_t(loff_t, lov_bytes, MAX_LFS_FILESIZE);
290 for (i = 0; i < stripe_count; i++) {
291 struct lov_oinfo *loi = lsme->lsme_oinfo[i];
294 OBD_SLAB_FREE_PTR(lsme->lsme_oinfo[i], lov_oinfo_slab);
296 OBD_FREE_LARGE(lsme, lsme_size);
302 lov_stripe_md *lsm_unpackmd_v1v3(struct lov_obd *lov, struct lov_mds_md *lmm,
303 size_t buf_size, const char *pool_name,
304 struct lov_ost_data_v1 *objects)
306 struct lov_stripe_md *lsm;
307 struct lov_stripe_md_entry *lsme;
313 pattern = le32_to_cpu(lmm->lmm_pattern);
315 lsme = lsme_unpack(lov, lmm, buf_size, pool_name, true, objects,
318 RETURN(ERR_CAST(lsme));
320 lsme->lsme_flags = LCME_FL_INIT;
321 lsme->lsme_extent.e_start = 0;
322 lsme->lsme_extent.e_end = LUSTRE_EOF;
324 lsm_size = offsetof(typeof(*lsm), lsm_entries[1]);
325 OBD_ALLOC(lsm, lsm_size);
327 GOTO(out_lsme, rc = -ENOMEM);
329 atomic_set(&lsm->lsm_refc, 1);
330 spin_lock_init(&lsm->lsm_lock);
331 lsm->lsm_maxbytes = maxbytes;
332 lmm_oi_le_to_cpu(&lsm->lsm_oi, &lmm->lmm_oi);
333 lsm->lsm_magic = le32_to_cpu(lmm->lmm_magic);
334 lsm->lsm_layout_gen = le16_to_cpu(lmm->lmm_layout_gen);
335 lsm->lsm_entry_count = 1;
336 lsm->lsm_is_released = pattern & LOV_PATTERN_F_RELEASED;
337 lsm->lsm_entries[0] = lsme;
347 static struct lov_stripe_md *
348 lsm_unpackmd_v1(struct lov_obd *lov, void *buf, size_t buf_size)
350 struct lov_mds_md_v1 *lmm = buf;
352 return lsm_unpackmd_v1v3(lov, buf, buf_size, NULL, lmm->lmm_objects);
355 const struct lsm_operations lsm_v1_ops = {
356 .lsm_unpackmd = lsm_unpackmd_v1,
359 static struct lov_stripe_md *
360 lsm_unpackmd_v3(struct lov_obd *lov, void *buf, size_t buf_size)
362 struct lov_mds_md_v3 *lmm = buf;
364 return lsm_unpackmd_v1v3(lov, buf, buf_size, lmm->lmm_pool_name,
368 const struct lsm_operations lsm_v3_ops = {
369 .lsm_unpackmd = lsm_unpackmd_v3,
372 static int lsm_verify_comp_md_v1(struct lov_comp_md_v1 *lcm,
375 unsigned int entry_count;
379 lcm_size = le32_to_cpu(lcm->lcm_size);
380 if (lcm_buf_size < lcm_size) {
381 CERROR("bad LCM buffer size %zu, expected %zu\n",
382 lcm_buf_size, lcm_size);
386 entry_count = le16_to_cpu(lcm->lcm_entry_count);
387 for (i = 0; i < entry_count; i++) {
388 struct lov_comp_md_entry_v1 *lcme = &lcm->lcm_entries[i];
392 blob_offset = le32_to_cpu(lcme->lcme_offset);
393 blob_size = le32_to_cpu(lcme->lcme_size);
395 if (lcm_size < blob_offset || lcm_size < blob_size ||
396 lcm_size < blob_offset + blob_size) {
397 CERROR("LCM entry %u has invalid blob: "
398 "LCM size = %zu, offset = %zu, size = %zu\n",
399 le32_to_cpu(lcme->lcme_id),
400 lcm_size, blob_offset, blob_size);
408 static struct lov_stripe_md_entry *
409 lsme_unpack_comp(struct lov_obd *lov, struct lov_mds_md *lmm,
410 size_t lmm_buf_size, bool inited, loff_t *maxbytes)
413 unsigned int stripe_count;
415 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
416 if (stripe_count == 0 &&
417 lov_pattern(le32_to_cpu(lmm->lmm_pattern)) != LOV_PATTERN_MDT)
418 RETURN(ERR_PTR(-EINVAL));
419 /* un-instantiated lmm contains no ost id info, i.e. lov_ost_data_v1 */
423 magic = le32_to_cpu(lmm->lmm_magic);
424 if (magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3)
425 RETURN(ERR_PTR(-EINVAL));
427 if (lmm_buf_size < lov_mds_md_size(stripe_count, magic))
428 RETURN(ERR_PTR(-EINVAL));
430 if (magic == LOV_MAGIC_V1) {
431 return lsme_unpack(lov, lmm, lmm_buf_size, NULL,
432 inited, lmm->lmm_objects, maxbytes);
434 struct lov_mds_md_v3 *lmm3 = (struct lov_mds_md_v3 *)lmm;
436 return lsme_unpack(lov, lmm, lmm_buf_size, lmm3->lmm_pool_name,
437 inited, lmm3->lmm_objects, maxbytes);
441 static struct lov_stripe_md *
442 lsm_unpackmd_comp_md_v1(struct lov_obd *lov, void *buf, size_t buf_size)
444 struct lov_comp_md_v1 *lcm = buf;
445 struct lov_stripe_md *lsm;
447 unsigned int entry_count = 0;
452 rc = lsm_verify_comp_md_v1(buf, buf_size);
456 entry_count = le16_to_cpu(lcm->lcm_entry_count);
458 lsm_size = offsetof(typeof(*lsm), lsm_entries[entry_count]);
459 OBD_ALLOC(lsm, lsm_size);
461 return ERR_PTR(-ENOMEM);
463 atomic_set(&lsm->lsm_refc, 1);
464 spin_lock_init(&lsm->lsm_lock);
465 lsm->lsm_magic = le32_to_cpu(lcm->lcm_magic);
466 lsm->lsm_layout_gen = le32_to_cpu(lcm->lcm_layout_gen);
467 lsm->lsm_entry_count = entry_count;
468 lsm->lsm_mirror_count = le16_to_cpu(lcm->lcm_mirror_count);
469 lsm->lsm_flags = le16_to_cpu(lcm->lcm_flags);
470 lsm->lsm_is_released = true;
471 lsm->lsm_maxbytes = LLONG_MIN;
473 for (i = 0; i < entry_count; i++) {
474 struct lov_comp_md_entry_v1 *lcme = &lcm->lcm_entries[i];
475 struct lov_stripe_md_entry *lsme;
480 blob_offset = le32_to_cpu(lcme->lcme_offset);
481 blob_size = le32_to_cpu(lcme->lcme_size);
482 blob = (char *)lcm + blob_offset;
484 lsme = lsme_unpack_comp(lov, blob, blob_size,
485 le32_to_cpu(lcme->lcme_flags) &
487 (i == entry_count - 1) ? &maxbytes :
490 GOTO(out_lsm, rc = PTR_ERR(lsme));
492 if (!(lsme->lsme_pattern & LOV_PATTERN_F_RELEASED))
493 lsm->lsm_is_released = false;
495 lsm->lsm_entries[i] = lsme;
496 lsme->lsme_id = le32_to_cpu(lcme->lcme_id);
497 lsme->lsme_flags = le32_to_cpu(lcme->lcme_flags);
498 if (lsme->lsme_flags & LCME_FL_NOSYNC)
499 lsme->lsme_timestamp =
500 le64_to_cpu(lcme->lcme_timestamp);
501 lu_extent_le_to_cpu(&lsme->lsme_extent, &lcme->lcme_extent);
503 if (i == entry_count - 1) {
504 lsm->lsm_maxbytes = (loff_t)lsme->lsme_extent.e_start +
507 * the last component hasn't been defined, or
508 * lsm_maxbytes overflowed.
510 if (!lsme_is_dom(lsme) &&
511 (lsme->lsme_extent.e_end != LUSTRE_EOF ||
513 (loff_t)lsme->lsme_extent.e_start))
514 lsm->lsm_maxbytes = MAX_LFS_FILESIZE;
521 for (i = 0; i < entry_count; i++)
522 if (lsm->lsm_entries[i])
523 lsme_free(lsm->lsm_entries[i]);
525 OBD_FREE(lsm, lsm_size);
530 const struct lsm_operations lsm_comp_md_v1_ops = {
531 .lsm_unpackmd = lsm_unpackmd_comp_md_v1,
535 lov_stripe_md *lsm_unpackmd_foreign(struct lov_obd *lov, void *buf,
538 struct lov_foreign_md *lfm = buf;
539 struct lov_stripe_md *lsm;
541 struct lov_stripe_md_entry *lsme;
543 lsm_size = offsetof(typeof(*lsm), lsm_entries[1]);
544 OBD_ALLOC(lsm, lsm_size);
546 RETURN(ERR_PTR(-ENOMEM));
548 atomic_set(&lsm->lsm_refc, 1);
549 spin_lock_init(&lsm->lsm_lock);
550 lsm->lsm_magic = le32_to_cpu(lfm->lfm_magic);
551 lsm->lsm_foreign_size = foreign_size_le(lfm);
553 /* alloc for full foreign EA including format fields */
554 OBD_ALLOC_LARGE(lsme, lsm->lsm_foreign_size);
556 OBD_FREE(lsm, lsm_size);
557 RETURN(ERR_PTR(-ENOMEM));
560 /* copy full foreign EA including format fields */
561 memcpy(lsme, buf, lsm->lsm_foreign_size);
563 lsm_foreign(lsm) = lsme;
568 const struct lsm_operations lsm_foreign_ops = {
569 .lsm_unpackmd = lsm_unpackmd_foreign,
572 void dump_lsm(unsigned int level, const struct lov_stripe_md *lsm)
577 "lsm %p, objid "DOSTID", maxbytes %#llx, magic 0x%08X, refc: %d, entry: %u, layout_gen %u\n",
578 lsm, POSTID(&lsm->lsm_oi), lsm->lsm_maxbytes, lsm->lsm_magic,
579 atomic_read(&lsm->lsm_refc), lsm->lsm_entry_count,
580 lsm->lsm_layout_gen);
582 if (lsm->lsm_magic == LOV_MAGIC_FOREIGN) {
583 struct lov_foreign_md *lfm = (void *)lsm_foreign(lsm);
586 "foreign LOV EA, magic %x, length %u, type %x, flags %x, value '%.*s'\n",
587 lfm->lfm_magic, lfm->lfm_length, lfm->lfm_type,
588 lfm->lfm_flags, lfm->lfm_length, lfm->lfm_value);
592 for (i = 0; i < lsm->lsm_entry_count; i++) {
593 struct lov_stripe_md_entry *lse = lsm->lsm_entries[i];
595 CDEBUG(level, DEXT ": id: %u, flags: %x, "
596 "magic 0x%08X, layout_gen %u, "
597 "stripe count %u, sstripe size %u, "
598 "pool: ["LOV_POOLNAMEF"]\n",
599 PEXT(&lse->lsme_extent), lse->lsme_id, lse->lsme_flags,
600 lse->lsme_magic, lse->lsme_layout_gen,
601 lse->lsme_stripe_count, lse->lsme_stripe_size,
602 lse->lsme_pool_name);
603 if (!lsme_inited(lse) ||
604 lse->lsme_pattern & LOV_PATTERN_F_RELEASED)
606 for (j = 0; j < lse->lsme_stripe_count; j++) {
607 CDEBUG(level, " oinfo:%p: ostid: "DOSTID
608 " ost idx: %d gen: %d\n",
610 POSTID(&lse->lsme_oinfo[j]->loi_oi),
611 lse->lsme_oinfo[j]->loi_ost_idx,
612 lse->lsme_oinfo[j]->loi_ost_gen);
617 int lov_lsm_entry(const struct lov_stripe_md *lsm, __u64 offset)
621 for (i = 0; i < lsm->lsm_entry_count; i++) {
622 struct lov_stripe_md_entry *lse = lsm->lsm_entries[i];
624 if ((offset >= lse->lsme_extent.e_start &&
625 offset < lse->lsme_extent.e_end) ||
626 (offset == OBD_OBJECT_EOF &&
627 lse->lsme_extent.e_end == OBD_OBJECT_EOF))
635 * lmm_layout_gen overlaps stripe_offset field, it needs to be reset back when
636 * sending to MDT for passing striping checks
638 void lov_fix_ea_for_replay(void *lovea)
640 struct lov_user_md *lmm = lovea;
641 struct lov_comp_md_v1 *c1;
644 switch (le32_to_cpu(lmm->lmm_magic)) {
645 case LOV_USER_MAGIC_V1:
646 case LOV_USER_MAGIC_V3:
647 lmm->lmm_stripe_offset = LOV_OFFSET_DEFAULT;
650 case LOV_USER_MAGIC_COMP_V1:
652 for (i = 0; i < le16_to_cpu(c1->lcm_entry_count); i++) {
653 struct lov_comp_md_entry_v1 *ent = &c1->lcm_entries[i];
655 if (le32_to_cpu(ent->lcme_flags) & LCME_FL_INIT) {
656 lmm = (void *)((char *)c1 +
657 le32_to_cpu(ent->lcme_offset));
658 lmm->lmm_stripe_offset = LOV_OFFSET_DEFAULT;
663 EXPORT_SYMBOL(lov_fix_ea_for_replay);