4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2016, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
34 * Author: Wang Di <wangdi@clusterfs.com>
37 #define DEBUG_SUBSYSTEM S_LOV
39 #include <linux/math64.h>
40 #include <linux/sort.h>
41 #include <libcfs/libcfs.h>
43 #include <obd_class.h>
44 #include <lustre/lustre_idl.h>
45 #include <lustre/lustre_user.h>
47 #include "lov_internal.h"
50 lu_extent_le_to_cpu(struct lu_extent *dst, const struct lu_extent *src)
52 dst->e_start = le64_to_cpu(src->e_start);
53 dst->e_end = le64_to_cpu(src->e_end);
56 /* Find minimum stripe maxbytes value. For inactive or
57 * reconnecting targets use LUSTRE_EXT3_STRIPE_MAXBYTES. */
58 static loff_t lov_tgt_maxbytes(struct lov_tgt_desc *tgt)
60 struct obd_import *imp;
61 loff_t maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES;
66 imp = tgt->ltd_obd->u.cli.cl_import;
70 spin_lock(&imp->imp_lock);
71 if (imp->imp_state == LUSTRE_IMP_FULL &&
72 (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_MAXBYTES) &&
73 imp->imp_connect_data.ocd_maxbytes > 0)
74 maxbytes = imp->imp_connect_data.ocd_maxbytes;
76 spin_unlock(&imp->imp_lock);
81 static int lsm_lmm_verify_v1v3(struct lov_mds_md *lmm, size_t lmm_size,
84 if (stripe_count > LOV_V1_INSANE_STRIPE_COUNT) {
85 CERROR("bad stripe count %d\n", stripe_count);
86 lov_dump_lmm_common(D_WARNING, lmm);
90 if (lmm_oi_id(&lmm->lmm_oi) == 0) {
91 CERROR("zero object id\n");
92 lov_dump_lmm_common(D_WARNING, lmm);
96 if (lov_pattern(le32_to_cpu(lmm->lmm_pattern)) != LOV_PATTERN_RAID0) {
97 CERROR("bad striping pattern\n");
98 lov_dump_lmm_common(D_WARNING, lmm);
102 if (lmm->lmm_stripe_size == 0 ||
103 (le32_to_cpu(lmm->lmm_stripe_size)&(LOV_MIN_STRIPE_SIZE-1)) != 0) {
104 CERROR("bad stripe size %u\n",
105 le32_to_cpu(lmm->lmm_stripe_size));
106 lov_dump_lmm_common(D_WARNING, lmm);
112 static void lsme_free(struct lov_stripe_md_entry *lsme)
114 unsigned int stripe_count = lsme->lsme_stripe_count;
118 for (i = 0; i < stripe_count; i++)
119 OBD_SLAB_FREE_PTR(lsme->lsme_oinfo[i], lov_oinfo_slab);
121 lsme_size = offsetof(typeof(*lsme), lsme_oinfo[stripe_count]);
122 OBD_FREE_LARGE(lsme, lsme_size);
125 void lsm_free(struct lov_stripe_md *lsm)
127 unsigned int entry_count = lsm->lsm_entry_count;
131 for (i = 0; i < entry_count; i++)
132 lsme_free(lsm->lsm_entries[i]);
134 lsm_size = offsetof(typeof(*lsm), lsm_entries[entry_count]);
135 OBD_FREE(lsm, lsm_size);
139 * Unpack a struct lov_mds_md into a struct lov_stripe_md_entry.
141 * The caller should set id and extent.
143 static struct lov_stripe_md_entry *
144 lsme_unpack(struct lov_obd *lov, struct lov_mds_md *lmm, size_t buf_size,
145 const char *pool_name, struct lov_ost_data_v1 *objects,
148 struct lov_stripe_md_entry *lsme;
150 loff_t min_stripe_maxbytes = 0;
154 unsigned int stripe_count;
158 magic = le32_to_cpu(lmm->lmm_magic);
159 if (magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3)
160 RETURN(ERR_PTR(-EINVAL));
162 pattern = le32_to_cpu(lmm->lmm_pattern);
163 if (pattern & LOV_PATTERN_F_RELEASED)
166 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
168 if (buf_size < (magic == LOV_MAGIC_V1 ? sizeof(struct lov_mds_md_v1) :
169 sizeof(struct lov_mds_md_v3))) {
170 CERROR("LOV EA %s too small: %zu, need %u\n",
171 magic == LOV_MAGIC_V1 ? "V1" : "V3", buf_size,
172 lov_mds_md_size(stripe_count, magic == LOV_MAGIC_V1 ?
173 LOV_MAGIC_V1 : LOV_MAGIC_V3));
174 lov_dump_lmm_common(D_WARNING, lmm);
175 return ERR_PTR(-EINVAL);
178 rc = lsm_lmm_verify_v1v3(lmm, buf_size, stripe_count);
182 lsme_size = offsetof(typeof(*lsme), lsme_oinfo[stripe_count]);
183 OBD_ALLOC_LARGE(lsme, lsme_size);
185 RETURN(ERR_PTR(-ENOMEM));
187 lsme->lsme_magic = magic;
188 lsme->lsme_pattern = pattern;
189 lsme->lsme_stripe_size = le32_to_cpu(lmm->lmm_stripe_size);
190 lsme->lsme_stripe_count = stripe_count;
191 lsme->lsme_layout_gen = le16_to_cpu(lmm->lmm_layout_gen);
193 if (pool_name != NULL) {
194 size_t pool_name_len;
196 pool_name_len = strlcpy(lsme->lsme_pool_name, pool_name,
197 sizeof(lsme->lsme_pool_name));
198 if (pool_name_len >= sizeof(lsme->lsme_pool_name))
199 GOTO(out_lsme, rc = -E2BIG);
202 for (i = 0; i < stripe_count; i++) {
203 struct lov_oinfo *loi;
204 struct lov_tgt_desc *ltd;
206 OBD_SLAB_ALLOC_PTR_GFP(loi, lov_oinfo_slab, GFP_NOFS);
208 GOTO(out_lsme, rc = -ENOMEM);
210 lsme->lsme_oinfo[i] = loi;
212 ostid_le_to_cpu(&objects[i].l_ost_oi, &loi->loi_oi);
213 loi->loi_ost_idx = le32_to_cpu(objects[i].l_ost_idx);
214 loi->loi_ost_gen = le32_to_cpu(objects[i].l_ost_gen);
215 if (lov_oinfo_is_dummy(loi))
218 if (loi->loi_ost_idx >= lov->desc.ld_tgt_count &&
219 !lov2obd(lov)->obd_process_conf) {
220 CERROR("%s: OST index %d more than OST count %d\n",
221 (char*)lov->desc.ld_uuid.uuid,
222 loi->loi_ost_idx, lov->desc.ld_tgt_count);
223 lov_dump_lmm_v1(D_WARNING, lmm);
224 GOTO(out_lsme, rc = -EINVAL);
227 ltd = lov->lov_tgts[loi->loi_ost_idx];
229 CERROR("%s: OST index %d missing\n",
230 (char*)lov->desc.ld_uuid.uuid, loi->loi_ost_idx);
231 lov_dump_lmm_v1(D_WARNING, lmm);
235 lov_bytes = lov_tgt_maxbytes(ltd);
236 if (min_stripe_maxbytes == 0 || lov_bytes < min_stripe_maxbytes)
237 min_stripe_maxbytes = lov_bytes;
240 if (min_stripe_maxbytes == 0)
241 min_stripe_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES;
243 lov_bytes = min_stripe_maxbytes * stripe_count;
245 if (maxbytes != NULL) {
246 if (lov_bytes < min_stripe_maxbytes) /* handle overflow */
247 *maxbytes = MAX_LFS_FILESIZE;
249 *maxbytes = lov_bytes;
255 for (i = 0; i < stripe_count; i++) {
256 struct lov_oinfo *loi = lsme->lsme_oinfo[i];
259 OBD_SLAB_FREE_PTR(lsme->lsme_oinfo[i], lov_oinfo_slab);
261 OBD_FREE_LARGE(lsme, lsme_size);
266 static struct lov_stripe_md *
267 lsm_unpackmd_v1v3(struct lov_obd *lov,
268 struct lov_mds_md *lmm, size_t buf_size,
269 const char *pool_name,
270 struct lov_ost_data_v1 *objects)
272 struct lov_stripe_md *lsm;
273 struct lov_stripe_md_entry *lsme;
279 pattern = le32_to_cpu(lmm->lmm_pattern);
281 lsme = lsme_unpack(lov, lmm, buf_size, pool_name, objects, &maxbytes);
283 RETURN(ERR_CAST(lsme));
285 lsme->lsme_extent.e_start = 0;
286 lsme->lsme_extent.e_end = LUSTRE_EOF;
288 lsm_size = offsetof(typeof(*lsm), lsm_entries[1]);
289 OBD_ALLOC(lsm, lsm_size);
291 GOTO(out_lsme, rc = -ENOMEM);
293 atomic_set(&lsm->lsm_refc, 1);
294 spin_lock_init(&lsm->lsm_lock);
295 lsm->lsm_maxbytes = maxbytes;
296 lmm_oi_le_to_cpu(&lsm->lsm_oi, &lmm->lmm_oi);
297 lsm->lsm_magic = le32_to_cpu(lmm->lmm_magic);
298 lsm->lsm_layout_gen = le16_to_cpu(lmm->lmm_layout_gen);
299 lsm->lsm_entry_count = 1;
300 lsm->lsm_is_released = pattern & LOV_PATTERN_F_RELEASED;
301 lsm->lsm_entries[0] = lsme;
311 static inline struct lov_stripe_md *
312 lsm_unpackmd_v1(struct lov_obd *lov, void *buf, size_t buf_size)
314 struct lov_mds_md_v1 *lmm = buf;
316 return lsm_unpackmd_v1v3(lov, buf, buf_size, NULL, lmm->lmm_objects);
319 const struct lsm_operations lsm_v1_ops = {
320 .lsm_unpackmd = lsm_unpackmd_v1,
323 static inline struct lov_stripe_md *
324 lsm_unpackmd_v3(struct lov_obd *lov, void *buf, size_t buf_size)
326 struct lov_mds_md_v3 *lmm = buf;
328 return lsm_unpackmd_v1v3(lov, buf, buf_size, lmm->lmm_pool_name,
332 const struct lsm_operations lsm_v3_ops = {
333 .lsm_unpackmd = lsm_unpackmd_v3,
336 static int lsm_verify_comp_md_v1(struct lov_comp_md_v1 *lcm,
339 unsigned int entry_count;
343 lcm_size = le32_to_cpu(lcm->lcm_size);
344 if (lcm_buf_size < lcm_size) {
345 CERROR("bad LCM buffer size %zu, expected %zu\n",
346 lcm_buf_size, lcm_size);
350 entry_count = le16_to_cpu(lcm->lcm_entry_count);
351 for (i = 0; i < entry_count; i++) {
352 struct lov_comp_md_entry_v1 *lcme = &lcm->lcm_entries[i];
356 blob_offset = le32_to_cpu(lcme->lcme_offset);
357 blob_size = le32_to_cpu(lcme->lcme_size);
359 if (lcm_size < blob_offset || lcm_size < blob_size ||
360 lcm_size < blob_offset + blob_size) {
361 CERROR("LCM entry %u has invalid blob: "
362 "LCM size = %zu, offset = %zu, size = %zu\n",
363 le32_to_cpu(lcme->lcme_id),
364 lcm_size, blob_offset, blob_size);
372 static struct lov_stripe_md_entry *
373 lsme_unpack_comp(struct lov_obd *lov, struct lov_mds_md *lmm,
374 size_t lmm_buf_size, loff_t *maxbytes)
377 unsigned int stripe_count;
379 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
380 if (stripe_count == 0)
381 RETURN(ERR_PTR(-EINVAL));
383 magic = le32_to_cpu(lmm->lmm_magic);
384 if (magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3)
385 RETURN(ERR_PTR(-EINVAL));
387 if (lmm_buf_size < lov_mds_md_size(stripe_count, magic))
388 RETURN(ERR_PTR(-EINVAL));
390 if (magic == LOV_MAGIC_V1) {
391 return lsme_unpack(lov, lmm, lmm_buf_size, NULL,
392 lmm->lmm_objects, maxbytes);
394 struct lov_mds_md_v3 *lmm3 = (struct lov_mds_md_v3 *)lmm;
396 return lsme_unpack(lov, lmm, lmm_buf_size, lmm3->lmm_pool_name,
397 lmm3->lmm_objects, maxbytes);
401 static struct lov_stripe_md *
402 lsm_unpackmd_comp_md_v1(struct lov_obd *lov, void *buf, size_t buf_size)
404 struct lov_comp_md_v1 *lcm = buf;
405 struct lov_stripe_md *lsm;
407 unsigned int entry_count = 0;
412 rc = lsm_verify_comp_md_v1(buf, buf_size);
416 entry_count = le16_to_cpu(lcm->lcm_entry_count);
418 lsm_size = offsetof(typeof(*lsm), lsm_entries[entry_count]);
419 OBD_ALLOC(lsm, lsm_size);
421 return ERR_PTR(-ENOMEM);
423 atomic_set(&lsm->lsm_refc, 1);
424 spin_lock_init(&lsm->lsm_lock);
425 lsm->lsm_magic = le32_to_cpu(lcm->lcm_magic);
426 lsm->lsm_layout_gen = le32_to_cpu(lcm->lcm_layout_gen);
427 lsm->lsm_entry_count = entry_count;
428 lsm->lsm_is_released = true;
429 lsm->lsm_maxbytes = LLONG_MIN;
431 for (i = 0; i < entry_count; i++) {
432 struct lov_comp_md_entry_v1 *lcme = &lcm->lcm_entries[i];
433 struct lov_stripe_md_entry *lsme;
438 blob_offset = le32_to_cpu(lcme->lcme_offset);
439 blob_size = le32_to_cpu(lcme->lcme_size);
440 blob = (char *)lcm + blob_offset;
442 lsme = lsme_unpack_comp(lov, blob, blob_size,
443 (i == entry_count - 1) ? &maxbytes :
446 GOTO(out_lsm, rc = PTR_ERR(lsme));
448 if (!(lsme->lsme_pattern & LOV_PATTERN_F_RELEASED))
449 lsm->lsm_is_released = false;
451 lsm->lsm_entries[i] = lsme;
452 lsme->lsme_id = le32_to_cpu(lcme->lcme_id);
453 lu_extent_le_to_cpu(&lsme->lsme_extent, &lcme->lcme_extent);
455 if (i == entry_count - 1) {
456 lsm->lsm_maxbytes = (loff_t)lsme->lsme_extent.e_start +
458 /* the last component hasn't been defined, or
459 * lsm_maxbytes overflowed. */
460 if (lsme->lsme_extent.e_end != LUSTRE_EOF ||
462 (loff_t)lsme->lsme_extent.e_start)
463 lsm->lsm_maxbytes = MAX_LFS_FILESIZE;
470 for (i = 0; i < entry_count; i++)
471 if (lsm->lsm_entries[i] != NULL)
472 lsme_free(lsm->lsm_entries[i]);
474 OBD_FREE(lsm, lsm_size);
479 const struct lsm_operations lsm_comp_md_v1_ops = {
480 .lsm_unpackmd = lsm_unpackmd_comp_md_v1,
483 void dump_lsm(unsigned int level, const struct lov_stripe_md *lsm)
487 CDEBUG(level, "lsm %p, objid "DOSTID", maxbytes %#llx, magic 0x%08X, "
488 "refc: %d, entry: %u, layout_gen %u\n",
489 lsm, POSTID(&lsm->lsm_oi), lsm->lsm_maxbytes, lsm->lsm_magic,
490 atomic_read(&lsm->lsm_refc), lsm->lsm_entry_count,
491 lsm->lsm_layout_gen);
493 for (i = 0; i < lsm->lsm_entry_count; i++) {
494 struct lov_stripe_md_entry *lse = lsm->lsm_entries[i];
497 DEXT ": id: %u, magic 0x%08X, stripe count %u, "
498 "size %u, layout_gen %u, pool: ["LOV_POOLNAMEF"]\n",
499 PEXT(&lse->lsme_extent), lse->lsme_id, lse->lsme_magic,
500 lse->lsme_stripe_count, lse->lsme_stripe_size,
501 lse->lsme_layout_gen, lse->lsme_pool_name);
505 int lov_lsm_entry(const struct lov_stripe_md *lsm, __u64 offset)
509 for (i = 0; i < lsm->lsm_entry_count; i++) {
510 struct lov_stripe_md_entry *lse = lsm->lsm_entries[i];
512 if ((offset >= lse->lsme_extent.e_start &&
513 offset < lse->lsme_extent.e_end) ||
514 (offset == OBD_OBJECT_EOF &&
515 lse->lsme_extent.e_end == OBD_OBJECT_EOF))