1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Wang Di <wangdi@clusterfs.com>
42 # define EXPORT_SYMTAB
44 #define DEBUG_SUBSYSTEM S_LOV
47 #include <asm/div64.h>
48 #include <libcfs/libcfs.h>
50 #include <liblustre.h>
53 #include <obd_class.h>
55 #include <lustre/lustre_idl.h>
56 #include <lustre_log.h>
58 #include "lov_internal.h"
60 struct lovea_unpack_args {
61 struct lov_stripe_md *lsm;
65 static int lsm_lmm_verify_common(struct lov_mds_md *lmm, int lmm_bytes,
69 if (stripe_count == 0 || stripe_count > LOV_V1_INSANE_STRIPE_COUNT) {
70 CERROR("bad stripe count %d\n", stripe_count);
71 lov_dump_lmm(D_WARNING, lmm);
75 if (lmm->lmm_object_id == 0) {
76 CERROR("zero object id\n");
77 lov_dump_lmm(D_WARNING, lmm);
81 if (lmm->lmm_pattern != cpu_to_le32(LOV_PATTERN_RAID0)) {
82 CERROR("bad striping pattern\n");
83 lov_dump_lmm(D_WARNING, lmm);
87 if (lmm->lmm_stripe_size == 0 ||
88 (le32_to_cpu(lmm->lmm_stripe_size)&(LOV_MIN_STRIPE_SIZE-1)) != 0) {
89 CERROR("bad stripe size %u\n",
90 le32_to_cpu(lmm->lmm_stripe_size));
91 if (stripe_count != -1)
92 lov_dump_lmm(D_WARNING, lmm);
98 struct lov_stripe_md *lsm_alloc_plain(int stripe_count, int *size)
100 struct lov_stripe_md *lsm;
101 int i, oinfo_ptrs_size;
102 struct lov_oinfo *loi;
104 LASSERT(stripe_count > 0);
106 oinfo_ptrs_size = sizeof(struct lov_oinfo *) * stripe_count;
107 *size = sizeof(struct lov_stripe_md) + oinfo_ptrs_size;
109 OBD_ALLOC(lsm, *size);
113 for (i = 0; i < stripe_count; i++) {
114 OBD_SLAB_ALLOC(loi, lov_oinfo_slab, CFS_ALLOC_IO, sizeof(*loi));
117 lsm->lsm_oinfo[i] = loi;
119 lsm->lsm_stripe_count = stripe_count;
120 lsm->lsm_pool_name[0] = '\0';
125 OBD_SLAB_FREE(lsm->lsm_oinfo[i], lov_oinfo_slab, sizeof(*loi));
126 OBD_FREE(lsm, *size);
130 void lsm_free_plain(struct lov_stripe_md *lsm)
132 int stripe_count = lsm->lsm_stripe_count;
135 for (i = 0; i < stripe_count; i++)
136 OBD_SLAB_FREE(lsm->lsm_oinfo[i], lov_oinfo_slab,
137 sizeof(struct lov_oinfo));
138 OBD_FREE(lsm, sizeof(struct lov_stripe_md) +
139 stripe_count * sizeof(struct lov_oinfo *));
142 static void lsm_unpackmd_common(struct lov_stripe_md *lsm,
143 struct lov_mds_md *lmm)
146 * This supposes lov_mds_md_v1/v3 first fields are
149 lsm->lsm_object_id = le64_to_cpu(lmm->lmm_object_id);
150 lsm->lsm_object_gr = le64_to_cpu(lmm->lmm_object_gr);
151 lsm->lsm_stripe_size = le32_to_cpu(lmm->lmm_stripe_size);
152 lsm->lsm_pattern = le32_to_cpu(lmm->lmm_pattern);
153 lsm->lsm_pool_name[0] = '\0';
157 lsm_stripe_by_index_plain(struct lov_stripe_md *lsm, int *stripeno,
158 obd_off *lov_off, obd_off *swidth)
161 *swidth = (obd_off)lsm->lsm_stripe_size * lsm->lsm_stripe_count;
165 lsm_stripe_by_offset_plain(struct lov_stripe_md *lsm, int *stripeno,
166 obd_off *lov_off, obd_off *swidth)
169 *swidth = (obd_off)lsm->lsm_stripe_size * lsm->lsm_stripe_count;
173 lsm_stripe_offset_by_index_plain(struct lov_stripe_md *lsm,
180 lsm_stripe_offset_by_offset_plain(struct lov_stripe_md *lsm,
187 lsm_stripe_index_by_offset_plain(struct lov_stripe_md *lsm,
193 static int lsm_revalidate_plain(struct lov_stripe_md *lsm,
194 struct obd_device *obd)
199 static int lsm_destroy_plain(struct lov_stripe_md *lsm, struct obdo *oa,
200 struct obd_export *md_exp)
205 static int lsm_lmm_verify_v1(struct lov_mds_md_v1 *lmm, int lmm_bytes,
208 if (lmm_bytes < sizeof(*lmm)) {
209 CERROR("lov_mds_md_v1 too small: %d, need at least %d\n",
210 lmm_bytes, (int)sizeof(*lmm));
214 *stripe_count = le32_to_cpu(lmm->lmm_stripe_count);
216 if (lmm_bytes < lov_mds_md_size(*stripe_count, LOV_MAGIC_V1)) {
217 CERROR("LOV EA V1 too small: %d, need %d\n",
218 lmm_bytes, lov_mds_md_size(*stripe_count, LOV_MAGIC_V1));
219 lov_dump_lmm_v1(D_WARNING, lmm);
223 return lsm_lmm_verify_common(lmm, lmm_bytes, *stripe_count);
226 int lsm_unpackmd_v1(struct lov_obd *lov, struct lov_stripe_md *lsm,
227 struct lov_mds_md_v1 *lmm)
229 struct lov_oinfo *loi;
232 lsm_unpackmd_common(lsm, lmm);
234 for (i = 0; i < lsm->lsm_stripe_count; i++) {
235 /* XXX LOV STACKING call down to osc_unpackmd() */
236 loi = lsm->lsm_oinfo[i];
237 loi->loi_id = le64_to_cpu(lmm->lmm_objects[i].l_object_id);
238 loi->loi_gr = le64_to_cpu(lmm->lmm_objects[i].l_object_gr);
239 loi->loi_ost_idx = le32_to_cpu(lmm->lmm_objects[i].l_ost_idx);
240 loi->loi_ost_gen = le32_to_cpu(lmm->lmm_objects[i].l_ost_gen);
241 if (loi->loi_ost_idx >= lov->desc.ld_tgt_count) {
242 CERROR("OST index %d more than OST count %d\n",
243 loi->loi_ost_idx, lov->desc.ld_tgt_count);
244 lov_dump_lmm_v1(D_WARNING, lmm);
247 if (!lov->lov_tgts[loi->loi_ost_idx]) {
248 CERROR("OST index %d missing\n", loi->loi_ost_idx);
249 lov_dump_lmm_v1(D_WARNING, lmm);
257 struct lsm_operations lsm_v1_ops = {
258 .lsm_free = lsm_free_plain,
259 .lsm_destroy = lsm_destroy_plain,
260 .lsm_stripe_by_index = lsm_stripe_by_index_plain,
261 .lsm_stripe_by_offset = lsm_stripe_by_offset_plain,
262 .lsm_revalidate = lsm_revalidate_plain,
263 .lsm_stripe_offset_by_index = lsm_stripe_offset_by_index_plain,
264 .lsm_stripe_offset_by_offset = lsm_stripe_offset_by_offset_plain,
265 .lsm_stripe_index_by_offset = lsm_stripe_index_by_offset_plain,
266 .lsm_lmm_verify = lsm_lmm_verify_v1,
267 .lsm_unpackmd = lsm_unpackmd_v1,
270 struct lov_extent *lovea_off2le(struct lov_stripe_md *lsm, obd_off lov_off)
272 struct lov_array_info *lai;
273 struct lov_extent *le;
276 LASSERT(lsm->lsm_array != NULL);
277 lai = lsm->lsm_array;
278 LASSERT(lai->lai_ext_count > 1);
280 for (le = lai->lai_ext_array, i = 0;
281 i < lai->lai_ext_count && le->le_start + le->le_len <= lov_off
287 CDEBUG(D_INFO, "off "LPU64" idx %d, ext "LPU64":"LPU64" idx %d sc %d\n",
288 lov_off, i, le->le_start, le->le_len, le->le_loi_idx,
289 le->le_stripe_count);
294 struct lov_extent *lovea_idx2le(struct lov_stripe_md *lsm, int stripe_no)
296 struct lov_extent *le;
297 struct lov_array_info *lai;
300 LASSERT(lsm->lsm_array != NULL);
301 LASSERT(stripe_no >= 0 && stripe_no <= lsm->lsm_stripe_count);
302 lai = lsm->lsm_array;
303 LASSERT(lai->lai_ext_count > 1);
305 for (le = lai->lai_ext_array, i = 0, stripe_index = le->le_stripe_count;
306 i < lai->lai_ext_count && stripe_index <= stripe_no &&
307 le->le_len != -1; i ++, le ++,
308 stripe_index += le->le_stripe_count) {
312 CDEBUG(D_INFO, "stripe %d idx %d, ext "LPU64":"LPU64" idx %d sc %d\n",
313 stripe_no, i, le->le_start, le->le_len, le->le_loi_idx,
314 le->le_stripe_count);
318 static void lovea_free_array_info(struct lov_stripe_md *lsm)
320 if (!lsm || !lsm->lsm_array)
323 if (lsm->lsm_array->lai_ext_array)
324 OBD_FREE(lsm->lsm_array->lai_ext_array,
325 lsm->lsm_array->lai_ext_count *
326 sizeof(struct lov_extent));
328 OBD_FREE_PTR(lsm->lsm_array);
331 static void lsm_free_join(struct lov_stripe_md *lsm)
333 lovea_free_array_info(lsm);
338 lsm_stripe_by_index_join(struct lov_stripe_md *lsm, int *stripeno,
339 obd_off *lov_off, obd_off *swidth)
341 struct lov_extent *le;
343 LASSERT(stripeno != NULL);
345 le = lovea_idx2le(lsm, *stripeno);
347 LASSERT(le != NULL && le->le_stripe_count != 0);
349 *stripeno -= le->le_loi_idx;
352 *swidth = (obd_off)lsm->lsm_stripe_size * le->le_stripe_count;
355 struct lov_extent *lov_le = lovea_off2le(lsm, *lov_off);
357 *lov_off = (*lov_off > le->le_start) ?
358 (*lov_off - le->le_start) : 0;
360 *lov_off = (*lov_off > le->le_start) ?
362 LASSERT(*lov_off != -1);
368 lsm_stripe_by_offset_join(struct lov_stripe_md *lsm, int *stripeno,
369 obd_off *lov_off, obd_off *swidth)
371 struct lov_extent *le;
373 LASSERT(lov_off != NULL);
375 le = lovea_off2le(lsm, *lov_off);
377 LASSERT(le != NULL && le->le_stripe_count != 0);
379 *lov_off = (*lov_off > le->le_start) ? (*lov_off - le->le_start) : 0;
382 *stripeno -= le->le_loi_idx;
385 *swidth = (obd_off)lsm->lsm_stripe_size * le->le_stripe_count;
389 lsm_stripe_offset_by_index_join(struct lov_stripe_md *lsm,
392 struct lov_extent *le;
394 le = lovea_idx2le(lsm, stripe_index);
396 return le ? le->le_start : 0;
400 lsm_stripe_offset_by_offset_join(struct lov_stripe_md *lsm,
403 struct lov_extent *le;
405 le = lovea_off2le(lsm, lov_off);
407 return le ? le->le_start : 0;
411 lsm_stripe_index_by_offset_join(struct lov_stripe_md *lsm,
414 struct lov_extent *le = NULL;
416 le = lovea_off2le(lsm, lov_off);
418 return le ? le->le_loi_idx : 0;
421 static int lovea_unpack_array(struct llog_handle *handle,
422 struct llog_rec_hdr *rec, void *data)
424 struct lovea_unpack_args *args = (struct lovea_unpack_args *)data;
425 struct llog_array_rec *la_rec = (struct llog_array_rec*)rec;
426 struct mds_extent_desc *med = &la_rec->lmr_med;
427 struct lov_stripe_md *lsm = args->lsm;
428 int cursor = args->cursor++;
429 struct lov_mds_md *lmm;
430 struct lov_array_info *lai;
431 struct lov_oinfo * loi;
436 LASSERT(lsm->lsm_stripe_count != 0);
438 LASSERT(lsm->lsm_array != NULL);
440 lai = lsm->lsm_array;
443 lai->lai_ext_array[cursor].le_loi_idx = 0;
445 int next_loi_index = lai->lai_ext_array[cursor - 1].le_loi_idx +
446 lai->lai_ext_array[cursor - 1].le_stripe_count;
447 lai->lai_ext_array[cursor].le_loi_idx = next_loi_index;
449 /* insert extent desc into lsm extent array */
450 lai->lai_ext_array[cursor].le_start = le64_to_cpu(med->med_start);
451 lai->lai_ext_array[cursor].le_len = le64_to_cpu(med->med_len);
452 lai->lai_ext_array[cursor].le_stripe_count =
453 le32_to_cpu(lmm->lmm_stripe_count);
455 /* unpack extent's lmm to lov_oinfo array */
456 loi_index = lai->lai_ext_array[cursor].le_loi_idx;
457 CDEBUG(D_INFO, "lovea upackmd cursor %d, loi_index %d extent "
458 LPU64":"LPU64"\n", cursor, loi_index, med->med_start,
461 for (i = 0; i < le32_to_cpu(lmm->lmm_stripe_count); i ++, loi_index++) {
462 /* XXX LOV STACKING call down to osc_unpackmd() */
463 loi = lsm->lsm_oinfo[loi_index];
464 loi->loi_id = le64_to_cpu(lmm->lmm_objects[i].l_object_id);
465 loi->loi_gr = le64_to_cpu(lmm->lmm_objects[i].l_object_gr);
466 loi->loi_ost_idx = le32_to_cpu(lmm->lmm_objects[i].l_ost_idx);
467 loi->loi_ost_gen = le32_to_cpu(lmm->lmm_objects[i].l_ost_gen);
473 static int lsm_revalidate_join(struct lov_stripe_md *lsm,
474 struct obd_device *obd)
476 struct llog_handle *llh;
477 struct llog_ctxt *ctxt;
478 struct lovea_unpack_args args;
482 LASSERT(lsm->lsm_array != NULL);
484 /*Revalidate lsm might be called from client or MDS server.
485 *So the ctxt might be in different position
487 ctxt = llog_get_context(obd, LLOG_LOVEA_REPL_CTXT);
489 ctxt = llog_get_context(obd, LLOG_LOVEA_ORIG_CTXT);
493 if (lsm->lsm_array && lsm->lsm_array->lai_ext_array)
494 GOTO(release_ctxt, rc = 0);
496 CDEBUG(D_INFO, "get lsm logid: "LPU64":"LPU64"\n",
497 lsm->lsm_array->lai_array_id.lgl_oid,
498 lsm->lsm_array->lai_array_id.lgl_ogr);
499 OBD_ALLOC(lsm->lsm_array->lai_ext_array,lsm->lsm_array->lai_ext_count *
500 sizeof (struct lov_extent));
501 if (!lsm->lsm_array->lai_ext_array)
502 GOTO(release_ctxt, rc = -ENOMEM);
504 CDEBUG(D_INFO, "get lsm logid: "LPU64":"LPU64"\n",
505 lsm->lsm_array->lai_array_id.lgl_oid,
506 lsm->lsm_array->lai_array_id.lgl_ogr);
508 rc = llog_create(ctxt, &llh, &lsm->lsm_array->lai_array_id, NULL);
514 rc = llog_init_handle(llh, LLOG_F_IS_PLAIN, NULL);
516 rc = llog_process(llh, lovea_unpack_array, &args, NULL);
517 rc2 = llog_close(llh);
522 lovea_free_array_info(lsm);
528 int lsm_destroy_join(struct lov_stripe_md *lsm, struct obdo *oa,
529 struct obd_export *md_exp)
531 struct llog_ctxt *ctxt;
532 struct llog_handle *llh;
536 LASSERT(md_exp != NULL);
537 /*for those orphan inode, we should keep array id*/
538 if (!(oa->o_valid & OBD_MD_FLCOOKIE))
541 ctxt = llog_get_context(md_exp->exp_obd, LLOG_LOVEA_REPL_CTXT);
545 LASSERT(lsm->lsm_array != NULL);
546 rc = llog_create(ctxt, &llh, &lsm->lsm_array->lai_array_id,
551 rc = llog_init_handle(llh, LLOG_F_IS_PLAIN, NULL);
553 rc = llog_destroy(llh);
555 llog_free_handle(llh);
561 static int lsm_lmm_verify_join(struct lov_mds_md *lmm, int lmm_bytes,
564 struct lov_mds_md_join *lmmj = (struct lov_mds_md_join *)lmm;
566 if (lmm_bytes < sizeof(*lmmj)) {
567 CERROR("lov_mds_md too small: %d, need at least %d\n",
568 lmm_bytes, (int)sizeof(*lmmj));
572 if (lmmj->lmmj_array_id.lgl_oid == 0) {
573 CERROR("zero array object id\n");
577 *stripe_count = le32_to_cpu(lmmj->lmmj_md.lmm_stripe_count);
579 return lsm_lmm_verify_common(&lmmj->lmmj_md, lmm_bytes, *stripe_count);
582 static int lovea_init_array_info(struct lov_stripe_md *lsm,
583 struct llog_logid *logid,
586 struct lov_array_info *lai;
593 lai->lai_array_id.lgl_oid = le64_to_cpu(logid->lgl_oid);
594 lai->lai_array_id.lgl_ogr = le64_to_cpu(logid->lgl_ogr);
595 lai->lai_array_id.lgl_ogen = le32_to_cpu(logid->lgl_ogen);
596 lai->lai_ext_count = le32_to_cpu(extent_count);
597 lsm->lsm_array = lai;
601 static int lsm_unpackmd_join(struct lov_obd *lov, struct lov_stripe_md *lsm,
602 struct lov_mds_md *lmm)
604 struct lov_mds_md_join *lmmj = (struct lov_mds_md_join*)lmm;
608 lsm_unpackmd_common(lsm, &lmmj->lmmj_md);
610 rc = lovea_init_array_info(lsm, &lmmj->lmmj_array_id,
611 lmmj->lmmj_extent_count);
613 CERROR("Init joined lsm id"LPU64" arrary error %d",
614 lsm->lsm_object_id, rc);
621 struct lsm_operations lsm_join_ops = {
622 .lsm_free = lsm_free_join,
623 .lsm_destroy = lsm_destroy_join,
624 .lsm_stripe_by_index = lsm_stripe_by_index_join,
625 .lsm_stripe_by_offset = lsm_stripe_by_offset_join,
626 .lsm_revalidate = lsm_revalidate_join,
627 .lsm_stripe_offset_by_index = lsm_stripe_offset_by_index_join,
628 .lsm_stripe_offset_by_offset = lsm_stripe_offset_by_offset_join,
629 .lsm_stripe_index_by_offset = lsm_stripe_index_by_offset_join,
630 .lsm_lmm_verify = lsm_lmm_verify_join,
631 .lsm_unpackmd = lsm_unpackmd_join,
635 static int lsm_lmm_verify_v3(struct lov_mds_md *lmmv1, int lmm_bytes,
638 struct lov_mds_md_v3 *lmm;
640 lmm = (struct lov_mds_md_v3 *)lmmv1;
642 if (lmm_bytes < sizeof(*lmm)) {
643 CERROR("lov_mds_md_v3 too small: %d, need at least %d\n",
644 lmm_bytes, (int)sizeof(*lmm));
648 *stripe_count = le32_to_cpu(lmm->lmm_stripe_count);
650 if (lmm_bytes < lov_mds_md_size(*stripe_count, LOV_MAGIC_V3)) {
651 CERROR("LOV EA V3 too small: %d, need %d\n",
652 lmm_bytes, lov_mds_md_size(*stripe_count, LOV_MAGIC_V3));
653 lov_dump_lmm_v3(D_WARNING, lmm);
657 return lsm_lmm_verify_common((struct lov_mds_md_v1 *)lmm, lmm_bytes,
661 int lsm_unpackmd_v3(struct lov_obd *lov, struct lov_stripe_md *lsm,
662 struct lov_mds_md *lmmv1)
664 struct lov_mds_md_v3 *lmm;
665 struct lov_oinfo *loi;
668 lmm = (struct lov_mds_md_v3 *)lmmv1;
670 lsm_unpackmd_common(lsm, (struct lov_mds_md_v1 *)lmm);
671 strncpy(lsm->lsm_pool_name, lmm->lmm_pool_name, LOV_MAXPOOLNAME);
673 for (i = 0; i < lsm->lsm_stripe_count; i++) {
674 /* XXX LOV STACKING call down to osc_unpackmd() */
675 loi = lsm->lsm_oinfo[i];
676 loi->loi_id = le64_to_cpu(lmm->lmm_objects[i].l_object_id);
677 loi->loi_gr = le64_to_cpu(lmm->lmm_objects[i].l_object_gr);
678 loi->loi_ost_idx = le32_to_cpu(lmm->lmm_objects[i].l_ost_idx);
679 loi->loi_ost_gen = le32_to_cpu(lmm->lmm_objects[i].l_ost_gen);
680 if (loi->loi_ost_idx >= lov->desc.ld_tgt_count) {
681 CERROR("OST index %d more than OST count %d\n",
682 loi->loi_ost_idx, lov->desc.ld_tgt_count);
683 lov_dump_lmm_v3(D_WARNING, lmm);
686 if (!lov->lov_tgts[loi->loi_ost_idx]) {
687 CERROR("OST index %d missing\n", loi->loi_ost_idx);
688 lov_dump_lmm_v3(D_WARNING, lmm);
696 struct lsm_operations lsm_v3_ops = {
697 .lsm_free = lsm_free_plain,
698 .lsm_destroy = lsm_destroy_plain,
699 .lsm_stripe_by_index = lsm_stripe_by_index_plain,
700 .lsm_stripe_by_offset = lsm_stripe_by_offset_plain,
701 .lsm_revalidate = lsm_revalidate_plain,
702 .lsm_stripe_offset_by_index = lsm_stripe_offset_by_index_plain,
703 .lsm_stripe_offset_by_offset = lsm_stripe_offset_by_offset_plain,
704 .lsm_stripe_index_by_offset = lsm_stripe_index_by_offset_plain,
705 .lsm_lmm_verify = lsm_lmm_verify_v3,
706 .lsm_unpackmd = lsm_unpackmd_v3,