1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
5 * Author: Andreas Dilger <adilger@clusterfs.com>
7 * This file is part of Lustre, http://www.lustre.org.
9 * Lustre is free software; you can redistribute it and/or
10 * modify it under the terms of version 2 of the GNU General Public
11 * License as published by the Free Software Foundation.
13 * Lustre is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with Lustre; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 * (Un)packing of OST/MDS requests
26 #define DEBUG_SUBSYSTEM S_LLITE
28 #include <liblustre.h>
31 #include <linux/lustre_net.h>
32 #include <linux/obd.h>
33 #include <linux/obd_lov.h>
34 #include <linux/obd_class.h>
35 #include <linux/obd_support.h>
37 #include "lov_internal.h"
39 void lov_dump_lmm(int level, struct lov_mds_md *lmm)
41 struct lov_object_id *loi;
44 CDEBUG(level, "objid "LPX64", magic %#08x, ost_count %u\n",
45 lmm->lmm_object_id, lmm->lmm_magic, lmm->lmm_ost_count);
46 CDEBUG(level,"stripe_size %u, stripe_count %u, stripe_offset %u\n",
47 lmm->lmm_stripe_size, lmm->lmm_stripe_count,
48 lmm->lmm_stripe_offset);
49 for (idx = 0, loi = lmm->lmm_objects; idx < lmm->lmm_ost_count;
51 CDEBUG(level, "ost idx %u subobj "LPX64"\n", idx,
55 #define LMM_ASSERT(test) \
57 if (!(test)) lov_dump_lmm(D_ERROR, lmm); \
58 LASSERT(test); /* so we know what assertion failed */ \
61 /* Pack LOV object metadata for disk storage. It is packed in LE byte
62 * order and is opaque to the networking layer.
64 * XXX In the future, this will be enhanced to get the EA size from the
65 * underlying OSC device(s) to get their EA sizes so we can stack
66 * LOVs properly. For now lov_mds_md_size() just assumes one obd_id
69 int lov_packmd(struct lustre_handle *conn, struct lov_mds_md **lmmp,
70 struct lov_stripe_md *lsm)
72 struct obd_device *obd = class_conn2obd(conn);
73 struct lov_obd *lov = &obd->u.lov;
74 struct lov_oinfo *loi;
75 struct lov_mds_md *lmm;
76 int ost_count = lov->desc.ld_tgt_count;
77 int stripe_count = ost_count;
84 if (lsm->lsm_magic != LOV_MAGIC) {
85 CERROR("bad mem LOV MAGIC: %#010x != %#010x\n",
86 lsm->lsm_magic, LOV_MAGIC);
89 stripe_count = lsm->lsm_stripe_count;
91 for (i = 0,loi = lsm->lsm_oinfo; i < stripe_count; i++,loi++) {
92 if (loi->loi_ost_idx > max)
93 max = loi->loi_ost_idx;
98 /* XXX LOV STACKING call into osc for sizes */
99 lmm_size = lov_mds_md_size(ost_count);
105 ost_count = le32_to_cpu ((*lmmp)->lmm_ost_count);
106 OBD_FREE(*lmmp, lov_mds_md_size(ost_count));
112 OBD_ALLOC(*lmmp, lmm_size);
118 lmm->lmm_magic = cpu_to_le32 (LOV_MAGIC);
119 lmm->lmm_ost_count = cpu_to_le16 (ost_count);
124 lmm->lmm_object_id = cpu_to_le64 (lsm->lsm_object_id);
125 lmm->lmm_stripe_count = cpu_to_le16 (stripe_count);
126 lmm->lmm_stripe_size = cpu_to_le32 (lsm->lsm_stripe_size);
127 lmm->lmm_stripe_offset = cpu_to_le32 (lsm->lsm_stripe_offset);
129 /* Only fill in the object ids which we are actually using.
130 * Assumes lmm_objects is otherwise zero-filled. */
131 for (i = 0, loi = lsm->lsm_oinfo; i < stripe_count; i++, loi++) {
132 /* XXX call down to osc_packmd() to do the packing */
133 LASSERT (loi->loi_id);
134 lmm->lmm_objects[loi->loi_ost_idx].l_object_id =
135 cpu_to_le64 (loi->loi_id);
141 int lov_get_stripecnt(struct lov_obd *lov, int stripe_count)
144 stripe_count = lov->desc.ld_default_stripe_count;
145 if (!stripe_count || stripe_count > lov->desc.ld_active_tgt_count)
146 stripe_count = lov->desc.ld_active_tgt_count;
151 static int lov_verify_lmm(struct lov_mds_md *lmm, int lmm_bytes,
152 int *ost_count, int *stripe_count, int *ost_offset)
154 if (lmm_bytes < sizeof(*lmm)) {
155 CERROR("lov_mds_md too small: %d, need at least %d\n",
156 lmm_bytes, (int)sizeof(*lmm));
160 if (le32_to_cpu(lmm->lmm_magic) != LOV_MAGIC) {
161 CERROR("bad disk LOV MAGIC: %#08x != %#08x\n",
162 le32_to_cpu(lmm->lmm_magic), LOV_MAGIC);
163 lov_dump_lmm(D_WARNING, lmm);
167 *ost_count = le16_to_cpu(lmm->lmm_ost_count);
168 *stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
169 *ost_offset = le32_to_cpu(lmm->lmm_stripe_offset);
171 if (*ost_count == 0 || *stripe_count == 0) {
172 CERROR("zero OST count %d or stripe count %d\n",
173 *ost_count, *stripe_count);
174 lov_dump_lmm(D_WARNING, lmm);
178 if (lmm_bytes < lov_mds_md_size(*ost_count)) {
179 CERROR("lov_mds_md too small: %d, need %d\n",
180 lmm_bytes, lov_mds_md_size(*ost_count));
181 lov_dump_lmm(D_WARNING, lmm);
185 if (*ost_offset > *ost_count) {
186 CERROR("starting OST offset %d > number of OSTs %d\n",
187 *ost_offset, *ost_count);
188 lov_dump_lmm(D_WARNING, lmm);
192 if (*stripe_count > *ost_count) {
193 CERROR("stripe count %d > number of OSTs %d\n",
194 *stripe_count, *ost_count);
195 lov_dump_lmm(D_WARNING, lmm);
199 if (lmm->lmm_object_id == 0) {
200 CERROR("zero object id\n");
201 lov_dump_lmm(D_WARNING, lmm);
208 int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count)
210 int lsm_size = lov_stripe_md_size(stripe_count);
211 struct lov_oinfo *loi;
214 OBD_ALLOC(*lsmp, lsm_size);
218 (*lsmp)->lsm_magic = LOV_MAGIC;
219 (*lsmp)->lsm_stripe_count = stripe_count;
220 (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
222 for (i = 0, loi = (*lsmp)->lsm_oinfo; i < stripe_count; i++, loi++){
223 loi->loi_dirty_ot = &loi->loi_dirty_ot_inline;
224 ot_init(loi->loi_dirty_ot);
229 void lov_free_memmd(struct lov_stripe_md **lsmp)
231 OBD_FREE(*lsmp, lov_stripe_md_size((*lsmp)->lsm_stripe_count));
235 /* Unpack LOV object metadata from disk storage. It is packed in LE byte
236 * order and is opaque to the networking layer.
238 int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
239 struct lov_mds_md *lmm, int lmm_bytes)
241 struct obd_device *obd = class_conn2obd(conn);
242 struct lov_obd *lov = &obd->u.lov;
243 struct lov_stripe_md *lsm;
244 struct lov_oinfo *loi;
252 /* If passed an MDS struct use values from there, otherwise defaults */
254 i = lov_verify_lmm(lmm, lmm_bytes, &ost_count, &stripe_count,
260 stripe_count = lov_get_stripecnt(lov, 0);
264 /* If we aren't passed an lsmp struct, we just want the size */
266 /* XXX LOV STACKING call into osc for sizes */
267 RETURN(lov_stripe_md_size(stripe_count));
269 /* If we are passed an allocated struct but nothing to unpack, free */
271 lov_free_memmd(lsmp);
275 lsm_size = lov_alloc_memmd(lsmp, stripe_count);
279 /* If we are passed a pointer but nothing to unpack, we only alloc */
284 lsm->lsm_object_id = le64_to_cpu(lmm->lmm_object_id);
285 lsm->lsm_stripe_size = le32_to_cpu(lmm->lmm_stripe_size);
286 lsm->lsm_stripe_offset = ost_offset;
288 for (i = 0, loi = lsm->lsm_oinfo; i < ost_count; i++, ost_offset++) {
289 ost_offset %= ost_count;
291 if (!lmm->lmm_objects[ost_offset].l_object_id)
294 /* XXX LOV STACKING call down to osc_unpackmd() */
296 le64_to_cpu(lmm->lmm_objects[ost_offset].l_object_id);
297 loi->loi_ost_idx = ost_offset;
301 if (loi - lsm->lsm_oinfo != stripe_count) {
302 CERROR("missing objects in lmm struct\n");
303 lov_dump_lmm(D_WARNING, lmm);
304 lov_free_memmd(lsmp);
312 /* Configure object striping information on a new file.
314 * @lmmu is a pointer to a user struct with one or more of the fields set to
315 * indicate the application preference: lmm_stripe_count, lmm_stripe_size,
316 * lmm_stripe_offset, and lmm_stripe_pattern. lmm_magic must be LOV_MAGIC.
317 * @lsmp is a pointer to an in-core stripe MD that needs to be filled in.
319 int lov_setstripe(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
320 struct lov_mds_md *lmmu)
322 struct obd_device *obd = class_conn2obd(conn);
323 struct lov_obd *lov = &obd->u.lov;
324 struct lov_mds_md lmm;
329 rc = copy_from_user(&lmm, lmmu, sizeof(lmm));
333 /* Bug 1185 FIXME: struct lov_mds_md is little-endian everywhere else */
335 if (lmm.lmm_magic != LOV_MAGIC) {
336 CDEBUG(D_IOCTL, "bad userland LOV MAGIC: %#08x != %#08x\n",
337 lmm.lmm_magic, LOV_MAGIC);
340 #if 0 /* the stripe_count/offset is "advisory", and it gets fixed later */
341 if (lmm.lmm_stripe_count > lov->desc.ld_tgt_count &&
342 lmm.lmm_stripe_count != 0xffffffff) {
343 CERROR("stripe count %u more than OST count %d\n",
344 lmm.lmm_stripe_count, lov->desc.ld_tgt_count);
347 if (lmm.lmm_stripe_offset >= lov->desc.ld_tgt_count &&
348 lmm.lmm_stripe_offset != 0xffffffff) {
349 CERROR("stripe offset %u more than max OST index %d\n",
350 lmm.lmm_stripe_offset, lov->desc.ld_tgt_count);
354 if (lmm.lmm_stripe_size & (PAGE_SIZE - 1)) {
355 CDEBUG(D_IOCTL, "stripe size %u not multiple of %lu\n",
356 lmm.lmm_stripe_size, PAGE_SIZE);
359 stripe_count = lov_get_stripecnt(lov, lmm.lmm_stripe_count);
361 if ((__u64)lmm.lmm_stripe_size * stripe_count > ~0UL) {
362 CDEBUG(D_IOCTL, "stripe width %ux%u > %lu on 32-bit system\n",
363 lmm.lmm_stripe_size, (int)lmm.lmm_stripe_count, ~0UL);
367 rc = lov_alloc_memmd(lsmp, stripe_count);
372 (*lsmp)->lsm_stripe_offset = lmm.lmm_stripe_offset;
373 (*lsmp)->lsm_stripe_size = lmm.lmm_stripe_size;
378 /* Retrieve object striping information.
380 * @lmmu is a pointer to an in-core struct with lmm_ost_count indicating
381 * the maximum number of OST indices which will fit in the user buffer.
382 * lmm_magic must be LOV_MAGIC.
384 int lov_getstripe(struct lustre_handle *conn, struct lov_stripe_md *lsm,
385 struct lov_mds_md *lmmu)
387 struct lov_mds_md lmm, *lmmk = NULL;
394 rc = copy_from_user(&lmm, lmmu, sizeof(lmm));
398 if (lmm.lmm_magic != LOV_MAGIC)
401 rc = lov_packmd(conn, &lmmk, lsm);
404 /* Bug 1185 FIXME: convert lmmk to big-endian before copy to userspace */
408 /* User wasn't expecting this many OST entries */
409 if (lmm.lmm_ost_count < lmmk->lmm_ost_count)
411 else if (copy_to_user(lmmu, lmmk, lmm_size))
414 obd_free_diskmd (conn, &lmmk);