Whamcloud - gitweb
merge b_devel into HEAD, which will become 0.7.3
[fs/lustre-release.git] / lustre / lov / lov_pack.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (C) 2002, 2003 Cluster File Systems, Inc.
5  *   Author: Andreas Dilger <adilger@clusterfs.com>
6  *
7  *   This file is part of Lustre, http://www.lustre.org.
8  *
9  *   Lustre is free software; you can redistribute it and/or
10  *   modify it under the terms of version 2 of the GNU General Public
11  *   License as published by the Free Software Foundation.
12  *
13  *   Lustre is distributed in the hope that it will be useful,
14  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *   GNU General Public License for more details.
17  *
18  *   You should have received a copy of the GNU General Public License
19  *   along with Lustre; if not, write to the Free Software
20  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21  *
22  * (Un)packing of OST/MDS requests
23  *
24  */
25
26 #define DEBUG_SUBSYSTEM S_LLITE
27 #ifndef __KERNEL__
28 #include <liblustre.h>
29 #endif
30
31 #include <linux/lustre_net.h>
32 #include <linux/obd.h>
33 #include <linux/obd_lov.h>
34 #include <linux/obd_class.h>
35 #include <linux/obd_support.h>
36
37 #include "lov_internal.h"
38
39 void lov_dump_lmm(int level, struct lov_mds_md *lmm)
40 {
41         struct lov_object_id *loi;
42         int idx;
43
44         CDEBUG(level, "objid "LPX64", magic %#08x, ost_count %u\n",
45                lmm->lmm_object_id, lmm->lmm_magic, lmm->lmm_ost_count);
46         CDEBUG(level,"stripe_size %u, stripe_count %u, stripe_offset %u\n",
47                lmm->lmm_stripe_size, lmm->lmm_stripe_count,
48                lmm->lmm_stripe_offset);
49         for (idx = 0, loi = lmm->lmm_objects; idx < lmm->lmm_ost_count;
50              idx++, loi++)
51                 CDEBUG(level, "ost idx %u subobj "LPX64"\n", idx,
52                        loi->l_object_id);
53 }
54
55 #define LMM_ASSERT(test)                                                \
56 do {                                                                    \
57         if (!(test)) lov_dump_lmm(D_ERROR, lmm);                        \
58         LASSERT(test); /* so we know what assertion failed */           \
59 } while(0)
60
61 /* Pack LOV object metadata for disk storage.  It is packed in LE byte
62  * order and is opaque to the networking layer.
63  *
64  * XXX In the future, this will be enhanced to get the EA size from the
65  *     underlying OSC device(s) to get their EA sizes so we can stack
66  *     LOVs properly.  For now lov_mds_md_size() just assumes one obd_id
67  *     per stripe.
68  */
69 int lov_packmd(struct lustre_handle *conn, struct lov_mds_md **lmmp,
70                struct lov_stripe_md *lsm)
71 {
72         struct obd_device *obd = class_conn2obd(conn);
73         struct lov_obd *lov = &obd->u.lov;
74         struct lov_oinfo *loi;
75         struct lov_mds_md *lmm;
76         int ost_count = lov->desc.ld_tgt_count;
77         int stripe_count = ost_count;
78         int lmm_size;
79         int i;
80         ENTRY;
81
82         if (lsm) {
83                 int i, max = 0;
84                 if (lsm->lsm_magic != LOV_MAGIC) {
85                         CERROR("bad mem LOV MAGIC: %#010x != %#010x\n",
86                                lsm->lsm_magic, LOV_MAGIC);
87                         RETURN(-EINVAL);
88                 }
89                 stripe_count = lsm->lsm_stripe_count;
90
91                 for (i = 0,loi = lsm->lsm_oinfo; i < stripe_count; i++,loi++) {
92                         if (loi->loi_ost_idx > max)
93                                 max = loi->loi_ost_idx;
94                 }
95                 ost_count = max + 1;
96         }
97
98         /* XXX LOV STACKING call into osc for sizes */
99         lmm_size = lov_mds_md_size(ost_count);
100
101         if (!lmmp)
102                 RETURN(lmm_size);
103
104         if (*lmmp && !lsm) {
105                 ost_count = le32_to_cpu ((*lmmp)->lmm_ost_count);
106                 OBD_FREE(*lmmp, lov_mds_md_size(ost_count));
107                 *lmmp = NULL;
108                 RETURN(0);
109         }
110
111         if (!*lmmp) {
112                 OBD_ALLOC(*lmmp, lmm_size);
113                 if (!*lmmp)
114                         RETURN(-ENOMEM);
115         }
116
117         lmm = *lmmp;
118         lmm->lmm_magic = cpu_to_le32 (LOV_MAGIC);
119         lmm->lmm_ost_count = cpu_to_le16 (ost_count);
120
121         if (!lsm)
122                 RETURN(lmm_size);
123
124         lmm->lmm_object_id = cpu_to_le64 (lsm->lsm_object_id);
125         lmm->lmm_stripe_count = cpu_to_le16 (stripe_count);
126         lmm->lmm_stripe_size = cpu_to_le32 (lsm->lsm_stripe_size);
127         lmm->lmm_stripe_offset = cpu_to_le32 (lsm->lsm_stripe_offset);
128
129         /* Only fill in the object ids which we are actually using.
130          * Assumes lmm_objects is otherwise zero-filled. */
131         for (i = 0, loi = lsm->lsm_oinfo; i < stripe_count; i++, loi++) {
132                 /* XXX call down to osc_packmd() to do the packing */
133                 LASSERT (loi->loi_id);
134                 lmm->lmm_objects[loi->loi_ost_idx].l_object_id =
135                         cpu_to_le64 (loi->loi_id);
136         }
137
138         RETURN(lmm_size);
139 }
140
141 int lov_get_stripecnt(struct lov_obd *lov, int stripe_count)
142 {
143         if (!stripe_count)
144                 stripe_count = lov->desc.ld_default_stripe_count;
145         if (!stripe_count || stripe_count > lov->desc.ld_active_tgt_count)
146                 stripe_count = lov->desc.ld_active_tgt_count;
147
148         return stripe_count;
149 }
150
151 static int lov_verify_lmm(struct lov_mds_md *lmm, int lmm_bytes,
152                           int *ost_count, int *stripe_count, int *ost_offset)
153 {
154         if (lmm_bytes < sizeof(*lmm)) {
155                 CERROR("lov_mds_md too small: %d, need at least %d\n",
156                        lmm_bytes, (int)sizeof(*lmm));
157                 return -EINVAL;
158         }
159
160         if (le32_to_cpu(lmm->lmm_magic) != LOV_MAGIC) {
161                 CERROR("bad disk LOV MAGIC: %#08x != %#08x\n",
162                        le32_to_cpu(lmm->lmm_magic), LOV_MAGIC);
163                 lov_dump_lmm(D_WARNING, lmm);
164                 return -EINVAL;
165         }
166
167         *ost_count = le16_to_cpu(lmm->lmm_ost_count);
168         *stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
169         *ost_offset = le32_to_cpu(lmm->lmm_stripe_offset);
170
171         if (*ost_count == 0 || *stripe_count == 0) {
172                 CERROR("zero OST count %d or stripe count %d\n",
173                        *ost_count, *stripe_count);
174                 lov_dump_lmm(D_WARNING, lmm);
175                 return -EINVAL;
176         }
177
178         if (lmm_bytes < lov_mds_md_size(*ost_count)) {
179                 CERROR("lov_mds_md too small: %d, need %d\n",
180                        lmm_bytes, lov_mds_md_size(*ost_count));
181                 lov_dump_lmm(D_WARNING, lmm);
182                 return -EINVAL;
183         }
184
185         if (*ost_offset > *ost_count) {
186                 CERROR("starting OST offset %d > number of OSTs %d\n",
187                        *ost_offset, *ost_count);
188                 lov_dump_lmm(D_WARNING, lmm);
189                 return -EINVAL;
190         }
191
192         if (*stripe_count > *ost_count) {
193                 CERROR("stripe count %d > number of OSTs %d\n",
194                        *stripe_count, *ost_count);
195                 lov_dump_lmm(D_WARNING, lmm);
196                 return -EINVAL;
197         }
198
199         if (lmm->lmm_object_id == 0) {
200                 CERROR("zero object id\n");
201                 lov_dump_lmm(D_WARNING, lmm);
202                 return -EINVAL;
203         }
204
205         return 0;
206 }
207
208 int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count)
209 {
210         int lsm_size = lov_stripe_md_size(stripe_count);
211         struct lov_oinfo *loi;
212         int i;
213
214         OBD_ALLOC(*lsmp, lsm_size);
215         if (!*lsmp)
216                 return -ENOMEM;
217
218         (*lsmp)->lsm_magic = LOV_MAGIC;
219         (*lsmp)->lsm_stripe_count = stripe_count;
220         (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
221
222         for (i = 0, loi = (*lsmp)->lsm_oinfo; i < stripe_count; i++, loi++){
223                 loi->loi_dirty_ot = &loi->loi_dirty_ot_inline;
224                 ot_init(loi->loi_dirty_ot);
225         }
226         return lsm_size;
227 }
228
229 void lov_free_memmd(struct lov_stripe_md **lsmp)
230 {
231         OBD_FREE(*lsmp, lov_stripe_md_size((*lsmp)->lsm_stripe_count));
232         *lsmp = NULL;
233 }
234
235 /* Unpack LOV object metadata from disk storage.  It is packed in LE byte
236  * order and is opaque to the networking layer.
237  */
238 int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
239                  struct lov_mds_md *lmm, int lmm_bytes)
240 {
241         struct obd_device *obd = class_conn2obd(conn);
242         struct lov_obd *lov = &obd->u.lov;
243         struct lov_stripe_md *lsm;
244         struct lov_oinfo *loi;
245         int ost_count;
246         int ost_offset;
247         int stripe_count;
248         int lsm_size;
249         int i;
250         ENTRY;
251
252         /* If passed an MDS struct use values from there, otherwise defaults */
253         if (lmm) {
254                 i = lov_verify_lmm(lmm, lmm_bytes, &ost_count, &stripe_count,
255                                    &ost_offset);
256                 if (i)
257                         RETURN(i);
258         } else {
259                 ost_count = 0;
260                 stripe_count = lov_get_stripecnt(lov, 0);
261                 ost_offset = 0;
262         }
263
264         /* If we aren't passed an lsmp struct, we just want the size */
265         if (!lsmp)
266                 /* XXX LOV STACKING call into osc for sizes */
267                 RETURN(lov_stripe_md_size(stripe_count));
268
269         /* If we are passed an allocated struct but nothing to unpack, free */
270         if (*lsmp && !lmm) {
271                 lov_free_memmd(lsmp);
272                 RETURN(0);
273         }
274
275         lsm_size = lov_alloc_memmd(lsmp, stripe_count);
276         if (lsm_size < 0)
277                 RETURN(lsm_size);
278
279         /* If we are passed a pointer but nothing to unpack, we only alloc */
280         if (!lmm)
281                 RETURN(lsm_size);
282
283         lsm = *lsmp;
284         lsm->lsm_object_id = le64_to_cpu(lmm->lmm_object_id);
285         lsm->lsm_stripe_size = le32_to_cpu(lmm->lmm_stripe_size);
286         lsm->lsm_stripe_offset = ost_offset;
287
288         for (i = 0, loi = lsm->lsm_oinfo; i < ost_count; i++, ost_offset++) {
289                 ost_offset %= ost_count;
290
291                 if (!lmm->lmm_objects[ost_offset].l_object_id)
292                         continue;
293
294                 /* XXX LOV STACKING call down to osc_unpackmd() */
295                 loi->loi_id =
296                         le64_to_cpu(lmm->lmm_objects[ost_offset].l_object_id);
297                 loi->loi_ost_idx = ost_offset;
298                 loi++;
299         }
300
301         if (loi - lsm->lsm_oinfo != stripe_count) {
302                 CERROR("missing objects in lmm struct\n");
303                 lov_dump_lmm(D_WARNING, lmm);
304                 lov_free_memmd(lsmp);
305                 RETURN(-EINVAL);
306         }
307
308
309         RETURN(lsm_size);
310 }
311
312 /* Configure object striping information on a new file.
313  *
314  * @lmmu is a pointer to a user struct with one or more of the fields set to
315  * indicate the application preference: lmm_stripe_count, lmm_stripe_size,
316  * lmm_stripe_offset, and lmm_stripe_pattern.  lmm_magic must be LOV_MAGIC.
317  * @lsmp is a pointer to an in-core stripe MD that needs to be filled in.
318  */
319 int lov_setstripe(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
320                   struct lov_mds_md *lmmu)
321 {
322         struct obd_device *obd = class_conn2obd(conn);
323         struct lov_obd *lov = &obd->u.lov;
324         struct lov_mds_md lmm;
325         int stripe_count;
326         int rc;
327         ENTRY;
328
329         rc = copy_from_user(&lmm, lmmu, sizeof(lmm));
330         if (rc)
331                 RETURN(-EFAULT);
332
333         /* Bug 1185 FIXME: struct lov_mds_md is little-endian everywhere else */
334
335         if (lmm.lmm_magic != LOV_MAGIC) {
336                 CDEBUG(D_IOCTL, "bad userland LOV MAGIC: %#08x != %#08x\n",
337                        lmm.lmm_magic, LOV_MAGIC);
338                 RETURN(-EINVAL);
339         }
340 #if 0   /* the stripe_count/offset is "advisory", and it gets fixed later */
341         if (lmm.lmm_stripe_count > lov->desc.ld_tgt_count &&
342             lmm.lmm_stripe_count != 0xffffffff) {
343                 CERROR("stripe count %u more than OST count %d\n",
344                        lmm.lmm_stripe_count, lov->desc.ld_tgt_count);
345                 RETURN(-EINVAL);
346         }
347         if (lmm.lmm_stripe_offset >= lov->desc.ld_tgt_count &&
348             lmm.lmm_stripe_offset != 0xffffffff) {
349                 CERROR("stripe offset %u more than max OST index %d\n",
350                        lmm.lmm_stripe_offset, lov->desc.ld_tgt_count);
351                 RETURN(-EINVAL);
352         }
353 #endif
354         if (lmm.lmm_stripe_size & (PAGE_SIZE - 1)) {
355                 CDEBUG(D_IOCTL, "stripe size %u not multiple of %lu\n",
356                        lmm.lmm_stripe_size, PAGE_SIZE);
357                 RETURN(-EINVAL);
358         }
359         stripe_count = lov_get_stripecnt(lov, lmm.lmm_stripe_count);
360
361         if ((__u64)lmm.lmm_stripe_size * stripe_count > ~0UL) {
362                 CDEBUG(D_IOCTL, "stripe width %ux%u > %lu on 32-bit system\n",
363                        lmm.lmm_stripe_size, (int)lmm.lmm_stripe_count, ~0UL);
364                 RETURN(-EINVAL);
365         }
366
367         rc = lov_alloc_memmd(lsmp, stripe_count);
368
369         if (rc < 0)
370                 RETURN(rc);
371
372         (*lsmp)->lsm_stripe_offset = lmm.lmm_stripe_offset;
373         (*lsmp)->lsm_stripe_size = lmm.lmm_stripe_size;
374
375         RETURN(0);
376 }
377
378 /* Retrieve object striping information.
379  *
380  * @lmmu is a pointer to an in-core struct with lmm_ost_count indicating
381  * the maximum number of OST indices which will fit in the user buffer.
382  * lmm_magic must be LOV_MAGIC.
383  */
384 int lov_getstripe(struct lustre_handle *conn, struct lov_stripe_md *lsm,
385                   struct lov_mds_md *lmmu)
386 {
387         struct lov_mds_md lmm, *lmmk = NULL;
388         int rc, lmm_size;
389         ENTRY;
390
391         if (!lsm)
392                 RETURN(-ENODATA);
393
394         rc = copy_from_user(&lmm, lmmu, sizeof(lmm));
395         if (rc)
396                 RETURN(-EFAULT);
397
398         if (lmm.lmm_magic != LOV_MAGIC)
399                 RETURN(-EINVAL);
400
401         rc = lov_packmd(conn, &lmmk, lsm);
402         if (rc < 0)
403                 RETURN(rc);
404         /* Bug 1185 FIXME: convert lmmk to big-endian before copy to userspace */
405         lmm_size = rc;
406         rc = 0;
407
408         /* User wasn't expecting this many OST entries */
409         if (lmm.lmm_ost_count < lmmk->lmm_ost_count)
410                 rc = -EOVERFLOW;
411         else if (copy_to_user(lmmu, lmmk, lmm_size))
412                 rc = -EFAULT;
413
414         obd_free_diskmd (conn, &lmmk);
415
416         RETURN(rc);
417 }