Whamcloud - gitweb
620dd5c9adf5cc55c86dcb495687ff3e224ec45a
[fs/lustre-release.git] / lustre / lov / lov_pack.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (C) 2002, 2003 Cluster File Systems, Inc.
5  *   Author: Andreas Dilger <adilger@clusterfs.com>
6  *
7  *   This file is part of Lustre, http://www.lustre.org.
8  *
9  *   Lustre is free software; you can redistribute it and/or
10  *   modify it under the terms of version 2 of the GNU General Public
11  *   License as published by the Free Software Foundation.
12  *
13  *   Lustre is distributed in the hope that it will be useful,
14  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *   GNU General Public License for more details.
17  *
18  *   You should have received a copy of the GNU General Public License
19  *   along with Lustre; if not, write to the Free Software
20  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21  *
22  * (Un)packing of OST/MDS requests
23  *
24  */
25
26 #define DEBUG_SUBSYSTEM S_LLITE
27 #ifndef __KERNEL__
28 #include <liblustre.h>
29 #endif
30
31 #include <linux/lustre_net.h>
32 #include <linux/obd.h>
33 #include <linux/obd_lov.h>
34 #include <linux/obd_class.h>
35 #include <linux/obd_support.h>
36
37 void lov_dump_lmm(int level, struct lov_mds_md *lmm)
38 {
39         struct lov_object_id *loi;
40         int idx;
41
42         CDEBUG(level, "objid "LPX64", magic %#08x, ost_count %u\n",
43                lmm->lmm_object_id, lmm->lmm_magic, lmm->lmm_ost_count);
44         CDEBUG(level,"stripe_size %u, stripe_count %u, stripe_offset %u\n",
45                lmm->lmm_stripe_size, lmm->lmm_stripe_count,
46                lmm->lmm_stripe_offset);
47         for (idx = 0, loi = lmm->lmm_objects; idx < lmm->lmm_ost_count;
48              idx++, loi++)
49                 CDEBUG(level, "ost idx %u subobj "LPX64"\n", idx,
50                        loi->l_object_id);
51 }
52
53 #define LMM_ASSERT(test)                                                \
54 do {                                                                    \
55         if (!(test)) lov_dump_lmm(D_ERROR, lmm);                        \
56         LASSERT(test); /* so we know what assertion failed */           \
57 } while(0)
58
59 /* Pack LOV object metadata for disk storage.  It is packed in LE byte
60  * order and is opaque to the networking layer.
61  *
62  * XXX In the future, this will be enhanced to get the EA size from the
63  *     underlying OSC device(s) to get their EA sizes so we can stack
64  *     LOVs properly.  For now lov_mds_md_size() just assumes one obd_id
65  *     per stripe.
66  */
67 int lov_packmd(struct lustre_handle *conn, struct lov_mds_md **lmmp,
68                struct lov_stripe_md *lsm)
69 {
70         struct obd_device *obd = class_conn2obd(conn);
71         struct lov_obd *lov = &obd->u.lov;
72         struct lov_oinfo *loi;
73         struct lov_mds_md *lmm;
74         int ost_count = lov->desc.ld_tgt_count;
75         int stripe_count = ost_count;
76         int lmm_size;
77         int i;
78         ENTRY;
79
80         if (lsm) {
81                 int i, max = 0;
82                 if (lsm->lsm_magic != LOV_MAGIC) {
83                         CERROR("bad mem LOV MAGIC: %#010x != %#010x\n",
84                                lsm->lsm_magic, LOV_MAGIC);
85                         RETURN(-EINVAL);
86                 }
87                 stripe_count = lsm->lsm_stripe_count;
88
89                 for (i = 0,loi = lsm->lsm_oinfo; i < stripe_count; i++,loi++) {
90                         if (loi->loi_ost_idx > max)
91                                 max = loi->loi_ost_idx;
92                 }
93                 ost_count = max + 1;
94         }
95
96         /* XXX LOV STACKING call into osc for sizes */
97         lmm_size = lov_mds_md_size(ost_count);
98
99         if (!lmmp)
100                 RETURN(lmm_size);
101
102         if (*lmmp && !lsm) {
103                 ost_count = le32_to_cpu ((*lmmp)->lmm_ost_count);
104                 OBD_FREE(*lmmp, lov_mds_md_size(ost_count));
105                 *lmmp = NULL;
106                 RETURN(0);
107         }
108
109         if (!*lmmp) {
110                 OBD_ALLOC(*lmmp, lmm_size);
111                 if (!*lmmp)
112                         RETURN(-ENOMEM);
113         }
114
115         lmm = *lmmp;
116         lmm->lmm_magic = cpu_to_le32 (LOV_MAGIC);
117         lmm->lmm_ost_count = cpu_to_le16 (ost_count);
118
119         if (!lsm)
120                 RETURN(lmm_size);
121
122         lmm->lmm_object_id = cpu_to_le64 (lsm->lsm_object_id);
123         lmm->lmm_stripe_count = cpu_to_le16 (stripe_count);
124         lmm->lmm_stripe_size = cpu_to_le32 (lsm->lsm_stripe_size);
125         lmm->lmm_stripe_offset = cpu_to_le32 (lsm->lsm_stripe_offset);
126
127         /* Only fill in the object ids which we are actually using.
128          * Assumes lmm_objects is otherwise zero-filled. */
129         for (i = 0, loi = lsm->lsm_oinfo; i < stripe_count; i++, loi++) {
130                 /* XXX call down to osc_packmd() to do the packing */
131                 LASSERT (loi->loi_id);
132                 lmm->lmm_objects[loi->loi_ost_idx].l_object_id = 
133                         cpu_to_le64 (loi->loi_id);
134         }
135
136         RETURN(lmm_size);
137 }
138
139 static int lov_get_stripecnt(struct lov_obd *lov, int stripe_count)
140 {
141         if (!stripe_count)
142                 stripe_count = lov->desc.ld_default_stripe_count;
143         if (!stripe_count || stripe_count > lov->desc.ld_active_tgt_count)
144                 stripe_count = lov->desc.ld_active_tgt_count;
145
146         return stripe_count;
147 }
148
149 /* Unpack LOV object metadata from disk storage.  It is packed in LE byte
150  * order and is opaque to the networking layer.
151  */
152 int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
153                  struct lov_mds_md *lmm, int lmm_bytes)
154 {
155         struct obd_device *obd = class_conn2obd(conn);
156         struct lov_obd *lov = &obd->u.lov;
157         struct lov_stripe_md *lsm;
158         struct lov_oinfo *loi;
159         int ost_count = 0;
160         int ost_offset = 0;
161         int stripe_count;
162         int lsm_size;
163         int i;
164         ENTRY;
165
166         if (lmm) {
167                 if (lmm_bytes < sizeof (*lmm)) {
168                         CERROR("lov_mds_md too small: %d, need %d\n",
169                                 lmm_bytes, (int)sizeof(*lmm));
170                         RETURN(-EINVAL);
171                 }
172                 if (le32_to_cpu (lmm->lmm_magic) != LOV_MAGIC) {
173                         CERROR("bad disk LOV MAGIC: %#08x != %#08x\n",
174                                le32_to_cpu (lmm->lmm_magic), LOV_MAGIC);
175                         RETURN(-EINVAL);
176                 }
177
178                 ost_count = le16_to_cpu (lmm->lmm_ost_count);
179                 stripe_count = le16_to_cpu (lmm->lmm_stripe_count);
180
181                 if (ost_count == 0 || stripe_count == 0) {
182                         CERROR ("zero ost %d or stripe %d count\n",
183                                 ost_count, stripe_count);
184                         RETURN (-EINVAL);
185                 }
186
187                 if (lmm_bytes < lov_mds_md_size (ost_count)) {
188                         CERROR ("lov_mds_md too small: %d, need %d\n",
189                                 lmm_bytes, lov_mds_md_size (ost_count));
190                         RETURN (-EINVAL);
191                 }
192         } else
193                 stripe_count = lov_get_stripecnt(lov, 0);
194
195         /* XXX LOV STACKING call into osc for sizes */
196         lsm_size = lov_stripe_md_size(stripe_count);
197
198         if (!lsmp)
199                 RETURN(lsm_size);
200
201         if (*lsmp && !lmm) {
202                 stripe_count = (*lsmp)->lsm_stripe_count;
203                 OBD_FREE(*lsmp, lov_stripe_md_size(stripe_count));
204                 *lsmp = NULL;
205                 RETURN(0);
206         }
207
208         if (!*lsmp) {
209                 OBD_ALLOC(*lsmp, lsm_size);
210                 if (!*lsmp)
211                         RETURN(-ENOMEM);
212         }
213
214         lsm = *lsmp;
215         lsm->lsm_magic = LOV_MAGIC;
216         lsm->lsm_stripe_count = stripe_count;
217         lsm->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
218
219         if (!lmm)
220                 RETURN(lsm_size);
221
222         lsm->lsm_object_id = le64_to_cpu (lmm->lmm_object_id);
223         lsm->lsm_stripe_size = le32_to_cpu (lmm->lmm_stripe_size);
224         ost_offset = lsm->lsm_stripe_offset = le32_to_cpu (lmm->lmm_stripe_offset);
225
226         LMM_ASSERT(lsm->lsm_object_id);
227         LMM_ASSERT(ost_count);
228
229         for (i = 0, loi = lsm->lsm_oinfo; i < ost_count; i++, ost_offset++) {
230                 ost_offset %= ost_count;
231
232                 if (!lmm->lmm_objects[ost_offset].l_object_id)
233                         continue;
234
235                 LMM_ASSERT(loi - lsm->lsm_oinfo < stripe_count);
236                 /* XXX LOV STACKING call down to osc_unpackmd() */
237                 loi->loi_id = le64_to_cpu (lmm->lmm_objects[ost_offset].l_object_id);
238                 loi->loi_ost_idx = ost_offset;
239                 loi++;
240         }
241         LMM_ASSERT(loi - lsm->lsm_oinfo > 0);
242         LMM_ASSERT(loi - lsm->lsm_oinfo == stripe_count);
243
244         RETURN(lsm_size);
245 }
246
247 /* Configure object striping information on a new file.
248  *
249  * @lmmu is a pointer to a user struct with one or more of the fields set to
250  * indicate the application preference: lmm_stripe_count, lmm_stripe_size,
251  * lmm_stripe_offset, and lmm_stripe_pattern.  lmm_magic must be LOV_MAGIC.
252  * @lsmp is a pointer to an in-core stripe MD that needs to be filled in.
253  */
254 int lov_setstripe(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
255                   struct lov_mds_md *lmmu)
256 {
257         struct obd_device *obd = class_conn2obd(conn);
258         struct lov_obd *lov = &obd->u.lov;
259         struct lov_mds_md lmm;
260         struct lov_stripe_md *lsm;
261         int stripe_count;
262         int rc;
263         ENTRY;
264
265         rc = copy_from_user(&lmm, lmmu, sizeof(lmm));
266         if (rc)
267                 RETURN(-EFAULT);
268
269         /* Bug 1185 FIXME: struct lov_mds_md is little-endian everywhere else */
270
271         if (lmm.lmm_magic != LOV_MAGIC) {
272                 CERROR("bad userland LOV MAGIC: %#08x != %#08x\n",
273                        lmm.lmm_magic, LOV_MAGIC);
274                 RETURN(-EINVAL);
275         }
276 #if 0   /* the stripe_count/offset is "advisory", and it gets fixed later */
277         if (lmm.lmm_stripe_count > lov->desc.ld_tgt_count &&
278             lmm.lmm_stripe_count != 0xffffffff) {
279                 CERROR("stripe count %u more than OST count %d\n",
280                        lmm.lmm_stripe_count, lov->desc.ld_tgt_count);
281                 RETURN(-EINVAL);
282         }
283         if (lmm.lmm_stripe_offset >= lov->desc.ld_tgt_count &&
284             lmm.lmm_stripe_offset != 0xffffffff) {
285                 CERROR("stripe offset %u more than max OST index %d\n",
286                        lmm.lmm_stripe_offset, lov->desc.ld_tgt_count);
287                 RETURN(-EINVAL);
288         }
289 #endif
290         if (lmm.lmm_stripe_size & (PAGE_SIZE - 1)) {
291                 CERROR("stripe size %u not multiple of %lu\n",
292                        lmm.lmm_stripe_size, PAGE_SIZE);
293                 RETURN(-EINVAL);
294         }
295         stripe_count = lov_get_stripecnt(lov, lmm.lmm_stripe_count);
296
297         if ((__u64)lmm.lmm_stripe_size * stripe_count > ~0UL) {
298                 CERROR("stripe width %ux%u > %lu on 32-bit system\n",
299                        lmm.lmm_stripe_size, (int)lmm.lmm_stripe_count, ~0UL);
300                 RETURN(-EINVAL);
301         }
302
303         /* XXX LOV STACKING call into osc for sizes */
304         OBD_ALLOC(lsm, lov_stripe_md_size(stripe_count));
305         if (!lsm)
306                 RETURN(-ENOMEM);
307
308         lsm->lsm_magic = LOV_MAGIC;
309         lsm->lsm_stripe_count = stripe_count;
310         lsm->lsm_stripe_offset = lmm.lmm_stripe_offset;
311         lsm->lsm_stripe_size = lmm.lmm_stripe_size;
312         lsm->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
313
314         *lsmp = lsm;
315
316         RETURN(rc);
317 }
318
319 /* Retrieve object striping information.
320  *
321  * @lmmu is a pointer to an in-core struct with lmm_ost_count indicating
322  * the maximum number of OST indices which will fit in the user buffer.
323  * lmm_magic must be LOV_MAGIC.
324  */
325 int lov_getstripe(struct lustre_handle *conn, struct lov_stripe_md *lsm,
326                   struct lov_mds_md *lmmu)
327 {
328         struct lov_mds_md lmm, *lmmk = NULL;
329         int rc, lmm_size;
330         ENTRY;
331
332         if (!lsm)
333                 RETURN(-ENODATA);
334
335         rc = copy_from_user(&lmm, lmmu, sizeof(lmm));
336         if (rc)
337                 RETURN(-EFAULT);
338
339         if (lmm.lmm_magic != LOV_MAGIC)
340                 RETURN(-EINVAL);
341
342         rc = lov_packmd(conn, &lmmk, lsm);
343         if (rc < 0)
344                 RETURN(rc);
345         /* Bug 1185 FIXME: convert lmmk to big-endian before copy to userspace */
346         lmm_size = rc;
347         rc = 0;
348
349         /* User wasn't expecting this many OST entries */
350         if (lmm.lmm_ost_count < lmmk->lmm_ost_count)
351                 rc = -EOVERFLOW;
352         else if (copy_to_user(lmmu, lmmk, lmm_size))
353                 rc = -EFAULT;
354
355         obd_free_diskmd (conn, &lmmk);
356
357         RETURN(rc);
358 }