Whamcloud - gitweb
70c1b3e4d17737a40d8d60ed4185d7ecd73ab44d
[fs/lustre-release.git] / libsysio / src / ioctx.c
1 /*
2  *    This Cplant(TM) source code is the property of Sandia National
3  *    Laboratories.
4  *
5  *    This Cplant(TM) source code is copyrighted by Sandia National
6  *    Laboratories.
7  *
8  *    The redistribution of this Cplant(TM) source code is subject to the
9  *    terms of the GNU Lesser General Public License
10  *    (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html)
11  *
12  *    Cplant(TM) Copyright 1998-2004 Sandia Corporation. 
13  *    Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
14  *    license for use of this work by or on behalf of the US Government.
15  *    Export of this program may require a license from the United States
16  *    Government.
17  */
18
19 /*
20  * This library is free software; you can redistribute it and/or
21  * modify it under the terms of the GNU Lesser General Public
22  * License as published by the Free Software Foundation; either
23  * version 2.1 of the License, or (at your option) any later version.
24  * 
25  * This library is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28  * Lesser General Public License for more details.
29  * 
30  * You should have received a copy of the GNU Lesser General Public
31  * License along with this library; if not, write to the Free Software
32  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
33  *
34  * Questions or comments about this library should be sent to:
35  *
36  * Lee Ward
37  * Sandia National Laboratories, New Mexico
38  * P.O. Box 5800
39  * Albuquerque, NM 87185-1110
40  *
41  * lee@sandia.gov
42  */
43
44 #include <stdlib.h>
45 #include <string.h>
46 #include <errno.h>
47 #include <assert.h>
48 #include <sys/types.h>
49 #include <sys/uio.h>
50 #include <sys/queue.h>
51
52 #include "sysio.h"
53 #include "inode.h"
54 #include "xtio.h"
55
56 /*
57  * Asynchronous IO context support.
58  */
59
60 /*
61  * Arguments to IO vector enumerator callback when used by _sysio_doio().
62  */
63 struct doio_helper_args {
64         ssize_t (*f)(void *, size_t, _SYSIO_OFF_T, void *);     /* base func */
65         void    *arg;                                           /* caller arg */
66 };
67
68 /*
69  * List of all outstanding (in-flight) asynch IO requests tracked
70  * by the system.
71  */
72 static LIST_HEAD( ,ioctx) aioq;
73
74 /*
75  * Free callback entry.
76  */
77 #define cb_free(cb)             free(cb)
78
79 /*
80  * Initialization. Must be called before using any other routine in this
81  * module.
82  */
83 int
84 _sysio_ioctx_init()
85 {
86
87         LIST_INIT(&aioq);
88         return 0;
89 }
90
91 /*
92  * Enter an IO context onto the async IO events queue.
93  */
94 void
95 _sysio_ioctx_enter(struct ioctx *ioctx)
96 {
97
98         LIST_INSERT_HEAD(&aioq, ioctx, ioctx_link);
99 }
100
101 /*
102  * Allocate and initialize a new IO context.
103  */
104 struct ioctx *
105 _sysio_ioctx_new(struct inode *ino,
106                  int wr,
107                  const struct iovec *iov,
108                  size_t iovlen,
109                  const struct intnl_xtvec *xtv,
110                  size_t xtvlen)
111 {
112         struct ioctx *ioctx;
113
114         ioctx = malloc(sizeof(struct ioctx));
115         if (!ioctx)
116                 return NULL;
117
118         I_REF(ino);
119
120         IOCTX_INIT(ioctx,
121                    0,
122                    (ioid_t )ioctx,
123                    wr,
124                    ino,
125                    iov, iovlen,
126                    xtv, xtvlen);
127
128         /*
129          * Link request onto the outstanding requests queue.
130          */
131         _sysio_ioctx_enter(ioctx);
132
133         return ioctx;
134 }
135
136 /*
137  * Add an IO completion call-back to the end of the context call-back queue.
138  * These are called in iowait() as the last thing, right before the context
139  * is destroyed.
140  *
141  * They are called in order. Beware.
142  */
143 int
144 _sysio_ioctx_cb(struct ioctx *ioctx,
145                 void (*f)(struct ioctx *, void *),
146                 void *data)
147 {
148         struct ioctx_callback *entry;
149
150         entry = malloc(sizeof(struct ioctx_callback));
151         if (!entry)
152                 return -ENOMEM;
153
154         entry->iocb_f = f;
155         entry->iocb_data = data;
156
157         TAILQ_INSERT_TAIL(&ioctx->ioctx_cbq, entry, iocb_next);
158
159         return 0;
160 }
161
162 /*
163  * Find an IO context given it's identifier.
164  *
165  * NB: This is dog-slow. If there are alot of these, we will need to change
166  * this implementation.
167  */
168 struct ioctx *
169 _sysio_ioctx_find(ioid_t id)
170 {
171         struct ioctx *ioctx;
172
173         for (ioctx = aioq.lh_first; ioctx; ioctx = ioctx->ioctx_link.le_next)
174                 if (ioctx->ioctx_id == id)
175                         return ioctx;
176
177         return NULL;
178 }
179
180 /*
181  * Wait for asynchronous IO operation to complete, return status
182  * and dispose of the context.
183  *
184  * Note:
185  * The context is no longer valid after return.
186  */
187 ssize_t
188 _sysio_ioctx_wait(struct ioctx *ioctx)
189 {
190         ssize_t cc;
191
192         /*
193          * Wait for async operation to complete.
194          */
195         while (!(ioctx->ioctx_done ||
196                  (*ioctx->ioctx_ino->i_ops.inop_iodone)(ioctx)))
197                 ;
198
199         /*
200          * Get status.
201          */
202         cc = ioctx->ioctx_cc;
203         if (cc < 0)
204                 cc = -ioctx->ioctx_errno;
205
206         /*
207          * Dispose.
208          */
209         _sysio_ioctx_complete(ioctx);
210
211         return cc;
212 }
213
214 /*
215  * Free callback entry.
216  */
217 void
218 _sysio_ioctx_cb_free(struct ioctx_callback *cb)
219 {
220
221         cb_free(cb);
222 }
223
224 /*
225  * Complete an asynchronous IO request.
226  */
227 void
228 _sysio_ioctx_complete(struct ioctx *ioctx)
229 {
230         struct ioctx_callback *entry;
231
232         /*
233          * Run the call-back queue.
234          */
235         while ((entry = ioctx->ioctx_cbq.tqh_first)) {
236                 TAILQ_REMOVE(&ioctx->ioctx_cbq, entry, iocb_next);
237                 (*entry->iocb_f)(ioctx, entry->iocb_data);
238                 cb_free(entry);
239         }
240
241         /*
242          * Unlink from the file record's outstanding request queue.
243          */
244         LIST_REMOVE(ioctx, ioctx_link);
245
246         if (ioctx->ioctx_fast)
247                 return;
248
249         I_RELE(ioctx->ioctx_ino);
250
251         free(ioctx);
252 }
253
254 /*
255  * General help validating strided-IO vectors.
256  *
257  * A driver may call this to make sure underflow/overflow of an off_t can't
258  * occur and overflow of a ssize_t can't occur when writing. The sum
259  * of the reconciled transfer length is returned or some appropriate
260  * error depending on underflow/overflow.
261  *
262  * The following algorithm assumes:
263  *
264  * a) sizeof(size_t) >= sizeof(ssize_t)
265  * b) 2's complement arithmetic
266  * c) The compiler won't optimize away code because it's developers
267  *      believed that something with an undefined result in `C' can't happen.
268  */
269 ssize_t
270 _sysio_validx(const struct intnl_xtvec *xtv, size_t xtvlen,
271               const struct iovec *iov, size_t iovlen,
272               _SYSIO_OFF_T limit)
273 {
274         ssize_t acc, cc;
275         struct iovec iovec;
276         struct intnl_xtvec xtvec;
277         _SYSIO_OFF_T off;
278
279         if (!(xtvlen && iovlen))
280                 return -EINVAL;
281
282         acc = 0;
283         xtvec.xtv_len = iovec.iov_len = 0;
284         do {
285                 while (!xtvec.xtv_len) {
286                         if (!xtvlen--)
287                                 break;
288                         if (!xtv->xtv_len) {
289                                 xtv++;
290                                 continue;
291                         }
292                         xtvec = *xtv++;
293                         if (xtvec.xtv_off < 0)
294                                 return -EINVAL;
295                 }
296                 if (!xtvec.xtv_len)
297                         break;
298                 do {
299                         while (!iovec.iov_len) {
300                                 if (!iovlen--)
301                                         break;
302                                 if (!iov->iov_len) {
303                                         iov++;
304                                         continue;
305                                 }
306                                 iovec = *iov++;
307                         }
308                         if (!iovec.iov_len)
309                                 break;
310                         cc = iovec.iov_len;
311                         if (cc < 0)
312                                 return -EINVAL;
313                         if ((size_t )cc > xtvec.xtv_len)
314                                 cc = xtvec.xtv_len;
315                         xtvec.xtv_len -= cc;
316                         iovec.iov_len -= cc;
317                         off = xtvec.xtv_off + cc;
318                         if (xtvec.xtv_off && off <= xtvec.xtv_off)
319                                 return off < 0 ? -EINVAL : -EOVERFLOW;
320                         if (off > limit)
321                                 return -EFBIG;
322                         xtvec.xtv_off = off;
323                         cc += acc;
324                         if (acc && (cc <= acc))
325                                 return -EINVAL;
326                         acc = cc;
327                 } while (xtvec.xtv_len && iovlen);
328         } while ((xtvlen || xtvec.xtv_len) && iovlen);
329         return acc;
330 }
331
332 /*
333  */
334 ssize_t
335 _sysio_enumerate_extents(const struct intnl_xtvec *xtv, size_t xtvlen,
336                          const struct iovec *iov, size_t iovlen,
337                          ssize_t (*f)(const struct iovec *, int,
338                                       _SYSIO_OFF_T,
339                                       ssize_t,
340                                       void *),
341                          void *arg)
342 {
343         ssize_t acc, tmp, cc;
344         struct iovec iovec;
345         struct intnl_xtvec xtvec;
346         const struct iovec *start;
347         _SYSIO_OFF_T off;
348         size_t  n;
349         size_t  remain;
350         
351         acc = 0;
352         iovec.iov_len = 0;
353         while (xtvlen) {
354                 /*
355                  * Coalesce contiguous extent vector entries.
356                  */
357                 off = xtvec.xtv_off = xtv->xtv_off;
358                 off += xtvec.xtv_len = xtv->xtv_len;
359                 while (++xtv, --xtvlen) {
360                         if (off != xtv->xtv_off) {
361                                 /*
362                                  * Not contiguous.
363                                  */
364                                 break;
365                         }
366                         if (!xtv->xtv_len) {
367                                 /*
368                                  * Zero length.
369                                  */
370                                 continue;
371                         }
372                         off += xtv->xtv_len;
373                         xtvec.xtv_len += xtv->xtv_len;
374                 }
375                 while (xtvec.xtv_len) {
376                         if (iovec.iov_len) {
377                                 tmp = iovec.iov_len; 
378                                 if (iovec.iov_len > xtvec.xtv_len) {
379                                         iovec.iov_len = xtvec.xtv_len;
380                                 } 
381                                 cc =
382                                     (*f)(&iovec, 1,
383                                          xtvec.xtv_off,
384                                          xtvec.xtv_len,
385                                          arg);
386                                 if (cc <= 0) {
387                                         if (acc)
388                                                 return acc;
389                                         return cc;
390                                 }
391                                 iovec.iov_base = (char *)iovec.iov_base + cc;
392                                 iovec.iov_len = tmp - cc; 
393                                 tmp = cc + acc;
394                                 if (acc && tmp <= acc)
395                                         abort();                /* paranoia */
396                                 acc = tmp;
397                         } else {
398                                 start = iov;
399                                 n = xtvec.xtv_len;
400                                 do {
401                                         if (iov->iov_len > n) {
402                                                 /*
403                                                  * That'll do.
404                                                  */
405                                                 break;
406                                         }
407                                         n -= iov->iov_len;
408                                         iov++;
409                                 } while (--iovlen);
410                                 if (iov == start) {
411                                         iovec = *iov++;
412 #if 0
413                                         if (iovec.iov_len > n) {
414                                                 iovec.iov_len = n;
415                                         } 
416 #endif
417                                         continue;
418                                 }
419                                 remain = xtvec.xtv_len - n;
420                                 cc =
421                                     (*f)(start, iov - start,
422                                                                  xtvec.xtv_off,
423                                                                  xtvec.xtv_len - n,
424                                          arg);
425                                 if (cc <= 0) {
426                                         if (acc)
427                                                 return acc;
428                                         return cc;
429                                 }
430                                                                 
431                                 tmp = cc + acc;
432                                 if (acc && tmp <= acc)
433                                         abort();                /* paranoia */
434                                 acc = tmp;
435                                 
436                                 if (remain && !iovlen) 
437                                         return acc;
438                                 
439                                 remain -= cc;
440                                 if (remain)
441                                         return acc;             /* short */
442                         }
443                         xtvec.xtv_off += cc;
444                         xtvec.xtv_len -= cc;
445                 }
446         }
447         return acc;
448 }
449
450 ssize_t
451 _sysio_enumerate_iovec(const struct iovec *iov, size_t count,
452                        _SYSIO_OFF_T off,
453                        ssize_t limit,
454                        ssize_t (*f)(void *, size_t, _SYSIO_OFF_T, void *),
455                        void *arg)
456 {
457         ssize_t acc, cc;
458         size_t  n;
459         unsigned indx;
460         size_t  remain;
461
462         if (!count)
463                 return -EINVAL;
464         assert(limit >= 0);
465         acc = 0;
466         n = limit;
467         for (indx = 0; n && indx < count; indx++) {
468                 if (iov[indx].iov_len < n) {
469                         cc = (ssize_t )iov[indx].iov_len;
470                         if (cc < 0)
471                                 return -EINVAL;
472                 } else
473                         cc = (ssize_t )n;
474                 if (!cc)
475                         continue;
476                 n -= cc;
477                 cc += acc;
478                 if (acc && cc <= acc)
479                         return -EINVAL;
480                 acc = cc;
481         }
482         if (!acc)
483                 return 0;
484         acc = 0;
485         do {
486                 if (!iov->iov_len) {
487                         iov++;
488                         continue;
489                 }
490                 n =
491                     iov->iov_len < (size_t )limit
492                       ? iov->iov_len
493                       : (size_t )limit;
494                 cc = (*f)(iov->iov_base, n, off, arg);
495                 if (cc <= 0) {
496                         if (acc)
497                                 return acc;
498                         return cc;
499                 }
500                 off += cc;
501                 limit -= cc;
502                 remain = iov->iov_len - cc;
503                 cc += acc;
504                 if (acc && cc <= acc)
505                         abort();                        /* bad driver! */
506                 acc = cc;
507                 if (remain || !limit)
508                         break;                          /* short/limited read */
509                 iov++;
510         } while (--count);
511         return acc;
512 }
513
514 static ssize_t
515 _sysio_doio_helper(const struct iovec *iov, int count,
516                    _SYSIO_OFF_T off,
517                    ssize_t limit,
518                    struct doio_helper_args *args)
519 {
520
521         return _sysio_enumerate_iovec(iov, count,
522                                       off, limit,
523                                       args->f,
524                                       args->arg);
525 }
526
527 /*
528  * A meta-driver for the whole strided-io process. Appropriate when
529  * the driver can't handle anything but simple p{read,write}-like
530  * interface.
531  */
532 ssize_t
533 _sysio_doio(const struct intnl_xtvec *xtv, size_t xtvlen,
534             const struct iovec *iov, size_t iovlen,
535             ssize_t (*f)(void *, size_t, _SYSIO_OFF_T, void *),
536             void *arg)
537 {
538         struct doio_helper_args arguments;
539
540         arguments.f = f;
541         arguments.arg = arg;
542         return _sysio_enumerate_extents(xtv, xtvlen,
543                                         iov, iovlen,
544                                         (ssize_t (*)(const struct iovec *, int,
545                                                      _SYSIO_OFF_T,
546                                                      ssize_t,
547                                                      void *))_sysio_doio_helper,
548                                         &arguments);
549 }