Whamcloud - gitweb
Ignore generated files.
[fs/lustre-release.git] / libsysio / src / ioctx.c
1 /*
2  *    This Cplant(TM) source code is the property of Sandia National
3  *    Laboratories.
4  *
5  *    This Cplant(TM) source code is copyrighted by Sandia National
6  *    Laboratories.
7  *
8  *    The redistribution of this Cplant(TM) source code is subject to the
9  *    terms of the GNU Lesser General Public License
10  *    (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html)
11  *
12  *    Cplant(TM) Copyright 1998-2004 Sandia Corporation. 
13  *    Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
14  *    license for use of this work by or on behalf of the US Government.
15  *    Export of this program may require a license from the United States
16  *    Government.
17  */
18
19 /*
20  * This library is free software; you can redistribute it and/or
21  * modify it under the terms of the GNU Lesser General Public
22  * License as published by the Free Software Foundation; either
23  * version 2.1 of the License, or (at your option) any later version.
24  * 
25  * This library is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28  * Lesser General Public License for more details.
29  * 
30  * You should have received a copy of the GNU Lesser General Public
31  * License along with this library; if not, write to the Free Software
32  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
33  *
34  * Questions or comments about this library should be sent to:
35  *
36  * Lee Ward
37  * Sandia National Laboratories, New Mexico
38  * P.O. Box 5800
39  * Albuquerque, NM 87185-1110
40  *
41  * lee@sandia.gov
42  */
43
44 #include <stdlib.h>
45 #include <string.h>
46 #include <errno.h>
47 #include <assert.h>
48 #include <sys/uio.h>
49 #include <sys/types.h>
50 #include <sys/queue.h>
51
52 #include "xtio.h"
53 #include "sysio.h"
54 #include "inode.h"
55
56
57 #if defined(REDSTORM)
58 #include <catamount/do_iostats.h>
59 #endif
60
61
62 /*
63  * Asynchronous IO context support.
64  */
65
66 /*
67  * Arguments to IO vector enumerator callback when used by _sysio_doio().
68  */
69 struct doio_helper_args {
70         ssize_t (*f)(void *, size_t, _SYSIO_OFF_T, void *);     /* base func */
71         void    *arg;                                           /* caller arg */
72 };
73
74 /*
75  * List of all outstanding (in-flight) asynch IO requests tracked
76  * by the system.
77  */
78 static LIST_HEAD( ,ioctx) aioq;
79
80 /*
81  * Free callback entry.
82  */
83 #define cb_free(cb)             free(cb)
84
85 /*
86  * Initialization. Must be called before using any other routine in this
87  * module.
88  */
89 int
90 _sysio_ioctx_init()
91 {
92
93         LIST_INIT(&aioq);
94         return 0;
95 }
96
97 /*
98  * Enter an IO context onto the async IO events queue.
99  */
100 void
101 _sysio_ioctx_enter(struct ioctx *ioctx)
102 {
103
104         LIST_INSERT_HEAD(&aioq, ioctx, ioctx_link);
105 }
106
107 /*
108  * Allocate and initialize a new IO context.
109  */
110 struct ioctx *
111 _sysio_ioctx_new(struct inode *ino,
112                  int wr,
113                  const struct iovec *iov,
114                  size_t iovlen,
115                  const struct intnl_xtvec *xtv,
116                  size_t xtvlen)
117 {
118         struct ioctx *ioctx;
119
120         ioctx = malloc(sizeof(struct ioctx));
121         if (!ioctx)
122                 return NULL;
123
124         I_REF(ino);
125
126         IOCTX_INIT(ioctx,
127                    0,
128                    wr,
129                    ino,
130                    iov, iovlen,
131                    xtv, xtvlen);
132
133         /*
134          * Link request onto the outstanding requests queue.
135          */
136         _sysio_ioctx_enter(ioctx);
137
138         return ioctx;
139 }
140
141 /*
142  * Add an IO completion call-back to the end of the context call-back queue.
143  * These are called in iowait() as the last thing, right before the context
144  * is destroyed.
145  *
146  * They are called in order. Beware.
147  */
148 int
149 _sysio_ioctx_cb(struct ioctx *ioctx,
150                 void (*f)(struct ioctx *, void *),
151                 void *data)
152 {
153         struct ioctx_callback *entry;
154
155         entry = malloc(sizeof(struct ioctx_callback));
156         if (!entry)
157                 return -ENOMEM;
158
159         entry->iocb_f = f;
160         entry->iocb_data = data;
161
162         TAILQ_INSERT_TAIL(&ioctx->ioctx_cbq, entry, iocb_next);
163
164         return 0;
165 }
166
167 /*
168  * Find an IO context given it's identifier.
169  *
170  * NB: This is dog-slow. If there are alot of these, we will need to change
171  * this implementation.
172  */
173 struct ioctx *
174 _sysio_ioctx_find(void *id)
175 {
176         struct ioctx *ioctx;
177
178         for (ioctx = aioq.lh_first; ioctx; ioctx = ioctx->ioctx_link.le_next)
179                 if (ioctx == id)
180                         return ioctx;
181
182         return NULL;
183 }
184
185 /*
186  * Wait for asynchronous IO operation to complete, return status
187  * and dispose of the context.
188  *
189  * Note:
190  * The context is no longer valid after return.
191  */
192 ssize_t
193 _sysio_ioctx_wait(struct ioctx *ioctx)
194 {
195         ssize_t cc;
196
197         /*
198          * Wait for async operation to complete.
199          */
200         while (!(ioctx->ioctx_done ||
201                  (*ioctx->ioctx_ino->i_ops.inop_iodone)(ioctx)))
202                 ;
203
204         /*
205          * Get status.
206          */
207         cc = ioctx->ioctx_cc;
208         if (cc < 0)
209                 cc = -ioctx->ioctx_errno;
210
211         /*
212          * Dispose.
213          */
214         _sysio_ioctx_complete(ioctx);
215
216         return cc;
217 }
218
219 /*
220  * Free callback entry.
221  */
222 void
223 _sysio_ioctx_cb_free(struct ioctx_callback *cb)
224 {
225
226         cb_free(cb);
227 }
228
229 /*
230  * Complete an asynchronous IO request.
231  */
232 void
233 _sysio_ioctx_complete(struct ioctx *ioctx)
234 {
235         struct ioctx_callback *entry;
236
237
238         /* update IO stats */
239         _SYSIO_UPDACCT(ioctx->ioctx_write, ioctx);
240
241         /*
242          * Run the call-back queue.
243          */
244         while ((entry = ioctx->ioctx_cbq.tqh_first)) {
245                 TAILQ_REMOVE(&ioctx->ioctx_cbq, entry, iocb_next);
246                 (*entry->iocb_f)(ioctx, entry->iocb_data);
247                 cb_free(entry);
248         }
249
250         /*
251          * Unlink from the file record's outstanding request queue.
252          */
253         LIST_REMOVE(ioctx, ioctx_link);
254
255         if (ioctx->ioctx_fast)
256                 return;
257
258         I_RELE(ioctx->ioctx_ino);
259
260         free(ioctx);
261 }
262
263 /*
264  * General help validating strided-IO vectors.
265  *
266  * A driver may call this to make sure underflow/overflow of an off_t can't
267  * occur and overflow of a ssize_t can't occur when writing. The sum
268  * of the reconciled transfer length is returned or some appropriate
269  * error depending on underflow/overflow.
270  *
271  * The following algorithm assumes:
272  *
273  * a) sizeof(size_t) >= sizeof(ssize_t)
274  * b) 2's complement arithmetic
275  * c) The compiler won't optimize away code because it's developers
276  *      believed that something with an undefined result in `C' can't happen.
277  */
278 ssize_t
279 _sysio_validx(const struct intnl_xtvec *xtv, size_t xtvlen,
280               const struct iovec *iov, size_t iovlen,
281               _SYSIO_OFF_T limit)
282 {
283         ssize_t acc, cc;
284         struct iovec iovec;
285         struct intnl_xtvec xtvec;
286         _SYSIO_OFF_T off;
287
288         if (!(xtvlen && iovlen))
289                 return -EINVAL;
290
291         acc = 0;
292         xtvec.xtv_len = iovec.iov_len = 0;
293         do {
294                 while (!xtvec.xtv_len) {
295                         if (!xtvlen--)
296                                 break;
297                         if (!xtv->xtv_len) {
298                                 xtv++;
299                                 continue;
300                         }
301                         xtvec = *xtv++;
302                         if (xtvec.xtv_off < 0)
303                                 return -EINVAL;
304                 }
305                 if (!xtvec.xtv_len)
306                         break;
307                 do {
308                         while (!iovec.iov_len) {
309                                 if (!iovlen--)
310                                         break;
311                                 if (!iov->iov_len) {
312                                         iov++;
313                                         continue;
314                                 }
315                                 iovec = *iov++;
316                         }
317                         if (!iovec.iov_len)
318                                 break;
319                         cc = iovec.iov_len;
320                         if (cc < 0)
321                                 return -EINVAL;
322                         if ((size_t )cc > xtvec.xtv_len)
323                                 cc = xtvec.xtv_len;
324                         xtvec.xtv_len -= cc;
325                         iovec.iov_len -= cc;
326                         off = xtvec.xtv_off + cc;
327                         if (xtvec.xtv_off && off <= xtvec.xtv_off)
328                                 return off < 0 ? -EINVAL : -EOVERFLOW;
329                         if (off > limit)
330                                 return -EFBIG;
331                         xtvec.xtv_off = off;
332                         cc += acc;
333                         if (acc && (cc <= acc))
334                                 return -EINVAL;
335                         acc = cc;
336                 } while (xtvec.xtv_len && iovlen);
337         } while ((xtvlen || xtvec.xtv_len) && iovlen);
338         return acc;
339 }
340
341 /*
342  */
343 ssize_t
344 _sysio_enumerate_extents(const struct intnl_xtvec *xtv, size_t xtvlen,
345                          const struct iovec *iov, size_t iovlen,
346                          ssize_t (*f)(const struct iovec *, int,
347                                       _SYSIO_OFF_T,
348                                       ssize_t,
349                                       void *),
350                          void *arg)
351 {
352         ssize_t acc, tmp, cc;
353         struct iovec iovec;
354         struct intnl_xtvec xtvec;
355         const struct iovec *start;
356         _SYSIO_OFF_T off;
357         size_t  n;
358         size_t  remain;
359         
360         acc = 0;
361         iovec.iov_len = 0;
362         while (xtvlen) {
363                 /*
364                  * Coalesce contiguous extent vector entries.
365                  */
366                 off = xtvec.xtv_off = xtv->xtv_off;
367                 off += xtvec.xtv_len = xtv->xtv_len;
368                 while (++xtv, --xtvlen) {
369                         if (off != xtv->xtv_off) {
370                                 /*
371                                  * Not contiguous.
372                                  */
373                                 break;
374                         }
375                         if (!xtv->xtv_len) {
376                                 /*
377                                  * Zero length.
378                                  */
379                                 continue;
380                         }
381                         off += xtv->xtv_len;
382                         xtvec.xtv_len += xtv->xtv_len;
383                 }
384                 while (xtvec.xtv_len) {
385                         if (iovec.iov_len) {
386                                 tmp = iovec.iov_len; 
387                                 if (iovec.iov_len > xtvec.xtv_len)
388                                         iovec.iov_len = xtvec.xtv_len;
389                                 cc =
390                                     (*f)(&iovec, 1,
391                                          xtvec.xtv_off,
392                                          xtvec.xtv_len,
393                                          arg);
394                                 if (cc <= 0) {
395                                         if (acc)
396                                                 return acc;
397                                         return cc;
398                                 }
399                                 iovec.iov_base = (char *)iovec.iov_base + cc;
400                                 iovec.iov_len = tmp - cc; 
401                                 tmp = cc + acc;
402                                 if (acc && tmp <= acc)
403                                         abort();                /* paranoia */
404                                 acc = tmp;
405                         } else if (iovlen) {
406                                 start = iov;
407                                 n = xtvec.xtv_len;
408                                 do {
409                                         if (iov->iov_len > n) {
410                                                 /*
411                                                  * That'll do.
412                                                  */
413                                                 break;
414                                         }
415                                         n -= iov->iov_len;
416                                         iov++;
417                                 } while (--iovlen);
418                                 if (iov == start) {
419                                         iovec = *iov++;
420                                         iovlen--;
421                                         continue;
422                                 }
423                                 remain = xtvec.xtv_len - n;
424                                 cc =
425                                     (*f)(start, iov - start,
426                                          xtvec.xtv_off,
427                                          remain,
428                                          arg);
429                                 if (cc <= 0) {
430                                         if (acc)
431                                                 return acc;
432                                         return cc;
433                                 }
434                                                                 
435                                 tmp = cc + acc;
436                                 if (acc && tmp <= acc)
437                                         abort();                /* paranoia */
438                                 acc = tmp;
439
440                                 remain -= cc;
441                                 if (remain)
442                                         return acc;             /* short */
443                         } else
444                                 return acc;                     /* short out */
445                         xtvec.xtv_off += cc;
446                         xtvec.xtv_len -= cc;
447                 }
448         }
449         return acc;
450 }
451
452 ssize_t
453 _sysio_enumerate_iovec(const struct iovec *iov, size_t count,
454                        _SYSIO_OFF_T off,
455                        ssize_t limit,
456                        ssize_t (*f)(void *, size_t, _SYSIO_OFF_T, void *),
457                        void *arg)
458 {
459         ssize_t acc, cc;
460         size_t  n;
461         unsigned indx;
462         size_t  remain;
463
464         if (!count)
465                 return -EINVAL;
466         assert(limit >= 0);
467         acc = 0;
468         n = limit;
469         for (indx = 0; n && indx < count; indx++) {
470                 if (iov[indx].iov_len < n) {
471                         cc = (ssize_t )iov[indx].iov_len;
472                         if (cc < 0)
473                                 return -EINVAL;
474                 } else
475                         cc = (ssize_t )n;
476                 if (!cc)
477                         continue;
478                 n -= cc;
479                 cc += acc;
480                 if (acc && cc <= acc)
481                         return -EINVAL;
482                 acc = cc;
483         }
484         if (!acc)
485                 return 0;
486         acc = 0;
487         do {
488                 if (!iov->iov_len) {
489                         iov++;
490                         continue;
491                 }
492                 n =
493                     iov->iov_len < (size_t )limit
494                       ? iov->iov_len
495                       : (size_t )limit;
496                 cc = (*f)(iov->iov_base, n, off, arg);
497                 if (cc <= 0) {
498                         if (acc)
499                                 return acc;
500                         return cc;
501                 }
502                 off += cc;
503                 limit -= cc;
504                 remain = iov->iov_len - cc;
505                 cc += acc;
506                 if (acc && cc <= acc)
507                         abort();                        /* bad driver! */
508                 acc = cc;
509                 if (remain || !limit)
510                         break;                          /* short/limited read */
511                 iov++;
512         } while (--count);
513         return acc;
514 }
515
516 static ssize_t
517 _sysio_doio_helper(const struct iovec *iov, int count,
518                    _SYSIO_OFF_T off,
519                    ssize_t limit,
520                    struct doio_helper_args *args)
521 {
522
523         return _sysio_enumerate_iovec(iov, count,
524                                       off, limit,
525                                       args->f,
526                                       args->arg);
527 }
528
529 /*
530  * A meta-driver for the whole strided-io process. Appropriate when
531  * the driver can't handle anything but simple p{read,write}-like
532  * interface.
533  */
534 ssize_t
535 _sysio_doio(const struct intnl_xtvec *xtv, size_t xtvlen,
536             const struct iovec *iov, size_t iovlen,
537             ssize_t (*f)(void *, size_t, _SYSIO_OFF_T, void *),
538             void *arg)
539 {
540         struct doio_helper_args arguments;
541
542         arguments.f = f;
543         arguments.arg = arg;
544         return _sysio_enumerate_extents(xtv, xtvlen,
545                                         iov, iovlen,
546                                         (ssize_t (*)(const struct iovec *, int,
547                                                      _SYSIO_OFF_T,
548                                                      ssize_t,
549                                                      void *))_sysio_doio_helper,
550                                         &arguments);
551 }