2 * This Cplant(TM) source code is the property of Sandia National
5 * This Cplant(TM) source code is copyrighted by Sandia National
8 * The redistribution of this Cplant(TM) source code is subject to the
9 * terms of the GNU Lesser General Public License
10 * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html)
12 * Cplant(TM) Copyright 1998-2004 Sandia Corporation.
13 * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
14 * license for use of this work by or on behalf of the US Government.
15 * Export of this program may require a license from the United States
20 * This library is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU Lesser General Public
22 * License as published by the Free Software Foundation; either
23 * version 2.1 of the License, or (at your option) any later version.
25 * This library is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28 * Lesser General Public License for more details.
30 * You should have received a copy of the GNU Lesser General Public
31 * License along with this library; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
34 * Questions or comments about this library should be sent to:
37 * Sandia National Laboratories, New Mexico
39 * Albuquerque, NM 87185-1110
49 #include <sys/types.h>
50 #include <sys/queue.h>
58 #include <catamount/do_iostats.h>
63 * Asynchronous IO context support.
67 * Arguments to IO vector enumerator callback when used by _sysio_doio().
69 struct doio_helper_args {
70 ssize_t (*f)(void *, size_t, _SYSIO_OFF_T, void *); /* base func */
71 void *arg; /* caller arg */
75 * List of all outstanding (in-flight) asynch IO requests tracked
78 static LIST_HEAD( ,ioctx) aioq;
81 * Free callback entry.
83 #define cb_free(cb) free(cb)
86 * Initialization. Must be called before using any other routine in this
98 * Enter an IO context onto the async IO events queue.
101 _sysio_ioctx_enter(struct ioctx *ioctx)
104 LIST_INSERT_HEAD(&aioq, ioctx, ioctx_link);
108 * Allocate and initialize a new IO context.
111 _sysio_ioctx_new(struct inode *ino,
113 const struct iovec *iov,
115 const struct intnl_xtvec *xtv,
120 ioctx = malloc(sizeof(struct ioctx));
134 * Link request onto the outstanding requests queue.
136 _sysio_ioctx_enter(ioctx);
142 * Add an IO completion call-back to the end of the context call-back queue.
143 * These are called in iowait() as the last thing, right before the context
146 * They are called in order. Beware.
149 _sysio_ioctx_cb(struct ioctx *ioctx,
150 void (*f)(struct ioctx *, void *),
153 struct ioctx_callback *entry;
155 entry = malloc(sizeof(struct ioctx_callback));
160 entry->iocb_data = data;
162 TAILQ_INSERT_TAIL(&ioctx->ioctx_cbq, entry, iocb_next);
168 * Find an IO context given it's identifier.
170 * NB: This is dog-slow. If there are alot of these, we will need to change
171 * this implementation.
174 _sysio_ioctx_find(void *id)
178 for (ioctx = aioq.lh_first; ioctx; ioctx = ioctx->ioctx_link.le_next)
186 * Wait for asynchronous IO operation to complete, return status
187 * and dispose of the context.
190 * The context is no longer valid after return.
193 _sysio_ioctx_wait(struct ioctx *ioctx)
198 * Wait for async operation to complete.
200 while (!(ioctx->ioctx_done ||
201 (*ioctx->ioctx_ino->i_ops.inop_iodone)(ioctx)))
207 cc = ioctx->ioctx_cc;
209 cc = -ioctx->ioctx_errno;
214 _sysio_ioctx_complete(ioctx);
220 * Free callback entry.
223 _sysio_ioctx_cb_free(struct ioctx_callback *cb)
230 * Complete an asynchronous IO request.
233 _sysio_ioctx_complete(struct ioctx *ioctx)
235 struct ioctx_callback *entry;
238 /* update IO stats */
239 _SYSIO_UPDACCT(ioctx->ioctx_write, ioctx);
242 * Run the call-back queue.
244 while ((entry = ioctx->ioctx_cbq.tqh_first)) {
245 TAILQ_REMOVE(&ioctx->ioctx_cbq, entry, iocb_next);
246 (*entry->iocb_f)(ioctx, entry->iocb_data);
251 * Unlink from the file record's outstanding request queue.
253 LIST_REMOVE(ioctx, ioctx_link);
255 if (ioctx->ioctx_fast)
258 I_RELE(ioctx->ioctx_ino);
264 * General help validating strided-IO vectors.
266 * A driver may call this to make sure underflow/overflow of an off_t can't
267 * occur and overflow of a ssize_t can't occur when writing. The sum
268 * of the reconciled transfer length is returned or some appropriate
269 * error depending on underflow/overflow.
271 * The following algorithm assumes:
273 * a) sizeof(size_t) >= sizeof(ssize_t)
274 * b) 2's complement arithmetic
275 * c) The compiler won't optimize away code because it's developers
276 * believed that something with an undefined result in `C' can't happen.
279 _sysio_validx(const struct intnl_xtvec *xtv, size_t xtvlen,
280 const struct iovec *iov, size_t iovlen,
285 struct intnl_xtvec xtvec;
288 if (!(xtvlen && iovlen))
292 xtvec.xtv_len = iovec.iov_len = 0;
294 while (!xtvec.xtv_len) {
302 if (xtvec.xtv_off < 0)
308 while (!iovec.iov_len) {
322 if ((size_t )cc > xtvec.xtv_len)
326 off = xtvec.xtv_off + cc;
327 if (xtvec.xtv_off && off <= xtvec.xtv_off)
328 return off < 0 ? -EINVAL : -EOVERFLOW;
333 if (acc && (cc <= acc))
336 } while (xtvec.xtv_len && iovlen);
337 } while ((xtvlen || xtvec.xtv_len) && iovlen);
344 _sysio_enumerate_extents(const struct intnl_xtvec *xtv, size_t xtvlen,
345 const struct iovec *iov, size_t iovlen,
346 ssize_t (*f)(const struct iovec *, int,
352 ssize_t acc, tmp, cc;
354 struct intnl_xtvec xtvec;
355 const struct iovec *start;
364 * Coalesce contiguous extent vector entries.
366 off = xtvec.xtv_off = xtv->xtv_off;
367 off += xtvec.xtv_len = xtv->xtv_len;
368 while (++xtv, --xtvlen) {
369 if (off != xtv->xtv_off) {
382 xtvec.xtv_len += xtv->xtv_len;
384 while (xtvec.xtv_len) {
387 if (iovec.iov_len > xtvec.xtv_len)
388 iovec.iov_len = xtvec.xtv_len;
399 iovec.iov_base = (char *)iovec.iov_base + cc;
400 iovec.iov_len = tmp - cc;
402 if (acc && tmp <= acc)
403 abort(); /* paranoia */
409 if (iov->iov_len > n) {
423 remain = xtvec.xtv_len - n;
425 (*f)(start, iov - start,
436 if (acc && tmp <= acc)
437 abort(); /* paranoia */
442 return acc; /* short */
444 return acc; /* short out */
453 _sysio_enumerate_iovec(const struct iovec *iov, size_t count,
456 ssize_t (*f)(void *, size_t, _SYSIO_OFF_T, void *),
469 for (indx = 0; n && indx < count; indx++) {
470 if (iov[indx].iov_len < n) {
471 cc = (ssize_t )iov[indx].iov_len;
480 if (acc && cc <= acc)
493 iov->iov_len < (size_t )limit
496 cc = (*f)(iov->iov_base, n, off, arg);
504 remain = iov->iov_len - cc;
506 if (acc && cc <= acc)
507 abort(); /* bad driver! */
509 if (remain || !limit)
510 break; /* short/limited read */
517 _sysio_doio_helper(const struct iovec *iov, int count,
520 struct doio_helper_args *args)
523 return _sysio_enumerate_iovec(iov, count,
530 * A meta-driver for the whole strided-io process. Appropriate when
531 * the driver can't handle anything but simple p{read,write}-like
535 _sysio_doio(const struct intnl_xtvec *xtv, size_t xtvlen,
536 const struct iovec *iov, size_t iovlen,
537 ssize_t (*f)(void *, size_t, _SYSIO_OFF_T, void *),
540 struct doio_helper_args arguments;
544 return _sysio_enumerate_extents(xtv, xtvlen,
546 (ssize_t (*)(const struct iovec *, int,
549 void *))_sysio_doio_helper,