2 * This Cplant(TM) source code is the property of Sandia National
5 * This Cplant(TM) source code is copyrighted by Sandia National
8 * The redistribution of this Cplant(TM) source code is subject to the
9 * terms of the GNU Lesser General Public License
10 * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html)
12 * Cplant(TM) Copyright 1998-2004 Sandia Corporation.
13 * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
14 * license for use of this work by or on behalf of the US Government.
15 * Export of this program may require a license from the United States
20 * This library is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU Lesser General Public
22 * License as published by the Free Software Foundation; either
23 * version 2.1 of the License, or (at your option) any later version.
25 * This library is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28 * Lesser General Public License for more details.
30 * You should have received a copy of the GNU Lesser General Public
31 * License along with this library; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
34 * Questions or comments about this library should be sent to:
37 * Sandia National Laboratories, New Mexico
39 * Albuquerque, NM 87185-1110
48 #include <stdio.h> /* for NULL */
54 #if !(defined(REDSTORM) || defined(MAX_IOVEC))
59 #include <sys/syscall.h>
61 #include <sys/types.h>
63 #include <sys/fcntl.h>
68 #include <sys/statvfs.h>
69 #include <sys/statfs.h>
73 #include <sys/queue.h>
82 #include "fs_native.h"
88 #if defined(SYSIO_SYS_getdirentries)
90 #elif defined(SYSIO_SYS_getdents64)
92 #elif defined(SYSIO_SYS_getdents)
93 #if defined(_LARGEFILE64_SOURCE)
96 * Kernel version of directory entry.
100 unsigned long ld_off;
101 unsigned short ld_reclen;
105 #else /* !defined(_LARGEFILE64_SOURCE) */
107 #endif /* defined(_LARGEFILE64_SOURCE) */
108 #else /* catch-none */
109 #error No usable directory fill entries interface available
113 * Native file system information we keep per FS.
115 struct native_filesystem {
116 time_t nfs_atimo; /* attr timeout (sec) */
120 * Given fs, return driver private part.
123 ((struct native_filesystem *)(fs)->fs_private)
126 * Native file identifiers format.
128 struct native_inode_identifier {
129 dev_t dev; /* device number */
130 ino_t ino; /* i-number */
131 #ifdef HAVE_GENERATION
132 unsigned int gen; /* generation number */
137 * Driver-private i-node information we keep about local host file
140 struct native_inode {
142 ni_seekok : 1, /* can seek? */
143 ni_attrvalid : 1, /* cached attrs ok? */
144 ni_resetfpos : 1; /* reset fpos? */
145 struct native_inode_identifier ni_ident; /* unique identifier */
146 struct file_identifier ni_fileid; /* ditto */
147 int ni_fd; /* host fildes */
148 int ni_oflags; /* flags, from open */
149 unsigned ni_nopens; /* soft ref count */
150 _SYSIO_OFF_T ni_fpos; /* current pos */
151 time_t ni_attrtim; /* attrs expire time */
155 * Cached attributes usable?
157 #define NATIVE_ATTRS_VALID(nino, t) \
158 ((nino)->ni_attrtim && (t) < (nino)->ni_attrtim)
161 * Native IO path arguments.
164 char nio_op; /* 'r' or 'w' */
165 struct native_inode *nio_nino; /* native ino */
168 static int native_inop_lookup(struct pnode *pno,
170 struct intent *intnt,
172 static int native_inop_getattr(struct pnode *pno,
174 struct intnl_stat *stbuf);
175 static int native_inop_setattr(struct pnode *pno,
178 struct intnl_stat *stbuf);
179 static ssize_t native_filldirentries(struct inode *ino,
183 static int native_inop_mkdir(struct pnode *pno, mode_t mode);
184 static int native_inop_rmdir(struct pnode *pno);
185 static int native_inop_symlink(struct pnode *pno, const char *data);
186 static int native_inop_readlink(struct pnode *pno, char *buf, size_t bufsiz);
187 static int native_inop_open(struct pnode *pno, int flags, mode_t mode);
188 static int native_inop_close(struct inode *ino);
189 static int native_inop_link(struct pnode *old, struct pnode *new);
190 static int native_inop_unlink(struct pnode *pno);
191 static int native_inop_rename(struct pnode *old, struct pnode *new);
192 static int native_inop_read(struct inode *ino, struct ioctx *ioctx);
193 static int native_inop_write(struct inode *ino, struct ioctx *ioctx);
194 static _SYSIO_OFF_T native_inop_pos(struct inode *ino, _SYSIO_OFF_T off);
195 static int native_inop_iodone(struct ioctx *ioctx);
196 static int native_inop_fcntl(struct inode *ino, int cmd, va_list ap, int *rtn);
197 static int native_inop_sync(struct inode *ino);
198 static int native_inop_datasync(struct inode *ino);
199 static int native_inop_ioctl(struct inode *ino,
200 unsigned long int request,
202 static int native_inop_mknod(struct pnode *pno, mode_t mode, dev_t dev);
204 static int native_inop_statvfs(struct pnode *pno,
206 struct intnl_statvfs *buf);
208 static void native_inop_gone(struct inode *ino);
210 static struct inode_ops native_i_ops = {
214 native_filldirentries,
218 native_inop_readlink,
230 native_inop_datasync,
239 static int native_fsswop_mount(const char *source,
242 struct pnode *tocover,
243 struct mount **mntp);
245 static struct fssw_ops native_fssw_ops = {
249 static void native_fsop_gone(struct filesys *fs);
251 static struct filesys_ops native_inodesys_ops = {
256 * This example driver plays a strange game. It maintains a private,
257 * internal mount -- It's own separate, rooted, name space. The local
258 * file system's entire name space is available via this tree.
260 * This simplifies the implementation. At mount time, we need to generate
261 * a path-node to be used as a root. This allows us to look up the needed
262 * node in the host name space and leverage a whole lot of support from
265 static struct mount *native_internal_mount = NULL;
268 * Given i-node, return driver private part.
270 #define I2NI(ino) ((struct native_inode *)((ino)->i_private))
276 native_stat(const char *path,
279 struct intnl_stat *buf)
281 struct native_inode *nino;
283 struct _sysio_native_stat stbuf;
285 nino = ino ? I2NI(ino) : NULL;
288 err = syscall(SYSIO_SYS_stat, path, &stbuf);
289 else if (nino && nino->ni_fd >= 0)
290 err = syscall(SYSIO_SYS_fstat, nino->ni_fd, &stbuf);
295 nino->ni_attrtim = 0;
299 nino->ni_attrtim = t;
300 SYSIO_COPY_STAT(&stbuf, &ino->i_stbuf);
307 SYSIO_COPY_STAT(&stbuf, buf);
312 * Introduce an i-node to the system.
314 static struct inode *
315 native_i_new(struct filesys *fs, time_t expiration, struct intnl_stat *buf)
317 struct native_inode *nino;
320 nino = malloc(sizeof(struct native_inode));
323 bzero(&nino->ni_ident, sizeof(nino->ni_ident));
325 nino->ni_attrvalid = 0;
326 nino->ni_resetfpos = 0;
327 nino->ni_ident.dev = buf->st_dev;
328 nino->ni_ident.ino = buf->st_ino;
329 #ifdef HAVE_GENERATION
330 nino->ni_ident.gen = buf->st_gen;
332 nino->ni_fileid.fid_data = &nino->ni_ident;
333 nino->ni_fileid.fid_len = sizeof(nino->ni_ident);
338 nino->ni_attrtim = expiration;
352 * Initialize this driver.
359 * Capture current process umask and reset our process umask to
360 * zero. All permission bits to open/creat/setattr are absolute --
361 * They've already had a umask applied, when appropriate.
364 _sysio_umask = syscall(SYSIO_SYS_umask, 0);
366 * For Red Storm, this functionality is handled in cstart.
367 * The mask to be "captured" has been sent already.
368 * This eliminates a system call from every node!
370 #endif /* REDSTORM */
372 return _sysio_fssw_register("native", &native_fssw_ops);
376 * Create private, internal, view of the hosts name space.
379 create_internal_namespace(const void *data)
384 struct native_filesystem *nfs;
387 struct inode *rootino;
388 struct pnode_base *rootpb;
389 static struct qstr noname = { NULL, 0, 0 };
392 struct intnl_stat stbuf;
394 static struct option_value_info v[] = {
399 if (native_internal_mount) {
410 if (data && (len = strlen((char *)data))) {
411 opts = malloc(len + 1);
414 (void )strcpy(opts, data);
415 if (_sysio_get_args(opts, v) - opts != (ssize_t )len)
418 ul = strtoul(v[0].ovi_value, &cp, 0);
419 if (*cp != '\0' || ul >= UINT_MAX)
427 * We maintain an artificial, internal, name space in order to
428 * have access to fully qualified path names in the various routines.
429 * Initialize that name space now.
436 * This really should be per-mount. Hmm, but that's best done
437 * as proper sub-mounts in the core and not this driver. We reconcile
438 * now, here, by putting the mount options on the file system. That
439 * means they are global and only can be passed at the initial mount.
441 * Maybe do it right some day?
443 nfs = malloc(sizeof(struct native_filesystem));
449 if ((unsigned long)nfs->nfs_atimo != ul) {
453 fs = _sysio_fs_new(&native_inodesys_ops, 0, nfs);
462 t = _SYSIO_LOCAL_TIME();
463 err = native_stat("/", NULL, 0, &stbuf);
466 rootino = native_i_new(fs, t + FS2NFS(fs)->nfs_atimo, &stbuf);
473 * Generate base path-node for root.
475 rootpb = _sysio_pb_new(&noname, NULL, rootino);
482 * Mount it. This name space is disconnected from the
483 * rest of the system -- Only available within this driver.
485 err = _sysio_do_mount(fs, rootpb, 0, NULL, &mnt);
489 native_internal_mount = mnt;
493 if (_sysio_do_unmount(mnt) != 0)
501 _sysio_pb_gone(rootpb);
515 native_fsswop_mount(const char *source,
518 struct pnode *tocover,
522 struct nameidata nameidata;
526 * Caller must use fully qualified path names when specifying
532 if (!native_internal_mount) {
533 err = create_internal_namespace(data);
536 } else if (data && *(char *)data)
540 * Lookup the source in the internally maintained name space.
542 ND_INIT(&nameidata, 0, source, native_internal_mount->mnt_root, NULL);
543 err = _sysio_path_walk(native_internal_mount->mnt_root, &nameidata);
548 * Have path-node specified by the given source argument. Let the
549 * system finish the job, now.
552 _sysio_do_mount(native_internal_mount->mnt_fs,
553 nameidata.nd_pno->p_base,
558 * Release the internal name space pnode and clean up any
559 * aliases we might have generated. We really don't need to cache them
560 * as they are only used at mount time..
562 P_RELE(nameidata.nd_pno);
563 (void )_sysio_p_prune(native_internal_mount->mnt_root);
566 FS_REF(native_internal_mount->mnt_fs);
573 native_i_invalid(struct inode *inop, struct intnl_stat *stat)
575 struct native_inode *nino;
578 * Validate passed in inode against stat struct info
582 if (!nino->ni_attrtim ||
583 (nino->ni_ident.dev != stat->st_dev ||
584 nino->ni_ident.ino != stat->st_ino ||
585 #ifdef HAVE_GENERATION
586 nino->ni_ident.gen != stat->st_gen ||
588 ((inop)->i_stbuf.st_mode & S_IFMT) != (stat->st_mode & S_IFMT)) ||
589 (((inop)->i_stbuf.st_rdev != stat->st_rdev) &&
590 (S_ISCHR((inop)->i_stbuf.st_mode) ||
591 S_ISBLK((inop)->i_stbuf.st_mode)))) {
592 nino->ni_attrtim = 0; /* invalidate attrs */
598 static struct inode *
599 native_iget(struct filesys *fs, time_t expire, struct intnl_stat *stbp)
602 struct native_inode_identifier ident;
603 struct file_identifier fileid;
605 bzero(&ident, sizeof(ident));
606 ident.dev = stbp->st_dev;
607 ident.ino = stbp->st_ino;
608 #ifdef HAVE_GENERATION
609 ident.gen = stbp->st_gen;
611 fileid.fid_data = &ident;
612 fileid.fid_len = sizeof(ident);
613 ino = _sysio_i_find(fs, &fileid);
615 ino->i_stbuf = *stbp;
616 I2NI(ino)->ni_attrtim = expire;
619 return native_i_new(fs, expire, stbp);
623 * Find, and validate, or create i-node by host-relative path. Returned i-node
627 native_ibind(struct filesys *fs,
632 struct intnl_stat ostbuf, stbuf;
637 ostbuf = (*inop)->i_stbuf;
639 err = native_stat(path, *inop, t, &stbuf);
647 if (!native_i_invalid(*inop, &ostbuf))
652 _sysio_i_undead(*inop);
656 if (!(ino = native_iget(fs, t + FS2NFS(fs)->nfs_atimo, &stbuf)))
664 native_inop_lookup(struct pnode *pno,
666 struct intent *intnt __IS_UNUSED,
667 const char *path __IS_UNUSED)
674 *inop = pno->p_base->pb_ino;
677 * Try to use the cached attributes unless the intent
678 * indicates we are looking up the last component and
679 * caller wants attributes. In that case, force a refresh.
681 t = _SYSIO_LOCAL_TIME();
683 (path || !intnt || (intnt->int_opmask & INT_GETATTR) == 0) &&
684 NATIVE_ATTRS_VALID(I2NI(*inop), t))
688 * Don't have an inode yet. Because we translate everything back to
689 * a single name space for the host, we will assume the object the
690 * caller is looking for has no existing alias in our internal
691 * name space. We don't see the same file on different mounts in the
692 * underlying host FS as the same file.
694 * The file identifier *will* be unique. It's got to have a different
697 fqpath = _sysio_pb_path(pno->p_base, '/');
700 fs = pno->p_mount->mnt_fs;
701 err = native_ibind(fs, fqpath, t + FS2NFS(fs)->nfs_atimo, inop);
709 native_inop_getattr(struct pnode *pno,
711 struct intnl_stat *stat)
713 struct native_inode *nino;
717 * We just cannot use the cached attributes when getattr is
718 * called. Had the caller felt those were sufficient then
719 * they could have (would have?) simply used what was cached
720 * after revalidating. In this case, there's a good chance the
721 * caller is looking for the current time stamps and/or size. Something
722 * pretty volatile anyway.
724 err = 0; /* compiler cookie */
730 path = _sysio_pb_path(pno->p_base, '/');
733 fs = pno->p_mount->mnt_fs;
734 t = _SYSIO_LOCAL_TIME();
735 err = native_stat(path, ino, t + FS2NFS(fs)->nfs_atimo, stat);
737 } else if ((nino = I2NI(ino))->ni_fd >= 0)
739 * Don't have access to the fs record anymore. Just
740 * refresh but keep the current timeout.
742 err = native_stat(NULL, ino, nino->ni_attrtim, stat);
745 * Dev inodes don't open in this driver. We won't have
746 * a file descriptor with which to do the deed then. Satisfy
747 * the request from the cached copy of the attributes.
751 sizeof(struct intnl_stat));
758 #ifdef SYSIO_SYS_utime
760 _ut(const char *path, time_t actime, time_t modtime)
765 ut.modtime = modtime;
766 return syscall(SYSIO_SYS_utime, path, &ut);
770 _ut(const char *path, time_t actime, time_t modtime)
772 struct timeval tv[2];
774 tv[0].tv_sec = actime;
776 tv[1].tv_sec = modtime;
778 return syscall(SYSIO_SYS_utimes, path, &tv);
783 native_inop_setattr(struct pnode *pno,
786 struct intnl_stat *stat)
789 struct native_inode *nino;
794 nino = ino ? I2NI(ino) : NULL;
798 if (fd < 0 || mask & (SETATTR_MTIME|SETATTR_ATIME)) {
801 path = _sysio_pb_path(pno->p_base, '/');
807 * Get current status for undo.
809 err = native_stat(path, ino, 0, NULL);
813 if (mask & SETATTR_MODE) {
817 * Alter permissions attribute.
819 mode = stat->st_mode & 07777;
822 ? syscall(SYSIO_SYS_chmod, path, mode)
823 : syscall(SYSIO_SYS_fchmod, fd, mode);
828 mask &= ~SETATTR_MODE;
829 else if (mask & (SETATTR_MTIME|SETATTR_ATIME)) {
830 time_t actime, modtime;
833 * Alter access and/or modify time attributes.
835 actime = ino->i_stbuf.st_atime;
836 modtime = ino->i_stbuf.st_mtime;
837 if (mask & SETATTR_ATIME)
838 actime = stat->st_atime;
839 if (mask & SETATTR_MTIME)
840 modtime = stat->st_mtime;
841 if (_ut(path, actime, modtime) != 0)
845 mask &= ~(SETATTR_MTIME|SETATTR_ATIME);
846 else if (mask & (SETATTR_UID|SETATTR_GID)) {
849 * Alter owner and/or group identifiers.
853 ? syscall(SYSIO_SYS_chown,
861 : syscall(SYSIO_SYS_fchown,
873 mask &= ~(SETATTR_UID|SETATTR_GID);
874 else if (mask & SETATTR_LEN) {
876 * Do the truncate last. It can't be undone.
879 ? syscall(SYSIO_SYS_truncate, path, stat->st_size)
880 : syscall(SYSIO_SYS_ftruncate, fd, stat->st_size);
887 * Undo after error. Some or all of this might not work... We
890 if (mask & (SETATTR_UID|SETATTR_GID)) {
892 ? syscall(SYSIO_SYS_chown,
895 ? ino->i_stbuf.st_uid
898 ? ino->i_stbuf.st_gid
900 : syscall(SYSIO_SYS_fchown,
903 ? ino->i_stbuf.st_uid
906 ? ino->i_stbuf.st_gid
909 if (mask & (SETATTR_MTIME|SETATTR_ATIME))
910 (void )_ut(path, ino->i_stbuf.st_atime, ino->i_stbuf.st_mtime);
911 if (mask & SETATTR_MODE) {
913 ? syscall(SYSIO_SYS_chmod, path, ino->i_stbuf.st_mode & 07777)
914 : syscall(SYSIO_SYS_fchmod, ino->i_stbuf.st_mode & 07777);
918 * We must refresh the cached attributes.
920 if (!err && native_stat(path, ino, _SYSIO_LOCAL_TIME(), NULL) != 0)
928 native_pos(int fd, _SYSIO_OFF_T *offset, int whence)
933 assert(*offset >= 0);
936 #if defined(_LARGEFILE64_SOURCE) && defined(SYSIO_SYS__llseek)
940 syscall(SYSIO_SYS__llseek,
942 (unsigned int)(off >> 32),
951 syscall(SYSIO_SYS_lseek,
964 native_ifilldirentries(struct native_inode *nino,
971 #if defined(SYSIO_SYS_getdirentries)
972 _SYSIO_OFF_T waste=*posp;
979 * Stream-oriented access requires that we reposition prior to the
982 assert(nino->ni_seekok);
983 if (*posp != nino->ni_fpos || nino->ni_resetfpos) {
984 nino->ni_fpos = *posp;
985 err = native_pos(nino->ni_fd, &nino->ni_fpos, SEEK_SET);
987 nino->ni_resetfpos = 1;
990 nino->ni_resetfpos = 0;
994 #if defined(SYSIO_SYS_getdirentries)
995 syscall(SYSIO_SYS_getdirentries,
1000 #elif defined(SYSIO_SYS_getdents64)
1001 syscall(SYSIO_SYS_getdents64, nino->ni_fd, buf, nbytes);
1002 #elif defined(SYSIO_SYS_getdents)
1003 syscall(SYSIO_SYS_getdents, nino->ni_fd, buf, nbytes);
1009 * Stream-oriented access requires that we discover where we are
1012 if ((err = native_pos(nino->ni_fd, &nino->ni_fpos, SEEK_CUR)) != 0) {
1014 * Leave the position at the old I suppose.
1016 nino->ni_resetfpos = 1;
1019 *posp = nino->ni_fpos;
1024 native_filldirentries(struct inode *ino,
1029 struct native_inode *nino = I2NI(ino);
1033 struct linux_dirent *ldp;
1034 struct dirent64 *d64p;
1039 #define count nbytes
1043 assert(nino->ni_fd >= 0);
1047 while (!(bp = malloc(count))) {
1049 if (count < sizeof(struct dirent))
1053 cc = native_ifilldirentries(nino, posp, bp, count);
1061 ldp = (struct linux_dirent *)bp;
1062 d64p = (struct dirent64 *)buf;
1064 namlen = strlen(ldp->ld_name);
1065 reclen = sizeof(*d64p) - sizeof(d64p->d_name) + namlen;
1066 if (nbytes <= reclen)
1068 d64p->d_ino = ldp->ld_ino;
1069 d64p->d_off = nino->ni_fpos = ldp->ld_off;
1071 (((reclen + sizeof(long))) / sizeof(long)) * sizeof(long);
1072 if (nbytes < d64p->d_reclen)
1073 d64p->d_reclen = reclen + 1;
1074 d64p->d_type = DT_UNKNOWN; /* you lose -- sorry. */
1075 (void )memcpy(d64p->d_name, ldp->ld_name, namlen);
1077 * Zero pad the rest.
1079 for (cp = d64p->d_name + namlen, n = d64p->d_reclen - reclen;
1083 cc -= ldp->ld_reclen;
1084 ldp = (struct linux_dirent *)((char *)ldp + ldp->ld_reclen);
1085 nbytes -= d64p->d_reclen;
1086 d64p = (struct dirent64 *)((char *)d64p + d64p->d_reclen);
1090 (d64p == (struct dirent64 *)buf && cc)
1092 : (char *)d64p - buf;
1101 native_inop_mkdir(struct pnode *pno, mode_t mode)
1106 path = _sysio_pb_path(pno->p_base, '/');
1110 err = syscall(SYSIO_SYS_mkdir, path, mode);
1118 native_inop_rmdir(struct pnode *pno)
1123 path = _sysio_pb_path(pno->p_base, '/');
1127 err = syscall(SYSIO_SYS_rmdir, path);
1135 native_inop_symlink(struct pnode *pno, const char *data)
1140 path = _sysio_pb_path(pno->p_base, '/');
1144 err = syscall(SYSIO_SYS_symlink, data, path);
1152 native_inop_readlink(struct pnode *pno, char *buf, size_t bufsiz)
1157 path = _sysio_pb_path(pno->p_base, '/');
1160 i = syscall(SYSIO_SYS_readlink, path, buf, bufsiz);
1168 native_inop_open(struct pnode *pno, int flags, mode_t mode)
1170 struct native_inode *nino;
1174 path = _sysio_pb_path(pno->p_base, '/');
1179 * Whether the file is already open, or not, makes no difference.
1180 * Want to always give the host OS a chance to authorize in case
1181 * something has changed underneath us.
1183 if (flags & O_WRONLY) {
1185 * Promote write-only attempt to RW.
1191 flags |= O_LARGEFILE;
1193 fd = syscall(SYSIO_SYS_open, path, flags, mode);
1194 if (!pno->p_base->pb_ino && fd >= 0) {
1199 * Success but we need to return an i-node.
1201 fs = pno->p_mount->mnt_fs;
1205 _SYSIO_LOCAL_TIME() + FS2NFS(fs)->nfs_atimo,
1206 &pno->p_base->pb_ino);
1208 (void )syscall(SYSIO_SYS_close, fd);
1219 * Remember this new open.
1221 nino = I2NI(pno->p_base->pb_ino);
1223 assert(nino->ni_nopens);
1225 if (nino->ni_fd >= 0) {
1226 if ((nino->ni_oflags & O_RDWR) ||
1227 (flags & (O_RDONLY|O_WRONLY|O_RDWR)) == O_RDONLY) {
1231 (void )syscall(SYSIO_SYS_close, fd);
1234 (void )syscall(SYSIO_SYS_close, nino->ni_fd);
1237 * Invariant; First open. Must init.
1239 nino->ni_resetfpos = 0;
1243 * Need to know whether we can seek on this
1247 native_pos(nino->ni_fd, &nino->ni_fpos, SEEK_CUR) != 0 ? 0 : 1;
1253 native_inop_close(struct inode *ino)
1255 struct native_inode *nino = I2NI(ino);
1258 if (nino->ni_fd < 0)
1261 assert(nino->ni_nopens);
1262 if (--nino->ni_nopens) {
1264 * Hmmm. We really don't need anything else. However, some
1265 * filesystems try to implement a sync-on-close semantic.
1266 * As this appears now, that is lost. Might want to change
1267 * it somehow in the future?
1272 err = syscall(SYSIO_SYS_close, nino->ni_fd);
1277 nino->ni_resetfpos = 0;
1283 native_inop_link(struct pnode *old, struct pnode *new)
1286 char *opath, *npath;
1290 opath = _sysio_pb_path(old->p_base, '/');
1291 npath = _sysio_pb_path(new->p_base, '/');
1292 if (!(opath && npath)) {
1297 err = syscall(SYSIO_SYS_link, opath, npath);
1310 native_inop_unlink(struct pnode *pno)
1315 path = _sysio_pb_path(pno->p_base, '/');
1320 * For this driver, unlink is easy with open files. Since the
1321 * file remains open to the system, too, the descriptors are still
1324 * Other drivers will have some difficulty here as the entry in the
1325 * file system name space must be removed without sacrificing access
1326 * to the file itself. In NFS this is done with a mechanism referred
1327 * to as a `silly delete'. The file is moved to a temporary name
1328 * (usually .NFSXXXXXX, where the X's are replaced by the PID and some
1329 * unique characters) in order to simulate the proper semantic.
1331 if (syscall(SYSIO_SYS_unlink, path) != 0)
1338 native_inop_rename(struct pnode *old, struct pnode *new)
1341 char *opath, *npath;
1343 opath = _sysio_pb_path(old->p_base, '/');
1344 npath = _sysio_pb_path(new->p_base, '/');
1345 if (!(opath && npath)) {
1350 err = syscall(SYSIO_SYS_rename, opath, npath);
1363 dopio(void *buf, size_t count, _SYSIO_OFF_T off, struct native_io *nio)
1367 if (!nio->nio_nino->ni_seekok) {
1368 if (off != nio->nio_nino->ni_fpos) {
1370 * They're trying to reposition. Can't
1371 * seek on this descriptor so we err out now.
1377 syscall(nio->nio_op == 'r'
1380 nio->nio_nino->ni_fd,
1384 nio->nio_nino->ni_fpos += cc;
1387 syscall((nio->nio_op == 'r'
1389 : SYSIO_SYS_pwrite),
1390 nio->nio_nino->ni_fd,
1399 doiov(const struct iovec *iov,
1403 struct native_io *nio)
1407 #if !(defined(REDSTORM) || defined(MAX_IOVEC))
1408 #define MAX_IOVEC INT_MAX
1416 * Avoid the reposition call if we're already at the right place.
1417 * Allows us to access pipes and fifos.
1419 if (off != nio->nio_nino->ni_fpos) {
1422 err = native_pos(nio->nio_nino->ni_fd, &off, SEEK_SET);
1424 nio->nio_nino->ni_resetfpos = 1;
1427 nio->nio_nino->ni_resetfpos = 0;
1428 nio->nio_nino->ni_fpos = off;
1432 * The {read,write}v is safe as this routine is only ever called
1433 * by _sysio_enumerate_extents() and that routine is exact. It never
1434 * passes iovectors including tails.
1439 ? syscall(nio->nio_op == 'r' ? SYSIO_SYS_readv : SYSIO_SYS_writev,
1440 nio->nio_nino->ni_fd,
1445 _sysio_enumerate_iovec(iov,
1449 (ssize_t (*)(void *,
1457 nio->nio_nino->ni_fpos += cc;
1460 #if !(defined(REDSTORM) || defined(MAX_IOVEC))
1467 lockop_all(struct native_inode *nino,
1468 struct intnl_xtvec *xtv,
1478 flock.l_whence = SEEK_SET;
1480 flock.l_start = xtv->xtv_off;
1481 flock.l_len = xtv->xtv_len;
1484 syscall(SYSIO_SYS_fcntl,
1495 order_xtv(const struct intnl_xtvec *xtv1, const struct intnl_xtvec *xtv2)
1498 if (xtv1->xtv_off < xtv2->xtv_off)
1500 if (xtv1->xtv_off > xtv2->xtv_off)
1507 doio(char op, struct ioctx *ioctx)
1509 struct native_inode *nino;
1512 struct intnl_xtvec *oxtv;
1515 struct native_io arguments;
1518 struct intnl_xtvec *front, *rear, tmp;
1521 nino = I2NI(ioctx->ioctx_ino);
1523 dolocks = ioctx->ioctx_xtvlen > 1 && nino->ni_seekok;
1526 * Must lock the regions (in order!) since we can't do
1527 * strided-IO as a single atomic operation.
1529 oxtv = malloc(ioctx->ioctx_xtvlen * sizeof(struct intnl_xtvec));
1534 ioctx->ioctx_xtvlen * sizeof(struct intnl_xtvec));
1536 ioctx->ioctx_xtvlen,
1537 sizeof(struct intnl_xtvec),
1538 (int (*)(const void *, const void *))order_xtv);
1541 oxtv, ioctx->ioctx_xtvlen,
1542 op == 'r' ? F_RDLCK : F_WRLCK);
1549 arguments.nio_op = op;
1550 arguments.nio_nino = nino;
1552 _sysio_enumerate_extents(ioctx->ioctx_xtv, ioctx->ioctx_xtvlen,
1553 ioctx->ioctx_iov, ioctx->ioctx_iovlen,
1554 (ssize_t (*)(const struct iovec *,
1563 * Must unlock in reverse order.
1566 rear = front + ioctx->ioctx_xtvlen - 1;
1567 while (front < rear) {
1572 if (lockop_all(nino, oxtv, ioctx->ioctx_xtvlen, F_UNLCK) != 0)
1577 if ((ioctx->ioctx_cc = cc) < 0) {
1578 ioctx->ioctx_errno = -ioctx->ioctx_cc;
1579 ioctx->ioctx_cc = -1;
1585 native_inop_read(struct inode *ino __IS_UNUSED, struct ioctx *ioctx)
1588 return doio('r', ioctx);
1592 native_inop_write(struct inode *ino __IS_UNUSED, struct ioctx *ioctx)
1595 return doio('w', ioctx);
1599 native_inop_pos(struct inode *ino, _SYSIO_OFF_T off)
1601 struct native_inode *nino = I2NI(ino);
1604 err = native_pos(nino->ni_fd, &off, SEEK_SET);
1605 return err < 0 ? err : off;
1609 native_inop_iodone(struct ioctx *ioctxp __IS_UNUSED)
1613 * It's always done in this driver. It completed when posted.
1619 native_inop_fcntl(struct inode *ino,
1624 struct native_inode *nino = I2NI(ino);
1628 if (nino->ni_fd < 0)
1638 *rtn = syscall(SYSIO_SYS_fcntl, nino->ni_fd, cmd);
1651 arg = va_arg(ap, long);
1652 *rtn = syscall(SYSIO_SYS_fcntl, nino->ni_fd, cmd, arg);
1664 native_inop_mknod(struct pnode *pno __IS_UNUSED,
1665 mode_t mode __IS_UNUSED,
1666 dev_t dev __IS_UNUSED)
1672 #ifdef _HAVE_STATVFS
1674 native_inop_statvfs(struct pnode *pno,
1676 struct intnl_statvfs *buf)
1683 if (!ino || I2NI(ino)->ni_fd < 0) {
1684 path = _sysio_pb_path(pno->p_base, '/');
1690 * The syscall interface does not support SYSIO_SYS_fstatvfs.
1691 * Should possibly return ENOSYS, but thought it
1692 * better to use SYSIO_SYS_fstatfs and fill in as much of
1693 * the statvfs structure as possible. This allows
1694 * for more of a test of the sysio user interface.
1698 ? syscall(SYSIO_SYS_statfs, path, &fs)
1699 : syscall(SYSIO_SYS_fstatfs, I2NI(ino)->ni_fd, &fs);
1705 buf->f_bsize = fs.f_bsize; /* file system block size */
1706 buf->f_frsize = fs.f_bsize; /* file system fundamental block size */
1707 buf->f_blocks = fs.f_blocks;
1708 buf->f_bfree = fs.f_bfree;
1709 buf->f_bavail = fs.f_bavail;
1710 buf->f_files = fs.f_files; /* Total number serial numbers */
1711 buf->f_ffree = fs.f_ffree; /* Number free serial numbers */
1712 buf->f_favail = fs.f_ffree; /* Number free ser num for non-privileged*/
1713 buf->f_fsid = fs.f_fsid.__val[1];
1714 buf->f_flag = 0; /* No equiv in statfs; maybe use type? */
1715 buf->f_namemax = fs.f_namelen;
1721 native_inop_sync(struct inode *ino)
1725 assert(I2NI(ino)->ni_fd >= 0);
1727 err = syscall(SYSIO_SYS_fsync, I2NI(ino)->ni_fd);
1734 native_inop_datasync(struct inode *ino)
1736 struct native_inode *nino;
1740 assert(nino->ni_fd >= 0);
1742 #ifdef SYSIO_SYS_fdatasync
1743 err = syscall(SYSIO_SYS_fdatasync, I2NI(ino)->ni_fd);
1746 #warning No fdatasync system call -- Using fsync instead!
1748 err = syscall(SYSIO_SYS_fsync, I2NI(ino)->ni_fd);
1755 #ifdef HAVE_LUSTRE_HACK
1757 native_inop_ioctl(struct inode *ino,
1758 unsigned long int request,
1761 struct native_inode *nino;
1762 long arg1, arg2, arg3, arg4;
1766 assert(nino->ni_fd >= 0);
1767 arg1 = va_arg(ap, long);
1768 arg2 = va_arg(ap, long);
1769 arg3 = va_arg(ap, long);
1770 arg4 = va_arg(ap, long);
1773 syscall(SYSIO_SYS_ioctl, I2NI(ino)->ni_fd, request,
1774 arg1, arg2, arg3, arg4);
1781 native_inop_ioctl(struct inode *ino __IS_UNUSED,
1782 unsigned long int request __IS_UNUSED,
1783 va_list ap __IS_UNUSED)
1787 * I'm lazy. Maybe implemented later.
1794 native_inop_gone(struct inode *ino)
1796 struct native_inode *nino = I2NI(ino);
1798 if (nino->ni_fd >= 0)
1799 (void )syscall(SYSIO_SYS_close, nino->ni_fd);
1801 free(ino->i_private);
1805 native_fsop_gone(struct filesys *fs __IS_UNUSED)
1808 free(fs->fs_private);
1810 * Do nothing. There is no private part maintained for the
1811 * native file interface.