2 * This Cplant(TM) source code is the property of Sandia National
5 * This Cplant(TM) source code is copyrighted by Sandia National
8 * The redistribution of this Cplant(TM) source code is subject to the
9 * terms of the GNU Lesser General Public License
10 * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html)
12 * Cplant(TM) Copyright 1998-2003 Sandia Corporation.
13 * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
14 * license for use of this work by or on behalf of the US Government.
15 * Export of this program may require a license from the United States
20 * This library is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU Lesser General Public
22 * License as published by the Free Software Foundation; either
23 * version 2.1 of the License, or (at your option) any later version.
25 * This library is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28 * Lesser General Public License for more details.
30 * You should have received a copy of the GNU Lesser General Public
31 * License along with this library; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
34 * Questions or comments about this library should be sent to:
37 * Sandia National Laboratories, New Mexico
39 * Albuquerque, NM 87185-1110
48 #include <sys/queue.h>
49 #include <sys/types.h>
59 * Support for path and index nodes.
63 * Size of all names bucket-hash table.
65 #ifndef NAMES_TABLE_LEN
66 #define NAMES_TABLE_LEN 251
70 * Desired i-nodes cache size is MAX_INODES_MULTIPLIER times the number
71 * of slots in the names hash table.
73 #define MAX_INODES_MULTIPLIER 3
76 * Active i-nodes in the system and the number of same.
78 struct inodes_head _sysio_inodes;
79 static size_t n_inodes = 0;
81 * Desired number of active i-nodes.
83 static size_t max_inodes = (MAX_INODES_MULTIPLIER * NAMES_TABLE_LEN);
86 * System table for rapid access to component names.
88 static LIST_HEAD(, pnode_base) names[NAMES_TABLE_LEN];
90 * Number of names tracked by the system.
92 static size_t n_names = 0;
94 * Desired number of base path nodes to maintain.
96 static size_t max_names = (2 * NAMES_TABLE_LEN);
99 * Number of pnodes to grab per memory allocation when filling the
102 #define PNODES_PER_CHUNK ((8 * 1024) / sizeof(struct pnode) - 2)
106 * Allocation information for pnodes bulk allocation.
108 struct pnodes_block {
109 LIST_ENTRY(pnodes_block) pnblk_links;
110 struct pnode pnblk_nodes[PNODES_PER_CHUNK];
113 static LIST_HEAD( ,pnodes_block) pnblocks;
117 * List of all path-nodes (aliases) referenced by any tree.
119 struct pnodes_head _sysio_pnodes;
122 * Free path-nodes -- Not referenced by any tree for fas reuse.
124 static LIST_HEAD( ,pnode) free_pnodes;
127 * The system root -- Aka `/'.
129 struct pnode *_sysio_root = NULL;
132 * Initialize path and i-node support. Must be called before any other
133 * routine in this module.
140 TAILQ_INIT(&_sysio_inodes);
142 for (i = 0; i < NAMES_TABLE_LEN; i++)
143 LIST_INIT(&names[i]);
146 LIST_INIT(&pnblocks);
148 TAILQ_INIT(&_sysio_pnodes);
149 LIST_INIT(&free_pnodes);
155 * Garbage-collect idle i-nodes. We try to keep resource use limited to
156 * MAX_INODES_MULTIPLIER * max_names.
161 struct inode *next, *ino;
165 * I just can't figure out a good way to reclaim these well without
166 * getting really fancy and using complex algorithms. The
167 * base nodes hold references on them for a long time and then
168 * release them. Those will age to the front of the queue and
169 * we have to skip over them. Oh well...
171 t = MAX_INODES_MULTIPLIER * max_names;
172 if (max_inodes < t) {
174 * Oops. Nope. We want more inodes than names entries.
179 next = _sysio_inodes.tqh_first;
185 next = ino->i_nodes.tqe_next;
186 if (ino->i_ref || ino->i_immune)
189 } while (next && n_inodes > t);
196 hash(struct file_identifier *fid)
213 * Allocate and initialize a new i-node. Returned i-node is referenced.
215 * NB: The passed file identifier is not copied. It is, therefor, up to the
216 * caller to assure that the value is static until the inode is destroyed.
219 _sysio_i_new(struct filesys *fs,
220 struct file_identifier *fid,
224 struct inode_ops *ops,
228 struct itable_entry *head;
229 struct inode_ops operations;
231 if (n_inodes > max_inodes) {
233 * Try to limit growth.
238 ino = malloc(sizeof(struct inode));
243 if (S_ISBLK(type) || S_ISCHR(type) || S_ISFIFO(type)) {
247 * Replace some operations sent with
248 * those from the device table.
250 o = _sysio_dev_lookup(type, rdev);
251 operations.inop_open = o->inop_open;
252 operations.inop_close = o->inop_close;
253 operations.inop_read = o->inop_read;
254 operations.inop_write = o->inop_write;
255 operations.inop_pos = o->inop_pos;
256 operations.inop_iodone = o->inop_iodone;
257 operations.inop_datasync = o->inop_datasync;
258 operations.inop_ioctl = o->inop_ioctl;
260 I_INIT(ino, fs, type, rdev, &operations, fid, immunity, private);
262 TAILQ_INSERT_TAIL(&_sysio_inodes, ino, i_nodes);
263 head = &fs->fs_itbl[hash(fid) % FS_ITBLSIZ];
264 LIST_INSERT_HEAD(head, ino, i_link);
273 * Find existing i-node given i-number and pointers to FS record
277 _sysio_i_find(struct filesys *fs, struct file_identifier *fid)
280 struct itable_entry *head;
282 head = &fs->fs_itbl[hash(fid) % FS_ITBLSIZ];
286 for (ino = head->lh_first; ino; ino = ino->i_link.le_next)
287 if (ino->i_fid->fid_len == fid->fid_len &&
288 memcmp(ino->i_fid->fid_data,
290 fid->fid_len) == 0) {
299 * Force reclaim of idle i-node.
302 _sysio_i_gone(struct inode *ino)
308 LIST_REMOVE(ino, i_link);
309 TAILQ_REMOVE(&_sysio_inodes, ino, i_nodes);
310 (*ino->i_ops.inop_gone)(ino);
318 * Stale inode, zombie it and move it out of the way
321 _sysio_i_undead(struct inode *ino)
324 LIST_REMOVE(ino, i_link);
329 * Garbage collect idle path (and base path) nodes tracked by the system.
334 struct pnode *next, *pno;
337 next = _sysio_pnodes.tqh_first;
344 next = pno->p_nodes.tqe_next;
349 (void )_sysio_p_prune(pno);
350 next = pno->p_nodes.tqe_next;
355 (void )_sysio_p_prune(pno);
356 } while (n_names > t && next);
363 * Allocate and initialize a new base path node.
366 _sysio_pb_new(struct qstr *name, struct pnode_base *parent, struct inode *ino)
368 struct pnode_base *pb;
370 if (n_names > max_names) {
372 * Try to limit growth.
377 pb = malloc(sizeof(struct pnode_base) + name->len);
381 pb->pb_name.name = NULL;
382 pb->pb_name.len = name->len;
383 if (pb->pb_name.len) {
387 * Copy the passed name.
389 * We have put the space for the name immediately behind
390 * the record in order to maximize spatial locality.
392 cp = (char *)pb + sizeof(struct pnode_base);
393 (void )strncpy(cp, name->name, name->len);
394 pb->pb_name.name = cp;
395 assert(name->hashval);
396 pb->pb_name.hashval = name->hashval;
397 LIST_INSERT_HEAD(&names[name->hashval % NAMES_TABLE_LEN],
402 LIST_INIT(&pb->pb_children);
403 LIST_INIT(&pb->pb_aliases);
405 LIST_INSERT_HEAD(&parent->pb_children, pb, pb_sibs);
406 pb->pb_parent = parent;
415 * Destroy base path node, releasing resources back to the system.
417 * NB: Caller must release the inode referenced by the record.
420 pb_destroy(struct pnode_base *pb)
426 assert(!pb->pb_aliases.lh_first);
427 assert(!pb->pb_children.lh_first);
430 LIST_REMOVE(pb, pb_names);
432 LIST_REMOVE(pb, pb_sibs);
436 * This can help us catch pb-nodes that are free'd redundantly.
438 pb->pb_name.hashval = 0;
444 * Force reclaim of idle base path node.
447 _sysio_pb_gone(struct pnode_base *pb)
458 * Generate more path (alias) nodes for the fast allocator.
465 struct pnodes_block *pnblk;
470 pnblk = malloc(sizeof(struct pnodes_block));
473 LIST_INSERT_HEAD(&pnblocks, pnblk, pnblk_links);
474 pno = pnblk->pnblk_nodes;
477 pno = malloc(PNODES_PER_CHUNK * sizeof(struct pnode));
481 n = PNODES_PER_CHUNK;
483 LIST_INSERT_HEAD(&free_pnodes, pno, p_links);
495 struct pnodes_block *pnblk;
497 while ((pnblk = pnblocks.lh_first)) {
498 LIST_REMOVE(pnblk, pnblk_links);
505 * Allocate, initialize and establish appropriate links for new path (alias)
509 _sysio_p_new_alias(struct pnode *parent,
510 struct pnode_base *pb,
515 assert(!pb->pb_name.name || pb->pb_name.hashval);
517 pno = free_pnodes.lh_first;
520 pno = free_pnodes.lh_first;
524 LIST_REMOVE(pno, p_links);
527 pno->p_parent = parent;
533 LIST_INSERT_HEAD(&pb->pb_aliases, pno, p_links);
534 TAILQ_INSERT_TAIL(&_sysio_pnodes, pno, p_nodes);
540 * For reclamation of idle path (alias) node.
543 _sysio_p_gone(struct pnode *pno)
545 struct pnode_base *pb;
548 assert(!pno->p_cover);
550 TAILQ_REMOVE(&_sysio_pnodes, pno, p_nodes);
551 LIST_REMOVE(pno, p_links);
554 if (!(pb->pb_aliases.lh_first || pb->pb_children.lh_first))
557 LIST_INSERT_HEAD(&free_pnodes, pno, p_links);
561 * (Re)Validate passed path node.
564 _sysio_p_validate(struct pnode *pno, struct intent *intnt, const char *path)
567 struct pnode_base *rootpb;
570 ino = pno->p_base->pb_ino;
572 * An invalid pnode will not have an associated inode. We'll use
573 * the FS root inode, then -- It *must* be valid.
575 rootpb = pno->p_mount->mnt_root->p_base;
576 assert(rootpb->pb_ino);
578 rootpb->pb_ino->i_ops.inop_lookup(pno,
583 * If the inode lookup returns a different inode, release the old if
584 * present and point to the new.
586 if (err || pno->p_base->pb_ino != ino) {
587 if (pno->p_base->pb_ino)
588 I_RELE(pno->p_base->pb_ino);
589 pno->p_base->pb_ino = ino;
595 * Find (or create!) an alias for the given parent and name. A misnomer,
596 * really -- This is a "get". Returned path node is referenced.
599 _sysio_p_find_alias(struct pnode *parent,
603 struct pnode_base *pb;
608 * Find the named child.
612 * Try the names table.
614 pb = names[name->hashval % NAMES_TABLE_LEN].lh_first;
616 if (pb->pb_parent == parent->p_base &&
617 pb->pb_name.len == name->len &&
618 strncmp(pb->pb_name.name,
622 pb = pb->pb_names.le_next;
626 * Brute force through the parent's list of children.
628 pb = parent->p_base->pb_children.lh_first;
630 if (pb->pb_parent == parent->p_base &&
631 pb->pb_name.len == name->len &&
632 strncmp(pb->pb_name.name,
636 pb = pb->pb_sibs.le_next;
641 * None found, create new child.
643 pb = _sysio_pb_new(name, parent->p_base, NULL);
648 * Now find the proper alias. It's the one with the passed
652 pno = pb->pb_aliases.lh_first;
654 if (pno->p_parent == parent) {
658 pno = pno->p_links.le_next;
662 * Hmm. No alias. Just create an invalid one, to be
665 pno = _sysio_p_new_alias(parent, pb, parent->p_mount);
675 * Prune idle path base nodes freom the passed sub-tree, including the root.
678 _sysio_prune(struct pnode_base *rpb)
680 struct pnode_base *nxtpb, *pb;
682 nxtpb = rpb->pb_children.lh_first;
683 while ((pb = nxtpb)) {
684 nxtpb = pb->pb_sibs.le_next;
685 if (pb->pb_aliases.lh_first)
687 if (pb->pb_children.lh_first) {
693 if (rpb->pb_children.lh_first)
699 * Prune idle nodes from the passed sub-tree, including the root.
701 * Returns the number of aliases on the same mount that could not be pruned.
702 * i.e. a zero return means the entire sub-tree is gone.
705 _sysio_p_prune(struct pnode *root)
708 struct pnode_base *nxtpb, *pb;
709 struct pnode *nxtpno, *pno;
712 nxtpb = root->p_base->pb_children.lh_first;
713 while ((pb = nxtpb)) {
714 nxtpb = pb->pb_sibs.le_next;
715 nxtpno = pb->pb_aliases.lh_first;
720 while ((pno = nxtpno)) {
721 nxtpno = pno->p_links.le_next;
722 if (pno->p_mount != root->p_mount) {
724 * Not the alias we were looking for.
728 if (pno->p_base->pb_children.lh_first) {
730 * Node is interior. Recurse.
732 count += _sysio_p_prune(pno);
737 * Can't prune; It's active.
742 assert(!pno->p_cover); /* covered => ref'd! */
743 assert(!pno->p_base->pb_name.name ||
744 pno->p_base->pb_name.hashval);
748 if (pno->p_mount->mnt_root == pno) {
749 #ifndef AUTOMOUNT_FILE_NAME
754 * This is an automount-point. Must
755 * unmount before relcaim.
758 if (_sysio_do_unmount(pno->p_mount) != 0) {
771 * Can't get the root or we disconnect the sub-trees.
773 return count + (root->p_ref ? 1 : 0);
777 * All that is left is the root. Try for it too.
781 } else if (root->p_mount->mnt_root == root) {
782 #ifndef AUTOMOUNT_FILE_NAME
786 * This is an automount-point. Must
787 * unmount before relcaim.
790 if (_sysio_do_unmount(root->p_mount) != 0) {
802 * Return path tracked by the base path node ancestor chain.
804 * Remember, base path nodes track the path relative to the file system and
805 * path (alias) nodes track path relative to our name space -- They cross
809 _sysio_pb_path(struct pnode_base *pb, const char separator)
813 struct pnode_base *tmp;
817 * First pass: Traverse to the root of the sub-tree, remembering
823 n = tmp->pb_name.len;
824 len += tmp->pb_name.len;
827 tmp = tmp->pb_parent;
834 buf = malloc(len + 1);
838 * Fill in the path buffer -- Backwards, since we're starting
844 *cp = '\0'; /* NUL term */
847 cp -= tmp->pb_name.len;
848 n = tmp->pb_name.len;
850 (void )strncpy(cp, tmp->pb_name.name, n);
853 tmp = tmp->pb_parent;
860 * Common set attributes routine.
863 _sysio_setattr(struct pnode *pno,
866 struct intnl_stat *stbuf)
868 /* It is possible that pno is null (for ftruncate call). */
871 assert(!(pno->p_base->pb_ino && ino) || pno->p_base->pb_ino == ino);
872 if (IS_RDONLY(pno, ino))
875 if (!ino && pno->p_base->pb_ino)
876 ino = pno->p_base->pb_ino;
877 return (*ino->i_ops.inop_setattr)(pno, ino, mask, stbuf);