2 * This Cplant(TM) source code is the property of Sandia National
5 * This Cplant(TM) source code is copyrighted by Sandia National
8 * The redistribution of this Cplant(TM) source code is subject to the
9 * terms of the GNU Lesser General Public License
10 * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html)
12 * Cplant(TM) Copyright 1998-2006 Sandia Corporation.
13 * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
14 * license for use of this work by or on behalf of the US Government.
15 * Export of this program may require a license from the United States
20 * This library is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU Lesser General Public
22 * License as published by the Free Software Foundation; either
23 * version 2.1 of the License, or (at your option) any later version.
25 * This library is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28 * Lesser General Public License for more details.
30 * You should have received a copy of the GNU Lesser General Public
31 * License along with this library; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
34 * Questions or comments about this library should be sent to:
37 * Sandia National Laboratories, New Mexico
39 * Albuquerque, NM 87185-1110
48 #include <sys/types.h>
50 #include <sys/queue.h>
59 * Support for path and index nodes.
63 * Size of all names bucket-hash table.
65 #ifndef NAMES_TABLE_LEN
66 #define NAMES_TABLE_LEN 251
70 * Desired i-nodes cache size is MAX_INODES_MULTIPLIER times the number
71 * of slots in the names hash table.
73 #define MAX_INODES_MULTIPLIER 3
76 * Active i-nodes in the system and the number of same.
78 struct inodes_head _sysio_inodes;
79 static size_t n_inodes = 0;
81 * Desired number of active i-nodes.
83 static size_t max_inodes = (MAX_INODES_MULTIPLIER * NAMES_TABLE_LEN);
86 * System table for rapid access to component names.
88 static LIST_HEAD(, pnode_base) names[NAMES_TABLE_LEN];
90 * Number of names tracked by the system.
92 static size_t n_names = 0;
94 * Desired number of base path nodes to maintain.
96 static size_t max_names = (2 * NAMES_TABLE_LEN);
99 * Number of pnodes to grab per memory allocation when filling the
102 #define PNODES_PER_CHUNK ((8 * 1024) / sizeof(struct pnode) - 2)
104 #ifdef ZERO_SUM_MEMORY
106 * Allocation information for pnodes bulk allocation.
108 struct pnodes_block {
109 LIST_ENTRY(pnodes_block) pnblk_links;
110 struct pnode pnblk_nodes[PNODES_PER_CHUNK];
113 static LIST_HEAD( ,pnodes_block) pnblocks;
117 * List of all path-nodes (aliases) referenced by any tree.
119 struct pnodes_head _sysio_pnodes;
122 * Free path-nodes -- Not referenced by any tree for fas reuse.
124 static LIST_HEAD( ,pnode) free_pnodes;
127 * The system root -- Aka `/'.
129 struct pnode *_sysio_root = NULL;
132 * Initialize path and i-node support. Must be called before any other
133 * routine in this module.
140 TAILQ_INIT(&_sysio_inodes);
142 for (i = 0; i < NAMES_TABLE_LEN; i++)
143 LIST_INIT(&names[i]);
145 #ifdef ZERO_SUM_MEMORY
146 LIST_INIT(&pnblocks);
148 TAILQ_INIT(&_sysio_pnodes);
149 LIST_INIT(&free_pnodes);
155 * Garbage-collect idle i-nodes. We try to keep resource use limited to
156 * MAX_INODES_MULTIPLIER * max_names.
161 struct inode *next, *ino;
165 * I just can't figure out a good way to reclaim these well without
166 * getting really fancy and using complex algorithms. The
167 * base nodes hold references on them for a long time and then
168 * release them. Those will age to the front of the queue and
169 * we have to skip over them. Oh well...
171 t = MAX_INODES_MULTIPLIER * max_names;
172 if (max_inodes < t) {
174 * Oops. Nope. We want more inodes than names entries.
179 next = _sysio_inodes.tqh_first;
185 next = ino->i_nodes.tqe_next;
186 if (ino->i_ref || ino->i_immune)
189 } while (next && n_inodes > t);
196 hash(struct file_identifier *fid)
213 * Allocate and initialize a new i-node. Returned i-node is referenced.
215 * NB: The passed file identifier is not copied. It is, therefor, up to the
216 * caller to assure that the value is static until the inode is destroyed.
219 _sysio_i_new(struct filesys *fs,
220 struct file_identifier *fid,
221 struct intnl_stat *stat,
223 struct inode_ops *ops,
227 struct itable_entry *head;
228 struct inode_ops operations;
230 if (n_inodes > max_inodes) {
232 * Try to limit growth.
237 ino = malloc(sizeof(struct inode));
242 if (S_ISBLK(stat->st_mode) ||
243 S_ISCHR(stat->st_mode) ||
244 S_ISFIFO(stat->st_mode)) {
248 * Replace some operations sent with
249 * those from the device table.
251 o = _sysio_dev_lookup(stat->st_mode, stat->st_rdev);
252 operations.inop_open = o->inop_open;
253 operations.inop_close = o->inop_close;
254 operations.inop_read = o->inop_read;
255 operations.inop_write = o->inop_write;
256 operations.inop_pos = o->inop_pos;
257 operations.inop_iodone = o->inop_iodone;
258 operations.inop_fcntl = o->inop_fcntl;
259 operations.inop_datasync = o->inop_datasync;
260 operations.inop_ioctl = o->inop_ioctl;
262 I_INIT(ino, fs, stat, &operations, fid, immunity, private);
264 TAILQ_INSERT_TAIL(&_sysio_inodes, ino, i_nodes);
265 head = &fs->fs_itbl[hash(fid) % FS_ITBLSIZ];
266 LIST_INSERT_HEAD(head, ino, i_link);
275 * Find existing i-node given i-number and pointers to FS record
279 _sysio_i_find(struct filesys *fs, struct file_identifier *fid)
282 struct itable_entry *head;
284 head = &fs->fs_itbl[hash(fid) % FS_ITBLSIZ];
288 for (ino = head->lh_first; ino; ino = ino->i_link.le_next)
289 if (ino->i_fid->fid_len == fid->fid_len &&
290 memcmp(ino->i_fid->fid_data,
292 fid->fid_len) == 0) {
301 * Force reclaim of idle i-node.
304 _sysio_i_gone(struct inode *ino)
310 LIST_REMOVE(ino, i_link);
311 TAILQ_REMOVE(&_sysio_inodes, ino, i_nodes);
312 (*ino->i_ops.inop_gone)(ino);
320 * Stale inode, zombie it and move it out of the way
323 _sysio_i_undead(struct inode *ino)
328 LIST_REMOVE(ino, i_link);
333 * Garbage collect idle path (and base path) nodes tracked by the system.
338 struct pnode *next, *pno;
341 next = _sysio_pnodes.tqh_first;
348 next = pno->p_nodes.tqe_next;
353 (void )_sysio_p_prune(pno);
354 next = pno->p_nodes.tqe_next;
359 (void )_sysio_p_prune(pno);
360 } while (n_names > t && next);
367 * Allocate and initialize a new base path node.
370 _sysio_pb_new(struct qstr *name, struct pnode_base *parent, struct inode *ino)
372 struct pnode_base *pb;
374 if (n_names > max_names) {
376 * Try to limit growth.
381 pb = malloc(sizeof(struct pnode_base) + name->len);
385 pb->pb_name.name = NULL;
386 pb->pb_name.len = name->len;
387 if (pb->pb_name.len) {
391 * Copy the passed name.
393 * We have put the space for the name immediately behind
394 * the record in order to maximize spatial locality.
396 cp = (char *)pb + sizeof(struct pnode_base);
397 (void )strncpy(cp, name->name, name->len);
398 pb->pb_name.name = cp;
399 assert(name->hashval);
400 pb->pb_name.hashval = name->hashval;
401 LIST_INSERT_HEAD(&names[name->hashval % NAMES_TABLE_LEN],
406 LIST_INIT(&pb->pb_children);
407 LIST_INIT(&pb->pb_aliases);
409 LIST_INSERT_HEAD(&parent->pb_children, pb, pb_sibs);
410 pb->pb_parent = parent;
419 * Destroy base path node, releasing resources back to the system.
421 * NB: Caller must release the inode referenced by the record.
424 pb_destroy(struct pnode_base *pb)
430 assert(!pb->pb_aliases.lh_first);
431 assert(!pb->pb_children.lh_first);
434 LIST_REMOVE(pb, pb_names);
436 LIST_REMOVE(pb, pb_sibs);
440 * This can help us catch pb-nodes that are free'd redundantly.
442 pb->pb_name.hashval = 0;
448 * Force reclaim of idle base path node.
451 _sysio_pb_gone(struct pnode_base *pb)
462 * Generate more path (alias) nodes for the fast allocator.
468 #ifdef ZERO_SUM_MEMORY
469 struct pnodes_block *pnblk;
473 #ifdef ZERO_SUM_MEMORY
474 pnblk = malloc(sizeof(struct pnodes_block));
477 LIST_INSERT_HEAD(&pnblocks, pnblk, pnblk_links);
478 pno = pnblk->pnblk_nodes;
481 pno = malloc(PNODES_PER_CHUNK * sizeof(struct pnode));
485 n = PNODES_PER_CHUNK;
487 LIST_INSERT_HEAD(&free_pnodes, pno, p_links);
492 #ifdef ZERO_SUM_MEMORY
499 struct pnodes_block *pnblk;
501 while ((pnblk = pnblocks.lh_first)) {
502 LIST_REMOVE(pnblk, pnblk_links);
509 * Allocate, initialize and establish appropriate links for new path (alias)
513 _sysio_p_new_alias(struct pnode *parent,
514 struct pnode_base *pb,
519 assert(!pb->pb_name.name || pb->pb_name.hashval);
521 pno = free_pnodes.lh_first;
524 pno = free_pnodes.lh_first;
528 LIST_REMOVE(pno, p_links);
531 pno->p_parent = parent;
537 LIST_INSERT_HEAD(&pb->pb_aliases, pno, p_links);
538 TAILQ_INSERT_TAIL(&_sysio_pnodes, pno, p_nodes);
544 * For reclamation of idle path (alias) node.
547 _sysio_p_gone(struct pnode *pno)
549 struct pnode_base *pb;
552 assert(!pno->p_cover);
554 TAILQ_REMOVE(&_sysio_pnodes, pno, p_nodes);
555 LIST_REMOVE(pno, p_links);
558 if (!(pb->pb_aliases.lh_first || pb->pb_children.lh_first))
561 LIST_INSERT_HEAD(&free_pnodes, pno, p_links);
565 * (Re)Validate passed path node.
568 _sysio_p_validate(struct pnode *pno, struct intent *intnt, const char *path)
571 struct pnode_base *rootpb;
574 ino = pno->p_base->pb_ino;
576 * An invalid pnode will not have an associated inode. We'll use
577 * the FS root inode, then -- It *must* be valid.
579 rootpb = pno->p_mount->mnt_root->p_base;
580 assert(rootpb->pb_ino);
582 rootpb->pb_ino->i_ops.inop_lookup(pno,
587 * If the inode lookup returns a different inode, release the old if
588 * present and point to the new.
590 if (err || pno->p_base->pb_ino != ino) {
591 if (pno->p_base->pb_ino)
592 I_RELE(pno->p_base->pb_ino);
593 pno->p_base->pb_ino = ino;
599 * Find (or create!) an alias for the given parent and name. A misnomer,
600 * really -- This is a "get". Returned path node is referenced.
603 _sysio_p_find_alias(struct pnode *parent,
607 struct pnode_base *pb;
612 * Find the named child.
616 * Try the names table.
618 pb = names[name->hashval % NAMES_TABLE_LEN].lh_first;
620 if (pb->pb_parent == parent->p_base &&
621 pb->pb_name.len == name->len &&
622 strncmp(pb->pb_name.name,
626 pb = pb->pb_names.le_next;
630 * Brute force through the parent's list of children.
632 pb = parent->p_base->pb_children.lh_first;
634 if (pb->pb_parent == parent->p_base &&
635 pb->pb_name.len == name->len &&
636 strncmp(pb->pb_name.name,
640 pb = pb->pb_sibs.le_next;
645 * None found, create new child.
647 pb = _sysio_pb_new(name, parent->p_base, NULL);
652 * Now find the proper alias. It's the one with the passed
656 pno = pb->pb_aliases.lh_first;
658 if (pno->p_parent == parent) {
662 pno = pno->p_links.le_next;
666 * Hmm. No alias. Just create an invalid one, to be
669 pno = _sysio_p_new_alias(parent, pb, parent->p_mount);
679 * Prune idle path base nodes freom the passed sub-tree, including the root.
682 _sysio_prune(struct pnode_base *rpb)
684 struct pnode_base *nxtpb, *pb;
686 nxtpb = rpb->pb_children.lh_first;
687 while ((pb = nxtpb)) {
688 nxtpb = pb->pb_sibs.le_next;
689 if (pb->pb_aliases.lh_first)
691 if (pb->pb_children.lh_first) {
697 if (rpb->pb_children.lh_first)
703 * Prune idle nodes from the passed sub-tree, including the root.
705 * Returns the number of aliases on the same mount that could not be pruned.
706 * i.e. a zero return means the entire sub-tree is gone.
709 _sysio_p_prune(struct pnode *root)
712 struct pnode_base *nxtpb, *pb;
713 struct pnode *nxtpno, *pno;
716 nxtpb = root->p_base->pb_children.lh_first;
717 while ((pb = nxtpb)) {
718 nxtpb = pb->pb_sibs.le_next;
719 nxtpno = pb->pb_aliases.lh_first;
724 while ((pno = nxtpno)) {
725 nxtpno = pno->p_links.le_next;
726 if (pno->p_mount != root->p_mount) {
728 * Not the alias we were looking for.
732 if (pno->p_base->pb_children.lh_first) {
734 * Node is interior. Recurse.
736 count += _sysio_p_prune(pno);
741 * Can't prune; It's active.
746 assert(!pno->p_cover); /* covered => ref'd! */
747 assert(!pno->p_base->pb_name.name ||
748 pno->p_base->pb_name.hashval);
752 if (pno->p_mount->mnt_root == pno) {
753 #ifndef AUTOMOUNT_FILE_NAME
758 * This is an automount-point. Must
759 * unmount before relcaim.
762 if (_sysio_do_unmount(pno->p_mount) != 0) {
775 * Can't get the root or we disconnect the sub-trees.
777 return count + (root->p_ref ? 1 : 0);
781 * All that is left is the root. Try for it too.
785 } else if (root->p_mount->mnt_root == root) {
786 #ifndef AUTOMOUNT_FILE_NAME
790 * This is an automount-point. Must
791 * unmount before relcaim.
794 if (_sysio_do_unmount(root->p_mount) != 0) {
806 * Return path tracked by the base path node ancestor chain.
808 * Remember, base path nodes track the path relative to the file system and
809 * path (alias) nodes track path relative to our name space -- They cross
813 _sysio_pb_path(struct pnode_base *pb, const char separator)
817 struct pnode_base *tmp;
821 * First pass: Traverse to the root of the sub-tree, remembering
827 n = tmp->pb_name.len;
828 len += tmp->pb_name.len;
831 tmp = tmp->pb_parent;
838 buf = malloc(len + 1);
842 * Fill in the path buffer -- Backwards, since we're starting
848 *cp = '\0'; /* NUL term */
851 cp -= tmp->pb_name.len;
852 n = tmp->pb_name.len;
854 (void )strncpy(cp, tmp->pb_name.name, n);
857 tmp = tmp->pb_parent;
864 * Common set attributes routine.
867 _sysio_setattr(struct pnode *pno,
870 struct intnl_stat *stbuf)
873 * It is possible that pno is null (for ftruncate call).
877 assert(!ino || pno->p_base->pb_ino == ino);
879 ino = pno->p_base->pb_ino;
882 if (pno && IS_RDONLY(pno))
886 * Determining permission to change the attributes is
887 * difficult, at best. Just try it.
889 return (*ino->i_ops.inop_setattr)(pno, ino, mask, stbuf);