--- /dev/null
+Index: linux-2.4.21-171/include/linux/skbuff.h
+===================================================================
+--- linux-2.4.21-171.orig/include/linux/skbuff.h 2004-03-31 14:58:38.000000000 -0500
++++ linux-2.4.21-171/include/linux/skbuff.h 2004-04-03 16:43:15.000000000 -0500
+@@ -116,6 +116,30 @@
+ __u16 size;
+ };
+
++/* Support for callback when skb data has been released */
++typedef struct zccd /* Zero Copy Callback Descriptor */
++{ /* (embed as first member of custom struct) */
++ atomic_t zccd_count; /* reference count */
++ void (*zccd_destructor)(struct zccd *); /* callback when refcount reaches zero */
++} zccd_t;
++
++static inline void zccd_init (zccd_t *d, void (*callback)(zccd_t *))
++{
++ atomic_set (&d->zccd_count, 1);
++ d->zccd_destructor = callback;
++}
++
++static inline void zccd_get (zccd_t *d) /* take a reference */
++{
++ atomic_inc (&d->zccd_count);
++}
++
++static inline void zccd_put (zccd_t *d) /* release a reference */
++{
++ if (atomic_dec_and_test (&d->zccd_count))
++ (d->zccd_destructor)(d);
++}
++
+ /* This data is invariant across clones and lives at
+ * the end of the header data, ie. at skb->end.
+ */
+@@ -123,6 +147,12 @@
+ atomic_t dataref;
+ unsigned int nr_frags;
+ struct sk_buff *frag_list;
++ zccd_t *zccd; /* zero copy descriptor */
++ zccd_t *zccd2; /* 2nd zero copy descriptor */
++ /* NB we expect zero-copy data to be at least 1 packet, so
++ * having 2 zccds means we don't unneccessarily split the packet
++ * where consecutive zero-copy sends abutt.
++ */
+ skb_frag_t frags[MAX_SKB_FRAGS];
+ };
+
+Index: linux-2.4.21-171/include/net/tcp.h
+===================================================================
+--- linux-2.4.21-171.orig/include/net/tcp.h 2004-03-31 15:07:31.000000000 -0500
++++ linux-2.4.21-171/include/net/tcp.h 2004-04-03 16:46:15.000000000 -0500
+@@ -646,6 +646,8 @@
+
+ extern int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size);
+ extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
++extern ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, ssize_t size,
++ int flags, zccd_t *zccd);
+
+ extern int tcp_ioctl(struct sock *sk,
+ int cmd,
+@@ -742,6 +744,9 @@
+ struct msghdr *msg,
+ int len, int nonblock,
+ int flags, int *addr_len);
++extern int tcp_recvpackets(struct sock *sk,
++ struct sk_buff_head *packets,
++ int len, int nonblock);
+
+ extern int tcp_listen_start(struct sock *sk);
+
+Index: linux-2.4.21-171/net/netsyms.c
+===================================================================
+--- linux-2.4.21-171.orig/net/netsyms.c 2004-02-24 14:03:22.000000000 -0500
++++ linux-2.4.21-171/net/netsyms.c 2004-04-03 16:13:53.000000000 -0500
+@@ -407,6 +407,8 @@
+
+ #endif
+
++EXPORT_SYMBOL(tcp_sendpage_zccd);
++EXPORT_SYMBOL(tcp_recvpackets);
+ EXPORT_SYMBOL(tcp_read_sock);
+
+ EXPORT_SYMBOL(netlink_set_err);
+Index: linux-2.4.21-171/net/core/skbuff.c
+===================================================================
+--- linux-2.4.21-171.orig/net/core/skbuff.c 2004-02-24 14:03:22.000000000 -0500
++++ linux-2.4.21-171/net/core/skbuff.c 2004-04-03 16:13:53.000000000 -0500
+@@ -208,6 +208,8 @@
+ atomic_set(&(skb_shinfo(skb)->dataref), 1);
+ skb_shinfo(skb)->nr_frags = 0;
+ skb_shinfo(skb)->frag_list = NULL;
++ skb_shinfo(skb)->zccd = NULL; /* skbuffs kick off with NO user zero copy descriptors */
++ skb_shinfo(skb)->zccd2 = NULL;
+ return skb;
+
+ nodata:
+@@ -277,6 +279,10 @@
+ {
+ if (!skb->cloned ||
+ atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
++ if (skb_shinfo(skb)->zccd != NULL) /* zero copy callback descriptor? */
++ zccd_put (skb_shinfo(skb)->zccd); /* release hold */
++ if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd zero copy callback descriptor? */
++ zccd_put (skb_shinfo(skb)->zccd2); /* release hold */
+ if (skb_shinfo(skb)->nr_frags) {
+ int i;
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+@@ -535,6 +541,8 @@
+ atomic_set(&(skb_shinfo(skb)->dataref), 1);
+ skb_shinfo(skb)->nr_frags = 0;
+ skb_shinfo(skb)->frag_list = NULL;
++ skb_shinfo(skb)->zccd = NULL; /* copied data => no user zero copy descriptor */
++ skb_shinfo(skb)->zccd2 = NULL;
+
+ /* We are no longer a clone, even if we were. */
+ skb->cloned = 0;
+@@ -581,6 +589,14 @@
+ n->data_len = skb->data_len;
+ n->len = skb->len;
+
++ if (skb_shinfo(skb)->zccd != NULL) /* user zero copy descriptor? */
++ zccd_get (skb_shinfo(skb)->zccd); /* 1 more ref (pages are shared) */
++ skb_shinfo(n)->zccd = skb_shinfo(skb)->zccd;
++
++ if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd user zero copy descriptor? */
++ zccd_get (skb_shinfo(skb)->zccd2); /* 1 more ref (pages are shared) */
++ skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2;
++
+ if (skb_shinfo(skb)->nr_frags) {
+ int i;
+
+@@ -623,6 +639,8 @@
+ u8 *data;
+ int size = nhead + (skb->end - skb->head) + ntail;
+ long off;
++ zccd_t *zccd = skb_shinfo(skb)->zccd; /* stash user zero copy descriptor */
++ zccd_t *zccd2 = skb_shinfo(skb)->zccd2; /* stash 2nd user zero copy descriptor */
+
+ if (skb_shared(skb))
+ BUG();
+@@ -644,6 +662,11 @@
+ if (skb_shinfo(skb)->frag_list)
+ skb_clone_fraglist(skb);
+
++ if (zccd != NULL) /* user zero copy descriptor? */
++ zccd_get (zccd); /* extra ref (pages are shared) */
++ if (zccd2 != NULL) /* 2nd user zero copy descriptor? */
++ zccd_get (zccd2); /* extra ref (pages are shared) */
++
+ skb_release_data(skb);
+
+ off = (data+nhead) - skb->head;
+@@ -658,6 +681,8 @@
+ skb->nh.raw += off;
+ skb->cloned = 0;
+ atomic_set(&skb_shinfo(skb)->dataref, 1);
++ skb_shinfo(skb)->zccd = zccd;
++ skb_shinfo(skb)->zccd2 = zccd2;
+ return 0;
+
+ nodata:
+Index: linux-2.4.21-171/net/ipv4/tcp.c
+===================================================================
+--- linux-2.4.21-171.orig/net/ipv4/tcp.c 2004-02-24 13:42:30.000000000 -0500
++++ linux-2.4.21-171/net/ipv4/tcp.c 2004-04-03 16:43:05.000000000 -0500
+@@ -748,7 +748,7 @@
+ goto out;
+ }
+
+-ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags);
++ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags, zccd_t *zccd);
+
+ static inline int
+ can_coalesce(struct sk_buff *skb, int i, struct page *page, int off)
+@@ -827,7 +827,7 @@
+ return err;
+ }
+
+-ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags)
++ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags, zccd_t *zccd)
+ {
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+ int mss_now;
+@@ -875,6 +875,17 @@
+ copy = size;
+
+ i = skb_shinfo(skb)->nr_frags;
++
++ if (zccd != NULL && /* this is a zcc I/O */
++ skb_shinfo(skb)->zccd != NULL && /* skb is part of a zcc I/O */
++ skb_shinfo(skb)->zccd2 != NULL &&
++ skb_shinfo(skb)->zccd != zccd && /* not the same one */
++ skb_shinfo(skb)->zccd2 != zccd)
++ {
++ tcp_mark_push (tp, skb);
++ goto new_segment;
++ }
++
+ if (can_coalesce(skb, i, page, offset)) {
+ skb_shinfo(skb)->frags[i-1].size += copy;
+ } else if (i < MAX_SKB_FRAGS) {
+@@ -885,6 +896,20 @@
+ goto new_segment;
+ }
+
++ if (zccd != NULL && /* this is a zcc I/O */
++ skb_shinfo(skb)->zccd != zccd && /* not already referencing this zccd */
++ skb_shinfo(skb)->zccd2 != zccd)
++ {
++ zccd_get (zccd); /* bump ref count */
++
++ BUG_TRAP (skb_shinfo(skb)->zccd2 == NULL);
++
++ if (skb_shinfo(skb)->zccd == NULL) /* reference this zccd */
++ skb_shinfo(skb)->zccd = zccd;
++ else
++ skb_shinfo(skb)->zccd2 = zccd;
++ }
++
+ skb->len += copy;
+ skb->data_len += copy;
+ skb->ip_summed = CHECKSUM_HW;
+@@ -948,7 +973,28 @@
+
+ lock_sock(sk);
+ TCP_CHECK_TIMER(sk);
+- res = do_tcp_sendpages(sk, &page, offset, size, flags);
++ res = do_tcp_sendpages(sk, &page, offset, size, flags, NULL);
++ TCP_CHECK_TIMER(sk);
++ release_sock(sk);
++ return res;
++}
++
++ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, ssize_t size,
++ int flags, zccd_t *zccd)
++{
++ ssize_t res;
++ struct sock *sk = sock->sk;
++
++#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)
++
++ if (!(sk->route_caps & NETIF_F_SG) || /* caller shouldn't waste her time */
++ !(sk->route_caps & TCP_ZC_CSUM_FLAGS)) /* on double mapping */
++ BUG ();
++
++ lock_sock(sk);
++ TCP_CHECK_TIMER(sk);
++
++ res = do_tcp_sendpages(sk, &page, 0, size, flags, zccd);
+ TCP_CHECK_TIMER(sk);
+ release_sock(sk);
+ return res;
+@@ -1772,6 +1818,202 @@
+ goto out;
+ }
+
++int tcp_recvpackets (struct sock *sk, struct sk_buff_head *packets,
++ int len, int nonblock)
++{
++ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
++ int copied;
++ long timeo;
++
++ BUG_TRAP (len > 0);
++ /*BUG_TRAP ((flags & (MSG_OOB | MSG_PEEK | MSG_TRUNC)) == 0);*/
++
++ lock_sock(sk);
++
++ TCP_CHECK_TIMER(sk);
++
++ copied = -ENOTCONN;
++ if (sk->state == TCP_LISTEN)
++ goto out;
++
++ copied = 0;
++ timeo = sock_rcvtimeo(sk, nonblock);
++
++ do {
++ struct sk_buff * skb;
++ u32 offset;
++ unsigned long used;
++ int exhausted;
++ int eaten;
++
++ /* Are we at urgent data? Stop if we have read anything. */
++ if (copied && tp->urg_data && tp->urg_seq == tp->copied_seq)
++ break;
++
++ /* We need to check signals first, to get correct SIGURG
++ * handling. FIXME: Need to check this doesnt impact 1003.1g
++ * and move it down to the bottom of the loop
++ */
++ if (signal_pending(current)) {
++ if (copied)
++ break;
++ copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
++ break;
++ }
++
++ /* Next get a buffer. */
++
++ skb = skb_peek(&sk->receive_queue);
++
++ if (skb == NULL) /* nothing ready */
++ {
++ if (copied) {
++ if (sk->err ||
++ sk->state == TCP_CLOSE ||
++ (sk->shutdown & RCV_SHUTDOWN) ||
++ !timeo ||
++ (0))
++ break;
++ } else {
++ if (sk->done)
++ break;
++
++ if (sk->err) {
++ copied = sock_error(sk);
++ break;
++ }
++
++ if (sk->shutdown & RCV_SHUTDOWN)
++ break;
++
++ if (sk->state == TCP_CLOSE) {
++ if (!sk->done) {
++ /* This occurs when user tries to read
++ * from never connected socket.
++ */
++ copied = -ENOTCONN;
++ break;
++ }
++ break;
++ }
++
++ if (!timeo) {
++ copied = -EAGAIN;
++ break;
++ }
++ }
++
++ cleanup_rbuf(sk, copied);
++ timeo = tcp_data_wait(sk, timeo);
++ continue;
++ }
++
++ BUG_TRAP (atomic_read (&skb->users) == 1);
++
++ exhausted = eaten = 0;
++
++ offset = tp->copied_seq - TCP_SKB_CB(skb)->seq;
++ if (skb->h.th->syn)
++ offset--;
++
++ used = skb->len - offset;
++
++ if (tp->urg_data) {
++ u32 urg_offset = tp->urg_seq - tp->copied_seq;
++ if (urg_offset < used) {
++ if (!urg_offset) { /* at urgent date */
++ if (!sk->urginline) {
++ tp->copied_seq++; /* discard the single byte of urgent data */
++ offset++;
++ used--;
++ }
++ } else /* truncate read */
++ used = urg_offset;
++ }
++ }
++
++ BUG_TRAP (used >= 0);
++ if (len < used)
++ used = len;
++
++ if (used == 0)
++ exhausted = 1;
++ else
++ {
++ if (skb_is_nonlinear (skb))
++ {
++ int rc = skb_linearize (skb, GFP_KERNEL);
++
++ printk ("tcp_recvpackets(): linearising: %d\n", rc);
++
++ if (rc)
++ {
++ if (!copied)
++ copied = rc;
++ break;
++ }
++ }
++
++ if ((offset + used) == skb->len) /* consuming the whole packet */
++ {
++ __skb_unlink (skb, &sk->receive_queue);
++ dst_release (skb->dst);
++ skb_orphan (skb);
++ __skb_pull (skb, offset);
++ __skb_queue_tail (packets, skb);
++ exhausted = eaten = 1;
++ }
++ else /* consuming only part of the packet */
++ {
++ struct sk_buff *skb2 = skb_clone (skb, GFP_KERNEL);
++
++ if (skb2 == NULL)
++ {
++ if (!copied)
++ copied = -ENOMEM;
++ break;
++ }
++
++ dst_release (skb2->dst);
++ __skb_pull (skb2, offset);
++ __skb_trim (skb2, used);
++ __skb_queue_tail (packets, skb2);
++ }
++
++ tp->copied_seq += used;
++ copied += used;
++ len -= used;
++ }
++
++ if (tp->urg_data && after(tp->copied_seq,tp->urg_seq)) {
++ tp->urg_data = 0;
++ tcp_fast_path_check(sk, tp);
++ }
++
++ if (!exhausted)
++ continue;
++
++ if (skb->h.th->fin)
++ {
++ tp->copied_seq++;
++ if (!eaten)
++ tcp_eat_skb (sk, skb);
++ break;
++ }
++
++ if (!eaten)
++ tcp_eat_skb (sk, skb);
++
++ } while (len > 0);
++
++ out:
++ /* Clean up data we have read: This will do ACK frames. */
++ cleanup_rbuf(sk, copied);
++ TCP_CHECK_TIMER(sk);
++ release_sock(sk);
++ return copied;
++}
++
+ /*
+ * State processing on a close. This implements the state shift for
+ * sending our FIN frame. Note that we only send a FIN for some
--- /dev/null
+ fs/dcache.c | 19 ++
+ fs/exec.c | 17 +-
+ fs/namei.c | 295 +++++++++++++++++++++++++++++++++++++++-------
+ fs/namespace.c | 28 +++-
+ fs/open.c | 172 +++++++++++++++++++-------
+ fs/stat.c | 52 +++++---
+ include/linux/dcache.h | 60 +++++++++
+ include/linux/fs.h | 32 ++++
+ include/linux/fs_struct.h | 4
+ kernel/exit.c | 3
+ kernel/fork.c | 3
+ kernel/ksyms.c | 1
+ 12 files changed, 558 insertions(+), 128 deletions(-)
+
+Index: linux-2.4.21-171/fs/dcache.c
+===================================================================
+--- linux-2.4.21-171.orig/fs/dcache.c 2004-02-24 13:42:29.000000000 -0500
++++ linux-2.4.21-171/fs/dcache.c 2004-04-03 16:09:50.000000000 -0500
+@@ -186,6 +186,13 @@
+ spin_unlock(&dcache_lock);
+ return 0;
+ }
++
++ /* network invalidation by Lustre */
++ if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
++ spin_unlock(&dcache_lock);
++ return 0;
++ }
++
+ /*
+ * Check whether to do a partial shrink_dcache
+ * to get rid of unused child entries.
+@@ -838,13 +845,19 @@
+ * Adds a dentry to the hash according to its name.
+ */
+
+-void d_rehash(struct dentry * entry)
++void __d_rehash(struct dentry * entry, int lock)
+ {
+ struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
+ if (!list_empty(&entry->d_hash)) BUG();
+- spin_lock(&dcache_lock);
++ if (lock) spin_lock(&dcache_lock);
+ list_add(&entry->d_hash, list);
+- spin_unlock(&dcache_lock);
++ if (lock) spin_unlock(&dcache_lock);
++}
++EXPORT_SYMBOL(__d_rehash);
++
++void d_rehash(struct dentry * entry)
++{
++ __d_rehash(entry, 1);
+ }
+
+ #define do_switch(x,y) do { \
+Index: linux-2.4.21-171/fs/exec.c
+===================================================================
+--- linux-2.4.21-171.orig/fs/exec.c 2004-02-24 14:03:21.000000000 -0500
++++ linux-2.4.21-171/fs/exec.c 2004-04-03 16:09:50.000000000 -0500
+@@ -113,8 +113,10 @@
+ struct file * file;
+ struct nameidata nd;
+ int error;
++ struct lookup_intent it = { .it_op = IT_OPEN,
++ .it_flags = FMODE_READ|FMODE_EXEC };
+
+- error = user_path_walk(library, &nd);
++ error = user_path_walk_it(library, &nd, &it);
+ if (error)
+ goto out;
+
+@@ -126,7 +128,8 @@
+ if (error)
+ goto exit;
+
+- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
++ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
++ intent_release(&it);
+ error = PTR_ERR(file);
+ if (IS_ERR(file))
+ goto out;
+@@ -383,8 +386,10 @@
+ struct inode *inode;
+ struct file *file;
+ int err = 0;
++ struct lookup_intent it = { .it_op = IT_OPEN,
++ .it_flags = FMODE_READ|FMODE_EXEC };
+
+- err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
++ err = path_lookup_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
+ file = ERR_PTR(err);
+ if (!err) {
+ inode = nd.dentry->d_inode;
+@@ -396,7 +401,8 @@
+ err = -EACCES;
+ file = ERR_PTR(err);
+ if (!err) {
+- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
++ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
++ intent_release(&it);
+ if (!IS_ERR(file)) {
+ err = deny_write_access(file);
+ if (err) {
+@@ -408,6 +414,7 @@
+ return file;
+ }
+ }
++ intent_release(&it);
+ path_release(&nd);
+ }
+ goto out;
+@@ -1148,7 +1155,7 @@
+ goto close_fail;
+ if (!file->f_op->write)
+ goto close_fail;
+- if (do_truncate(file->f_dentry, 0) != 0)
++ if (do_truncate(file->f_dentry, 0, 0) != 0)
+ goto close_fail;
+
+ retval = binfmt->core_dump(signr, regs, file);
+Index: linux-2.4.21-171/fs/namei.c
+===================================================================
+--- linux-2.4.21-171.orig/fs/namei.c 2004-02-24 13:42:29.000000000 -0500
++++ linux-2.4.21-171/fs/namei.c 2004-04-03 16:13:23.000000000 -0500
+@@ -94,6 +94,13 @@
+ * XEmacs seems to be relying on it...
+ */
+
++void intent_release(struct lookup_intent *it)
++{
++ if (it && it->it_op_release)
++ it->it_op_release(it);
++
++}
++
+ /* In order to reduce some races, while at the same time doing additional
+ * checking and hopefully speeding things up, we copy filenames to the
+ * kernel data space before using them..
+@@ -260,10 +267,19 @@
+ * Internal lookup() using the new generic dcache.
+ * SMP-safe
+ */
+-static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
++static struct dentry *cached_lookup(struct dentry *parent, struct qstr *name,
++ int flags, struct lookup_intent *it)
+ {
+ struct dentry * dentry = d_lookup(parent, name);
+
++ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
++ if (!dentry->d_op->d_revalidate_it(dentry, flags, it) &&
++ !d_invalidate(dentry)) {
++ dput(dentry);
++ dentry = NULL;
++ }
++ return dentry;
++ } else
+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
+ if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
+ dput(dentry);
+@@ -281,11 +297,15 @@
+ * make sure that nobody added the entry to the dcache in the meantime..
+ * SMP-safe
+ */
+-static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
++static struct dentry *real_lookup(struct dentry *parent, struct qstr *name,
++ int flags, struct lookup_intent *it)
+ {
+ struct dentry * result;
+ struct inode *dir = parent->d_inode;
++ int counter = 0;
+
++again:
++ counter++;
+ down(&dir->i_sem);
+ /*
+ * First re-do the cached lookup just in case it was created
+@@ -300,6 +320,9 @@
+ result = ERR_PTR(-ENOMEM);
+ if (dentry) {
+ lock_kernel();
++ if (dir->i_op->lookup_it)
++ result = dir->i_op->lookup_it(dir, dentry, it, flags);
++ else
+ result = dir->i_op->lookup(dir, dentry);
+ unlock_kernel();
+ if (result)
+@@ -321,6 +344,15 @@
+ dput(result);
+ result = ERR_PTR(-ENOENT);
+ }
++ } else if (result->d_op && result->d_op->d_revalidate_it) {
++ if (!result->d_op->d_revalidate_it(result, flags, it) &&
++ !d_invalidate(result)) {
++ dput(result);
++ if (counter > 10)
++ result = ERR_PTR(-ESTALE);
++ if (!IS_ERR(result))
++ goto again;
++ }
+ }
+ return result;
+ }
+@@ -332,7 +364,8 @@
+ * Without that kind of total limit, nasty chains of consecutive
+ * symlinks can cause almost arbitrarily long lookups.
+ */
+-static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
++static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd,
++ struct lookup_intent *it)
+ {
+ int err;
+ if (current->link_count >= 8)
+@@ -346,10 +379,12 @@
+ current->link_count++;
+ current->total_link_count++;
+ UPDATE_ATIME(dentry->d_inode);
++ nd->intent = it;
+ err = dentry->d_inode->i_op->follow_link(dentry, nd);
+ current->link_count--;
+ return err;
+ loop:
++ intent_release(it);
+ path_release(nd);
+ return -ELOOP;
+ }
+@@ -448,7 +483,8 @@
+ * We expect 'base' to be positive and a directory.
+ */
+ static inline int __attribute__((always_inline))
+-__link_path_walk(const char * name, struct nameidata *nd)
++__link_path_walk_it(const char * name, struct nameidata *nd,
++ struct lookup_intent *it)
+ {
+ struct dentry *dentry;
+ struct inode *inode;
+@@ -525,12 +561,12 @@
+ break;
+ }
+ /* This does the actual lookups.. */
+- dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
++ dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
+ if (!dentry) {
+ err = -EWOULDBLOCKIO;
+ if (atomic)
+ break;
+- dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
++ dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
+ err = PTR_ERR(dentry);
+ if (IS_ERR(dentry))
+ break;
+@@ -548,7 +584,7 @@
+ goto out_dput;
+
+ if (inode->i_op->follow_link) {
+- err = do_follow_link(dentry, nd);
++ err = do_follow_link(dentry, nd, NULL);
+ dput(dentry);
+ if (err)
+ goto return_err;
+@@ -564,7 +600,7 @@
+ nd->dentry = dentry;
+ }
+ err = -ENOTDIR;
+- if (!inode->i_op->lookup)
++ if (!inode->i_op->lookup && !inode->i_op->lookup_it)
+ break;
+ continue;
+ /* here ends the main loop */
+@@ -591,12 +627,12 @@
+ if (err < 0)
+ break;
+ }
+- dentry = cached_lookup(nd->dentry, &this, 0);
++ dentry = cached_lookup(nd->dentry, &this, 0, it);
+ if (!dentry) {
+ err = -EWOULDBLOCKIO;
+ if (atomic)
+ break;
+- dentry = real_lookup(nd->dentry, &this, 0);
++ dentry = real_lookup(nd->dentry, &this, 0, it);
+ err = PTR_ERR(dentry);
+ if (IS_ERR(dentry))
+ break;
+@@ -606,7 +642,7 @@
+ inode = dentry->d_inode;
+ if ((lookup_flags & LOOKUP_FOLLOW)
+ && inode && inode->i_op && inode->i_op->follow_link) {
+- err = do_follow_link(dentry, nd);
++ err = do_follow_link(dentry, nd, it);
+ dput(dentry);
+ if (err)
+ goto return_err;
+@@ -620,7 +656,8 @@
+ goto no_inode;
+ if (lookup_flags & LOOKUP_DIRECTORY) {
+ err = -ENOTDIR;
+- if (!inode->i_op || !inode->i_op->lookup)
++ if (!inode->i_op ||
++ (!inode->i_op->lookup && !inode->i_op->lookup_it))
+ break;
+ }
+ goto return_base;
+@@ -644,6 +681,25 @@
+ * Check the cached dentry for staleness.
+ */
+ dentry = nd->dentry;
++ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
++ err = -ESTALE;
++ if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) {
++ struct dentry *new;
++ err = permission(dentry->d_parent->d_inode,
++ MAY_EXEC);
++ if (err)
++ break;
++ new = real_lookup(dentry->d_parent,
++ &dentry->d_name, 0, NULL);
++ d_invalidate(dentry);
++ dput(dentry);
++ if (IS_ERR(new)) {
++ err = PTR_ERR(new);
++ break;
++ }
++ nd->dentry = new;
++ }
++ } else
+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
+ err = -ESTALE;
+ if (!dentry->d_op->d_revalidate(dentry, lookup_flags & LOOKUP_PARENT)) {
+@@ -657,6 +713,8 @@
+ dput(dentry);
+ break;
+ }
++ if (err)
++ intent_release(it);
+ path_release(nd);
+ return_err:
+ return err;
+@@ -664,13 +722,13 @@
+
+ int link_path_walk(const char * name, struct nameidata *nd)
+ {
+- return __link_path_walk(name,nd);
++ return __link_path_walk_it(name, nd, NULL);
+ }
+
+ static inline int __path_walk(const char * name, struct nameidata *nd)
+ {
+ current->total_link_count = 0;
+- return __link_path_walk(name, nd);
++ return __link_path_walk_it(name, nd, NULL);
+ }
+
+ int path_walk(const char * name, struct nameidata *nd)
+@@ -678,6 +736,12 @@
+ return __path_walk(name, nd);
+ }
+
++int path_walk_it(const char * name, struct nameidata *nd, struct lookup_intent *it)
++{
++ current->total_link_count = 0;
++ return __link_path_walk_it(name, nd, it);
++}
++
+ /* SMP-safe */
+ /* returns 1 if everything is done */
+ static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
+@@ -760,6 +824,17 @@
+ }
+
+ /* SMP-safe */
++int path_lookup_it(const char *path, unsigned flags, struct nameidata *nd,
++ struct lookup_intent *it)
++{
++ int error = 0;
++ if (path_init(path, flags, nd))
++ error = path_walk_it(path, nd, it);
++ return error;
++}
++
++
++/* SMP-safe */
+ int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
+ {
+ int error = 0;
+@@ -774,6 +849,7 @@
+ {
+ nd->last_type = LAST_ROOT; /* if there are only slashes... */
+ nd->flags = flags;
++ nd->intent = NULL;
+ if (*name=='/')
+ return walk_init_root(name,nd);
+ read_lock(¤t->fs->lock);
+@@ -788,7 +864,8 @@
+ * needs parent already locked. Doesn't follow mounts.
+ * SMP-safe.
+ */
+-struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
++struct dentry * lookup_hash_it(struct qstr *name, struct dentry * base,
++ struct lookup_intent *it)
+ {
+ struct dentry * dentry;
+ struct inode *inode;
+@@ -811,13 +888,16 @@
+ goto out;
+ }
+
+- dentry = cached_lookup(base, name, 0);
++ dentry = cached_lookup(base, name, 0, it);
+ if (!dentry) {
+ struct dentry *new = d_alloc(base, name);
+ dentry = ERR_PTR(-ENOMEM);
+ if (!new)
+ goto out;
+ lock_kernel();
++ if (inode->i_op->lookup_it)
++ dentry = inode->i_op->lookup_it(inode, new, it, 0);
++ else
+ dentry = inode->i_op->lookup(inode, new);
+ unlock_kernel();
+ if (!dentry)
+@@ -829,6 +909,12 @@
+ return dentry;
+ }
+
++struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
++{
++ return lookup_hash_it(name, base, NULL);
++}
++
++
+ /* SMP-safe */
+ struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
+ {
+@@ -850,7 +936,7 @@
+ }
+ this.hash = end_name_hash(hash);
+
+- return lookup_hash(&this, base);
++ return lookup_hash_it(&this, base, NULL);
+ access:
+ return ERR_PTR(-EACCES);
+ }
+@@ -881,6 +967,23 @@
+ return err;
+ }
+
++int __user_walk_it(const char *name, unsigned flags, struct nameidata *nd,
++ struct lookup_intent *it)
++{
++ char *tmp;
++ int err;
++
++ tmp = getname(name);
++ err = PTR_ERR(tmp);
++ if (!IS_ERR(tmp)) {
++ err = 0;
++ if (path_init(tmp, flags, nd))
++ err = path_walk_it(tmp, nd, it);
++ putname(tmp);
++ }
++ return err;
++}
++
+ /*
+ * It's inline, so penalty for filesystems that don't use sticky bit is
+ * minimal.
+@@ -978,7 +1081,8 @@
+ return retval;
+ }
+
+-int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
++static int vfs_create_it(struct inode *dir, struct dentry *dentry, int mode,
++ struct lookup_intent *it)
+ {
+ int error;
+
+@@ -991,12 +1095,15 @@
+ goto exit_lock;
+
+ error = -EACCES; /* shouldn't it be ENOSYS? */
+- if (!dir->i_op || !dir->i_op->create)
++ if (!dir->i_op || (!dir->i_op->create && !dir->i_op->create_it))
+ goto exit_lock;
+
+ DQUOT_INIT(dir);
+ lock_kernel();
+- error = dir->i_op->create(dir, dentry, mode);
++ if (dir->i_op->create_it)
++ error = dir->i_op->create_it(dir, dentry, mode, it);
++ else
++ error = dir->i_op->create(dir, dentry, mode);
+ unlock_kernel();
+ exit_lock:
+ up(&dir->i_zombie);
+@@ -1005,6 +1112,11 @@
+ return error;
+ }
+
++int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
++{
++ return vfs_create_it(dir, dentry, mode, NULL);
++}
++
+ /*
+ * open_namei()
+ *
+@@ -1019,7 +1131,8 @@
+ * for symlinks (where the permissions are checked later).
+ * SMP-safe
+ */
+-int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
++int open_namei_it(const char *pathname, int flag, int mode,
++ struct nameidata *nd, struct lookup_intent *it)
+ {
+ int acc_mode, error = 0;
+ struct inode *inode;
+@@ -1029,11 +1142,14 @@
+
+ acc_mode = ACC_MODE(flag);
+
++ if (it)
++ it->it_flags = flag;
++
+ /*
+ * The simplest case - just a plain lookup.
+ */
+ if (!(flag & O_CREAT)) {
+- error = path_lookup(pathname, lookup_flags(flag), nd);
++ error = path_lookup_it(pathname, lookup_flags(flag), nd, it);
+ if (error)
+ return error;
+ dentry = nd->dentry;
+@@ -1043,6 +1159,10 @@
+ /*
+ * Create - we need to know the parent.
+ */
++ if (it) {
++ it->it_create_mode = mode;
++ it->it_op |= IT_CREAT;
++ }
+ error = path_lookup(pathname, LOOKUP_PARENT, nd);
+ if (error)
+ return error;
+@@ -1058,7 +1178,7 @@
+
+ dir = nd->dentry;
+ down(&dir->d_inode->i_sem);
+- dentry = lookup_hash(&nd->last, nd->dentry);
++ dentry = lookup_hash_it(&nd->last, nd->dentry, it);
+
+ do_last:
+ error = PTR_ERR(dentry);
+@@ -1067,11 +1187,12 @@
+ goto exit;
+ }
+
++ it->it_create_mode = mode;
+ /* Negative dentry, just create the file */
+ if (!dentry->d_inode) {
+ if (!IS_POSIXACL(dir->d_inode))
+ mode &= ~current->fs->umask;
+- error = vfs_create(dir->d_inode, dentry, mode);
++ error = vfs_create_it(dir->d_inode, dentry, mode, it);
+ up(&dir->d_inode->i_sem);
+ #ifndef DENTRY_WASTE_RAM
+ if (error)
+@@ -1179,7 +1300,7 @@
+ if (!error) {
+ DQUOT_INIT(inode);
+
+- error = do_truncate(dentry, 0);
++ error = do_truncate(dentry, 0, 1);
+ }
+ put_write_access(inode);
+ if (error)
+@@ -1191,8 +1312,10 @@
+ return 0;
+
+ exit_dput:
++ intent_release(it);
+ dput(dentry);
+ exit:
++ intent_release(it);
+ path_release(nd);
+ return error;
+
+@@ -1211,7 +1334,10 @@
+ * are done. Procfs-like symlinks just set LAST_BIND.
+ */
+ UPDATE_ATIME(dentry->d_inode);
++ nd->intent = it;
+ error = dentry->d_inode->i_op->follow_link(dentry, nd);
++ if (error)
++ intent_release(it);
+ dput(dentry);
+ if (error)
+ return error;
+@@ -1233,13 +1359,20 @@
+ }
+ dir = nd->dentry;
+ down(&dir->d_inode->i_sem);
+- dentry = lookup_hash(&nd->last, nd->dentry);
++ dentry = lookup_hash_it(&nd->last, nd->dentry, it);
+ putname(nd->last.name);
+ goto do_last;
+ }
+
++int open_namei(const char *pathname, int flag, int mode, struct nameidata *nd)
++{
++ return open_namei_it(pathname, flag, mode, nd, NULL);
++}
++
++
+ /* SMP-safe */
+-struct dentry *lookup_create(struct nameidata *nd, int is_dir)
++struct dentry *lookup_create(struct nameidata *nd, int is_dir,
++ struct lookup_intent *it)
+ {
+ struct dentry *dentry;
+
+@@ -1247,7 +1380,7 @@
+ dentry = ERR_PTR(-EEXIST);
+ if (nd->last_type != LAST_NORM)
+ goto fail;
+- dentry = lookup_hash(&nd->last, nd->dentry);
++ dentry = lookup_hash_it(&nd->last, nd->dentry, it);
+ if (IS_ERR(dentry))
+ goto fail;
+ if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
+@@ -1303,7 +1436,16 @@
+ error = path_lookup(tmp, LOOKUP_PARENT, &nd);
+ if (error)
+ goto out;
+- dentry = lookup_create(&nd, 0);
++
++ if (nd.dentry->d_inode->i_op->mknod_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->mknod_raw(&nd, mode, dev);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out2;
++ }
++
++ dentry = lookup_create(&nd, 0, NULL);
+ error = PTR_ERR(dentry);
+
+ if (!IS_POSIXACL(nd.dentry->d_inode))
+@@ -1325,6 +1467,7 @@
+ dput(dentry);
+ }
+ up(&nd.dentry->d_inode->i_sem);
++out2:
+ path_release(&nd);
+ out:
+ putname(tmp);
+@@ -1372,7 +1515,14 @@
+ error = path_lookup(tmp, LOOKUP_PARENT, &nd);
+ if (error)
+ goto out;
+- dentry = lookup_create(&nd, 1);
++ if (nd.dentry->d_inode->i_op->mkdir_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->mkdir_raw(&nd, mode);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out2;
++ }
++ dentry = lookup_create(&nd, 1, NULL);
+ error = PTR_ERR(dentry);
+ if (!IS_ERR(dentry)) {
+ if (!IS_POSIXACL(nd.dentry->d_inode))
+@@ -1381,6 +1531,7 @@
+ dput(dentry);
+ }
+ up(&nd.dentry->d_inode->i_sem);
++out2:
+ path_release(&nd);
+ out:
+ putname(tmp);
+@@ -1481,8 +1632,16 @@
+ error = -EBUSY;
+ goto exit1;
+ }
++ if (nd.dentry->d_inode->i_op->rmdir_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++ error = op->rmdir_raw(&nd);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit1;
++ }
+ down(&nd.dentry->d_inode->i_sem);
+- dentry = lookup_hash(&nd.last, nd.dentry);
++ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
+ error = PTR_ERR(dentry);
+ if (!IS_ERR(dentry)) {
+ error = vfs_rmdir(nd.dentry->d_inode, dentry);
+@@ -1540,8 +1699,15 @@
+ error = -EISDIR;
+ if (nd.last_type != LAST_NORM)
+ goto exit1;
++ if (nd.dentry->d_inode->i_op->unlink_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->unlink_raw(&nd);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit1;
++ }
+ down(&nd.dentry->d_inode->i_sem);
+- dentry = lookup_hash(&nd.last, nd.dentry);
++ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
+ error = PTR_ERR(dentry);
+ if (!IS_ERR(dentry)) {
+ /* Why not before? Because we want correct error value */
+@@ -1608,15 +1774,23 @@
+ error = path_lookup(to, LOOKUP_PARENT, &nd);
+ if (error)
+ goto out;
+- dentry = lookup_create(&nd, 0);
++ if (nd.dentry->d_inode->i_op->symlink_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->symlink_raw(&nd, from);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out2;
++ }
++ dentry = lookup_create(&nd, 0, NULL);
+ error = PTR_ERR(dentry);
+ if (!IS_ERR(dentry)) {
+ error = vfs_symlink(nd.dentry->d_inode, dentry, from);
+ dput(dentry);
+ }
+ up(&nd.dentry->d_inode->i_sem);
++ out2:
+ path_release(&nd);
+-out:
++ out:
+ putname(to);
+ }
+ putname(from);
+@@ -1692,7 +1866,14 @@
+ error = -EXDEV;
+ if (old_nd.mnt != nd.mnt)
+ goto out_release;
+- new_dentry = lookup_create(&nd, 0);
++ if (nd.dentry->d_inode->i_op->link_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->link_raw(&old_nd, &nd);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out_release;
++ }
++ new_dentry = lookup_create(&nd, 0, NULL);
+ error = PTR_ERR(new_dentry);
+ if (!IS_ERR(new_dentry)) {
+ error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
+@@ -1736,7 +1917,7 @@
+ * locking].
+ */
+ int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
+- struct inode *new_dir, struct dentry *new_dentry)
++ struct inode *new_dir, struct dentry *new_dentry)
+ {
+ int error;
+ struct inode *target;
+@@ -1815,7 +1996,7 @@
+ }
+
+ int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
+- struct inode *new_dir, struct dentry *new_dentry)
++ struct inode *new_dir, struct dentry *new_dentry)
+ {
+ int error;
+
+@@ -1903,9 +2084,18 @@
+ if (newnd.last_type != LAST_NORM)
+ goto exit2;
+
++ if (old_dir->d_inode->i_op->rename_raw) {
++ lock_kernel();
++ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd);
++ unlock_kernel();
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit2;
++ }
++
+ double_lock(new_dir, old_dir);
+
+- old_dentry = lookup_hash(&oldnd.last, old_dir);
++ old_dentry = lookup_hash_it(&oldnd.last, old_dir, NULL);
+ error = PTR_ERR(old_dentry);
+ if (IS_ERR(old_dentry))
+ goto exit3;
+@@ -1921,16 +2111,16 @@
+ if (newnd.last.name[newnd.last.len])
+ goto exit4;
+ }
+- new_dentry = lookup_hash(&newnd.last, new_dir);
++ new_dentry = lookup_hash_it(&newnd.last, new_dir, NULL);
+ error = PTR_ERR(new_dentry);
+ if (IS_ERR(new_dentry))
+ goto exit4;
+
++
+ lock_kernel();
+ error = vfs_rename(old_dir->d_inode, old_dentry,
+ new_dir->d_inode, new_dentry);
+ unlock_kernel();
+-
+ dput(new_dentry);
+ exit4:
+ dput(old_dentry);
+@@ -1981,20 +2171,26 @@
+ }
+
+ static inline int __attribute__((always_inline))
+-__vfs_follow_link(struct nameidata *nd, const char *link)
++__vfs_follow_link(struct nameidata *nd, const char *link,
++ struct lookup_intent *it)
+ {
+ int res = 0;
+ char *name;
+ if (IS_ERR(link))
+ goto fail;
+
++ if (it == NULL)
++ it = nd->intent;
++ else if (it != nd->intent)
++ printk("it != nd->intent: tell phil@clusterfs.com\n");
++
+ if (*link == '/') {
+ path_release(nd);
+ if (!walk_init_root(link, nd))
+ /* weird __emul_prefix() stuff did it */
+ goto out;
+ }
+- res = __link_path_walk(link, nd);
++ res = __link_path_walk_it(link, nd, it);
+ out:
+ if (current->link_count || res || nd->last_type!=LAST_NORM)
+ return res;
+@@ -2018,7 +2214,13 @@
+
+ int vfs_follow_link(struct nameidata *nd, const char *link)
+ {
+- return __vfs_follow_link(nd, link);
++ return __vfs_follow_link(nd, link, NULL);
++}
++
++int vfs_follow_link_it(struct nameidata *nd, const char *link,
++ struct lookup_intent *it)
++{
++ return __vfs_follow_link(nd, link, it);
+ }
+
+ /* get the link contents into pagecache */
+@@ -2060,7 +2262,7 @@
+ {
+ struct page *page = NULL;
+ char *s = page_getlink(dentry, &page);
+- int res = __vfs_follow_link(nd, s);
++ int res = __vfs_follow_link(nd, s, NULL);
+ if (page) {
+ kunmap(page);
+ page_cache_release(page);
+Index: linux-2.4.21-171/fs/namespace.c
+===================================================================
+--- linux-2.4.21-171.orig/fs/namespace.c 2004-02-24 13:42:29.000000000 -0500
++++ linux-2.4.21-171/fs/namespace.c 2004-04-03 16:09:50.000000000 -0500
+@@ -98,6 +98,7 @@
+ {
+ old_nd->dentry = mnt->mnt_mountpoint;
+ old_nd->mnt = mnt->mnt_parent;
++ UNPIN(old_nd->dentry, old_nd->mnt, 1);
+ mnt->mnt_parent = mnt;
+ mnt->mnt_mountpoint = mnt->mnt_root;
+ list_del_init(&mnt->mnt_child);
+@@ -109,6 +110,7 @@
+ {
+ mnt->mnt_parent = mntget(nd->mnt);
+ mnt->mnt_mountpoint = dget(nd->dentry);
++ PIN(nd->dentry, nd->mnt, 1);
+ list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
+ list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
+ nd->dentry->d_mounted++;
+@@ -488,14 +490,17 @@
+ {
+ struct nameidata old_nd;
+ struct vfsmount *mnt = NULL;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
+ int err = mount_is_safe(nd);
+ if (err)
+ return err;
+ if (!old_name || !*old_name)
+ return -EINVAL;
+- err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd);
+- if (err)
++ err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it);
++ if (err) {
++ intent_release(&it);
+ return err;
++ }
+
+ down_write(¤t->namespace->sem);
+ err = -EINVAL;
+@@ -518,6 +523,7 @@
+ }
+
+ up_write(¤t->namespace->sem);
++ intent_release(&it);
+ path_release(&old_nd);
+ return err;
+ }
+@@ -701,6 +707,7 @@
+ unsigned long flags, void *data_page)
+ {
+ struct nameidata nd;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
+ int retval = 0;
+ int mnt_flags = 0;
+
+@@ -725,10 +732,11 @@
+ flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
+
+ /* ... and get the mountpoint */
+- retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
+- if (retval)
++ retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
++ if (retval) {
++ intent_release(&it);
+ return retval;
+-
++ }
+ if (flags & MS_REMOUNT)
+ retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
+ data_page);
+@@ -739,6 +747,8 @@
+ else
+ retval = do_add_mount(&nd, type_page, flags, mnt_flags,
+ dev_name, data_page);
++
++ intent_release(&it);
+ path_release(&nd);
+ return retval;
+ }
+@@ -904,6 +914,8 @@
+ {
+ struct vfsmount *tmp;
+ struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
++ struct lookup_intent new_it = { .it_op = IT_GETATTR };
++ struct lookup_intent old_it = { .it_op = IT_GETATTR };
+ int error;
+
+ if (!capable(CAP_SYS_ADMIN))
+@@ -911,14 +923,14 @@
+
+ lock_kernel();
+
+- error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd);
++ error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it);
+ if (error)
+ goto out0;
+ error = -EINVAL;
+ if (!check_mnt(new_nd.mnt))
+ goto out1;
+
+- error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd);
++ error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it);
+ if (error)
+ goto out1;
+
+@@ -973,8 +985,10 @@
+ up(&old_nd.dentry->d_inode->i_zombie);
+ up_write(¤t->namespace->sem);
+ path_release(&user_nd);
++ intent_release(&old_it);
+ path_release(&old_nd);
+ out1:
++ intent_release(&new_it);
+ path_release(&new_nd);
+ out0:
+ unlock_kernel();
+Index: linux-2.4.21-171/fs/open.c
+===================================================================
+--- linux-2.4.21-171.orig/fs/open.c 2004-02-24 13:42:29.000000000 -0500
++++ linux-2.4.21-171/fs/open.c 2004-04-03 16:09:50.000000000 -0500
+@@ -19,6 +19,8 @@
+ #include <asm/uaccess.h>
+
+ #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
++extern int path_walk_it(const char *name, struct nameidata *nd,
++ struct lookup_intent *it);
+
+ int vfs_statfs(struct super_block *sb, struct statfs *buf)
+ {
+@@ -95,9 +97,10 @@
+ write_unlock(&files->file_lock);
+ }
+
+-int do_truncate(struct dentry *dentry, loff_t length)
++int do_truncate(struct dentry *dentry, loff_t length, int called_from_open)
+ {
+ struct inode *inode = dentry->d_inode;
++ struct inode_operations *op = dentry->d_inode->i_op;
+ int error;
+ struct iattr newattrs;
+
+@@ -109,7 +112,13 @@
+ down(&inode->i_sem);
+ newattrs.ia_size = length;
+ newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
+- error = notify_change(dentry, &newattrs);
++ if (called_from_open)
++ newattrs.ia_valid |= ATTR_FROM_OPEN;
++ if (op->setattr_raw) {
++ newattrs.ia_valid |= ATTR_RAW;
++ error = op->setattr_raw(inode, &newattrs);
++ } else
++ error = notify_change(dentry, &newattrs);
+ up(&inode->i_sem);
+ up_write(&inode->i_alloc_sem);
+ return error;
+@@ -120,12 +129,13 @@
+ struct nameidata nd;
+ struct inode * inode;
+ int error;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
+
+ error = -EINVAL;
+ if (length < 0) /* sorry, but loff_t says... */
+ goto out;
+
+- error = user_path_walk(path, &nd);
++ error = user_path_walk_it(path, &nd, &it);
+ if (error)
+ goto out;
+ inode = nd.dentry->d_inode;
+@@ -165,11 +175,13 @@
+ error = locks_verify_truncate(inode, NULL, length);
+ if (!error) {
+ DQUOT_INIT(inode);
+- error = do_truncate(nd.dentry, length);
++ intent_release(&it);
++ error = do_truncate(nd.dentry, length, 0);
+ }
+ put_write_access(inode);
+
+ dput_and_out:
++ intent_release(&it);
+ path_release(&nd);
+ out:
+ return error;
+@@ -217,7 +229,7 @@
+
+ error = locks_verify_truncate(inode, file, length);
+ if (!error)
+- error = do_truncate(dentry, length);
++ error = do_truncate(dentry, length, 0);
+ out_putf:
+ fput(file);
+ out:
+@@ -262,11 +274,13 @@
+ struct inode * inode;
+ struct iattr newattrs;
+
+- error = user_path_walk(filename, &nd);
++ error = user_path_walk_it(filename, &nd, NULL);
+ if (error)
+ goto out;
+ inode = nd.dentry->d_inode;
+
++ /* this is safe without a Lustre lock because it only depends
++ on the super block */
+ error = -EROFS;
+ if (IS_RDONLY(inode))
+ goto dput_and_out;
+@@ -281,11 +295,25 @@
+ goto dput_and_out;
+
+ newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
+- } else {
++ }
++
++ if (inode->i_op->setattr_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++ newattrs.ia_valid |= ATTR_RAW;
++ error = op->setattr_raw(inode, &newattrs);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto dput_and_out;
++ }
++
++ error = -EPERM;
++ if (!times) {
+ if (current->fsuid != inode->i_uid &&
+ (error = permission(inode,MAY_WRITE)) != 0)
+ goto dput_and_out;
+ }
++
+ error = notify_change(nd.dentry, &newattrs);
+ dput_and_out:
+ path_release(&nd);
+@@ -306,12 +334,14 @@
+ struct inode * inode;
+ struct iattr newattrs;
+
+- error = user_path_walk(filename, &nd);
++ error = user_path_walk_it(filename, &nd, NULL);
+
+ if (error)
+ goto out;
+ inode = nd.dentry->d_inode;
+
++ /* this is safe without a Lustre lock because it only depends
++ on the super block */
+ error = -EROFS;
+ if (IS_RDONLY(inode))
+ goto dput_and_out;
+@@ -326,7 +356,20 @@
+ newattrs.ia_atime = times[0].tv_sec;
+ newattrs.ia_mtime = times[1].tv_sec;
+ newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
+- } else {
++ }
++
++ if (inode->i_op->setattr_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++ newattrs.ia_valid |= ATTR_RAW;
++ error = op->setattr_raw(inode, &newattrs);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto dput_and_out;
++ }
++
++ error = -EPERM;
++ if (!utimes) {
+ if (current->fsuid != inode->i_uid &&
+ (error = permission(inode,MAY_WRITE)) != 0)
+ goto dput_and_out;
+@@ -349,6 +392,7 @@
+ int old_fsuid, old_fsgid;
+ kernel_cap_t old_cap;
+ int res;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
+
+ if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
+ return -EINVAL;
+@@ -366,13 +410,14 @@
+ else
+ current->cap_effective = current->cap_permitted;
+
+- res = user_path_walk(filename, &nd);
++ res = user_path_walk_it(filename, &nd, &it);
+ if (!res) {
+ res = permission(nd.dentry->d_inode, mode);
+ /* SuS v2 requires we report a read only fs too */
+ if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
+ && !special_file(nd.dentry->d_inode->i_mode))
+ res = -EROFS;
++ intent_release(&it);
+ path_release(&nd);
+ }
+
+@@ -387,8 +432,9 @@
+ {
+ int error;
+ struct nameidata nd;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
+
+- error = __user_walk(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd);
++ error = __user_walk_it(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd, &it);
+ if (error)
+ goto out;
+
+@@ -399,6 +445,7 @@
+ set_fs_pwd(current->fs, nd.mnt, nd.dentry);
+
+ dput_and_out:
++ intent_release(&it);
+ path_release(&nd);
+ out:
+ return error;
+@@ -438,9 +485,10 @@
+ {
+ int error;
+ struct nameidata nd;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
+
+- error = __user_walk(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
+- LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
++ error = __user_walk_it(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
++ LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it);
+ if (error)
+ goto out;
+
+@@ -456,39 +504,56 @@
+ set_fs_altroot();
+ error = 0;
+ dput_and_out:
++ intent_release(&it);
+ path_release(&nd);
+ out:
+ return error;
+ }
+
+-asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
++int chmod_common(struct dentry *dentry, mode_t mode)
+ {
+- struct inode * inode;
+- struct dentry * dentry;
+- struct file * file;
+- int err = -EBADF;
++ struct inode *inode = dentry->d_inode;
+ struct iattr newattrs;
++ int err = -EROFS;
+
+- file = fget(fd);
+- if (!file)
++ if (IS_RDONLY(inode))
+ goto out;
+
+- dentry = file->f_dentry;
+- inode = dentry->d_inode;
++ if (inode->i_op->setattr_raw) {
++ newattrs.ia_mode = mode;
++ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
++ newattrs.ia_valid |= ATTR_RAW;
++ err = inode->i_op->setattr_raw(inode, &newattrs);
++ /* the file system wants to use normal vfs path now */
++ if (err != -EOPNOTSUPP)
++ goto out;
++ }
+
+- err = -EROFS;
+- if (IS_RDONLY(inode))
+- goto out_putf;
+ err = -EPERM;
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+- goto out_putf;
++ goto out;
++
+ if (mode == (mode_t) -1)
+ mode = inode->i_mode;
+ newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
+ err = notify_change(dentry, &newattrs);
+
+-out_putf:
++out:
++ return err;
++}
++
++asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
++{
++ struct file * file;
++ int err = -EBADF;
++
++ file = fget(fd);
++ if (!file)
++ goto out;
++
++ err = chmod_common(file->f_dentry, mode);
++
+ fput(file);
+ out:
+ return err;
+@@ -497,30 +562,14 @@
+ asmlinkage long sys_chmod(const char * filename, mode_t mode)
+ {
+ struct nameidata nd;
+- struct inode * inode;
+ int error;
+- struct iattr newattrs;
+
+ error = user_path_walk(filename, &nd);
+ if (error)
+ goto out;
+- inode = nd.dentry->d_inode;
+-
+- error = -EROFS;
+- if (IS_RDONLY(inode))
+- goto dput_and_out;
+
+- error = -EPERM;
+- if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+- goto dput_and_out;
++ error = chmod_common(nd.dentry, mode);
+
+- if (mode == (mode_t) -1)
+- mode = inode->i_mode;
+- newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
+- newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
+- error = notify_change(nd.dentry, &newattrs);
+-
+-dput_and_out:
+ path_release(&nd);
+ out:
+ return error;
+@@ -540,6 +589,20 @@
+ error = -EROFS;
+ if (IS_RDONLY(inode))
+ goto out;
++
++ if (inode->i_op->setattr_raw) {
++ struct inode_operations *op = dentry->d_inode->i_op;
++
++ newattrs.ia_uid = user;
++ newattrs.ia_gid = group;
++ newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME;
++ newattrs.ia_valid |= ATTR_RAW;
++ error = op->setattr_raw(inode, &newattrs);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ return error;
++ }
++
+ error = -EPERM;
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ goto out;
+@@ -644,6 +707,7 @@
+ {
+ int namei_flags, error;
+ struct nameidata nd;
++ struct lookup_intent it = { .it_op = IT_OPEN };
+
+ namei_flags = flags;
+ if ((namei_flags+1) & O_ACCMODE)
+@@ -651,14 +715,15 @@
+ if (namei_flags & O_TRUNC)
+ namei_flags |= 2;
+
+- error = open_namei(filename, namei_flags, mode, &nd);
+- if (!error)
+- return dentry_open(nd.dentry, nd.mnt, flags);
++ error = open_namei_it(filename, namei_flags, mode, &nd, &it);
++ if (error)
++ return ERR_PTR(error);
+
+- return ERR_PTR(error);
++ return dentry_open_it(nd.dentry, nd.mnt, flags, &it);
+ }
+
+-struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
++struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
++ int flags, struct lookup_intent *it)
+ {
+ struct file * f;
+ struct inode *inode;
+@@ -686,7 +751,9 @@
+ file_move(f, &inode->i_sb->s_files);
+
+ if (f->f_op && f->f_op->open) {
++ f->f_it = it;
+ error = f->f_op->open(inode,f);
++ f->f_it = NULL;
+ if (error)
+ goto cleanup_all;
+ }
+@@ -698,6 +765,7 @@
+ !inode->i_mapping->a_ops->direct_IO))
+ goto cleanup_all;
+
++ intent_release(it);
+ return f;
+
+ cleanup_all:
+@@ -710,11 +778,17 @@
+ cleanup_file:
+ put_filp(f);
+ cleanup_dentry:
++ intent_release(it);
+ dput(dentry);
+ mntput(mnt);
+ return ERR_PTR(error);
+ }
+
++struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
++{
++ return dentry_open_it(dentry, mnt, flags, NULL);
++}
++
+ /*
+ * Find an empty file descriptor entry, and mark it busy.
+ */
+Index: linux-2.4.21-171/fs/stat.c
+===================================================================
+--- linux-2.4.21-171.orig/fs/stat.c 2004-02-24 13:42:29.000000000 -0500
++++ linux-2.4.21-171/fs/stat.c 2004-04-03 16:09:50.000000000 -0500
+@@ -17,10 +17,14 @@
+ * Revalidate the inode. This is required for proper NFS attribute caching.
+ */
+ static __inline__ int
+-do_revalidate(struct dentry *dentry)
++do_revalidate(struct dentry *dentry, struct lookup_intent *it)
+ {
+ struct inode * inode = dentry->d_inode;
+- if (inode->i_op && inode->i_op->revalidate)
++ if (!inode)
++ return -ENOENT;
++ if (inode->i_op && inode->i_op->revalidate_it)
++ return inode->i_op->revalidate_it(dentry, it);
++ else if (inode->i_op && inode->i_op->revalidate)
+ return inode->i_op->revalidate(dentry);
+ return 0;
+ }
+@@ -141,13 +145,15 @@
+ asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf)
+ {
+ struct nameidata nd;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
+ int error;
+
+- error = user_path_walk(filename, &nd);
++ error = user_path_walk_it(filename, &nd, &it);
+ if (!error) {
+- error = do_revalidate(nd.dentry);
++ error = do_revalidate(nd.dentry, &it);
+ if (!error)
+ error = cp_old_stat(nd.dentry->d_inode, statbuf);
++ intent_release(&it);
+ path_release(&nd);
+ }
+ return error;
+@@ -157,13 +163,15 @@
+ asmlinkage long sys_newstat(char * filename, struct stat * statbuf)
+ {
+ struct nameidata nd;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
+ int error;
+
+- error = user_path_walk(filename, &nd);
++ error = user_path_walk_it(filename, &nd, &it);
+ if (!error) {
+- error = do_revalidate(nd.dentry);
++ error = do_revalidate(nd.dentry, &it);
+ if (!error)
+ error = cp_new_stat(nd.dentry->d_inode, statbuf);
++ intent_release(&it);
+ path_release(&nd);
+ }
+ return error;
+@@ -178,13 +186,15 @@
+ asmlinkage long sys_lstat(char * filename, struct __old_kernel_stat * statbuf)
+ {
+ struct nameidata nd;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
+ int error;
+
+- error = user_path_walk_link(filename, &nd);
++ error = user_path_walk_link_it(filename, &nd, &it);
+ if (!error) {
+- error = do_revalidate(nd.dentry);
++ error = do_revalidate(nd.dentry, &it);
+ if (!error)
+ error = cp_old_stat(nd.dentry->d_inode, statbuf);
++ intent_release(&it);
+ path_release(&nd);
+ }
+ return error;
+@@ -195,13 +205,15 @@
+ asmlinkage long sys_newlstat(char * filename, struct stat * statbuf)
+ {
+ struct nameidata nd;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
+ int error;
+
+- error = user_path_walk_link(filename, &nd);
++ error = user_path_walk_link_it(filename, &nd, &it);
+ if (!error) {
+- error = do_revalidate(nd.dentry);
++ error = do_revalidate(nd.dentry, &it);
+ if (!error)
+ error = cp_new_stat(nd.dentry->d_inode, statbuf);
++ intent_release(&it);
+ path_release(&nd);
+ }
+ return error;
+@@ -222,7 +234,7 @@
+ if (f) {
+ struct dentry * dentry = f->f_dentry;
+
+- err = do_revalidate(dentry);
++ err = do_revalidate(dentry, NULL);
+ if (!err)
+ err = cp_old_stat(dentry->d_inode, statbuf);
+ fput(f);
+@@ -241,7 +253,7 @@
+ if (f) {
+ struct dentry * dentry = f->f_dentry;
+
+- err = do_revalidate(dentry);
++ err = do_revalidate(dentry, NULL);
+ if (!err)
+ err = cp_new_stat(dentry->d_inode, statbuf);
+ fput(f);
+@@ -263,7 +275,7 @@
+
+ error = -EINVAL;
+ if (inode->i_op && inode->i_op->readlink &&
+- !(error = do_revalidate(nd.dentry))) {
++ !(error = do_revalidate(nd.dentry, NULL))) {
+ UPDATE_ATIME(inode);
+ error = inode->i_op->readlink(nd.dentry, buf, bufsiz);
+ }
+@@ -339,12 +351,14 @@
+ {
+ struct nameidata nd;
+ int error;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
+
+- error = user_path_walk(filename, &nd);
++ error = user_path_walk_it(filename, &nd, &it);
+ if (!error) {
+- error = do_revalidate(nd.dentry);
++ error = do_revalidate(nd.dentry, &it);
+ if (!error)
+ error = cp_new_stat64(nd.dentry->d_inode, statbuf);
++ intent_release(&it);
+ path_release(&nd);
+ }
+ return error;
+@@ -354,12 +368,14 @@
+ {
+ struct nameidata nd;
+ int error;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
+
+- error = user_path_walk_link(filename, &nd);
++ error = user_path_walk_link_it(filename, &nd, &it);
+ if (!error) {
+- error = do_revalidate(nd.dentry);
++ error = do_revalidate(nd.dentry, &it);
+ if (!error)
+ error = cp_new_stat64(nd.dentry->d_inode, statbuf);
++ intent_release(&it);
+ path_release(&nd);
+ }
+ return error;
+@@ -374,7 +390,7 @@
+ if (f) {
+ struct dentry * dentry = f->f_dentry;
+
+- err = do_revalidate(dentry);
++ err = do_revalidate(dentry, NULL);
+ if (!err)
+ err = cp_new_stat64(dentry->d_inode, statbuf);
+ fput(f);
+Index: linux-2.4.21-171/include/linux/dcache.h
+===================================================================
+--- linux-2.4.21-171.orig/include/linux/dcache.h 2004-03-31 14:58:26.000000000 -0500
++++ linux-2.4.21-171/include/linux/dcache.h 2004-04-03 16:09:50.000000000 -0500
+@@ -7,6 +7,51 @@
+ #include <linux/gdb.h>
+ #include <linux/mount.h>
+ #include <linux/kernel.h>
++#include <linux/string.h>
++
++#define IT_OPEN 0x0001
++#define IT_CREAT 0x0002
++#define IT_READDIR 0x0004
++#define IT_GETATTR 0x0008
++#define IT_LOOKUP 0x0010
++#define IT_UNLINK 0x0020
++#define IT_GETXATTR 0x0040
++#define IT_EXEC 0x0080
++#define IT_PIN 0x0100
++
++#define IT_FL_LOCKED 0x0001
++#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */
++
++#define INTENT_MAGIC 0x19620323
++
++
++struct lustre_intent_data {
++ int it_disposition;
++ int it_status;
++ __u64 it_lock_handle;
++ void *it_data;
++ int it_lock_mode;
++ int it_int_flags;
++};
++struct lookup_intent {
++ int it_magic;
++ void (*it_op_release)(struct lookup_intent *);
++ int it_op;
++ int it_flags;
++ int it_create_mode;
++ union {
++ struct lustre_intent_data lustre;
++ } d;
++};
++
++static inline void intent_init(struct lookup_intent *it, int op, int flags)
++{
++ memset(it, 0, sizeof(*it));
++ it->it_magic = INTENT_MAGIC;
++ it->it_op = op;
++ it->it_flags = flags;
++}
++
+
+ /*
+ * linux/include/linux/dcache.h
+@@ -94,8 +139,22 @@
+ int (*d_delete)(struct dentry *);
+ void (*d_release)(struct dentry *);
+ void (*d_iput)(struct dentry *, struct inode *);
++ int (*d_revalidate_it)(struct dentry *, int, struct lookup_intent *);
++ void (*d_pin)(struct dentry *, struct vfsmount * , int);
++ void (*d_unpin)(struct dentry *, struct vfsmount *, int);
+ };
+
++#define PIN(de,mnt,flag) if (de && de->d_op && de->d_op->d_pin) \
++ de->d_op->d_pin(de, mnt, flag);
++#define UNPIN(de,mnt,flag) if (de && de->d_op && de->d_op->d_unpin) \
++ de->d_op->d_unpin(de, mnt, flag);
++
++
++/* defined in fs/namei.c */
++extern void intent_release(struct lookup_intent *it);
++/* defined in fs/dcache.c */
++extern void __d_rehash(struct dentry * entry, int lock);
++
+ /* the dentry parameter passed to d_hash and d_compare is the parent
+ * directory of the entries to be compared. It is used in case these
+ * functions need any directory specific information for determining
+@@ -127,6 +186,7 @@
+ * s_nfsd_free_path semaphore will be down
+ */
+ #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
++#define DCACHE_LUSTRE_INVALID 0x0010 /* Lustre invalidated */
+
+ extern spinlock_t dcache_lock;
+
+Index: linux-2.4.21-171/include/linux/fs.h
+===================================================================
+--- linux-2.4.21-171.orig/include/linux/fs.h 2004-04-03 16:07:40.000000000 -0500
++++ linux-2.4.21-171/include/linux/fs.h 2004-04-03 16:09:50.000000000 -0500
+@@ -74,6 +74,7 @@
+
+ #define FMODE_READ 1
+ #define FMODE_WRITE 2
++#define FMODE_EXEC 4
+
+ #define READ 0
+ #define WRITE 1
+@@ -360,6 +361,9 @@
+ #define ATTR_MTIME_SET 256
+ #define ATTR_FORCE 512 /* Not a change, but a change it */
+ #define ATTR_ATTR_FLAG 1024
++#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */
++#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */
++#define ATTR_CTIME_SET 0x2000
+
+ /*
+ * This is the Inode Attributes structure, used for notify_change(). It
+@@ -504,6 +508,7 @@
+ struct pipe_inode_info *i_pipe;
+ struct block_device *i_bdev;
+ struct char_device *i_cdev;
++ void *i_filterdata;
+
+ unsigned long i_dnotify_mask; /* Directory notify events */
+ struct dnotify_struct *i_dnotify; /* for directory notifications */
+@@ -666,6 +671,7 @@
+
+ /* needed for tty driver, and maybe others */
+ void *private_data;
++ struct lookup_intent *f_it;
+ struct list_head f_ep_links;
+ spinlock_t f_ep_lock;
+ };
+@@ -795,6 +801,7 @@
+ struct qstr last;
+ unsigned int flags;
+ int last_type;
++ struct lookup_intent *intent;
+ };
+
+ /*
+@@ -916,7 +923,8 @@
+ extern int __vfs_rmdir(struct inode *, struct dentry *);
+ extern int vfs_rmdir(struct inode *, struct dentry *);
+ extern int vfs_unlink(struct inode *, struct dentry *);
+-extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
++int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
++ struct inode *new_dir, struct dentry *new_dentry);
+
+ /*
+ * File types
+@@ -991,21 +999,32 @@
+
+ struct inode_operations {
+ int (*create) (struct inode *,struct dentry *,int);
++ int (*create_it) (struct inode *,struct dentry *,int, struct lookup_intent *);
+ struct dentry * (*lookup) (struct inode *,struct dentry *);
++ struct dentry * (*lookup_it) (struct inode *,struct dentry *, struct lookup_intent *, int flags);
+ int (*link) (struct dentry *,struct inode *,struct dentry *);
++ int (*link_raw) (struct nameidata *,struct nameidata *);
+ int (*unlink) (struct inode *,struct dentry *);
++ int (*unlink_raw) (struct nameidata *);
+ int (*symlink) (struct inode *,struct dentry *,const char *);
++ int (*symlink_raw) (struct nameidata *,const char *);
+ int (*mkdir) (struct inode *,struct dentry *,int);
++ int (*mkdir_raw) (struct nameidata *,int);
+ int (*rmdir) (struct inode *,struct dentry *);
++ int (*rmdir_raw) (struct nameidata *);
+ int (*mknod) (struct inode *,struct dentry *,int,int);
++ int (*mknod_raw) (struct nameidata *,int,dev_t);
+ int (*rename) (struct inode *, struct dentry *,
+ struct inode *, struct dentry *);
++ int (*rename_raw) (struct nameidata *, struct nameidata *);
+ int (*readlink) (struct dentry *, char *,int);
+ int (*follow_link) (struct dentry *, struct nameidata *);
+ void (*truncate) (struct inode *);
+ int (*permission) (struct inode *, int);
+ int (*revalidate) (struct dentry *);
++ int (*revalidate_it) (struct dentry *, struct lookup_intent *);
+ int (*setattr) (struct dentry *, struct iattr *);
++ int (*setattr_raw) (struct inode *, struct iattr *);
+ int (*getattr) (struct dentry *, struct iattr *);
+ int (*setxattr) (struct dentry *, const char *, const void *, size_t, int);
+ ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
+@@ -1204,10 +1223,14 @@
+
+ asmlinkage long sys_open(const char *, int, int);
+ asmlinkage long sys_close(unsigned int); /* yes, it's really unsigned */
+-extern int do_truncate(struct dentry *, loff_t start);
++extern int do_truncate(struct dentry *, loff_t start, int called_from_open);
+
+ extern struct file *filp_open(const char *, int, int);
+ extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
++extern int open_namei_it(const char *filename, int namei_flags, int mode,
++ struct nameidata *nd, struct lookup_intent *it);
++extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
++ int flags, struct lookup_intent *it);
+ extern int filp_close(struct file *, fl_owner_t id);
+ extern char * getname(const char *);
+
+@@ -1503,6 +1526,7 @@
+ extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
+
+ extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
++extern int FASTCALL(__user_walk_it(const char *, unsigned, struct nameidata *, struct lookup_intent *it));
+ extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
+ extern int FASTCALL(path_walk(const char *, struct nameidata *));
+ extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
+@@ -1515,6 +1539,8 @@
+ extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
+ #define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
+ #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
++#define user_path_walk_it(name,nd,it) __user_walk_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd, it)
++#define user_path_walk_link_it(name,nd,it) __user_walk_it(name, LOOKUP_POSITIVE, nd, it)
+
+ extern void inode_init_once(struct inode *);
+ extern void _inode_init_once(struct inode *);
+@@ -1666,6 +1692,8 @@
+
+ extern int vfs_readlink(struct dentry *, char *, int, const char *);
+ extern int vfs_follow_link(struct nameidata *, const char *);
++extern int vfs_follow_link_it(struct nameidata *, const char *,
++ struct lookup_intent *it);
+ extern int page_readlink(struct dentry *, char *, int);
+ extern int page_follow_link(struct dentry *, struct nameidata *);
+ extern struct inode_operations page_symlink_inode_operations;
+Index: linux-2.4.21-171/include/linux/fs_struct.h
+===================================================================
+--- linux-2.4.21-171.orig/include/linux/fs_struct.h 2004-02-24 13:42:29.000000000 -0500
++++ linux-2.4.21-171/include/linux/fs_struct.h 2004-04-03 16:09:50.000000000 -0500
+@@ -34,10 +34,12 @@
+ write_lock(&fs->lock);
+ old_root = fs->root;
+ old_rootmnt = fs->rootmnt;
++ PIN(dentry, mnt, 1);
+ fs->rootmnt = mntget(mnt);
+ fs->root = dget(dentry);
+ write_unlock(&fs->lock);
+ if (old_root) {
++ UNPIN(old_root, old_rootmnt, 1);
+ dput(old_root);
+ mntput(old_rootmnt);
+ }
+@@ -57,10 +59,12 @@
+ write_lock(&fs->lock);
+ old_pwd = fs->pwd;
+ old_pwdmnt = fs->pwdmnt;
++ PIN(dentry, mnt, 0);
+ fs->pwdmnt = mntget(mnt);
+ fs->pwd = dget(dentry);
+ write_unlock(&fs->lock);
+ if (old_pwd) {
++ UNPIN(old_pwd, old_pwdmnt, 0);
+ dput(old_pwd);
+ mntput(old_pwdmnt);
+ }
+Index: linux-2.4.21-171/kernel/exit.c
+===================================================================
+--- linux-2.4.21-171.orig/kernel/exit.c 2004-02-24 14:03:21.000000000 -0500
++++ linux-2.4.21-171/kernel/exit.c 2004-04-03 16:09:50.000000000 -0500
+@@ -292,11 +292,14 @@
+ {
+ /* No need to hold fs->lock if we are killing it */
+ if (atomic_dec_and_test(&fs->count)) {
++ UNPIN(fs->pwd, fs->pwdmnt, 0);
++ UNPIN(fs->root, fs->rootmnt, 1);
+ dput(fs->root);
+ mntput(fs->rootmnt);
+ dput(fs->pwd);
+ mntput(fs->pwdmnt);
+ if (fs->altroot) {
++ UNPIN(fs->altroot, fs->altrootmnt, 1);
+ dput(fs->altroot);
+ mntput(fs->altrootmnt);
+ }
+Index: linux-2.4.21-171/kernel/fork.c
+===================================================================
+--- linux-2.4.21-171.orig/kernel/fork.c 2004-02-24 14:03:21.000000000 -0500
++++ linux-2.4.21-171/kernel/fork.c 2004-04-03 16:09:50.000000000 -0500
+@@ -467,10 +467,13 @@
+ fs->umask = old->umask;
+ read_lock(&old->lock);
+ fs->rootmnt = mntget(old->rootmnt);
++ PIN(old->pwd, old->pwdmnt, 0);
++ PIN(old->root, old->rootmnt, 1);
+ fs->root = dget(old->root);
+ fs->pwdmnt = mntget(old->pwdmnt);
+ fs->pwd = dget(old->pwd);
+ if (old->altroot) {
++ PIN(old->altroot, old->altrootmnt, 1);
+ fs->altrootmnt = mntget(old->altrootmnt);
+ fs->altroot = dget(old->altroot);
+ } else {
+Index: linux-2.4.21-171/kernel/ksyms.c
+===================================================================
+--- linux-2.4.21-171.orig/kernel/ksyms.c 2004-04-03 16:07:40.000000000 -0500
++++ linux-2.4.21-171/kernel/ksyms.c 2004-04-03 16:09:50.000000000 -0500
+@@ -334,6 +334,7 @@
+ EXPORT_SYMBOL(set_page_dirty);
+ EXPORT_SYMBOL(vfs_readlink);
+ EXPORT_SYMBOL(vfs_follow_link);
++EXPORT_SYMBOL(vfs_follow_link_it);
+ EXPORT_SYMBOL(page_readlink);
+ EXPORT_SYMBOL(page_follow_link);
+ EXPORT_SYMBOL(page_symlink_inode_operations);