Whamcloud - gitweb
b=16893,18503
[fs/lustre-release.git] / lustre / llite / statahead.c
index 77dd034..c853210 100644 (file)
@@ -1,22 +1,37 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (c) 2007 Cluster File Systems, Inc.
+ * GPL HEADER START
  *
- *   This file is part of Lustre, http://www.lustre.org.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
  *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
  *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
  */
 
 #include <linux/fs.h>
@@ -47,10 +62,6 @@ enum {
         SA_ENTRY_STATED
 };
 
-struct dentry_operations ll_sai_d_ops = {
-        .d_release = ll_release,
-};
-
 static unsigned int sai_generation = 0;
 static spinlock_t sai_generation_lock = SPIN_LOCK_UNLOCKED;
 
@@ -81,7 +92,7 @@ static inline int sa_received_empty(struct ll_statahead_info *sai)
 
 static inline int sa_not_full(struct ll_statahead_info *sai)
 {
-        return sai->sai_index < sai->sai_hit + sai->sai_miss + sai->sai_max;
+        return (sai->sai_index < sai->sai_hit + sai->sai_miss + sai->sai_max);
 }
 
 static inline int sa_is_running(struct ll_statahead_info *sai)
@@ -117,7 +128,7 @@ static inline int sa_low_hit(struct ll_statahead_info *sai)
  * (3) drop dentry's ref count
  * (4) release request's ref count
  */
-static void ll_sai_entry_cleanup(struct ll_sai_entry *entry)
+static void ll_sai_entry_cleanup(struct ll_sai_entry *entry, int free)
 {
         struct ptlrpc_request  *req = entry->se_req;
         struct md_enqueue_info *minfo = entry->se_minfo;
@@ -136,7 +147,10 @@ static void ll_sai_entry_cleanup(struct ll_sai_entry *entry)
                 entry->se_req = NULL;
                 ptlrpc_req_finished(req);
         }
-        OBD_FREE_PTR(entry);
+        if (free) {
+                LASSERT(list_empty(&entry->se_list));
+                OBD_FREE_PTR(entry);
+        }
 
         EXIT;
 }
@@ -175,13 +189,17 @@ struct ll_statahead_info *ll_sai_get(struct ll_statahead_info *sai)
 static void ll_sai_put(struct ll_statahead_info *sai)
 {
         struct inode         *inode = sai->sai_inode;
-        struct ll_inode_info *lli = ll_i2info(inode);
+        struct ll_inode_info *lli;
         ENTRY;
 
+        LASSERT(inode != NULL);
+        lli = ll_i2info(inode);
         if (atomic_dec_and_lock(&sai->sai_refcount, &lli->lli_lock)) {
                 struct ll_sai_entry *entry, *next;
 
+                LASSERT(lli->lli_opendir_key == NULL);
                 lli->lli_sai = NULL;
+                lli->lli_opendir_pid = 0;
                 spin_unlock(&lli->lli_lock);
 
                 LASSERT(sa_is_stopped(sai));
@@ -194,20 +212,19 @@ static void ll_sai_put(struct ll_statahead_info *sai)
 
                 list_for_each_entry_safe(entry, next, &sai->sai_entries_sent,
                                          se_list) {
-                        list_del(&entry->se_list);
-                        ll_sai_entry_cleanup(entry);
+                        list_del_init(&entry->se_list);
+                        ll_sai_entry_cleanup(entry, 1);
                 }
                 list_for_each_entry_safe(entry, next, &sai->sai_entries_received,
                                          se_list) {
-                        list_del(&entry->se_list);
-                        ll_sai_entry_cleanup(entry);
+                        list_del_init(&entry->se_list);
+                        ll_sai_entry_cleanup(entry, 1);
                 }
                 list_for_each_entry_safe(entry, next, &sai->sai_entries_stated,
                                          se_list) {
-                        list_del(&entry->se_list);
-                        ll_sai_entry_cleanup(entry);
+                        list_del_init(&entry->se_list);
+                        ll_sai_entry_cleanup(entry, 1);
                 }
-                dput(sai->sai_first);
                 OBD_FREE_PTR(sai);
                 iput(inode);
         }
@@ -279,8 +296,7 @@ ll_sai_entry_set(struct ll_statahead_info *sai, unsigned int index, int stat,
         ENTRY;
 
         if (!list_empty(&sai->sai_entries_sent)) {
-                list_for_each_entry(entry, &sai->sai_entries_sent,
-                                    se_list) {
+                list_for_each_entry(entry, &sai->sai_entries_sent, se_list) {
                         if (entry->se_index == index) {
                                 entry->se_stat = stat;
                                 entry->se_req = ptlrpc_request_addref(req);
@@ -317,13 +333,15 @@ ll_sai_entry_to_stated(struct ll_statahead_info *sai, struct ll_sai_entry *entry
         struct ll_sai_entry  *se;
         ENTRY;
 
+        ll_sai_entry_cleanup(entry, 0);
+
         spin_lock(&lli->lli_lock);
         if (!list_empty(&entry->se_list))
                 list_del_init(&entry->se_list);
 
         if (unlikely(entry->se_index < sai->sai_index_next)) {
                 spin_unlock(&lli->lli_lock);
-                ll_sai_entry_cleanup(entry);
+                OBD_FREE_PTR(entry);
                 RETURN(0);
         }
 
@@ -355,6 +373,7 @@ static int do_statahead_interpret(struct ll_statahead_info *sai)
         struct dentry          *dentry;
         struct lookup_intent   *it;
         int                     rc = 0;
+        struct mdt_body        *body;
         ENTRY;
 
         spin_lock(&lli->lli_lock);
@@ -365,17 +384,21 @@ static int do_statahead_interpret(struct ll_statahead_info *sai)
         spin_unlock(&lli->lli_lock);
 
         if (unlikely(entry->se_index < sai->sai_index_next)) {
-                ll_sai_entry_cleanup(entry);
+                ll_sai_entry_cleanup(entry, 1);
                 RETURN(0);
         }
 
+        if (entry->se_stat != SA_ENTRY_STATED)
+                GOTO(out, rc = entry->se_stat);
+
         req = entry->se_req;
         minfo = entry->se_minfo;
         dentry = minfo->mi_dentry;
         it = &minfo->mi_it;
 
-        if (entry->se_stat != SA_ENTRY_STATED)
-                GOTO(out, rc = entry->se_stat);
+        body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+        if (body == NULL)
+                GOTO(out, rc = -EFAULT);
 
         if (dentry->d_inode == NULL) {
                 /*
@@ -389,6 +412,13 @@ static int do_statahead_interpret(struct ll_statahead_info *sai)
 
                 LASSERT(fid_is_zero(&minfo->mi_data.op_fid2));
 
+                /*
+                 * XXX: No fid in reply, this is probaly cross-ref case.
+                 * SA can't handle it yet.
+                 */
+                if (body->valid & OBD_MD_MDS)
+                        GOTO(out, rc = -EAGAIN);
+
                 rc = ll_lookup_it_finish(req, it, &icbd);
                 if (!rc)
                         /*
@@ -406,10 +436,6 @@ static int do_statahead_interpret(struct ll_statahead_info *sai)
                 /*
                  * revalidate.
                  */
-                struct mdt_body *body;
-
-                body = lustre_msg_buf(req->rq_repmsg, DLM_REPLY_REC_OFF,
-                                      sizeof(*body));
                 if (!lu_fid_eq(&minfo->mi_data.op_fid2, &body->fid1)) {
                         ll_unhash_aliases(dentry->d_inode);
                         GOTO(out, rc = -EAGAIN);
@@ -421,6 +447,7 @@ static int do_statahead_interpret(struct ll_statahead_info *sai)
                         GOTO(out, rc);
                 }
 
+                spin_lock(&ll_lookup_lock);
                 spin_lock(&dcache_lock);
                 lock_dentry(dentry);
                 __d_drop(dentry);
@@ -430,21 +457,15 @@ static int do_statahead_interpret(struct ll_statahead_info *sai)
                 unlock_dentry(dentry);
                 d_rehash_cond(dentry, 0);
                 spin_unlock(&dcache_lock);
+                spin_unlock(&ll_lookup_lock);
 
                 ll_lookup_finish_locks(it, dentry);
         }
         EXIT;
 
 out:
-        if (likely(ll_sai_entry_to_stated(sai, entry))) {
-                entry->se_minfo = NULL;
-                entry->se_req = NULL;
+        if (likely(ll_sai_entry_to_stated(sai, entry)))
                 cfs_waitq_signal(&sai->sai_waitq);
-                ll_intent_release(it);
-                OBD_FREE_PTR(minfo);
-                dput(dentry);
-                ptlrpc_req_finished(req);
-        }
         return rc;
 }
 
@@ -472,7 +493,7 @@ static int ll_statahead_interpret(struct ptlrpc_request *req,
                 OBD_FREE_PTR(minfo);
                 RETURN(-ESTALE);
         } else {
-                sai = lli->lli_sai;
+                sai = ll_sai_get(lli->lli_sai);
                 if (rc || dir == NULL)
                         rc = -ESTALE;
 
@@ -491,8 +512,9 @@ static int ll_statahead_interpret(struct ptlrpc_request *req,
                                 list_del_init(&entry->se_list);
                         sai->sai_replied++;
                         spin_unlock(&lli->lli_lock);
-                        ll_sai_entry_cleanup(entry);
+                        ll_sai_entry_cleanup(entry, 1);
                 }
+                ll_sai_put(sai);
                 RETURN(rc);
         }
 }
@@ -639,18 +661,11 @@ static int do_sa_revalidate(struct dentry *dentry)
         RETURN(rc);
 }
 
-static inline void ll_name2qstr(struct qstr *this, const char *name, int namelen)
+static inline void ll_name2qstr(struct qstr *q, const char *name, int namelen)
 {
-        unsigned long hash = init_name_hash();
-        unsigned int  c;
-
-        this->name = name;
-        this->len  = namelen;
-        for (; namelen > 0; namelen--, name++) {
-                c = *(const unsigned char *)name;
-                hash = partial_name_hash(c, hash);
-        }
-        this->hash = end_name_hash(hash);
+        q->name = name;
+        q->len  = namelen;
+        q->hash = full_name_hash(name, namelen);
 }
 
 static int ll_statahead_one(struct dentry *parent, const char* entry_name,
@@ -726,8 +741,8 @@ static int ll_statahead_thread(void *arg)
         struct inode             *dir = parent->d_inode;
         struct ll_inode_info     *lli = ll_i2info(dir);
         struct ll_sb_info        *sbi = ll_i2sbi(dir);
-        struct ll_statahead_info *sai = ll_sai_get(lli->lli_sai);
-        struct ptlrpc_thread     *thread = &sai->sai_thread;
+        struct ll_statahead_info *sai;
+        struct ptlrpc_thread     *thread;
         struct page              *page;
         __u64                     pos = 0;
         int                       first = 0;
@@ -735,12 +750,23 @@ static int ll_statahead_thread(void *arg)
         struct ll_dir_chain       chain;
         ENTRY;
 
+        spin_lock(&lli->lli_lock);
+        if (unlikely(lli->lli_sai == NULL)) {
+                spin_unlock(&lli->lli_lock);
+                dput(parent);
+                RETURN(-EAGAIN);
+        } else {
+                sai = ll_sai_get(lli->lli_sai);
+                spin_unlock(&lli->lli_lock);
+        }
+
         {
                 char pname[16];
                 snprintf(pname, 15, "ll_sa_%u", sta->sta_pid);
                 cfs_daemonize(pname);
         }
 
+        thread = &sai->sai_thread;
         sbi->ll_sa_total++;
         spin_lock(&lli->lli_lock);
         thread->t_flags = SVC_RUNNING;
@@ -758,7 +784,7 @@ static int ll_statahead_thread(void *arg)
 
                 if (IS_ERR(page)) {
                         rc = PTR_ERR(page);
-                        CERROR("error reading dir "DFID" at %llu/%u: rc %d\n",
+                        CERROR("error reading dir "DFID" at "LPU64"/%u: rc %d\n",
                                PFID(ll_inode2fid(dir)), pos,
                                sai->sai_index, rc);
                         break;
@@ -882,22 +908,22 @@ out:
 void ll_stop_statahead(struct inode *inode, void *key)
 {
         struct ll_inode_info *lli = ll_i2info(inode);
-        struct ptlrpc_thread *thread;
+
+        if (unlikely(key == NULL))
+                return;
 
         spin_lock(&lli->lli_lock);
-        if (lli->lli_opendir_pid == 0 ||
-            unlikely(lli->lli_opendir_key != key)) {
+        if (lli->lli_opendir_key != key || lli->lli_opendir_pid == 0) {
                 spin_unlock(&lli->lli_lock);
                 return;
         }
 
         lli->lli_opendir_key = NULL;
-        lli->lli_opendir_pid = 0;
 
         if (lli->lli_sai) {
                 struct l_wait_info lwi = { 0 };
+                struct ptlrpc_thread *thread = &lli->lli_sai->sai_thread;
 
-                thread = &lli->lli_sai->sai_thread;
                 if (!sa_is_stopped(lli->lli_sai)) {
                         thread->t_flags = SVC_STOPPING;
                         spin_unlock(&lli->lli_lock);
@@ -918,9 +944,10 @@ void ll_stop_statahead(struct inode *inode, void *key)
                  * maybe inflight.
                  */
                 ll_sai_put(lli->lli_sai);
-                return;
+        } else {
+                lli->lli_opendir_pid = 0;
+                spin_unlock(&lli->lli_lock);
         }
-        spin_unlock(&lli->lli_lock);
 }
 
 enum {
@@ -957,7 +984,7 @@ static int is_first_dirent(struct inode *dir, struct dentry *dentry)
 
                 if (IS_ERR(page)) {
                         rc = PTR_ERR(page);
-                        CERROR("error reading dir "DFID" at %llu: rc %d\n",
+                        CERROR("error reading dir "DFID" at "LPU64": rc %d\n",
                                PFID(ll_inode2fid(dir)), pos, rc);
                         break;
                 }
@@ -999,7 +1026,7 @@ static int is_first_dirent(struct inode *dir, struct dentry *dentry)
                         }
 
                         if (target->len == namelen &&
-                            !strncmp(target->name, name, target->len))
+                            memcmp(target->name, name, namelen) == 0)
                                 rc = LS_FIRST_DE + dot_de;
                         else
                                 rc = LS_NONE_FIRST_DE;
@@ -1039,6 +1066,7 @@ out:
  * \retval 1       -- stat ahead thread process such dentry, for lookup, it hit
  * \retval -EEXIST -- stat ahead thread started, and this is the first dentry
  * \retval -EBADFD -- statahead thread exit and not dentry available
+ * \retval -EAGAIN -- try to stat by caller
  * \retval others  -- error
  */
 int do_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup)
@@ -1047,8 +1075,8 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup)
         struct ll_inode_info     *lli = ll_i2info(dir);
         struct ll_statahead_info *sai = lli->lli_sai;
         struct ll_sa_thread_args  sta;
-        struct l_wait_info        lwi = { 0 };
-        int                       rc;
+        struct l_wait_info        lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+        int                       rc = 0;
         ENTRY;
 
         LASSERT(lli->lli_opendir_pid == cfs_curproc_pid());
@@ -1058,16 +1086,6 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup)
                              list_empty(&sai->sai_entries_stated)))
                         RETURN(-EBADFD);
 
-                /*
-                 * skip the first dentry.
-                 */
-                if (unlikely((*dentryp)->d_name.len ==
-                             sai->sai_first->d_name.len &&
-                             !strncmp((*dentryp)->d_name.name,
-                                      sai->sai_first->d_name.name,
-                                      sai->sai_first->d_name.len)))
-                        RETURN(-EEXIST);
-
                 if ((*dentryp)->d_name.name[0] == '.') {
                         if (likely(sai->sai_ls_all ||
                             sai->sai_miss_hidden >= sai->sai_skip_hidden)) {
@@ -1102,9 +1120,10 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup)
                         /*
                          * thread started already, avoid double-stat.
                          */
-                        l_wait_event(sai->sai_waitq,
-                                     ll_sai_entry_stated(sai) || sa_is_stopped(sai),
-                                     &lwi);
+                        rc = l_wait_event(sai->sai_waitq,
+                                          ll_sai_entry_stated(sai) ||
+                                          sa_is_stopped(sai),
+                                          &lwi);
                 }
 
                 if (lookup) {
@@ -1114,7 +1133,8 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup)
                                           &(*dentryp)->d_name);
                         if (result) {
                                 LASSERT(result != *dentryp);
-                                dput(*dentryp);
+                                /* BUG 16303: do not drop reference count for
+                                 * "*dentryp", VFS will do that by itself. */
                                 *dentryp = result;
                                 RETURN(1);
                         }
@@ -1122,7 +1142,7 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup)
                 /*
                  * do nothing for revalidate.
                  */
-                RETURN(0);
+                RETURN(rc);
         }
 
          /*
@@ -1146,9 +1166,16 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup)
         if (sai == NULL)
                 RETURN(-ENOMEM);
 
-        sai->sai_inode  = igrab(dir);
-        sai->sai_first = dget(*dentryp);
         sai->sai_ls_all = (rc == LS_FIRST_DOT_DE);
+        sai->sai_inode = igrab(dir);
+        if (unlikely(sai->sai_inode == NULL)) {
+                CWARN("Do not start stat ahead on dying inode "DFID" .\n",
+                      PFID(&lli->lli_fid));
+                OBD_FREE_PTR(sai);
+                RETURN(-ESTALE);
+        }
+
+        LASSERT(sai->sai_inode == (*dentryp)->d_parent->d_inode);
 
         sta.sta_parent = (*dentryp)->d_parent;
         sta.sta_pid    = cfs_curproc_pid();
@@ -1157,10 +1184,11 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup)
         rc = cfs_kernel_thread(ll_statahead_thread, &sta, 0);
         if (rc < 0) {
                 CERROR("can't start ll_sa thread, rc: %d\n", rc);
+                lli->lli_opendir_key = NULL;
                 sai->sai_thread.t_flags = SVC_STOPPED;
                 ll_sai_put(sai);
                 LASSERT(lli->lli_sai == NULL);
-                RETURN(rc);
+                RETURN(-EAGAIN);
         }
 
         l_wait_event(sai->sai_thread.t_ctl_waitq, 
@@ -1177,20 +1205,16 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup)
 /**
  * update hit/miss count.
  */
-int ll_statahead_exit(struct dentry *dentry, int result)
+void ll_statahead_exit(struct dentry *dentry, int result)
 {
-        struct dentry         *parent = dentry->d_parent;
-        struct ll_inode_info  *lli = ll_i2info(parent->d_inode);
-        struct ll_sb_info     *sbi = ll_i2sbi(parent->d_inode);
-        struct ll_dentry_data *ldd = ll_d2d(dentry);
+        struct dentry            *parent = dentry->d_parent;
+        struct ll_inode_info     *lli = ll_i2info(parent->d_inode);
+        struct ll_sb_info        *sbi = ll_i2sbi(parent->d_inode);
+        struct ll_statahead_info *sai = lli->lli_sai;
+        struct ll_dentry_data    *ldd = ll_d2d(dentry);
         ENTRY;
 
-        if (lli->lli_opendir_pid != cfs_curproc_pid())
-                RETURN(-EBADFD);
-
-        if (lli->lli_sai) {
-                struct ll_statahead_info *sai = lli->lli_sai;
-
+        if (lli->lli_opendir_pid == cfs_curproc_pid() && sai) {
                 if (result >= 1) {
                         sbi->ll_sa_hit++;
                         sai->sai_hit++;
@@ -1220,21 +1244,8 @@ int ll_statahead_exit(struct dentry *dentry, int result)
                 if (!sa_is_stopped(sai))
                         cfs_waitq_signal(&sai->sai_thread.t_ctl_waitq);
                 ll_sai_entry_fini(sai);
-
-                if (unlikely(ldd == NULL)) {
-                        ll_set_dd(dentry);
-                        ldd = ll_d2d(dentry);
-                        if (ldd != NULL && dentry->d_op == NULL) {
-                                lock_dentry(dentry);
-                                dentry->d_op = dentry->d_op ? : &ll_sai_d_ops;
-                                unlock_dentry(dentry);
-                        }
-                }
-
                 if (likely(ldd != NULL))
                         ldd->lld_sa_generation = sai->sai_generation;
-                else
-                        RETURN(-ENOMEM);
         }
-        RETURN(0);
+        EXIT;
 }