Whamcloud - gitweb
b=20339
[fs/lustre-release.git] / lustre / llite / statahead.c
index 9146b36..52ba057 100644 (file)
@@ -16,8 +16,8 @@
  * in the LICENSE file that accompanied this code).
  *
  * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see [sun.com URL with a
- * copy of GPLv2].
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  *
  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  * CA 95054 USA or visit www.sun.com if you need additional information or
@@ -62,10 +62,6 @@ enum {
         SA_ENTRY_STATED
 };
 
-struct dentry_operations ll_sai_d_ops = {
-        .d_release = ll_release,
-};
-
 static unsigned int sai_generation = 0;
 static spinlock_t sai_generation_lock = SPIN_LOCK_UNLOCKED;
 
@@ -96,7 +92,7 @@ static inline int sa_received_empty(struct ll_statahead_info *sai)
 
 static inline int sa_not_full(struct ll_statahead_info *sai)
 {
-        return sai->sai_index < sai->sai_hit + sai->sai_miss + sai->sai_max;
+        return (sai->sai_index < sai->sai_hit + sai->sai_miss + sai->sai_max);
 }
 
 static inline int sa_is_running(struct ll_statahead_info *sai)
@@ -121,7 +117,7 @@ static inline int sa_is_stopped(struct ll_statahead_info *sai)
  */
 static inline int sa_low_hit(struct ll_statahead_info *sai)
 {
-        return ((sai->sai_hit < 4 * sai->sai_miss && sai->sai_hit > 7) ||
+        return ((sai->sai_hit > 7 && sai->sai_hit < 4 * sai->sai_miss) ||
                 (sai->sai_consecutive_miss > 8));
 }
 
@@ -132,26 +128,27 @@ static inline int sa_low_hit(struct ll_statahead_info *sai)
  * (3) drop dentry's ref count
  * (4) release request's ref count
  */
-static void ll_sai_entry_cleanup(struct ll_sai_entry *entry)
+static void ll_sai_entry_cleanup(struct ll_sai_entry *entry, int free)
 {
-        struct ptlrpc_request  *req = entry->se_req;
         struct md_enqueue_info *minfo = entry->se_minfo;
+        struct ptlrpc_request  *req = entry->se_req;
         ENTRY;
 
         if (minfo) {
-                struct dentry        *dentry = minfo->mi_dentry;
-                struct lookup_intent *it = &minfo->mi_it;
-
                 entry->se_minfo = NULL;
-                ll_intent_release(it);
+                ll_intent_release(&minfo->mi_it);
+                dput(minfo->mi_dentry);
+                iput(minfo->mi_dir);
                 OBD_FREE_PTR(minfo);
-                dput(dentry);
         }
         if (req) {
                 entry->se_req = NULL;
                 ptlrpc_req_finished(req);
         }
-        OBD_FREE_PTR(entry);
+        if (free) {
+                LASSERT(list_empty(&entry->se_list));
+                OBD_FREE_PTR(entry);
+        }
 
         EXIT;
 }
@@ -190,13 +187,28 @@ struct ll_statahead_info *ll_sai_get(struct ll_statahead_info *sai)
 static void ll_sai_put(struct ll_statahead_info *sai)
 {
         struct inode         *inode = sai->sai_inode;
-        struct ll_inode_info *lli = ll_i2info(inode);
+        struct ll_inode_info *lli;
         ENTRY;
 
-        if (atomic_dec_and_lock(&sai->sai_refcount, &lli->lli_lock)) {
+        LASSERT(inode != NULL);
+        lli = ll_i2info(inode);
+        LASSERT(lli->lli_sai == sai);
+
+        if (atomic_dec_and_test(&sai->sai_refcount)) {
                 struct ll_sai_entry *entry, *next;
 
+                spin_lock(&lli->lli_lock);
+                if (unlikely(atomic_read(&sai->sai_refcount) > 0)) {
+                        /* It is race case, the interpret callback just hold
+                         * a reference count */
+                        spin_unlock(&lli->lli_lock);
+                        EXIT;
+                        return;
+                }
+
+                LASSERT(lli->lli_opendir_key == NULL);
                 lli->lli_sai = NULL;
+                lli->lli_opendir_pid = 0;
                 spin_unlock(&lli->lli_lock);
 
                 LASSERT(sa_is_stopped(sai));
@@ -209,22 +221,21 @@ static void ll_sai_put(struct ll_statahead_info *sai)
 
                 list_for_each_entry_safe(entry, next, &sai->sai_entries_sent,
                                          se_list) {
-                        list_del(&entry->se_list);
-                        ll_sai_entry_cleanup(entry);
+                        list_del_init(&entry->se_list);
+                        ll_sai_entry_cleanup(entry, 1);
                 }
                 list_for_each_entry_safe(entry, next, &sai->sai_entries_received,
                                          se_list) {
-                        list_del(&entry->se_list);
-                        ll_sai_entry_cleanup(entry);
+                        list_del_init(&entry->se_list);
+                        ll_sai_entry_cleanup(entry, 1);
                 }
                 list_for_each_entry_safe(entry, next, &sai->sai_entries_stated,
                                          se_list) {
-                        list_del(&entry->se_list);
-                        ll_sai_entry_cleanup(entry);
+                        list_del_init(&entry->se_list);
+                        ll_sai_entry_cleanup(entry, 1);
                 }
-                dput(sai->sai_first);
-                OBD_FREE_PTR(sai);
                 iput(inode);
+                OBD_FREE_PTR(sai);
         }
         EXIT;
 }
@@ -294,8 +305,7 @@ ll_sai_entry_set(struct ll_statahead_info *sai, unsigned int index, int stat,
         ENTRY;
 
         if (!list_empty(&sai->sai_entries_sent)) {
-                list_for_each_entry(entry, &sai->sai_entries_sent,
-                                    se_list) {
+                list_for_each_entry(entry, &sai->sai_entries_sent, se_list) {
                         if (entry->se_index == index) {
                                 entry->se_stat = stat;
                                 entry->se_req = ptlrpc_request_addref(req);
@@ -332,13 +342,15 @@ ll_sai_entry_to_stated(struct ll_statahead_info *sai, struct ll_sai_entry *entry
         struct ll_sai_entry  *se;
         ENTRY;
 
+        ll_sai_entry_cleanup(entry, 0);
+
         spin_lock(&lli->lli_lock);
         if (!list_empty(&entry->se_list))
                 list_del_init(&entry->se_list);
 
         if (unlikely(entry->se_index < sai->sai_index_next)) {
                 spin_unlock(&lli->lli_lock);
-                ll_sai_entry_cleanup(entry);
+                OBD_FREE_PTR(entry);
                 RETURN(0);
         }
 
@@ -367,9 +379,10 @@ static int do_statahead_interpret(struct ll_statahead_info *sai)
         struct ll_sai_entry    *entry;
         struct ptlrpc_request  *req;
         struct md_enqueue_info *minfo;
-        struct dentry          *dentry;
         struct lookup_intent   *it;
+        struct dentry          *dentry;
         int                     rc = 0;
+        struct mdt_body        *body;
         ENTRY;
 
         spin_lock(&lli->lli_lock);
@@ -380,17 +393,23 @@ static int do_statahead_interpret(struct ll_statahead_info *sai)
         spin_unlock(&lli->lli_lock);
 
         if (unlikely(entry->se_index < sai->sai_index_next)) {
-                ll_sai_entry_cleanup(entry);
+                CWARN("Found stale entry: [index %u] [next %u]\n",
+                      entry->se_index, sai->sai_index_next);
+                ll_sai_entry_cleanup(entry, 1);
                 RETURN(0);
         }
 
+        if (entry->se_stat != SA_ENTRY_STATED)
+                GOTO(out, rc = entry->se_stat);
+
         req = entry->se_req;
         minfo = entry->se_minfo;
-        dentry = minfo->mi_dentry;
         it = &minfo->mi_it;
+        dentry = minfo->mi_dentry;
 
-        if (entry->se_stat != SA_ENTRY_STATED)
-                GOTO(out, rc = entry->se_stat);
+        body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+        if (body == NULL)
+                GOTO(out, rc = -EFAULT);
 
         if (dentry->d_inode == NULL) {
                 /*
@@ -398,12 +417,19 @@ static int do_statahead_interpret(struct ll_statahead_info *sai)
                  */
                 struct dentry    *save = dentry;
                 struct it_cb_data icbd = {
-                        .icbd_parent   = dentry->d_parent->d_inode,
+                        .icbd_parent   = minfo->mi_dir,
                         .icbd_childp   = &dentry
                 };
 
                 LASSERT(fid_is_zero(&minfo->mi_data.op_fid2));
 
+                /*
+                 * XXX: No fid in reply, this is probaly cross-ref case.
+                 * SA can't handle it yet.
+                 */
+                if (body->valid & OBD_MD_MDS)
+                        GOTO(out, rc = -EAGAIN);
+
                 rc = ll_lookup_it_finish(req, it, &icbd);
                 if (!rc)
                         /*
@@ -421,10 +447,6 @@ static int do_statahead_interpret(struct ll_statahead_info *sai)
                 /*
                  * revalidate.
                  */
-                struct mdt_body *body;
-
-                body = lustre_msg_buf(req->rq_repmsg, DLM_REPLY_REC_OFF,
-                                      sizeof(*body));
                 if (!lu_fid_eq(&minfo->mi_data.op_fid2, &body->fid1)) {
                         ll_unhash_aliases(dentry->d_inode);
                         GOTO(out, rc = -EAGAIN);
@@ -436,6 +458,7 @@ static int do_statahead_interpret(struct ll_statahead_info *sai)
                         GOTO(out, rc);
                 }
 
+                spin_lock(&ll_lookup_lock);
                 spin_lock(&dcache_lock);
                 lock_dentry(dentry);
                 __d_drop(dentry);
@@ -445,21 +468,15 @@ static int do_statahead_interpret(struct ll_statahead_info *sai)
                 unlock_dentry(dentry);
                 d_rehash_cond(dentry, 0);
                 spin_unlock(&dcache_lock);
+                spin_unlock(&ll_lookup_lock);
 
                 ll_lookup_finish_locks(it, dentry);
         }
         EXIT;
 
 out:
-        if (likely(ll_sai_entry_to_stated(sai, entry))) {
-                entry->se_minfo = NULL;
-                entry->se_req = NULL;
+        if (likely(ll_sai_entry_to_stated(sai, entry)))
                 cfs_waitq_signal(&sai->sai_waitq);
-                ll_intent_release(it);
-                OBD_FREE_PTR(minfo);
-                dput(dentry);
-                ptlrpc_req_finished(req);
-        }
         return rc;
 }
 
@@ -467,9 +484,9 @@ static int ll_statahead_interpret(struct ptlrpc_request *req,
                                   struct md_enqueue_info *minfo,
                                   int rc)
 {
-        struct dentry            *dentry = minfo->mi_dentry;
         struct lookup_intent     *it = &minfo->mi_it;
-        struct inode             *dir = dentry->d_parent->d_inode;
+        struct dentry            *dentry = minfo->mi_dentry;
+        struct inode             *dir = minfo->mi_dir;
         struct ll_inode_info     *lli = ll_i2info(dir);
         struct ll_statahead_info *sai;
         struct ll_sai_entry      *entry;
@@ -484,13 +501,11 @@ static int ll_statahead_interpret(struct ptlrpc_request *req,
                 spin_unlock(&lli->lli_lock);
                 ll_intent_release(it);
                 dput(dentry);
+                iput(dir);
                 OBD_FREE_PTR(minfo);
                 RETURN(-ESTALE);
         } else {
-                sai = lli->lli_sai;
-                if (rc || dir == NULL)
-                        rc = -ESTALE;
-
+                sai = ll_sai_get(lli->lli_sai);
                 entry = ll_sai_entry_set(sai,
                                          (unsigned int)(long)minfo->mi_cbdata,
                                          rc ? SA_ENTRY_UNSTATED :
@@ -506,8 +521,9 @@ static int ll_statahead_interpret(struct ptlrpc_request *req,
                                 list_del_init(&entry->se_list);
                         sai->sai_replied++;
                         spin_unlock(&lli->lli_lock);
-                        ll_sai_entry_cleanup(entry);
+                        ll_sai_entry_cleanup(entry, 1);
                 }
+                ll_sai_put(sai);
                 RETURN(rc);
         }
 }
@@ -516,6 +532,7 @@ static void sa_args_fini(struct md_enqueue_info *minfo,
                          struct ldlm_enqueue_info *einfo)
 {
         LASSERT(minfo && einfo);
+        iput(minfo->mi_dir);
         capa_put(minfo->mi_data.op_capa1);
         capa_put(minfo->mi_data.op_capa2);
         OBD_FREE_PTR(minfo);
@@ -562,6 +579,7 @@ static int sa_args_init(struct inode *dir, struct dentry *dentry,
 
         minfo->mi_it.it_op = IT_GETATTR;
         minfo->mi_dentry = dentry;
+        minfo->mi_dir = igrab(dir);
         minfo->mi_cb = ll_statahead_interpret;
         minfo->mi_generation = lli->lli_sai->sai_generation;
         minfo->mi_cbdata = (void *)(long)lli->lli_sai->sai_index;
@@ -613,10 +631,9 @@ static int do_sa_lookup(struct inode *dir, struct dentry *dentry)
  * \retval      0 -- will send stat-ahead request
  * \retval others -- prepare stat-ahead request failed
  */
-static int do_sa_revalidate(struct dentry *dentry)
+static int do_sa_revalidate(struct inode *dir, struct dentry *dentry)
 {
         struct inode             *inode = dentry->d_inode;
-        struct inode             *dir = dentry->d_parent->d_inode;
         struct lookup_intent      it = { .it_op = IT_GETATTR };
         struct md_enqueue_info   *minfo;
         struct ldlm_enqueue_info *einfo;
@@ -654,18 +671,11 @@ static int do_sa_revalidate(struct dentry *dentry)
         RETURN(rc);
 }
 
-static inline void ll_name2qstr(struct qstr *this, const char *name, int namelen)
+static inline void ll_name2qstr(struct qstr *q, const char *name, int namelen)
 {
-        unsigned long hash = init_name_hash();
-        unsigned int  c;
-
-        this->name = name;
-        this->len  = namelen;
-        for (; namelen > 0; namelen--, name++) {
-                c = *(const unsigned char *)name;
-                hash = partial_name_hash(c, hash);
-        }
-        this->hash = end_name_hash(hash);
+        q->name = name;
+        q->len  = namelen;
+        q->hash = full_name_hash(name, namelen);
 }
 
 static int ll_statahead_one(struct dentry *parent, const char* entry_name,
@@ -707,7 +717,7 @@ static int ll_statahead_one(struct dentry *parent, const char* entry_name,
                         GOTO(out, rc = -ENOMEM);
                 }
         } else {
-                rc = do_sa_revalidate(dentry);
+                rc = do_sa_revalidate(dir, dentry);
                 if (rc)
                         dput(dentry);
         }
@@ -729,15 +739,9 @@ out:
         return rc;
 }
 
-struct ll_sa_thread_args {
-        struct dentry   *sta_parent;
-        pid_t            sta_pid;
-};
-
 static int ll_statahead_thread(void *arg)
 {
-        struct ll_sa_thread_args *sta = arg;
-        struct dentry            *parent = dget(sta->sta_parent);
+        struct dentry            *parent = (struct dentry *)arg;
         struct inode             *dir = parent->d_inode;
         struct ll_inode_info     *lli = ll_i2info(dir);
         struct ll_sb_info        *sbi = ll_i2sbi(dir);
@@ -752,7 +756,7 @@ static int ll_statahead_thread(void *arg)
 
         {
                 char pname[16];
-                snprintf(pname, 15, "ll_sa_%u", sta->sta_pid);
+                snprintf(pname, 15, "ll_sa_%u", lli->lli_opendir_pid);
                 cfs_daemonize(pname);
         }
 
@@ -773,7 +777,7 @@ static int ll_statahead_thread(void *arg)
 
                 if (IS_ERR(page)) {
                         rc = PTR_ERR(page);
-                        CERROR("error reading dir "DFID" at %llu/%u: rc %d\n",
+                        CERROR("error reading dir "DFID" at "LPU64"/%u: rc %d\n",
                                PFID(ll_inode2fid(dir)), pos,
                                sai->sai_index, rc);
                         break;
@@ -897,22 +901,22 @@ out:
 void ll_stop_statahead(struct inode *inode, void *key)
 {
         struct ll_inode_info *lli = ll_i2info(inode);
-        struct ptlrpc_thread *thread;
+
+        if (unlikely(key == NULL))
+                return;
 
         spin_lock(&lli->lli_lock);
-        if (lli->lli_opendir_pid == 0 ||
-            unlikely(lli->lli_opendir_key != key)) {
+        if (lli->lli_opendir_key != key || lli->lli_opendir_pid == 0) {
                 spin_unlock(&lli->lli_lock);
                 return;
         }
 
         lli->lli_opendir_key = NULL;
-        lli->lli_opendir_pid = 0;
 
         if (lli->lli_sai) {
                 struct l_wait_info lwi = { 0 };
+                struct ptlrpc_thread *thread = &lli->lli_sai->sai_thread;
 
-                thread = &lli->lli_sai->sai_thread;
                 if (!sa_is_stopped(lli->lli_sai)) {
                         thread->t_flags = SVC_STOPPING;
                         spin_unlock(&lli->lli_lock);
@@ -933,9 +937,10 @@ void ll_stop_statahead(struct inode *inode, void *key)
                  * maybe inflight.
                  */
                 ll_sai_put(lli->lli_sai);
-                return;
+        } else {
+                lli->lli_opendir_pid = 0;
+                spin_unlock(&lli->lli_lock);
         }
-        spin_unlock(&lli->lli_lock);
 }
 
 enum {
@@ -972,7 +977,7 @@ static int is_first_dirent(struct inode *dir, struct dentry *dentry)
 
                 if (IS_ERR(page)) {
                         rc = PTR_ERR(page);
-                        CERROR("error reading dir "DFID" at %llu: rc %d\n",
+                        CERROR("error reading dir "DFID" at "LPU64": rc %d\n",
                                PFID(ll_inode2fid(dir)), pos, rc);
                         break;
                 }
@@ -1014,7 +1019,7 @@ static int is_first_dirent(struct inode *dir, struct dentry *dentry)
                         }
 
                         if (target->len == namelen &&
-                            !strncmp(target->name, name, target->len))
+                            memcmp(target->name, name, namelen) == 0)
                                 rc = LS_FIRST_DE + dot_de;
                         else
                                 rc = LS_NONE_FIRST_DE;
@@ -1054,35 +1059,30 @@ out:
  * \retval 1       -- stat ahead thread process such dentry, for lookup, it hit
  * \retval -EEXIST -- stat ahead thread started, and this is the first dentry
  * \retval -EBADFD -- statahead thread exit and not dentry available
+ * \retval -EAGAIN -- try to stat by caller
  * \retval others  -- error
  */
 int do_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup)
 {
-        struct ll_sb_info        *sbi = ll_i2sbi(dir);
-        struct ll_inode_info     *lli = ll_i2info(dir);
-        struct ll_statahead_info *sai = lli->lli_sai;
-        struct ll_sa_thread_args  sta;
-        struct l_wait_info        lwi = { 0 };
-        int                       rc;
+        struct ll_inode_info     *lli;
+        struct ll_statahead_info *sai;
+        struct dentry            *parent;
+        struct l_wait_info        lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+        int                       rc = 0;
         ENTRY;
 
+        LASSERT(dir != NULL);
+        lli = ll_i2info(dir);
         LASSERT(lli->lli_opendir_pid == cfs_curproc_pid());
+        sai = lli->lli_sai;
 
         if (sai) {
+                struct ll_sb_info *sbi;
+
                 if (unlikely(sa_is_stopped(sai) &&
                              list_empty(&sai->sai_entries_stated)))
                         RETURN(-EBADFD);
 
-                /*
-                 * skip the first dentry.
-                 */
-                if (unlikely((*dentryp)->d_name.len ==
-                             sai->sai_first->d_name.len &&
-                             !strncmp((*dentryp)->d_name.name,
-                                      sai->sai_first->d_name.name,
-                                      sai->sai_first->d_name.len)))
-                        RETURN(-EEXIST);
-
                 if ((*dentryp)->d_name.name[0] == '.') {
                         if (likely(sai->sai_ls_all ||
                             sai->sai_miss_hidden >= sai->sai_skip_hidden)) {
@@ -1110,6 +1110,7 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup)
                         }
                 }
 
+                sbi = ll_i2sbi(dir);
                 if (ll_sai_entry_stated(sai)) {
                         sbi->ll_sa_cached++;
                 } else {
@@ -1117,9 +1118,10 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup)
                         /*
                          * thread started already, avoid double-stat.
                          */
-                        l_wait_event(sai->sai_waitq,
-                                     ll_sai_entry_stated(sai) || sa_is_stopped(sai),
-                                     &lwi);
+                        rc = l_wait_event(sai->sai_waitq,
+                                          ll_sai_entry_stated(sai) ||
+                                          sa_is_stopped(sai),
+                                          &lwi);
                 }
 
                 if (lookup) {
@@ -1129,7 +1131,8 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup)
                                           &(*dentryp)->d_name);
                         if (result) {
                                 LASSERT(result != *dentryp);
-                                dput(*dentryp);
+                                /* BUG 16303: do not drop reference count for
+                                 * "*dentryp", VFS will do that by itself. */
                                 *dentryp = result;
                                 RETURN(1);
                         }
@@ -1137,45 +1140,53 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup)
                 /*
                  * do nothing for revalidate.
                  */
-                RETURN(0);
+                RETURN(rc);
         }
 
-         /*
-          * I am the "lli_opendir_pid" owner, only me can set "lli_sai".
-          */ 
-        LASSERT(lli->lli_sai == NULL);
-
+        /* I am the "lli_opendir_pid" owner, only me can set "lli_sai". */ 
         rc = is_first_dirent(dir, *dentryp);
-        if (rc == LS_NONE_FIRST_DE) {
-                /*
-                 * It is not "ls -{a}l" operation, no need statahead for it.
-                 */
-                spin_lock(&lli->lli_lock);
-                lli->lli_opendir_key = NULL;
-                lli->lli_opendir_pid = 0;
-                spin_unlock(&lli->lli_lock);
-                RETURN(-EBADF);
-        }
+        if (rc == LS_NONE_FIRST_DE)
+                /* It is not "ls -{a}l" operation, no need statahead for it. */
+                GOTO(out, rc = -EAGAIN);
 
         sai = ll_sai_alloc();
         if (sai == NULL)
-                RETURN(-ENOMEM);
+                GOTO(out, rc = -ENOMEM);
 
-        sai->sai_inode  = igrab(dir);
-        sai->sai_first = dget(*dentryp);
         sai->sai_ls_all = (rc == LS_FIRST_DOT_DE);
+        sai->sai_inode = igrab(dir);
+        if (unlikely(sai->sai_inode == NULL)) {
+                CWARN("Do not start stat ahead on dying inode "DFID" .\n",
+                      PFID(&lli->lli_fid));
+                OBD_FREE_PTR(sai);
+                GOTO(out, rc = -ESTALE);
+        }
 
-        sta.sta_parent = (*dentryp)->d_parent;
-        sta.sta_pid    = cfs_curproc_pid();
+        /* get parent reference count here, and put it in ll_statahead_thread */
+        parent = dget((*dentryp)->d_parent);
+        if (unlikely(sai->sai_inode != parent->d_inode)) {
+                struct ll_inode_info *nlli = ll_i2info(parent->d_inode);
+
+                CWARN("Race condition, someone changed %.*s just now: "
+                      "old parent "DFID", new parent "DFID" .\n",
+                      (*dentryp)->d_name.len, (*dentryp)->d_name.name,
+                      PFID(&lli->lli_fid), PFID(&nlli->lli_fid));
+                dput(parent);
+                iput(sai->sai_inode);
+                OBD_FREE_PTR(sai);
+                RETURN(-EAGAIN);
+        }
 
         lli->lli_sai = sai;
-        rc = cfs_kernel_thread(ll_statahead_thread, &sta, 0);
+        rc = cfs_kernel_thread(ll_statahead_thread, parent, 0);
         if (rc < 0) {
                 CERROR("can't start ll_sa thread, rc: %d\n", rc);
+                dput(parent);
+                lli->lli_opendir_key = NULL;
                 sai->sai_thread.t_flags = SVC_STOPPED;
                 ll_sai_put(sai);
                 LASSERT(lli->lli_sai == NULL);
-                RETURN(rc);
+                RETURN(-EAGAIN);
         }
 
         l_wait_event(sai->sai_thread.t_ctl_waitq, 
@@ -1187,69 +1198,62 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup)
          * lookup, and -EEXIST also indicates that this is the first dirent.
          */
         RETURN(-EEXIST);
+
+out:
+        spin_lock(&lli->lli_lock);
+        lli->lli_opendir_key = NULL;
+        lli->lli_opendir_pid = 0;
+        spin_unlock(&lli->lli_lock);
+        return rc;
 }
 
 /**
  * update hit/miss count.
  */
-int ll_statahead_exit(struct dentry *dentry, int result)
+void ll_statahead_exit(struct inode *dir, struct dentry *dentry, int result)
 {
-        struct dentry         *parent = dentry->d_parent;
-        struct ll_inode_info  *lli = ll_i2info(parent->d_inode);
-        struct ll_sb_info     *sbi = ll_i2sbi(parent->d_inode);
-        struct ll_dentry_data *ldd = ll_d2d(dentry);
+        struct ll_inode_info     *lli;
+        struct ll_statahead_info *sai;
+        struct ll_sb_info        *sbi;
+        struct ll_dentry_data    *ldd = ll_d2d(dentry);
         ENTRY;
 
-        if (lli->lli_opendir_pid != cfs_curproc_pid())
-                RETURN(-EBADFD);
-
-        if (lli->lli_sai) {
-                struct ll_statahead_info *sai = lli->lli_sai;
-
-                if (result >= 1) {
-                        sbi->ll_sa_hit++;
-                        sai->sai_hit++;
-                        sai->sai_consecutive_miss = 0;
-                        sai->sai_max = min(2 * sai->sai_max, sbi->ll_sa_max);
-                } else {
-                        sbi->ll_sa_miss++;
-                        sai->sai_miss++;
-                        sai->sai_consecutive_miss++;
-                        if (sa_low_hit(sai) && sa_is_running(sai)) {
-                                sbi->ll_sa_wrong++;
-                                CDEBUG(D_READA, "statahead for dir %.*s hit "
-                                       "ratio too low: hit/miss %u/%u, "
-                                       "sent/replied %u/%u. stopping statahead "
-                                       "thread: pid %d\n",
-                                       parent->d_name.len, parent->d_name.name,
-                                       sai->sai_hit, sai->sai_miss,
-                                       sai->sai_sent, sai->sai_replied,
-                                       cfs_curproc_pid());
-                                spin_lock(&lli->lli_lock);
-                                if (!sa_is_stopped(sai))
-                                        sai->sai_thread.t_flags = SVC_STOPPING;
-                                spin_unlock(&lli->lli_lock);
-                        }
+        LASSERT(dir != NULL);
+        lli = ll_i2info(dir);
+        LASSERT(lli->lli_opendir_pid == cfs_curproc_pid());
+        sai = lli->lli_sai;
+        LASSERT(sai != NULL);
+        sbi = ll_i2sbi(dir);
+
+        if (result >= 1) {
+                sbi->ll_sa_hit++;
+                sai->sai_hit++;
+                sai->sai_consecutive_miss = 0;
+                sai->sai_max = min(2 * sai->sai_max, sbi->ll_sa_max);
+        } else {
+                sbi->ll_sa_miss++;
+                sai->sai_miss++;
+                sai->sai_consecutive_miss++;
+                if (sa_low_hit(sai) && sa_is_running(sai)) {
+                        sbi->ll_sa_wrong++;
+                        CDEBUG(D_READA, "Statahead for dir "DFID" hit ratio "
+                               "too low: hit/miss %u/%u, sent/replied %u/%u, "
+                               "stopping statahead thread: pid %d\n",
+                               PFID(&lli->lli_fid), sai->sai_hit,
+                               sai->sai_miss, sai->sai_sent,
+                               sai->sai_replied, cfs_curproc_pid());
+                        spin_lock(&lli->lli_lock);
+                        if (!sa_is_stopped(sai))
+                                sai->sai_thread.t_flags = SVC_STOPPING;
+                        spin_unlock(&lli->lli_lock);
                 }
+        }
 
-                if (!sa_is_stopped(sai))
-                        cfs_waitq_signal(&sai->sai_thread.t_ctl_waitq);
-                ll_sai_entry_fini(sai);
-
-                if (unlikely(ldd == NULL)) {
-                        ll_set_dd(dentry);
-                        ldd = ll_d2d(dentry);
-                        if (ldd != NULL && dentry->d_op == NULL) {
-                                lock_dentry(dentry);
-                                dentry->d_op = dentry->d_op ? : &ll_sai_d_ops;
-                                unlock_dentry(dentry);
-                        }
-                }
+        if (!sa_is_stopped(sai))
+                cfs_waitq_signal(&sai->sai_thread.t_ctl_waitq);
+        ll_sai_entry_fini(sai);
+        if (likely(ldd != NULL))
+                ldd->lld_sa_generation = sai->sai_generation;
 
-                if (likely(ldd != NULL))
-                        ldd->lld_sa_generation = sai->sai_generation;
-                else
-                        RETURN(-ENOMEM);
-        }
-        RETURN(0);
+        EXIT;
 }