From: fanyong Date: Sat, 8 Aug 2009 12:40:59 +0000 (+0000) Subject: Branch b1_8 X-Git-Tag: v1_8_2_01~1^2~182 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=e292108b1b7bb2ac6184b5c15457fefd9d402566;p=fs%2Flustre-release.git Branch b1_8 b=18902 i=alexey.lyashkov i=vladimir.saveliev 1) do not reset statahead if conflict was found when opendir 2) do not release "sai" if someone else want to use it for race condition 3) patch from bug 20139 4) increase timeout value for racer killing threads to process client evicted case There are race conditions for statahead: 1) just before statahead thread is triggered, the first item of the dentry many moved to other directory, which will cause the statahead thread to be triggered for different directory, and then cause the original "ls -l" thread which is waiting for the expected directory can not be waken up. 2) when some items are in statahead, such items maybe move to other directory, then the interpret call maybe find different dentry under such changed directory, and left the original original "ls -l" thread which is waiting for the expected directory can not be waken up. 3) lli_fid should be set just after vfs inode created on client-side, to prevent the same inode was created multiple times by statahead thread and other stat thread. --- diff --git a/lustre/include/lustre_mds.h b/lustre/include/lustre_mds.h index 4808866..aa7d663 100644 --- a/lustre/include/lustre_mds.h +++ b/lustre/include/lustre_mds.h @@ -303,6 +303,7 @@ struct md_enqueue_info { struct lookup_intent mi_it; struct lustre_handle mi_lockh; struct dentry *mi_dentry; + struct inode *mi_dir; md_enqueue_cb_t mi_cb; unsigned int mi_generation; void *mi_cbdata; diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index 1d61bc5..8991242 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -344,6 +344,7 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags, struct ptlrpc_request *req = NULL; struct lookup_intent lookup_it = { .it_op = IT_LOOKUP }; struct obd_export *exp; + struct inode *parent = de->d_parent->d_inode; int first = 0, rc; ENTRY; @@ -364,8 +365,7 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags, RETURN(0); #endif - rc = ll_have_md_lock(de->d_parent->d_inode, - MDS_INODELOCK_UPDATE); + rc = ll_have_md_lock(parent, MDS_INODELOCK_UPDATE); GOTO(out_sa, rc); } @@ -391,7 +391,7 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags, ll_frob_intent(&it, &lookup_it); LASSERT(it); - ll_prepare_mdc_op_data(&op_data, de->d_parent->d_inode, de->d_inode, + ll_prepare_mdc_op_data(&op_data, parent, de->d_inode, de->d_name.name, de->d_name.len, 0, NULL); if ((it->it_op == IT_OPEN) && de->d_inode) { @@ -439,7 +439,7 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags, } if (it->it_op == IT_GETATTR) - first = ll_statahead_enter(de->d_parent->d_inode, &de, 0); + first = ll_statahead_enter(parent, &de, 0); do_lock: it->it_create_mode &= ~current->fs->umask; @@ -451,9 +451,9 @@ do_lock: /* If there are too many locks on client-side, then some * locks taken by statahead maybe dropped automatically * before the real "revalidate" using them. */ - ll_statahead_exit(de, req == NULL ? rc : 0); + ll_statahead_exit(parent, de, req == NULL ? rc : 0); else if (first == -EEXIST) - ll_statahead_mark(de); + ll_statahead_mark(parent, de); /* If req is NULL, then mdc_intent_lock only tried to do a lock match; * if all was well, it will return 1 if it found locks, 0 otherwise. */ @@ -545,7 +545,7 @@ do_lookup: it = &lookup_it; } /*do real lookup here */ - ll_prepare_mdc_op_data(&op_data, de->d_parent->d_inode, NULL, + ll_prepare_mdc_op_data(&op_data, parent, NULL, de->d_name.name, de->d_name.len, 0, NULL); rc = mdc_intent_lock(exp, &op_data, NULL, 0, it, 0, &req, ll_mdc_blocking_ast, 0); @@ -574,11 +574,11 @@ out_sa: * statahead windows; for rc == 0 case, the "lookup" will be done later. */ if (it && it->it_op == IT_GETATTR && rc == 1) { - first = ll_statahead_enter(de->d_parent->d_inode, &de, 0); + first = ll_statahead_enter(parent, &de, 0); if (!first) - ll_statahead_exit(de, 1); + ll_statahead_exit(parent, de, 1); else if (first == -EEXIST) - ll_statahead_mark(de); + ll_statahead_mark(parent, de); } return rc; diff --git a/lustre/llite/file.c b/lustre/llite/file.c index ce7cee3..4c238a0 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -427,29 +427,12 @@ int ll_file_open(struct inode *inode, struct file *file) RETURN(-ENOMEM); if (S_ISDIR(inode->i_mode)) { -again: spin_lock(&lli->lli_lock); if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) { LASSERT(lli->lli_sai == NULL); lli->lli_opendir_key = fd; lli->lli_opendir_pid = cfs_curproc_pid(); opendir_set = 1; - } else if (unlikely(lli->lli_opendir_pid == cfs_curproc_pid() && - lli->lli_opendir_key != NULL)) { - /* Two cases for this: - * (1) The same process open such directory many times. - * (2) The old process opened the directory, and exited - * before its children processes. Then new process - * with the same pid opens such directory before the - * old process's children processes exit. - * reset stat ahead for such cases. */ - spin_unlock(&lli->lli_lock); - CDEBUG(D_INFO, "Conflict statahead for %.*s %lu/%u" - " reset it.\n", file->f_dentry->d_name.len, - file->f_dentry->d_name.name, - inode->i_ino, inode->i_generation); - ll_stop_statahead(inode, lli->lli_opendir_key); - goto again; } spin_unlock(&lli->lli_lock); } diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index f70df3b..bab694a 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -1058,15 +1058,20 @@ struct ll_statahead_info { }; int do_statahead_enter(struct inode *dir, struct dentry **dentry, int lookup); -void ll_statahead_exit(struct dentry *dentry, int result); +void ll_statahead_exit(struct inode *dir, struct dentry *dentry, int result); void ll_stop_statahead(struct inode *inode, void *key); static inline -void ll_statahead_mark(struct dentry *dentry) +void ll_statahead_mark(struct inode *dir, struct dentry *dentry) { - struct ll_inode_info *lli = ll_i2info(dentry->d_parent->d_inode); + struct ll_inode_info *lli; struct ll_dentry_data *ldd = ll_d2d(dentry); + /* dentry has been move to other directory, no need mark */ + if (unlikely(dir != dentry->d_parent->d_inode)) + return; + + lli = ll_i2info(dir); /* not the same process, don't mark */ if (lli->lli_opendir_pid != cfs_curproc_pid()) return; @@ -1080,20 +1085,25 @@ void ll_statahead_mark(struct dentry *dentry) static inline int ll_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup) { - struct ll_sb_info *sbi = ll_i2sbi(dir); - struct ll_inode_info *lli = ll_i2info(dir); - struct ll_dentry_data *ldd = ll_d2d(*dentryp); + struct ll_inode_info *lli; + struct ll_sb_info *sbi; + struct ll_dentry_data *ldd = ll_d2d(*dentryp); - if (sbi->ll_sa_max == 0) - return -ENOTSUPP; + if (unlikely(dir == NULL)) + return -EAGAIN; + sbi = ll_i2sbi(dir); /* temporarily disable dir stat ahead in interoperability mode */ if (sbi->ll_mdc_exp->exp_connect_flags & OBD_CONNECT_FID) return -ENOTSUPP; + if (sbi->ll_sa_max == 0) + return -ENOTSUPP; + + lli = ll_i2info(dir); /* not the same process, don't statahead */ if (lli->lli_opendir_pid != cfs_curproc_pid()) - return -EBADF; + return -EAGAIN; /* * When "ls" a dentry, the system trigger more than once "revalidate" or diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 97fe090..8730f1f 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -132,7 +132,6 @@ static int fid_set_inode(struct inode *inode, void *opaque) struct inode *ll_iget(struct super_block *sb, ino_t hash, struct lustre_md *md) { - struct ll_inode_info *lli; struct inode *inode; ENTRY; @@ -140,7 +139,6 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash, inode = iget5_locked(sb, hash, fid_test_inode, fid_set_inode, md); if (inode) { - lli = ll_i2info(inode); if (inode->i_state & I_NEW) { ll_read_inode2(inode, md); unlock_new_inode(inode); @@ -575,7 +573,7 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry, if (it->it_op == IT_GETATTR) { first = ll_statahead_enter(parent, &dentry, 1); if (first >= 0) { - ll_statahead_exit(dentry, first); + ll_statahead_exit(parent, dentry, first); if (first == 1) RETURN(retval = dentry); } @@ -604,7 +602,7 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry, } if (first == -EEXIST) - ll_statahead_mark(dentry); + ll_statahead_mark(parent, dentry); if ((it->it_op & IT_OPEN) && dentry->d_inode && !S_ISREG(dentry->d_inode->i_mode) && diff --git a/lustre/llite/statahead.c b/lustre/llite/statahead.c index 9dd17af..685635f 100644 --- a/lustre/llite/statahead.c +++ b/lustre/llite/statahead.c @@ -117,7 +117,7 @@ static inline int sa_is_stopped(struct ll_statahead_info *sai) */ static inline int sa_low_hit(struct ll_statahead_info *sai) { - return ((sai->sai_hit < 4 * sai->sai_miss && sai->sai_hit > 7) || + return ((sai->sai_hit > 7 && sai->sai_hit < 4 * sai->sai_miss) || (sai->sai_consecutive_miss > 8)); } @@ -130,18 +130,16 @@ static inline int sa_low_hit(struct ll_statahead_info *sai) */ static void ll_sai_entry_cleanup(struct ll_sai_entry *entry, int free) { - struct ptlrpc_request *req = entry->se_req; struct md_enqueue_info *minfo = entry->se_minfo; + struct ptlrpc_request *req = entry->se_req; ENTRY; if (minfo) { - struct dentry *dentry = minfo->mi_dentry; - struct lookup_intent *it = &minfo->mi_it; - entry->se_minfo = NULL; - ll_intent_release(it); + ll_intent_release(&minfo->mi_it); + dput(minfo->mi_dentry); + iput(minfo->mi_dir); OBD_FREE_PTR(minfo); - dput(dentry); } if (req) { entry->se_req = NULL; @@ -194,9 +192,20 @@ static void ll_sai_put(struct ll_statahead_info *sai) LASSERT(inode != NULL); lli = ll_i2info(inode); - if (atomic_dec_and_lock(&sai->sai_refcount, &lli->lli_lock)) { + LASSERT(lli->lli_sai == sai); + + if (atomic_dec_and_test(&sai->sai_refcount)) { struct ll_sai_entry *entry, *next; + spin_lock(&lli->lli_lock); + if (unlikely(atomic_read(&sai->sai_refcount) > 0)) { + /* It is race case, the interpret callback just hold + * a reference count */ + spin_unlock(&lli->lli_lock); + EXIT; + return; + } + LASSERT(lli->lli_opendir_key == NULL); lli->lli_sai = NULL; lli->lli_opendir_pid = 0; @@ -225,8 +234,8 @@ static void ll_sai_put(struct ll_statahead_info *sai) list_del_init(&entry->se_list); ll_sai_entry_cleanup(entry, 1); } - OBD_FREE_PTR(sai); iput(inode); + OBD_FREE_PTR(sai); } EXIT; } @@ -370,8 +379,8 @@ static int do_statahead_interpret(struct ll_statahead_info *sai) struct ll_sai_entry *entry; struct ptlrpc_request *req; struct md_enqueue_info *minfo; - struct dentry *dentry; struct lookup_intent *it; + struct dentry *dentry; int rc = 0; ENTRY; @@ -383,6 +392,8 @@ static int do_statahead_interpret(struct ll_statahead_info *sai) spin_unlock(&lli->lli_lock); if (unlikely(entry->se_index < sai->sai_index_next)) { + CWARN("Found stale entry: [index %u] [next %u]\n", + entry->se_index, sai->sai_index_next); ll_sai_entry_cleanup(entry, 1); RETURN(0); } @@ -392,8 +403,8 @@ static int do_statahead_interpret(struct ll_statahead_info *sai) req = entry->se_req; minfo = entry->se_minfo; - dentry = minfo->mi_dentry; it = &minfo->mi_it; + dentry = minfo->mi_dentry; if (dentry->d_inode == NULL) { /* @@ -401,7 +412,7 @@ static int do_statahead_interpret(struct ll_statahead_info *sai) */ struct dentry *save = dentry; struct it_cb_data icbd = { - .icbd_parent = dentry->d_parent->d_inode, + .icbd_parent = minfo->mi_dir, .icbd_childp = &dentry }; @@ -465,9 +476,9 @@ static int ll_statahead_interpret(struct obd_export *exp, struct md_enqueue_info *minfo, int rc) { - struct dentry *dentry = minfo->mi_dentry; struct lookup_intent *it = &minfo->mi_it; - struct inode *dir = dentry->d_parent->d_inode; + struct dentry *dentry = minfo->mi_dentry; + struct inode *dir = minfo->mi_dir; struct ll_inode_info *lli = ll_i2info(dir); struct ll_statahead_info *sai; struct ll_sai_entry *entry; @@ -482,13 +493,11 @@ static int ll_statahead_interpret(struct obd_export *exp, spin_unlock(&lli->lli_lock); ll_intent_release(it); dput(dentry); + iput(dir); OBD_FREE_PTR(minfo); RETURN(-ESTALE); } else { sai = ll_sai_get(lli->lli_sai); - if (rc || dir == NULL) - rc = -ESTALE; - entry = ll_sai_entry_set(sai, (unsigned int)(long)minfo->mi_cbdata, rc ? SA_ENTRY_UNSTATED : @@ -515,6 +524,7 @@ static void sa_args_fini(struct md_enqueue_info *minfo, struct ldlm_enqueue_info *einfo) { LASSERT(minfo && einfo); + iput(minfo->mi_dir); OBD_FREE_PTR(minfo); OBD_FREE_PTR(einfo); } @@ -539,6 +549,7 @@ static int sa_args_prep(struct inode *dir, struct dentry *dentry, minfo->mi_it.it_op = IT_GETATTR; minfo->mi_dentry = dentry; + minfo->mi_dir = igrab(dir); minfo->mi_cb = ll_statahead_interpret; minfo->mi_generation = lli->lli_sai->sai_generation; minfo->mi_cbdata = (void *)(long)lli->lli_sai->sai_index; @@ -588,10 +599,9 @@ static int do_sa_lookup(struct inode *dir, struct dentry *dentry) * \retval 0 -- will send stat-ahead request * \retval others -- prepare stat-ahead request failed */ -static int do_sa_revalidate(struct dentry *dentry) +static int do_sa_revalidate(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; - struct inode *dir = dentry->d_parent->d_inode; struct ll_fid fid; struct lookup_intent it = { .it_op = IT_GETATTR }; struct md_enqueue_info *minfo; @@ -620,7 +630,7 @@ static int do_sa_revalidate(struct dentry *dentry) if (rc) RETURN(rc); - rc = ll_prepare_mdc_op_data(&minfo->mi_data, dentry->d_parent->d_inode, + rc = ll_prepare_mdc_op_data(&minfo->mi_data, dir, inode, dentry->d_name.name, dentry->d_name.len, 0, NULL); if (rc == 0) @@ -677,7 +687,7 @@ static int ll_statahead_one(struct dentry *parent, struct ll_dir_entry *de) GOTO(out, rc = -ENOMEM); } } else { - rc = do_sa_revalidate(dentry); + rc = do_sa_revalidate(dir, dentry); if (rc) dput(dentry); } @@ -699,42 +709,25 @@ out: return rc; } -struct ll_sa_thread_args { - struct dentry *sta_parent; - pid_t sta_pid; -}; - static int ll_statahead_thread(void *arg) { - struct ll_sa_thread_args *sta = arg; - struct dentry *parent = dget(sta->sta_parent); + struct dentry *parent = (struct dentry *)arg; struct inode *dir = parent->d_inode; struct ll_inode_info *lli = ll_i2info(dir); struct ll_sb_info *sbi = ll_i2sbi(dir); - struct ll_statahead_info *sai; - struct ptlrpc_thread *thread; + struct ll_statahead_info *sai = ll_sai_get(lli->lli_sai); + struct ptlrpc_thread *thread = &sai->sai_thread; unsigned long index = 0; int first = 0; int rc = 0; ENTRY; - spin_lock(&lli->lli_lock); - if (unlikely(lli->lli_sai == NULL)) { - spin_unlock(&lli->lli_lock); - dput(parent); - RETURN(-EAGAIN); - } else { - sai = ll_sai_get(lli->lli_sai); - spin_unlock(&lli->lli_lock); - } - { char pname[16]; - snprintf(pname, 15, "ll_sa_%u", sta->sta_pid); + snprintf(pname, 15, "ll_sa_%u", lli->lli_opendir_pid); cfs_daemonize(pname); } - thread = &sai->sai_thread; sbi->ll_sa_total++; spin_lock(&lli->lli_lock); thread->t_flags = SVC_RUNNING; @@ -1015,17 +1008,21 @@ static int is_first_dirent(struct inode *dir, struct dentry *dentry) */ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup) { - struct ll_sb_info *sbi = ll_i2sbi(dir); - struct ll_inode_info *lli = ll_i2info(dir); - struct ll_statahead_info *sai = lli->lli_sai; - struct ll_sa_thread_args sta; + struct ll_inode_info *lli; + struct ll_statahead_info *sai; + struct dentry *parent; struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); int rc = 0; ENTRY; + LASSERT(dir != NULL); + lli = ll_i2info(dir); LASSERT(lli->lli_opendir_pid == cfs_curproc_pid()); + sai = lli->lli_sai; if (sai) { + struct ll_sb_info *sbi; + if (unlikely(sa_is_stopped(sai) && list_empty(&sai->sai_entries_stated))) RETURN(-EBADFD); @@ -1057,6 +1054,7 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup) } } + sbi = ll_i2sbi(dir); if (ll_sai_entry_stated(sai)) { sbi->ll_sa_cached++; } else { @@ -1089,26 +1087,15 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup) RETURN(rc); } - /* - * I am the "lli_opendir_pid" owner, only me can set "lli_sai". - */ - LASSERT(lli->lli_sai == NULL); - + /* I am the "lli_opendir_pid" owner, only me can set "lli_sai". */ rc = is_first_dirent(dir, *dentryp); - if (rc == LS_NONE_FIRST_DE) { - /* - * It is not "ls -{a}l" operation, no need statahead for it. - */ - spin_lock(&lli->lli_lock); - lli->lli_opendir_key = NULL; - lli->lli_opendir_pid = 0; - spin_unlock(&lli->lli_lock); - RETURN(-EBADF); - } + if (rc == LS_NONE_FIRST_DE) + /* It is not "ls -{a}l" operation, no need statahead for it. */ + GOTO(out, rc = -EAGAIN); sai = ll_sai_alloc(); if (sai == NULL) - RETURN(-ENOMEM); + GOTO(out, rc = -ENOMEM); sai->sai_ls_all = (rc == LS_FIRST_DOT_DE); sai->sai_inode = igrab(dir); @@ -1116,18 +1103,29 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup) CWARN("Do not start stat ahead on dying inode %lu/%u.\n", dir->i_ino, dir->i_generation); OBD_FREE_PTR(sai); - RETURN(-ESTALE); + GOTO(out, rc = -ESTALE); } - LASSERT(sai->sai_inode == (*dentryp)->d_parent->d_inode); + /* get parent reference count here, and put it in ll_statahead_thread */ + parent = dget((*dentryp)->d_parent); + if (unlikely(sai->sai_inode != parent->d_inode)) { + struct ll_inode_info *nlli = ll_i2info(parent->d_inode); - sta.sta_parent = (*dentryp)->d_parent; - sta.sta_pid = cfs_curproc_pid(); + CWARN("Race condition, someone changed %.*s just now: " + "old parent "DFID", new parent "DFID" .\n", + (*dentryp)->d_name.len, (*dentryp)->d_name.name, + PFID(&lli->lli_fid), PFID(&nlli->lli_fid)); + dput(parent); + iput(sai->sai_inode); + OBD_FREE_PTR(sai); + RETURN(-EAGAIN); + } lli->lli_sai = sai; - rc = cfs_kernel_thread(ll_statahead_thread, &sta, 0); + rc = cfs_kernel_thread(ll_statahead_thread, parent, 0); if (rc < 0) { CERROR("can't start ll_sa thread, rc: %d\n", rc); + dput(parent); lli->lli_opendir_key = NULL; sai->sai_thread.t_flags = SVC_STOPPED; ll_sai_put(sai); @@ -1144,52 +1142,62 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup) * lookup, and -EEXIST also indicates that this is the first dirent. */ RETURN(-EEXIST); + +out: + spin_lock(&lli->lli_lock); + lli->lli_opendir_key = NULL; + lli->lli_opendir_pid = 0; + spin_unlock(&lli->lli_lock); + return rc; } /** * update hit/miss count. */ -void ll_statahead_exit(struct dentry *dentry, int result) +void ll_statahead_exit(struct inode *dir, struct dentry *dentry, int result) { - struct dentry *parent = dentry->d_parent; - struct ll_inode_info *lli = ll_i2info(parent->d_inode); - struct ll_sb_info *sbi = ll_i2sbi(parent->d_inode); - struct ll_statahead_info *sai = lli->lli_sai; + struct ll_inode_info *lli; + struct ll_statahead_info *sai; + struct ll_sb_info *sbi; struct ll_dentry_data *ldd = ll_d2d(dentry); ENTRY; - if (lli->lli_opendir_pid == cfs_curproc_pid() && sai) { - if (result >= 1) { - sbi->ll_sa_hit++; - sai->sai_hit++; - sai->sai_consecutive_miss = 0; - sai->sai_max = min(2 * sai->sai_max, sbi->ll_sa_max); - } else { - sbi->ll_sa_miss++; - sai->sai_miss++; - sai->sai_consecutive_miss++; - if (sa_low_hit(sai) && sa_is_running(sai)) { - sbi->ll_sa_wrong++; - CDEBUG(D_READA, "statahead for dir %.*s hit " - "ratio too low: hit/miss %u/%u, " - "sent/replied %u/%u. stopping statahead " - "thread: pid %d\n", - parent->d_name.len, parent->d_name.name, - sai->sai_hit, sai->sai_miss, - sai->sai_sent, sai->sai_replied, - cfs_curproc_pid()); - spin_lock(&lli->lli_lock); - if (!sa_is_stopped(sai)) - sai->sai_thread.t_flags = SVC_STOPPING; - spin_unlock(&lli->lli_lock); - } + LASSERT(dir != NULL); + lli = ll_i2info(dir); + LASSERT(lli->lli_opendir_pid == cfs_curproc_pid()); + sai = lli->lli_sai; + LASSERT(sai != NULL); + sbi = ll_i2sbi(dir); + + if (result >= 1) { + sbi->ll_sa_hit++; + sai->sai_hit++; + sai->sai_consecutive_miss = 0; + sai->sai_max = min(2 * sai->sai_max, sbi->ll_sa_max); + } else { + sbi->ll_sa_miss++; + sai->sai_miss++; + sai->sai_consecutive_miss++; + if (sa_low_hit(sai) && sa_is_running(sai)) { + sbi->ll_sa_wrong++; + CDEBUG(D_READA, "Statahead for dir "DFID" hit ratio " + "too low: hit/miss %u/%u, sent/replied %u/%u, " + "stopping statahead thread: pid %d\n", + PFID(&lli->lli_fid), sai->sai_hit, + sai->sai_miss, sai->sai_sent, + sai->sai_replied, cfs_curproc_pid()); + spin_lock(&lli->lli_lock); + if (!sa_is_stopped(sai)) + sai->sai_thread.t_flags = SVC_STOPPING; + spin_unlock(&lli->lli_lock); } - - if (!sa_is_stopped(sai)) - cfs_waitq_signal(&sai->sai_thread.t_ctl_waitq); - ll_sai_entry_fini(sai); - if (likely(ldd != NULL)) - ldd->lld_sa_generation = sai->sai_generation; } + + if (!sa_is_stopped(sai)) + cfs_waitq_signal(&sai->sai_thread.t_ctl_waitq); + ll_sai_entry_fini(sai); + if (likely(ldd != NULL)) + ldd->lld_sa_generation = sai->sai_generation; + EXIT; } diff --git a/lustre/tests/runracer b/lustre/tests/runracer index feca24b..b67a819 100644 --- a/lustre/tests/runracer +++ b/lustre/tests/runracer @@ -37,6 +37,7 @@ do_racer_cleanup () { local INTERVAL=5 local pids local rc=0 + local TMAX local RDIR=$1 @@ -45,11 +46,19 @@ do_racer_cleanup () { # Check if all processes are killed local clients=$CLIENTS + local num_clients=$(get_node_count ${clients//,/ }) + if at_is_enabled; then + TMAX=$(at_max_get mds) + else + TMAX=$(lctl get_param -n timeout) + fi + + [ $TMAX -gt $((num_clients * 60)) ] || TMAX=$((num_clients * 60)) # 1.Let chance to racer to kill all it's processes # FIXME: not sure how long does it take for racer to kill all processes # 80 is sometimes are enough for 2 clients; sometimes it takes more than 150 sec - while [ $WAIT -lt 90 ]; do + while [ $WAIT -lt $TMAX ]; do running=$(do_nodes $clients "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|pdsh|bash)" || true) [ -z "$running" ] && rc=0 && break echo "clients $clients are still running the racer processes. Waited $WAIT secs" @@ -68,8 +77,8 @@ do_racer_cleanup () { echo "client $C still running racer processes after $WAIT seconds. Killing $pids" do_node $C "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|PATH)" do_node $C kill -TERM $pids || true - # let processes to be killed - sleep 2 + # let processes to be killed, there maybe many threads to be killed, so give 20 sec gap + sleep 20 # 3. Check if the processes were killed # exit error if the processes still exist for pid in $pids; do