From c7139b228b5294b1e9f4f52a4c6565c06d17b4af Mon Sep 17 00:00:00 2001 From: Eric Mei Date: Tue, 21 Sep 2010 22:33:59 +0400 Subject: [PATCH] b=22781 fix the waiting time/race of identity upcall. r=vitaly.fertman r=di.wang --- lustre/lvfs/upcall_cache.c | 35 ++++++++++++++++++----------------- lustre/mdt/mdt_identity.c | 13 +++++++++---- lustre/utils/l_getidentity.c | 3 +-- 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/lustre/lvfs/upcall_cache.c b/lustre/lvfs/upcall_cache.c index 98e5b40..5e08659 100644 --- a/lustre/lvfs/upcall_cache.c +++ b/lustre/lvfs/upcall_cache.c @@ -144,7 +144,8 @@ static int check_unlink_entry(struct upcall_cache *cache, return 0; if (UC_CACHE_IS_ACQUIRING(entry)) { - if (cfs_time_before(jiffies, entry->ue_acquire_expire)) + if (entry->ue_acquire_expire == 0 || + cfs_time_before(jiffies, entry->ue_acquire_expire)) return 0; UC_CACHE_SET_EXPIRED(entry); @@ -191,7 +192,7 @@ find_again: } } - if (!found) { /* didn't find it */ + if (!found) { if (!new) { cfs_spin_unlock(&cache->uc_lock); new = alloc_entry(cache, key, args); @@ -217,46 +218,46 @@ find_again: if (UC_CACHE_IS_NEW(entry)) { UC_CACHE_SET_ACQUIRING(entry); UC_CACHE_CLEAR_NEW(entry); - entry->ue_acquire_expire = jiffies + cache->uc_acquire_expire; cfs_spin_unlock(&cache->uc_lock); rc = refresh_entry(cache, entry); cfs_spin_lock(&cache->uc_lock); + entry->ue_acquire_expire = jiffies + cache->uc_acquire_expire; if (rc < 0) { UC_CACHE_CLEAR_ACQUIRING(entry); UC_CACHE_SET_INVALID(entry); + cfs_waitq_broadcast(&entry->ue_waitq); if (unlikely(rc == -EREMCHG)) { put_entry(cache, entry); GOTO(out, entry = ERR_PTR(rc)); } } - /* fall through */ } - /* someone (and only one) is doing upcall upon - * this item, just wait it complete - */ + + /* someone (and only one) is doing upcall upon this item, + * wait it to complete */ if (UC_CACHE_IS_ACQUIRING(entry)) { - unsigned long expiry = jiffies + cache->uc_acquire_expire; + long expiry = (entry == new) ? cache->uc_acquire_expire : + CFS_MAX_SCHEDULE_TIMEOUT; + long left; cfs_waitlink_init(&wait); cfs_waitq_add(&entry->ue_waitq, &wait); cfs_set_current_state(CFS_TASK_INTERRUPTIBLE); cfs_spin_unlock(&cache->uc_lock); - cfs_waitq_timedwait(&wait, CFS_TASK_INTERRUPTIBLE, - cache->uc_acquire_expire); + left = cfs_waitq_timedwait(&wait, CFS_TASK_INTERRUPTIBLE, + expiry); cfs_spin_lock(&cache->uc_lock); cfs_waitq_del(&entry->ue_waitq, &wait); if (UC_CACHE_IS_ACQUIRING(entry)) { /* we're interrupted or upcall failed in the middle */ - rc = cfs_time_before(jiffies, expiry) ? \ - -EINTR : -ETIMEDOUT; + rc = left > 0 ? -EINTR : -ETIMEDOUT; + CERROR("acquire for key "LPU64": error %d\n", + entry->ue_key, rc); put_entry(cache, entry); - CERROR("acquire timeout exceeded for key "LPU64 - "\n", entry->ue_key); GOTO(out, entry = ERR_PTR(rc)); } - /* fall through */ } /* invalid means error, don't need to try again */ @@ -464,8 +465,8 @@ struct upcall_cache *upcall_cache_init(const char *name, const char *upcall, strncpy(cache->uc_name, name, sizeof(cache->uc_name) - 1); /* upcall pathname proc tunable */ strncpy(cache->uc_upcall, upcall, sizeof(cache->uc_upcall) - 1); - cache->uc_entry_expire = 10 * 60 * CFS_HZ; - cache->uc_acquire_expire = 15 * CFS_HZ; + cache->uc_entry_expire = 20 * 60 * CFS_HZ; + cache->uc_acquire_expire = 30 * CFS_HZ; cache->uc_ops = ops; RETURN(cache); diff --git a/lustre/mdt/mdt_identity.c b/lustre/mdt/mdt_identity.c index 7b0a312..434ddbd 100644 --- a/lustre/mdt/mdt_identity.c +++ b/lustre/mdt/mdt_identity.c @@ -114,6 +114,7 @@ static int mdt_identity_do_upcall(struct upcall_cache *cache, [1] = "PATH=/sbin:/usr/sbin", [2] = NULL }; + struct timeval start, end; int size, rc; ENTRY; @@ -140,15 +141,19 @@ static int mdt_identity_do_upcall(struct upcall_cache *cache, CDEBUG(D_INFO, "The upcall is: '%s'\n", cache->uc_upcall); + cfs_gettimeofday(&start); rc = USERMODEHELPER(argv[0], argv, envp); + cfs_gettimeofday(&end); if (rc < 0) { CERROR("%s: error invoking upcall %s %s %s: rc %d; " - "check /proc/fs/lustre/mdt/%s/identity_upcall\n", + "check /proc/fs/lustre/mdt/%s/identity_upcall, " + "time %ldus\n", cache->uc_name, argv[0], argv[1], argv[2], rc, - cache->uc_name); + cache->uc_name, cfs_timeval_sub(&end, &start, NULL)); } else { - CDEBUG(D_HA, "%s: invoked upcall %s %s %s\n", cache->uc_name, - argv[0], argv[1], argv[2]); + CDEBUG(D_HA, "%s: invoked upcall %s %s %s, time %ldus\n", + cache->uc_name, argv[0], argv[1], argv[2], + cfs_timeval_sub(&end, &start, NULL)); rc = 0; } EXIT; diff --git a/lustre/utils/l_getidentity.c b/lustre/utils/l_getidentity.c index 2673412..ee4b645 100644 --- a/lustre/utils/l_getidentity.c +++ b/lustre/utils/l_getidentity.c @@ -89,11 +89,10 @@ static void errlog(const char *fmt, ...) { va_list args; - openlog(progname, LOG_PERROR, LOG_AUTHPRIV); + openlog(progname, LOG_PERROR | LOG_PID, LOG_AUTHPRIV); va_start(args, fmt); vsyslog(LOG_NOTICE, fmt, args); - fprintf(stderr, fmt, args); va_end(args); closelog(); -- 1.8.3.1