Whamcloud - gitweb
b=22781 fix the waiting time/race of identity upcall.
authorEric Mei <eric.mei@oracle.com>
Tue, 21 Sep 2010 18:33:59 +0000 (22:33 +0400)
committerMikhail Pershin <tappro@sun.com>
Mon, 27 Sep 2010 13:06:05 +0000 (17:06 +0400)
r=vitaly.fertman
r=di.wang

lustre/lvfs/upcall_cache.c
lustre/mdt/mdt_identity.c
lustre/utils/l_getidentity.c

index 98e5b40..5e08659 100644 (file)
@@ -144,7 +144,8 @@ static int check_unlink_entry(struct upcall_cache *cache,
                 return 0;
 
         if (UC_CACHE_IS_ACQUIRING(entry)) {
-                if (cfs_time_before(jiffies, entry->ue_acquire_expire))
+                if (entry->ue_acquire_expire == 0 ||
+                    cfs_time_before(jiffies, entry->ue_acquire_expire))
                         return 0;
 
                 UC_CACHE_SET_EXPIRED(entry);
@@ -191,7 +192,7 @@ find_again:
                 }
         }
 
-        if (!found) { /* didn't find it */
+        if (!found) {
                 if (!new) {
                         cfs_spin_unlock(&cache->uc_lock);
                         new = alloc_entry(cache, key, args);
@@ -217,46 +218,46 @@ find_again:
         if (UC_CACHE_IS_NEW(entry)) {
                 UC_CACHE_SET_ACQUIRING(entry);
                 UC_CACHE_CLEAR_NEW(entry);
-                entry->ue_acquire_expire = jiffies + cache->uc_acquire_expire;
                 cfs_spin_unlock(&cache->uc_lock);
                 rc = refresh_entry(cache, entry);
                 cfs_spin_lock(&cache->uc_lock);
+                entry->ue_acquire_expire = jiffies + cache->uc_acquire_expire;
                 if (rc < 0) {
                         UC_CACHE_CLEAR_ACQUIRING(entry);
                         UC_CACHE_SET_INVALID(entry);
+                        cfs_waitq_broadcast(&entry->ue_waitq);
                         if (unlikely(rc == -EREMCHG)) {
                                 put_entry(cache, entry);
                                 GOTO(out, entry = ERR_PTR(rc));
                         }
                 }
-                /* fall through */
         }
-        /* someone (and only one) is doing upcall upon
-         * this item, just wait it complete
-         */
+
+        /* someone (and only one) is doing upcall upon this item,
+         * wait it to complete */
         if (UC_CACHE_IS_ACQUIRING(entry)) {
-                unsigned long expiry = jiffies + cache->uc_acquire_expire;
+                long expiry = (entry == new) ? cache->uc_acquire_expire :
+                                               CFS_MAX_SCHEDULE_TIMEOUT;
+                long left;
 
                 cfs_waitlink_init(&wait);
                 cfs_waitq_add(&entry->ue_waitq, &wait);
                 cfs_set_current_state(CFS_TASK_INTERRUPTIBLE);
                 cfs_spin_unlock(&cache->uc_lock);
 
-                cfs_waitq_timedwait(&wait, CFS_TASK_INTERRUPTIBLE, 
-                                    cache->uc_acquire_expire);
+                left = cfs_waitq_timedwait(&wait, CFS_TASK_INTERRUPTIBLE,
+                                           expiry);
 
                 cfs_spin_lock(&cache->uc_lock);
                 cfs_waitq_del(&entry->ue_waitq, &wait);
                 if (UC_CACHE_IS_ACQUIRING(entry)) {
                         /* we're interrupted or upcall failed in the middle */
-                        rc = cfs_time_before(jiffies, expiry) ? \
-                                -EINTR : -ETIMEDOUT;
+                        rc = left > 0 ? -EINTR : -ETIMEDOUT;
+                        CERROR("acquire for key "LPU64": error %d\n",
+                               entry->ue_key, rc);
                         put_entry(cache, entry);
-                        CERROR("acquire timeout exceeded for key "LPU64
-                               "\n", entry->ue_key);
                         GOTO(out, entry = ERR_PTR(rc));
                 }
-                /* fall through */
         }
 
         /* invalid means error, don't need to try again */
@@ -464,8 +465,8 @@ struct upcall_cache *upcall_cache_init(const char *name, const char *upcall,
         strncpy(cache->uc_name, name, sizeof(cache->uc_name) - 1);
         /* upcall pathname proc tunable */
         strncpy(cache->uc_upcall, upcall, sizeof(cache->uc_upcall) - 1);
-        cache->uc_entry_expire = 10 * 60 * CFS_HZ;
-        cache->uc_acquire_expire = 15 * CFS_HZ;
+        cache->uc_entry_expire = 20 * 60 * CFS_HZ;
+        cache->uc_acquire_expire = 30 * CFS_HZ;
         cache->uc_ops = ops;
 
         RETURN(cache);
index 7b0a312..434ddbd 100644 (file)
@@ -114,6 +114,7 @@ static int mdt_identity_do_upcall(struct upcall_cache *cache,
                   [1] = "PATH=/sbin:/usr/sbin",
                   [2] = NULL
         };
+        struct timeval start, end;
         int size, rc;
         ENTRY;
 
@@ -140,15 +141,19 @@ static int mdt_identity_do_upcall(struct upcall_cache *cache,
 
         CDEBUG(D_INFO, "The upcall is: '%s'\n", cache->uc_upcall);
 
+        cfs_gettimeofday(&start);
         rc = USERMODEHELPER(argv[0], argv, envp);
+        cfs_gettimeofday(&end);
         if (rc < 0) {
                 CERROR("%s: error invoking upcall %s %s %s: rc %d; "
-                       "check /proc/fs/lustre/mdt/%s/identity_upcall\n",
+                       "check /proc/fs/lustre/mdt/%s/identity_upcall, "
+                       "time %ldus\n",
                        cache->uc_name, argv[0], argv[1], argv[2], rc,
-                       cache->uc_name);
+                       cache->uc_name, cfs_timeval_sub(&end, &start, NULL));
         } else {
-                CDEBUG(D_HA, "%s: invoked upcall %s %s %s\n", cache->uc_name,
-                       argv[0], argv[1], argv[2]);
+                CDEBUG(D_HA, "%s: invoked upcall %s %s %s, time %ldus\n",
+                       cache->uc_name, argv[0], argv[1], argv[2],
+                       cfs_timeval_sub(&end, &start, NULL));
                 rc = 0;
         }
         EXIT;
index 2673412..ee4b645 100644 (file)
@@ -89,11 +89,10 @@ static void errlog(const char *fmt, ...)
 {
         va_list args;
 
-        openlog(progname, LOG_PERROR, LOG_AUTHPRIV);
+        openlog(progname, LOG_PERROR | LOG_PID, LOG_AUTHPRIV);
 
         va_start(args, fmt);
         vsyslog(LOG_NOTICE, fmt, args);
-        fprintf(stderr, fmt, args);
         va_end(args);
 
         closelog();