Whamcloud - gitweb
LU-3936 ldlm: ldlm_cancel_stale_locks()) ASSERTION( count > 0 ) failed
[fs/lustre-release.git] / lustre / ldlm / ldlm_pool.c
index 820a50d..734b330 100644 (file)
 /*
  * 50 ldlm locks for 1MB of RAM.
  */
-#define LDLM_POOL_HOST_L ((CFS_NUM_CACHEPAGES >> (20 - CFS_PAGE_SHIFT)) * 50)
+#define LDLM_POOL_HOST_L ((NUM_CACHEPAGES >> (20 - PAGE_CACHE_SHIFT)) * 50)
 
 /*
  * Maximal possible grant step plan in %.
@@ -518,8 +518,8 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
          * It may be called when SLV has changed much, this is why we do not
          * take into account pl->pl_recalc_time here.
          */
-        RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), 0, LDLM_SYNC, 
-                               LDLM_CANCEL_LRUR));
+       RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), 0, LCF_ASYNC,
+                              LDLM_CANCEL_LRUR));
 }
 
 /**
@@ -551,8 +551,8 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl,
        spin_unlock(&ns->ns_lock);
 
         if (nr) {
-                canceled = ldlm_cancel_lru(ns, nr, LDLM_ASYNC,
-                                           LDLM_CANCEL_SHRINK);
+               canceled = ldlm_cancel_lru(ns, nr, LCF_ASYNC,
+                                          LDLM_CANCEL_SHRINK);
         }
 #ifdef __KERNEL__
         /*
@@ -589,7 +589,6 @@ int ldlm_pool_recalc(struct ldlm_pool *pl)
                 goto recalc;
 
        spin_lock(&pl->pl_lock);
-        recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time;
         if (recalc_interval_sec > 0) {
                 /*
                  * Update pool statistics every 1s.
@@ -609,12 +608,12 @@ int ldlm_pool_recalc(struct ldlm_pool *pl)
                 count = pl->pl_ops->po_recalc(pl);
                 lprocfs_counter_add(pl->pl_stats, LDLM_POOL_RECALC_STAT,
                                     count);
-                return count;
         }
+       recalc_interval_sec = pl->pl_recalc_time - cfs_time_current_sec() +
+                             pl->pl_recalc_period;
 
-        return 0;
+        return recalc_interval_sec;
 }
-EXPORT_SYMBOL(ldlm_pool_recalc);
 
 /**
  * Pool shrink wrapper. Will call either client or server pool recalc callback
@@ -744,11 +743,13 @@ static int ldlm_pool_proc_init(struct ldlm_pool *pl)
         }
         pl->pl_proc_dir = lprocfs_register("pool", parent_ns_proc,
                                            NULL, NULL);
-        if (IS_ERR(pl->pl_proc_dir)) {
-                CERROR("LProcFS failed in ldlm-pool-init\n");
-                rc = PTR_ERR(pl->pl_proc_dir);
-                GOTO(out_free_name, rc);
-        }
+       if (IS_ERR(pl->pl_proc_dir)) {
+               rc = PTR_ERR(pl->pl_proc_dir);
+               pl->pl_proc_dir = NULL;
+               CERROR("%s: cannot create 'pool' proc entry: rc = %d\n",
+                      ldlm_ns_name(ns), rc);
+               GOTO(out_free_name, rc);
+       }
 
         var_name[MAX_STRING_SIZE] = '\0';
         memset(pool_vars, 0, sizeof(pool_vars));
@@ -1058,14 +1059,14 @@ __u32 ldlm_pool_get_lvf(struct ldlm_pool *pl)
 EXPORT_SYMBOL(ldlm_pool_get_lvf);
 
 #ifdef __KERNEL__
-static int ldlm_pool_granted(struct ldlm_pool *pl)
+static unsigned int ldlm_pool_granted(struct ldlm_pool *pl)
 {
         return cfs_atomic_read(&pl->pl_granted);
 }
 
 static struct ptlrpc_thread *ldlm_pools_thread;
-static struct cfs_shrinker *ldlm_pools_srv_shrinker;
-static struct cfs_shrinker *ldlm_pools_cli_shrinker;
+static struct shrinker *ldlm_pools_srv_shrinker;
+static struct shrinker *ldlm_pools_cli_shrinker;
 static struct completion ldlm_pools_comp;
 
 /*
@@ -1076,8 +1077,10 @@ static struct completion ldlm_pools_comp;
 static int ldlm_pools_shrink(ldlm_side_t client, int nr,
                              unsigned int gfp_mask)
 {
-        int total = 0, cached = 0, nr_ns;
+       unsigned int total = 0, cached = 0;
+       int nr_ns;
         struct ldlm_namespace *ns;
+       struct ldlm_namespace *ns_old = NULL; /* loop detection */
         void *cookie;
 
         if (client == LDLM_NAMESPACE_CLIENT && nr != 0 &&
@@ -1092,8 +1095,8 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
         /*
          * Find out how many resources we may release.
          */
-        for (nr_ns = cfs_atomic_read(ldlm_namespace_nr(client));
-             nr_ns > 0; nr_ns--)
+       for (nr_ns = ldlm_namespace_nr_read(client);
+            nr_ns > 0; nr_ns--)
         {
                mutex_lock(ldlm_namespace_lock(client));
                 if (cfs_list_empty(ldlm_namespace_list(client))) {
@@ -1102,8 +1105,23 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
                         return 0;
                 }
                 ns = ldlm_namespace_first_locked(client);
+
+               if (ns == ns_old) {
+                       mutex_unlock(ldlm_namespace_lock(client));
+                       break;
+               }
+
+               if (ldlm_ns_empty(ns)) {
+                       ldlm_namespace_move_to_inactive_locked(ns, client);
+                       mutex_unlock(ldlm_namespace_lock(client));
+                       continue;
+               }
+
+               if (ns_old == NULL)
+                       ns_old = ns;
+
                 ldlm_namespace_get(ns);
-                ldlm_namespace_move_locked(ns, client);
+                ldlm_namespace_move_to_active_locked(ns, client);
                mutex_unlock(ldlm_namespace_lock(client));
                 total += ldlm_pool_shrink(&ns->ns_pool, 0, gfp_mask);
                 ldlm_namespace_put(ns);
@@ -1117,10 +1135,11 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
         /*
          * Shrink at least ldlm_namespace_nr(client) namespaces.
          */
-        for (nr_ns = cfs_atomic_read(ldlm_namespace_nr(client));
-             nr_ns > 0; nr_ns--)
+       for (nr_ns = ldlm_namespace_nr_read(client) - nr_ns;
+            nr_ns > 0; nr_ns--)
         {
-                int cancel, nr_locks;
+               __u64 cancel;
+               unsigned int nr_locks;
 
                 /*
                  * Do not call shrink under ldlm_namespace_lock(client)
@@ -1138,12 +1157,13 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
                 }
                 ns = ldlm_namespace_first_locked(client);
                 ldlm_namespace_get(ns);
-                ldlm_namespace_move_locked(ns, client);
+                ldlm_namespace_move_to_active_locked(ns, client);
                mutex_unlock(ldlm_namespace_lock(client));
 
                 nr_locks = ldlm_pool_granted(&ns->ns_pool);
-                cancel = 1 + nr_locks * nr / total;
-                ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask);
+               cancel = (__u64)nr_locks * nr;
+               do_div(cancel, total);
+               ldlm_pool_shrink(&ns->ns_pool, 1 + cancel, gfp_mask);
                 cached += ldlm_pool_granted(&ns->ns_pool);
                 ldlm_namespace_put(ns);
         }
@@ -1167,11 +1187,13 @@ static int ldlm_pools_cli_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
                                  shrink_param(sc, gfp_mask));
 }
 
-void ldlm_pools_recalc(ldlm_side_t client)
+int ldlm_pools_recalc(ldlm_side_t client)
 {
         __u32 nr_l = 0, nr_p = 0, l;
         struct ldlm_namespace *ns;
+        struct ldlm_namespace *ns_old = NULL;
         int nr, equal = 0;
+       int time = 50; /* seconds of sleep if no active namespaces */
 
         /*
          * No need to setup pool limit for client pools.
@@ -1229,16 +1251,14 @@ void ldlm_pools_recalc(ldlm_side_t client)
                                  * for _all_ pools.
                                  */
                                 l = LDLM_POOL_HOST_L /
-                                        cfs_atomic_read(
-                                                ldlm_namespace_nr(client));
+                                       ldlm_namespace_nr_read(client);
                         } else {
                                 /*
                                  * All the rest of greedy pools will have
                                  * all locks in equal parts.
                                  */
                                 l = (LDLM_POOL_HOST_L - nr_l) /
-                                        (cfs_atomic_read(
-                                                ldlm_namespace_nr(client)) -
+                                       (ldlm_namespace_nr_read(client) -
                                          nr_p);
                         }
                         ldlm_pool_setup(&ns->ns_pool, l);
@@ -1249,7 +1269,7 @@ void ldlm_pools_recalc(ldlm_side_t client)
         /*
          * Recalc at least ldlm_namespace_nr(client) namespaces.
          */
-        for (nr = cfs_atomic_read(ldlm_namespace_nr(client)); nr > 0; nr--) {
+       for (nr = ldlm_namespace_nr_read(client); nr > 0; nr--) {
                 int     skip;
                 /*
                  * Lock the list, get first @ns in the list, getref, move it
@@ -1265,6 +1285,30 @@ void ldlm_pools_recalc(ldlm_side_t client)
                }
                ns = ldlm_namespace_first_locked(client);
 
+               if (ns_old == ns) { /* Full pass complete */
+                       mutex_unlock(ldlm_namespace_lock(client));
+                       break;
+               }
+
+               /* We got an empty namespace, need to move it back to inactive
+                * list.
+                * The race with parallel resource creation is fine:
+                * - If they do namespace_get before our check, we fail the
+                *   check and they move this item to the end of the list anyway
+                * - If we do the check and then they do namespace_get, then
+                *   we move the namespace to inactive and they will move
+                *   it back to active (synchronised by the lock, so no clash
+                *   there).
+                */
+               if (ldlm_ns_empty(ns)) {
+                       ldlm_namespace_move_to_inactive_locked(ns, client);
+                       mutex_unlock(ldlm_namespace_lock(client));
+                       continue;
+               }
+
+               if (ns_old == NULL)
+                       ns_old = ns;
+
                spin_lock(&ns->ns_lock);
                /*
                 * skip ns which is being freed, and we don't want to increase
@@ -1278,48 +1322,52 @@ void ldlm_pools_recalc(ldlm_side_t client)
                }
                spin_unlock(&ns->ns_lock);
 
-               ldlm_namespace_move_locked(ns, client);
+               ldlm_namespace_move_to_active_locked(ns, client);
                mutex_unlock(ldlm_namespace_lock(client));
 
-                /*
-                 * After setup is done - recalc the pool.
-                 */
-                if (!skip) {
-                        ldlm_pool_recalc(&ns->ns_pool);
-                        ldlm_namespace_put(ns);
-                }
+               /*
+                * After setup is done - recalc the pool.
+                */
+               if (!skip) {
+                       int ttime = ldlm_pool_recalc(&ns->ns_pool);
+
+                       if (ttime < time)
+                               time = ttime;
+
+                       ldlm_namespace_put(ns);
+               }
         }
+       return time;
 }
 EXPORT_SYMBOL(ldlm_pools_recalc);
 
 static int ldlm_pools_thread_main(void *arg)
 {
-        struct ptlrpc_thread *thread = (struct ptlrpc_thread *)arg;
-        char *t_name = "ldlm_poold";
-        ENTRY;
+       struct ptlrpc_thread *thread = (struct ptlrpc_thread *)arg;
+       int s_time, c_time;
+       ENTRY;
 
-        cfs_daemonize(t_name);
-        thread_set_flags(thread, SVC_RUNNING);
-        cfs_waitq_signal(&thread->t_ctl_waitq);
+       thread_set_flags(thread, SVC_RUNNING);
+       wake_up(&thread->t_ctl_waitq);
 
-        CDEBUG(D_DLMTRACE, "%s: pool thread starting, process %d\n",
-               t_name, cfs_curproc_pid());
+       CDEBUG(D_DLMTRACE, "%s: pool thread starting, process %d\n",
+              "ldlm_poold", current_pid());
 
         while (1) {
                 struct l_wait_info lwi;
 
-                /*
-                 * Recal all pools on this tick.
-                 */
-                ldlm_pools_recalc(LDLM_NAMESPACE_SERVER);
-                ldlm_pools_recalc(LDLM_NAMESPACE_CLIENT);
+               /*
+                * Recal all pools on this tick.
+                */
+               s_time = ldlm_pools_recalc(LDLM_NAMESPACE_SERVER);
+               c_time = ldlm_pools_recalc(LDLM_NAMESPACE_CLIENT);
 
-                /*
-                 * Wait until the next check time, or until we're
-                 * stopped.
-                 */
-                lwi = LWI_TIMEOUT(cfs_time_seconds(LDLM_POOLS_THREAD_PERIOD),
-                                  NULL, NULL);
+               /*
+                * Wait until the next check time, or until we're
+                * stopped.
+                */
+               lwi = LWI_TIMEOUT(cfs_time_seconds(min(s_time, c_time)),
+                                 NULL, NULL);
                 l_wait_event(thread->t_ctl_waitq,
                              thread_is_stopping(thread) ||
                              thread_is_event(thread),
@@ -1331,101 +1379,96 @@ static int ldlm_pools_thread_main(void *arg)
                         thread_test_and_clear_flags(thread, SVC_EVENT);
         }
 
-        thread_set_flags(thread, SVC_STOPPED);
-        cfs_waitq_signal(&thread->t_ctl_waitq);
+       thread_set_flags(thread, SVC_STOPPED);
+       wake_up(&thread->t_ctl_waitq);
 
-        CDEBUG(D_DLMTRACE, "%s: pool thread exiting, process %d\n",
-               t_name, cfs_curproc_pid());
+       CDEBUG(D_DLMTRACE, "%s: pool thread exiting, process %d\n",
+               "ldlm_poold", current_pid());
 
        complete_and_exit(&ldlm_pools_comp, 0);
 }
 
 static int ldlm_pools_thread_start(void)
 {
-        struct l_wait_info lwi = { 0 };
-        int rc;
-        ENTRY;
+       struct l_wait_info lwi = { 0 };
+       struct task_struct *task;
+       ENTRY;
 
-        if (ldlm_pools_thread != NULL)
-                RETURN(-EALREADY);
+       if (ldlm_pools_thread != NULL)
+               RETURN(-EALREADY);
 
-        OBD_ALLOC_PTR(ldlm_pools_thread);
-        if (ldlm_pools_thread == NULL)
-                RETURN(-ENOMEM);
+       OBD_ALLOC_PTR(ldlm_pools_thread);
+       if (ldlm_pools_thread == NULL)
+               RETURN(-ENOMEM);
 
        init_completion(&ldlm_pools_comp);
-        cfs_waitq_init(&ldlm_pools_thread->t_ctl_waitq);
-
-        /*
-         * CLONE_VM and CLONE_FILES just avoid a needless copy, because we
-         * just drop the VM and FILES in cfs_daemonize() right away.
-         */
-        rc = cfs_create_thread(ldlm_pools_thread_main, ldlm_pools_thread,
-                               CFS_DAEMON_FLAGS);
-        if (rc < 0) {
-                CERROR("Can't start pool thread, error %d\n",
-                       rc);
-                OBD_FREE(ldlm_pools_thread, sizeof(*ldlm_pools_thread));
-                ldlm_pools_thread = NULL;
-                RETURN(rc);
-        }
-        l_wait_event(ldlm_pools_thread->t_ctl_waitq,
-                     thread_is_running(ldlm_pools_thread), &lwi);
-        RETURN(0);
+       init_waitqueue_head(&ldlm_pools_thread->t_ctl_waitq);
+
+       task = kthread_run(ldlm_pools_thread_main, ldlm_pools_thread,
+                          "ldlm_poold");
+       if (IS_ERR(task)) {
+               CERROR("Can't start pool thread, error %ld\n", PTR_ERR(task));
+               OBD_FREE(ldlm_pools_thread, sizeof(*ldlm_pools_thread));
+               ldlm_pools_thread = NULL;
+               RETURN(PTR_ERR(task));
+       }
+       l_wait_event(ldlm_pools_thread->t_ctl_waitq,
+                    thread_is_running(ldlm_pools_thread), &lwi);
+       RETURN(0);
 }
 
 static void ldlm_pools_thread_stop(void)
 {
-        ENTRY;
+       ENTRY;
 
-        if (ldlm_pools_thread == NULL) {
-                EXIT;
-                return;
-        }
+       if (ldlm_pools_thread == NULL) {
+               EXIT;
+               return;
+       }
 
-        thread_set_flags(ldlm_pools_thread, SVC_STOPPING);
-        cfs_waitq_signal(&ldlm_pools_thread->t_ctl_waitq);
+       thread_set_flags(ldlm_pools_thread, SVC_STOPPING);
+       wake_up(&ldlm_pools_thread->t_ctl_waitq);
 
-        /*
-         * Make sure that pools thread is finished before freeing @thread.
-         * This fixes possible race and oops due to accessing freed memory
-         * in pools thread.
-         */
+       /*
+        * Make sure that pools thread is finished before freeing @thread.
+        * This fixes possible race and oops due to accessing freed memory
+        * in pools thread.
+        */
        wait_for_completion(&ldlm_pools_comp);
-        OBD_FREE_PTR(ldlm_pools_thread);
-        ldlm_pools_thread = NULL;
-        EXIT;
+       OBD_FREE_PTR(ldlm_pools_thread);
+       ldlm_pools_thread = NULL;
+       EXIT;
 }
 
 int ldlm_pools_init(void)
 {
-        int rc;
-        ENTRY;
+       int rc;
+       ENTRY;
 
-        rc = ldlm_pools_thread_start();
-        if (rc == 0) {
-                ldlm_pools_srv_shrinker =
-                        cfs_set_shrinker(CFS_DEFAULT_SEEKS,
-                                         ldlm_pools_srv_shrink);
-                ldlm_pools_cli_shrinker =
-                        cfs_set_shrinker(CFS_DEFAULT_SEEKS,
-                                         ldlm_pools_cli_shrink);
-        }
-        RETURN(rc);
+       rc = ldlm_pools_thread_start();
+       if (rc == 0) {
+               ldlm_pools_srv_shrinker =
+                       set_shrinker(DEFAULT_SEEKS,
+                                        ldlm_pools_srv_shrink);
+               ldlm_pools_cli_shrinker =
+                       set_shrinker(DEFAULT_SEEKS,
+                                        ldlm_pools_cli_shrink);
+       }
+       RETURN(rc);
 }
 EXPORT_SYMBOL(ldlm_pools_init);
 
 void ldlm_pools_fini(void)
 {
-        if (ldlm_pools_srv_shrinker != NULL) {
-                cfs_remove_shrinker(ldlm_pools_srv_shrinker);
-                ldlm_pools_srv_shrinker = NULL;
-        }
-        if (ldlm_pools_cli_shrinker != NULL) {
-                cfs_remove_shrinker(ldlm_pools_cli_shrinker);
-                ldlm_pools_cli_shrinker = NULL;
-        }
-        ldlm_pools_thread_stop();
+       if (ldlm_pools_srv_shrinker != NULL) {
+               remove_shrinker(ldlm_pools_srv_shrinker);
+               ldlm_pools_srv_shrinker = NULL;
+       }
+       if (ldlm_pools_cli_shrinker != NULL) {
+               remove_shrinker(ldlm_pools_cli_shrinker);
+               ldlm_pools_cli_shrinker = NULL;
+       }
+       ldlm_pools_thread_stop();
 }
 EXPORT_SYMBOL(ldlm_pools_fini);
 #endif /* __KERNEL__ */
@@ -1529,9 +1572,9 @@ void ldlm_pools_fini(void)
 }
 EXPORT_SYMBOL(ldlm_pools_fini);
 
-void ldlm_pools_recalc(ldlm_side_t client)
+int ldlm_pools_recalc(ldlm_side_t client)
 {
-        return;
+        return 0;
 }
 EXPORT_SYMBOL(ldlm_pools_recalc);
 #endif /* HAVE_LRU_RESIZE_SUPPORT */