Whamcloud - gitweb
LU-15283 quota: deadlock between reint & lquota_wb
[fs/lustre-release.git] / lustre / quota / qsd_reint.c
index b355eb8..666df96 100644 (file)
@@ -21,7 +21,7 @@
  * GPL HEADER END
  */
 /*
- * Copyright (c) 2012, 2016, Intel Corporation.
+ * Copyright (c) 2012, 2017, Intel Corporation.
  * Use is subject to license terms.
  *
  * Author: Johann Lombardi <johann.lombardi@intel.com>
@@ -89,6 +89,14 @@ static int qsd_reint_qid(const struct lu_env *env, struct qsd_qtype_info *qqi,
 
        rc = qsd_update_index(env, qqi, qid, global, 0, rec);
 out:
+
+       if (global && qid->qid_uid == 0) {
+               struct lquota_glb_rec *glb_rec = (struct lquota_glb_rec *)rec;
+               qsd_update_default_quota(qqi, glb_rec->qbr_hardlimit,
+                                        glb_rec->qbr_softlimit,
+                                        glb_rec->qbr_time);
+       }
+
        lqe_putref(lqe);
        RETURN(rc);
 }
@@ -199,7 +207,7 @@ static int qsd_reint_index(const struct lu_env *env, struct qsd_qtype_info *qqi,
        npages /= PAGE_SIZE;
 
        /* allocate pages for bulk index read */
-       OBD_ALLOC(pages, npages * sizeof(*pages));
+       OBD_ALLOC_PTR_ARRAY(pages, npages);
        if (pages == NULL)
                GOTO(out, rc = -ENOMEM);
        for (i = 0; i < npages; i++) {
@@ -277,7 +285,7 @@ out:
                for (i = 0; i < npages; i++)
                        if (pages[i] != NULL)
                                __free_page(pages[i]);
-               OBD_FREE(pages, npages * sizeof(*pages));
+               OBD_FREE_PTR_ARRAY(pages, npages);
        }
 
        /* Update index version */
@@ -401,17 +409,26 @@ static int qsd_started(struct qsd_instance *qsd)
        return started;
 }
 
+struct qsd_reint_args {
+       struct qsd_qtype_info   *qra_qqi;
+       struct lu_env            qra_env;
+       struct completion       *qra_started;
+};
+
+#ifndef TASK_IDLE
+#define TASK_IDLE TASK_INTERRUPTIBLE
+#endif
+
 /*
  * Routine executed by the reintegration thread.
  */
-static int qsd_reint_main(void *args)
+static int qsd_reint_main(void *_args)
 {
-       struct lu_env           *env;
+       struct qsd_reint_args   *args = _args;
+       struct lu_env           *env = &args->qra_env;
        struct qsd_thread_info  *qti;
-       struct qsd_qtype_info   *qqi = (struct qsd_qtype_info *)args;
+       struct qsd_qtype_info   *qqi = args->qra_qqi;
        struct qsd_instance     *qsd = qqi->qqi_qsd;
-       struct ptlrpc_thread    *thread = &qqi->qqi_reint_thread;
-       struct l_wait_info       lwi = { 0 };
        int                      rc;
        ENTRY;
 
@@ -419,27 +436,19 @@ static int qsd_reint_main(void *args)
               qsd->qsd_svname, PFID(&qqi->qqi_fid));
 
        qqi_getref(qqi);
-       lu_ref_add(&qqi->qqi_reference, "reint_thread", thread);
-
-       thread_set_flags(thread, SVC_RUNNING);
-       wake_up(&thread->t_ctl_waitq);
-
-       OBD_ALLOC_PTR(env);
-       if (env == NULL)
-               GOTO(out, rc = -ENOMEM);
-
-       /* initialize environment */
-       rc = lu_env_init(env, LCT_DT_THREAD);
-       if (rc)
-               GOTO(out_env, rc);
+       lu_ref_add(&qqi->qqi_reference, "reint_thread", current);
        qti = qsd_info(env);
 
+       complete(args->qra_started);
+
        /* wait for the connection to master established */
-       l_wait_event(thread->t_ctl_waitq,
-                    qsd_connected(qsd) || !thread_is_running(thread), &lwi);
+       while (({set_current_state(TASK_IDLE);
+                !qsd_connected(qsd) && !kthread_should_stop(); }))
+               schedule();
+       __set_current_state(TASK_RUNNING);
 
        /* Step 1: enqueue global index lock */
-       if (!thread_is_running(thread))
+       if (kthread_should_stop())
                GOTO(out_env_init, rc = 0);
 
        LASSERT(qsd->qsd_exp != NULL);
@@ -476,7 +485,7 @@ static int qsd_reint_main(void *args)
        }
 
        /* Step 2: reintegrate global index */
-       if (!thread_is_running(thread))
+       if (kthread_should_stop())
                GOTO(out_lock, rc = 0);
 
        OBD_FAIL_TIMEOUT(OBD_FAIL_QUOTA_DELAY_REINT, 10);
@@ -493,7 +502,7 @@ static int qsd_reint_main(void *args)
        }
 
        /* Step 3: reintegrate slave index */
-       if (!thread_is_running(thread))
+       if (kthread_should_stop())
                GOTO(out_lock, rc = 0);
 
        if (qqi->qqi_slv_ver != qti->qti_slv_ver) {
@@ -508,10 +517,12 @@ static int qsd_reint_main(void *args)
        }
 
        /* wait for the qsd instance started (target recovery done) */
-       l_wait_event(thread->t_ctl_waitq,
-                    qsd_started(qsd) || !thread_is_running(thread), &lwi);
+       while (({set_current_state(TASK_IDLE);
+                !qsd_started(qsd) && !kthread_should_stop(); }))
+               schedule();
+       __set_current_state(TASK_RUNNING);
 
-       if (!thread_is_running(thread))
+       if (kthread_should_stop())
                GOTO(out_lock, rc = 0);
 
        /* Step 4: start reconciliation for each enforced ID */
@@ -525,17 +536,15 @@ out_lock:
        ldlm_lock_decref(&qqi->qqi_lockh, qsd_glb_einfo.ei_mode);
 out_env_init:
        lu_env_fini(env);
-out_env:
-       OBD_FREE_PTR(env);
-out:
+       OBD_FREE_PTR(args);
        write_lock(&qsd->qsd_lock);
        qqi->qqi_reint = 0;
        write_unlock(&qsd->qsd_lock);
 
-       thread_set_flags(thread, SVC_STOPPED);
-       wake_up(&thread->t_ctl_waitq);
+       if (xchg(&qqi->qqi_reint_task, NULL) == NULL)
+               wait_var_event(qqi, kthread_should_stop());
 
-       lu_ref_del(&qqi->qqi_reference, "reint_thread", thread);
+       lu_ref_del(&qqi->qqi_reference, "reint_thread", current);
        qqi_putref(qqi);
 
        return rc;
@@ -543,16 +552,11 @@ out:
 
 void qsd_stop_reint_thread(struct qsd_qtype_info *qqi)
 {
-       struct ptlrpc_thread    *thread = &qqi->qqi_reint_thread;
-       struct l_wait_info       lwi = { 0 };
-
-       if (!thread_is_stopped(thread)) {
-               thread_set_flags(thread, SVC_STOPPING);
-               wake_up(&thread->t_ctl_waitq);
+       struct task_struct *task;
 
-               l_wait_event(thread->t_ctl_waitq,
-                            thread_is_stopped(thread), &lwi);
-       }
+       task = xchg(&qqi->qqi_reint_task, NULL);
+       if (task)
+               kthread_stop(task);
 }
 
 static int qsd_entry_iter_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd,
@@ -571,13 +575,13 @@ static int qsd_entry_iter_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd,
        return 0;
 }
 
-static bool qsd_pending_updates(struct qsd_qtype_info *qqi)
+static bool qqi_reint_delayed(struct qsd_qtype_info *qqi)
 {
        struct qsd_instance     *qsd = qqi->qqi_qsd;
        struct qsd_upd_rec      *upd;
        struct lquota_entry     *lqe, *n;
        int                      dqacq = 0;
-       bool                     updates = false;
+       bool                     delay = false;
        ENTRY;
 
        /* any pending quota adjust? */
@@ -590,47 +594,61 @@ static bool qsd_pending_updates(struct qsd_qtype_info *qqi)
        }
        spin_unlock(&qsd->qsd_adjust_lock);
 
+       /* any pending quota request? */
+       cfs_hash_for_each_safe(qqi->qqi_site->lqs_hash, qsd_entry_iter_cb,
+                              &dqacq);
+       if (dqacq) {
+               CDEBUG(D_QUOTA, "%s: pending dqacq for type:%d.\n",
+                      qsd->qsd_svname, qqi->qqi_qtype);
+               GOTO(out, delay = true);
+       }
+
        /* any pending updates? */
-       read_lock(&qsd->qsd_lock);
+       write_lock(&qsd->qsd_lock);
+
+       /* check if the reintegration has already started or finished */
+       if ((qqi->qqi_glb_uptodate && qqi->qqi_slv_uptodate) ||
+            qqi->qqi_reint || qsd->qsd_stopping || qsd->qsd_updating)
+               GOTO(out_lock, delay = true);
+
+       /* there could be some unfinished global or index entry updates
+        * (very unlikely), to avoid them messing up with the reint
+        * procedure, we just return and try to re-start reint later. */
        list_for_each_entry(upd, &qsd->qsd_upd_list, qur_link) {
                if (upd->qur_qqi == qqi) {
-                       read_unlock(&qsd->qsd_lock);
                        CDEBUG(D_QUOTA, "%s: pending %s updates for type:%d.\n",
                               qsd->qsd_svname,
                               upd->qur_global ? "global" : "slave",
                               qqi->qqi_qtype);
-                       GOTO(out, updates = true);
+                       GOTO(out_lock, delay = true);
                }
        }
-       read_unlock(&qsd->qsd_lock);
+       qqi->qqi_reint = 1;
 
-       /* any pending quota request? */
-       cfs_hash_for_each_safe(qqi->qqi_site->lqs_hash, qsd_entry_iter_cb,
-                              &dqacq);
-       if (dqacq) {
-               CDEBUG(D_QUOTA, "%s: pending dqacq for type:%d.\n",
-                      qsd->qsd_svname, qqi->qqi_qtype);
-               updates = true;
-       }
        EXIT;
+out_lock:
+       write_unlock(&qsd->qsd_lock);
 out:
-       if (updates)
+       if (delay)
                CERROR("%s: Delaying reintegration for qtype:%d until pending "
                       "updates are flushed.\n",
                       qsd->qsd_svname, qqi->qqi_qtype);
-       return updates;
+       return delay;
 }
 
 int qsd_start_reint_thread(struct qsd_qtype_info *qqi)
 {
-       struct ptlrpc_thread    *thread = &qqi->qqi_reint_thread;
        struct qsd_instance     *qsd = qqi->qqi_qsd;
-       struct l_wait_info       lwi = { 0 };
        struct task_struct      *task;
+       struct qsd_reint_args   *args;
+       DECLARE_COMPLETION_ONSTACK(started);
        int                      rc;
-       char                    *name;
        ENTRY;
 
+       /* do not try to start a new thread as this can lead to a deadlock */
+       if (current->flags & (PF_MEMALLOC | PF_KSWAPD))
+               RETURN(0);
+
        if (qsd->qsd_dev->dd_rdonly)
                RETURN(0);
 
@@ -638,53 +656,41 @@ int qsd_start_reint_thread(struct qsd_qtype_info *qqi)
        if (!qsd_type_enabled(qsd, qqi->qqi_qtype))
                RETURN(0);
 
-       if (qsd->qsd_acct_failed)
+       if (qqi->qqi_acct_failed)
                /* no space accounting support, can't enable enforcement */
                RETURN(0);
 
-       /* check if the reintegration has already started or finished */
-       write_lock(&qsd->qsd_lock);
-
-       if ((qqi->qqi_glb_uptodate && qqi->qqi_slv_uptodate) ||
-            qqi->qqi_reint || qsd->qsd_stopping) {
-               write_unlock(&qsd->qsd_lock);
-               RETURN(0);
-       }
-       qqi->qqi_reint = 1;
-
-       write_unlock(&qsd->qsd_lock);
-
-       /* there could be some unfinished global or index entry updates
-        * (very unlikely), to avoid them messing up with the reint
-        * procedure, we just return and try to re-start reint later. */
-       if (qsd_pending_updates(qqi)) {
-               write_lock(&qsd->qsd_lock);
-               qqi->qqi_reint = 0;
-               write_unlock(&qsd->qsd_lock);
+       if (qqi_reint_delayed(qqi))
                RETURN(0);
-       }
 
-       OBD_ALLOC(name, MTI_NAME_MAXLEN);
-       if (name == NULL)
-               RETURN(-ENOMEM);
-
-       snprintf(name, MTI_NAME_MAXLEN, "qsd_reint_%d.%s",
-                qqi->qqi_qtype, qsd->qsd_svname);
+       OBD_ALLOC_PTR(args);
+       if (args == NULL)
+               GOTO(out, rc = -ENOMEM);
 
-       task = kthread_run(qsd_reint_main, qqi, name);
-       OBD_FREE(name, MTI_NAME_MAXLEN);
+       args->qra_started = &started;
+       args->qra_qqi = qqi;
+       /* initialize environment */
+       rc = lu_env_init(&args->qra_env, LCT_DT_THREAD);
+       if (rc)
+               GOTO(out_args, rc);
+       task = kthread_create(qsd_reint_main, args, "qsd_reint_%d.%s",
+                             qqi->qqi_qtype, qsd->qsd_svname);
 
        if (IS_ERR(task)) {
                rc = PTR_ERR(task);
-               thread_set_flags(thread, SVC_STOPPED);
+               lu_env_fini(&args->qra_env);
+out_args:
+               OBD_FREE_PTR(args);
+out:
                write_lock(&qsd->qsd_lock);
                qqi->qqi_reint = 0;
                write_unlock(&qsd->qsd_lock);
                RETURN(rc);
        }
 
-       l_wait_event(thread->t_ctl_waitq,
-                    thread_is_running(thread) || thread_is_stopped(thread),
-                    &lwi);
+       qqi->qqi_reint_task = task;
+       wake_up_process(task);
+       wait_for_completion(&started);
+
        RETURN(0);
 }