Whamcloud - gitweb
LU-16271 ptlrpc: fix eviction right after recovery
[fs/lustre-release.git] / lustre / quota / qsd_writeback.c
index 70d6a81..9f36287 100644 (file)
  * GPL HEADER END
  */
 /*
- * Copyright (c) 2011, 2012, Intel, Inc.
+ * Copyright (c) 2012, 2017, Intel Corporation.
  * Use is subject to license terms.
  *
  * Author: Johann Lombardi <johann.lombardi@intel.com>
  * Author: Niu    Yawei    <yawei.niu@intel.com>
  */
 
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
 #define DEBUG_SUBSYSTEM S_LQUOTA
 
+#include <linux/kthread.h>
 #include "qsd_internal.h"
 
-extern cfs_mem_cache_t *upd_kmem;
-
 /*
  * Allocate and fill an qsd_upd_rec structure to be processed by the writeback
  * thread.
@@ -58,14 +53,13 @@ static struct qsd_upd_rec *qsd_upd_alloc(struct qsd_qtype_info *qqi,
 {
        struct qsd_upd_rec      *upd;
 
-       OBD_SLAB_ALLOC_PTR_GFP(upd, upd_kmem, CFS_ALLOC_IO);
+       OBD_SLAB_ALLOC_PTR_GFP(upd, upd_kmem, GFP_NOFS);
        if (upd == NULL) {
-               CERROR("Failed to allocate upd");
                return NULL;
        }
 
        /* fill it */
-       CFS_INIT_LIST_HEAD(&upd->qur_link);
+       INIT_LIST_HEAD(&upd->qur_link);
        upd->qur_qqi = qqi;
        upd->qur_lqe = lqe;
        if (lqe)
@@ -91,22 +85,32 @@ static void qsd_upd_add(struct qsd_instance *qsd, struct qsd_upd_rec *upd)
        if (!qsd->qsd_stopping) {
                list_add_tail(&upd->qur_link, &qsd->qsd_upd_list);
                /* wake up the upd thread */
-               cfs_waitq_signal(&qsd->qsd_upd_thread.t_ctl_waitq);
+               if (qsd->qsd_upd_task)
+                       wake_up_process(qsd->qsd_upd_task);
        } else {
-               CWARN("%s: discard deferred update.\n", qsd->qsd_svname);
+               CWARN("%s: discard update.\n", qsd->qsd_svname);
                if (upd->qur_lqe)
-                       LQUOTA_WARN(upd->qur_lqe, "discard deferred update.");
+                       LQUOTA_WARN(upd->qur_lqe, "discard update.");
                qsd_upd_free(upd);
        }
 }
 
 /* must hold the qsd_lock */
-static void qsd_add_deferred(cfs_list_t *list, struct qsd_upd_rec *upd)
+static void qsd_add_deferred(struct qsd_instance *qsd, struct list_head *list,
+                            struct qsd_upd_rec *upd)
 {
        struct qsd_upd_rec      *tmp, *n;
 
+       if (qsd->qsd_stopping) {
+               CWARN("%s: discard deferred udpate.\n", qsd->qsd_svname);
+               if (upd->qur_lqe)
+                       LQUOTA_WARN(upd->qur_lqe, "discard deferred update.");
+               qsd_upd_free(upd);
+               return;
+       }
+
        /* Sort the updates in ascending order */
-       cfs_list_for_each_entry_safe_reverse(tmp, n, list, qur_link) {
+       list_for_each_entry_safe_reverse(tmp, n, list, qur_link) {
 
                /* There could be some legacy records which have duplicated
                 * version. Imagine following scenario: slave received global
@@ -116,26 +120,29 @@ static void qsd_add_deferred(cfs_list_t *list, struct qsd_upd_rec *upd)
                 * updates. We should just delete the legacy record in such
                 * case. */
                if (upd->qur_ver == tmp->qur_ver) {
-                       LASSERT(tmp->qur_lqe);
-                       LQUOTA_ERROR(tmp->qur_lqe, "Found a conflict record "
-                                    "with ver:"LPU64"", tmp->qur_ver);
-                       cfs_list_del_init(&tmp->qur_link);
+                       if (tmp->qur_lqe)
+                               LQUOTA_WARN(tmp->qur_lqe, "Found a conflict "
+                                           "record with ver:%llu",
+                                           tmp->qur_ver);
+                       else
+                               CWARN("%s: Found a conflict record with ver: "
+                                     "%llu\n", qsd->qsd_svname, tmp->qur_ver);
+
+                       list_del_init(&tmp->qur_link);
                        qsd_upd_free(tmp);
-               }
-
-               if (upd->qur_ver < tmp->qur_ver) {
+               } else if (upd->qur_ver < tmp->qur_ver) {
                        continue;
                } else {
-                       cfs_list_add_tail(&upd->qur_link, &tmp->qur_link);
+                       list_add_tail(&upd->qur_link, &tmp->qur_link);
                        return;
                }
        }
-       cfs_list_add(&upd->qur_link, list);
+       list_add(&upd->qur_link, list);
 }
 
 /* must hold the qsd_lock */
-static void qsd_kickoff_deferred(struct qsd_qtype_info *qqi, cfs_list_t *list,
-                                __u64 ver)
+static void qsd_kickoff_deferred(struct qsd_qtype_info *qqi,
+                                struct list_head *list, __u64 ver)
 {
        struct qsd_upd_rec      *upd, *tmp;
        ENTRY;
@@ -143,12 +150,12 @@ static void qsd_kickoff_deferred(struct qsd_qtype_info *qqi, cfs_list_t *list,
        /* Get the first update record in the list, which has the smallest
         * version, discard all records with versions smaller than the current
         * one */
-       cfs_list_for_each_entry_safe(upd, tmp, list, qur_link) {
+       list_for_each_entry_safe(upd, tmp, list, qur_link) {
                if (upd->qur_ver <= ver) {
                        /* drop this update */
-                       cfs_list_del_init(&upd->qur_link);
+                       list_del_init(&upd->qur_link);
                        CDEBUG(D_QUOTA, "%s: skipping deferred update ver:"
-                              LPU64"/"LPU64", global:%d, qid:"LPU64"\n",
+                              "%llu/%llu, global:%d, qid:%llu\n",
                               qqi->qqi_qsd->qsd_svname, upd->qur_ver, ver,
                               upd->qur_global, upd->qur_qid.qid_uid);
                        qsd_upd_free(upd);
@@ -158,15 +165,15 @@ static void qsd_kickoff_deferred(struct qsd_qtype_info *qqi, cfs_list_t *list,
        }
 
        /* No remaining deferred update */
-       if (cfs_list_empty(list))
+       if (list_empty(list))
                RETURN_EXIT;
 
        CDEBUG(D_QUOTA, "%s: found deferred update record. "
-              "version:"LPU64"/"LPU64", global:%d, qid:"LPU64"\n",
+              "version:%llu/%llu, global:%d, qid:%llu\n",
               qqi->qqi_qsd->qsd_svname, upd->qur_ver, ver,
               upd->qur_global, upd->qur_qid.qid_uid);
 
-       LASSERTF(upd->qur_ver > ver, "lur_ver:"LPU64", cur_ver:"LPU64"\n",
+       LASSERTF(upd->qur_ver > ver, "lur_ver:%llu, cur_ver:%llu\n",
                 upd->qur_ver, ver);
 
        /* Kick off the deferred udpate */
@@ -185,8 +192,8 @@ static void qsd_kickoff_deferred(struct qsd_qtype_info *qqi, cfs_list_t *list,
  */
 void qsd_bump_version(struct qsd_qtype_info *qqi, __u64 ver, bool global)
 {
-       cfs_list_t      *list;
-       __u64           *idx_ver;
+       struct list_head *list;
+       __u64            *idx_ver;
 
        idx_ver = global ? &qqi->qqi_glb_ver : &qqi->qqi_slv_ver;
        list    = global ? &qqi->qqi_deferred_glb : &qqi->qqi_deferred_slv;
@@ -209,7 +216,7 @@ void qsd_bump_version(struct qsd_qtype_info *qqi, __u64 ver, bool global)
  * \param  qid   - quota id
  * \param  rec   - global or slave record to be updated to disk
  * \param  ver   - new index file version
- * \param  global- ture : master record; false : slave record
+ * \param  global- true: master record; false: slave record
  */
 void qsd_upd_schedule(struct qsd_qtype_info *qqi, struct lquota_entry *lqe,
                      union lquota_id *qid, union lquota_rec *rec, __u64 ver,
@@ -220,7 +227,7 @@ void qsd_upd_schedule(struct qsd_qtype_info *qqi, struct lquota_entry *lqe,
        __u64                    cur_ver;
        ENTRY;
 
-       CDEBUG(D_QUOTA, "%s: schedule update. global:%s, version:"LPU64"\n",
+       CDEBUG(D_QUOTA, "%s: schedule update. global:%s, version:%llu\n",
               qsd->qsd_svname, global ? "true" : "false", ver);
 
        upd = qsd_upd_alloc(qqi, lqe, qid, rec, ver, global);
@@ -245,11 +252,11 @@ void qsd_upd_schedule(struct qsd_qtype_info *qqi, struct lquota_entry *lqe,
                        /* legitimate race between glimpse AST and
                         * reintegration */
                        CDEBUG(D_QUOTA, "%s: discarding glb update from glimpse"
-                              " ver:"LPU64" local ver:"LPU64"\n",
+                              " ver:%llu local ver:%llu\n",
                               qsd->qsd_svname, ver, cur_ver);
                else
-                       CERROR("%s: discard slv update, ver:"LPU64" local ver:"
-                              LPU64"\n", qsd->qsd_svname, ver, cur_ver);
+                       CERROR("%s: discard slv update, ver:%llu local ver:"
+                              "%llu\n", qsd->qsd_svname, ver, cur_ver);
                qsd_upd_free(upd);
        } else if ((ver == cur_ver + 1) && qqi->qqi_glb_uptodate &&
                   qqi->qqi_slv_uptodate) {
@@ -259,9 +266,9 @@ void qsd_upd_schedule(struct qsd_qtype_info *qqi, struct lquota_entry *lqe,
                /* Out of order update (the one with smaller version hasn't
                 * reached slave or hasn't been flushed to disk yet), or
                 * the reintegration is in progress. Defer the update. */
-               cfs_list_t *list = global ? &qqi->qqi_deferred_glb :
-                                           &qqi->qqi_deferred_slv;
-               qsd_add_deferred(list, upd);
+               struct list_head *list = global ? &qqi->qqi_deferred_glb :
+                                                 &qqi->qqi_deferred_slv;
+               qsd_add_deferred(qsd, list, upd);
        }
 
        write_unlock(&qsd->qsd_lock);
@@ -273,15 +280,60 @@ static int qsd_process_upd(const struct lu_env *env, struct qsd_upd_rec *upd)
 {
        struct lquota_entry     *lqe = upd->qur_lqe;
        struct qsd_qtype_info   *qqi = upd->qur_qqi;
+       struct qsd_instance     *qsd = qqi->qqi_qsd;
        int                      rc;
        ENTRY;
 
+       if (qsd->qsd_exclusive) { /* It could be deadlock running with reint */
+               read_lock(&qsd->qsd_lock);
+               rc = qqi->qqi_reint;
+               read_unlock(&qsd->qsd_lock);
+               if (rc)
+                       return 1;
+       }
+
+       if (upd->qur_global &&
+           (LQUOTA_FLAG(upd->qur_rec.lqr_glb_rec.qbr_time) &
+                                                       LQUOTA_FLAG_DELETED)) {
+               struct thandle          *th = NULL;
+               struct dt_object        *obj;
+
+               obj = qqi->qqi_glb_obj;
+
+               th = dt_trans_create(env, qqi->qqi_qsd->qsd_dev);
+               if (IS_ERR(th))
+                       RETURN(PTR_ERR(th));
+
+               rc = lquota_disk_declare_write(env, th, obj, &upd->qur_qid);
+               if (rc)
+                       GOTO(out_del, rc);
+
+               rc = dt_trans_start_local(env, qqi->qqi_qsd->qsd_dev, th);
+               if (rc)
+                       GOTO(out_del, rc);
+
+               rc = lquota_disk_delete(env, th, obj, upd->qur_qid.qid_uid,
+                                       NULL);
+               if (rc == -ENOENT)
+                       rc = 0;
+
+out_del:
+               dt_trans_stop(env, qqi->qqi_qsd->qsd_dev, th);
+               if (lqe != NULL)
+                       lqe_set_deleted(lqe);
+
+               qsd_bump_version(qqi, upd->qur_ver, true);
+               RETURN(rc);
+       }
+
        if (lqe == NULL) {
                lqe = lqe_locate(env, qqi->qqi_site, &upd->qur_qid);
                if (IS_ERR(lqe))
                        GOTO(out, rc = PTR_ERR(lqe));
        }
 
+       lqe->lqe_is_deleted = 0;
+
        /* The in-memory lqe update for slave index copy isn't deferred,
         * we shouldn't touch it here. */
        if (upd->qur_global) {
@@ -290,6 +342,11 @@ static int qsd_process_upd(const struct lu_env *env, struct qsd_upd_rec *upd)
                        GOTO(out, rc);
                /* refresh usage */
                qsd_refresh_usage(env, lqe);
+
+               spin_lock(&qsd->qsd_adjust_lock);
+               lqe->lqe_adjust_time = 0;
+               spin_unlock(&qsd->qsd_adjust_lock);
+
                /* Report usage asynchronously */
                rc = qsd_adjust(env, lqe);
                if (rc)
@@ -299,6 +356,21 @@ static int qsd_process_upd(const struct lu_env *env, struct qsd_upd_rec *upd)
        rc = qsd_update_index(env, qqi, &upd->qur_qid, upd->qur_global,
                              upd->qur_ver, &upd->qur_rec);
 out:
+       if (upd->qur_global && rc == 0 &&
+           upd->qur_rec.lqr_glb_rec.qbr_softlimit == 0 &&
+           upd->qur_rec.lqr_glb_rec.qbr_hardlimit == 0 &&
+           (LQUOTA_FLAG(upd->qur_rec.lqr_glb_rec.qbr_time) &
+                                                       LQUOTA_FLAG_DEFAULT)) {
+               lqe->lqe_is_default = true;
+               if (qqi->qqi_default_softlimit == 0 &&
+                   qqi->qqi_default_hardlimit == 0)
+                       lqe->lqe_enforced = false;
+               else
+                       lqe->lqe_enforced = true;
+
+               LQUOTA_DEBUG(lqe, "update to use default quota");
+       }
+
        if (lqe && !IS_ERR(lqe)) {
                lqe_putref(lqe);
                upd->qur_lqe = NULL;
@@ -311,71 +383,90 @@ void qsd_adjust_schedule(struct lquota_entry *lqe, bool defer, bool cancel)
        struct qsd_instance     *qsd = lqe2qqi(lqe)->qqi_qsd;
        bool                     added = false;
 
+       read_lock(&qsd->qsd_lock);
+       if (qsd->qsd_stopping) {
+               read_unlock(&qsd->qsd_lock);
+               return;
+       }
+       read_unlock(&qsd->qsd_lock);
+
        lqe_getref(lqe);
        spin_lock(&qsd->qsd_adjust_lock);
 
        /* the lqe is being queued for the per-ID lock cancel, we should
         * cancel the lock cancel and re-add it for quota adjust */
-       if (!cfs_list_empty(&lqe->lqe_link) &&
+       if (!list_empty(&lqe->lqe_link) &&
            lqe->lqe_adjust_time == 0) {
-               cfs_list_del_init(&lqe->lqe_link);
+               list_del_init(&lqe->lqe_link);
                lqe_putref(lqe);
        }
 
-       if (cfs_list_empty(&lqe->lqe_link)) {
-               if (cancel)
+       if (list_empty(&lqe->lqe_link)) {
+               if (!cancel) {
+                       lqe->lqe_adjust_time = ktime_get_seconds();
+                       if (defer)
+                               lqe->lqe_adjust_time += QSD_WB_INTERVAL;
+               } else {
                        lqe->lqe_adjust_time = 0;
-               else
-                       lqe->lqe_adjust_time = defer ?
-                               cfs_time_shift_64(QSD_WB_INTERVAL) :
-                               cfs_time_current_64();
-               /* lqe reference transfered to list */
+               }
+
+               /* lqe reference transferred to list */
                if (defer)
-                       cfs_list_add_tail(&lqe->lqe_link,
+                       list_add_tail(&lqe->lqe_link,
                                          &qsd->qsd_adjust_list);
                else
-                       cfs_list_add(&lqe->lqe_link, &qsd->qsd_adjust_list);
+                       list_add(&lqe->lqe_link, &qsd->qsd_adjust_list);
                added = true;
        }
        spin_unlock(&qsd->qsd_adjust_lock);
 
-       if (added)
-               cfs_waitq_signal(&qsd->qsd_upd_thread.t_ctl_waitq);
-       else
+       if (!added)
                lqe_putref(lqe);
+       else {
+               read_lock(&qsd->qsd_lock);
+               if (qsd->qsd_upd_task)
+                       wake_up_process(qsd->qsd_upd_task);
+               read_unlock(&qsd->qsd_lock);
+       }
 }
 
 /* return true if there is pending writeback records or the pending
  * adjust requests */
-static bool qsd_job_pending(struct qsd_instance *qsd, cfs_list_t *upd,
+static bool qsd_job_pending(struct qsd_instance *qsd, struct list_head *upd,
                            bool *uptodate)
 {
        bool    job_pending = false;
        int     qtype;
 
-       LASSERT(cfs_list_empty(upd));
+       LASSERT(list_empty(upd));
        *uptodate = true;
 
        spin_lock(&qsd->qsd_adjust_lock);
-       if (!cfs_list_empty(&qsd->qsd_adjust_list)) {
+       if (!list_empty(&qsd->qsd_adjust_list)) {
                struct lquota_entry *lqe;
-               lqe = cfs_list_entry(qsd->qsd_adjust_list.next,
+               lqe = list_entry(qsd->qsd_adjust_list.next,
                                     struct lquota_entry, lqe_link);
-               if (cfs_time_beforeq_64(lqe->lqe_adjust_time,
-                                       cfs_time_current_64()))
+               if (ktime_get_seconds() >= lqe->lqe_adjust_time)
                        job_pending = true;
        }
        spin_unlock(&qsd->qsd_adjust_lock);
 
        write_lock(&qsd->qsd_lock);
-       if (!cfs_list_empty(&qsd->qsd_upd_list)) {
-               cfs_list_splice_init(&qsd->qsd_upd_list, upd);
+       if (!list_empty(&qsd->qsd_upd_list)) {
+               list_splice_init(&qsd->qsd_upd_list, upd);
                job_pending = true;
        }
+       if (qsd->qsd_exclusive)
+               qsd->qsd_updating = job_pending;
 
-       for (qtype = USRQUOTA; qtype < MAXQUOTAS; qtype++) {
+       for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) {
                struct qsd_qtype_info *qqi = qsd->qsd_type_array[qtype];
 
+               /* don't bother kicking off reintegration if space accounting
+                * failed to be enabled */
+               if (qqi->qqi_acct_failed)
+                       continue;
+
                if (!qsd_type_enabled(qsd, qtype))
                        continue;
 
@@ -390,64 +481,85 @@ static bool qsd_job_pending(struct qsd_instance *qsd, cfs_list_t *upd,
        return job_pending;
 }
 
-static int qsd_upd_thread(void *arg)
+struct qsd_upd_args {
+       struct qsd_instance     *qua_inst;
+       struct lu_env            qua_env;
+       struct completion       *qua_started;
+};
+
+#ifndef TASK_IDLE
+/* This identity is only safe inside kernel threads, or other places where
+ * all signals are disabled.  So it is placed here rather than in an include
+ * file.
+ * TASK_IDLE was added in v4.1-rc4-43-g80ed87c8a9ca so this can be removed
+ * when we no longer support kernels older than that.
+ */
+#define TASK_IDLE TASK_INTERRUPTIBLE
+#endif
+
+static int qsd_upd_thread(void *_args)
 {
-       struct qsd_instance     *qsd = (struct qsd_instance *)arg;
-       struct ptlrpc_thread    *thread = &qsd->qsd_upd_thread;
-       struct l_wait_info       lwi;
-       cfs_list_t               queue;
+       struct qsd_upd_args     *args = _args;
+       struct qsd_instance     *qsd = args->qua_inst;
+       LIST_HEAD(queue);
        struct qsd_upd_rec      *upd, *n;
-       char                     pname[MTI_NAME_MAXLEN];
-       struct lu_env           *env;
+       struct lu_env           *env = &args->qua_env;
        int                      qtype, rc = 0;
        bool                     uptodate;
-       struct lquota_entry     *lqe, *tmp;
-       __u64                    cur_time;
+       struct lquota_entry     *lqe;
+       time64_t cur_time;
        ENTRY;
 
-       OBD_ALLOC_PTR(env);
-       if (env == NULL)
-               RETURN(-ENOMEM);
-
-       rc = lu_env_init(env, LCT_DT_THREAD);
-       if (rc) {
-               CERROR("%s: Fail to init env.", qsd->qsd_svname);
-               OBD_FREE_PTR(env);
-               RETURN(rc);
-       }
-
-       snprintf(pname, MTI_NAME_MAXLEN, "lquota_wb_%s", qsd->qsd_svname);
-       cfs_daemonize(pname);
-
-       thread_set_flags(thread, SVC_RUNNING);
-       cfs_waitq_signal(&thread->t_ctl_waitq);
-
-       CFS_INIT_LIST_HEAD(&queue);
-       lwi = LWI_TIMEOUT(cfs_time_seconds(QSD_WB_INTERVAL), NULL, NULL);
-       while (1) {
-               l_wait_event(thread->t_ctl_waitq,
-                            qsd_job_pending(qsd, &queue, &uptodate) ||
-                            !thread_is_running(thread), &lwi);
-
-               cfs_list_for_each_entry_safe(upd, n, &queue, qur_link) {
-                       cfs_list_del_init(&upd->qur_link);
-                       qsd_process_upd(env, upd);
-                       qsd_upd_free(upd);
+       complete(args->qua_started);
+       while (({set_current_state(TASK_IDLE);
+                !kthread_should_stop(); })) {
+               int count = 0;
+
+               if (!qsd_job_pending(qsd, &queue, &uptodate))
+                       schedule_timeout(cfs_time_seconds(QSD_WB_INTERVAL));
+               __set_current_state(TASK_RUNNING);
+
+               while (1) {
+                       list_for_each_entry_safe(upd, n, &queue, qur_link) {
+                               if (qsd_process_upd(env, upd) <= 0) {
+                                       list_del_init(&upd->qur_link);
+                                       qsd_upd_free(upd);
+                               }
+                       }
+                       if (list_empty(&queue))
+                               break;
+                       count++;
+                       if (count % 7 == 0) {
+                               n = list_first_entry(&queue, struct qsd_upd_rec,
+                                                    qur_link);
+                               CWARN("%s: The reintegration thread [%d] "
+                                     "blocked more than %ld seconds\n",
+                                     n->qur_qqi->qqi_qsd->qsd_svname,
+                                     n->qur_qqi->qqi_qtype, count *
+                                     cfs_time_seconds(QSD_WB_INTERVAL) / 10);
+                       }
+                       schedule_timeout_interruptible(
+                               cfs_time_seconds(QSD_WB_INTERVAL) / 10);
+               }
+               if (qsd->qsd_exclusive) {
+                       write_lock(&qsd->qsd_lock);
+                       qsd->qsd_updating = false;
+                       write_unlock(&qsd->qsd_lock);
                }
 
                spin_lock(&qsd->qsd_adjust_lock);
-               cur_time = cfs_time_current_64();
-               cfs_list_for_each_entry_safe(lqe, tmp, &qsd->qsd_adjust_list,
-                                            lqe_link) {
+               cur_time = ktime_get_seconds();
+               while (!list_empty(&qsd->qsd_adjust_list)) {
+                       lqe = list_entry(qsd->qsd_adjust_list.next,
+                                        struct lquota_entry, lqe_link);
                        /* deferred items are sorted by time */
-                       if (!cfs_time_beforeq_64(lqe->lqe_adjust_time,
-                                                cur_time))
+                       if (lqe->lqe_adjust_time > cur_time)
                                break;
 
-                       cfs_list_del_init(&lqe->lqe_link);
+                       list_del_init(&lqe->lqe_link);
                        spin_unlock(&qsd->qsd_adjust_lock);
 
-                       if (thread_is_running(thread) && uptodate) {
+                       if (!kthread_should_stop() && uptodate) {
                                qsd_refresh_usage(env, lqe);
                                if (lqe->lqe_adjust_time == 0)
                                        qsd_id_lock_cancel(env, lqe);
@@ -460,47 +572,65 @@ static int qsd_upd_thread(void *arg)
                }
                spin_unlock(&qsd->qsd_adjust_lock);
 
-               if (!thread_is_running(thread))
-                       break;
-
-               if (uptodate)
+               if (uptodate || kthread_should_stop())
                        continue;
 
-               for (qtype = USRQUOTA; qtype < MAXQUOTAS; qtype++)
+               for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++)
                        qsd_start_reint_thread(qsd->qsd_type_array[qtype]);
        }
+       __set_current_state(TASK_RUNNING);
+
        lu_env_fini(env);
-       OBD_FREE_PTR(env);
-       thread_set_flags(thread, SVC_STOPPED);
-       cfs_waitq_signal(&thread->t_ctl_waitq);
+       OBD_FREE_PTR(args);
+
        RETURN(rc);
 }
 
 int qsd_start_upd_thread(struct qsd_instance *qsd)
 {
-       struct ptlrpc_thread    *thread = &qsd->qsd_upd_thread;
-       struct l_wait_info       lwi = { 0 };
-       int                      rc;
+       struct qsd_upd_args *args;
+       struct task_struct *task;
+       DECLARE_COMPLETION_ONSTACK(started);
+       int rc;
        ENTRY;
 
-       rc = cfs_create_thread(qsd_upd_thread, (void *)qsd, 0);
-       if (rc < 0) {
-               CERROR("Fail to start quota update thread. rc: %d\n", rc);
-               thread_set_flags(thread, SVC_STOPPED);
-               RETURN(rc);
+       OBD_ALLOC_PTR(args);
+       if (args == NULL)
+               RETURN(-ENOMEM);
+
+       rc = lu_env_init(&args->qua_env, LCT_DT_THREAD);
+       if (rc) {
+               CERROR("%s: cannot init env: rc = %d\n", qsd->qsd_svname, rc);
+               goto out_free;
        }
+       args->qua_inst = qsd;
+       args->qua_started = &started;
+
+       task = kthread_create(qsd_upd_thread, args,
+                             "lquota_wb_%s", qsd->qsd_svname);
+       if (IS_ERR(task)) {
+               rc = PTR_ERR(task);
+               CERROR("fail to start quota update thread: rc = %d\n", rc);
+               goto out_fini;
+       }
+       qsd->qsd_upd_task = task;
+       wake_up_process(task);
+       wait_for_completion(&started);
 
-       l_wait_event(thread->t_ctl_waitq,
-                    thread_is_running(thread) || thread_is_stopped(thread),
-                    &lwi);
        RETURN(0);
+
+out_fini:
+       lu_env_fini(&args->qua_env);
+out_free:
+       OBD_FREE_PTR(args);
+       RETURN(rc);
 }
 
 static void qsd_cleanup_deferred(struct qsd_instance *qsd)
 {
        int     qtype;
 
-       for (qtype = USRQUOTA; qtype < MAXQUOTAS; qtype++) {
+       for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) {
                struct qsd_upd_rec      *upd, *tmp;
                struct qsd_qtype_info   *qqi = qsd->qsd_type_array[qtype];
 
@@ -508,19 +638,19 @@ static void qsd_cleanup_deferred(struct qsd_instance *qsd)
                        continue;
 
                write_lock(&qsd->qsd_lock);
-               cfs_list_for_each_entry_safe(upd, tmp, &qqi->qqi_deferred_glb,
-                                            qur_link) {
-                       CWARN("%s: Free global deferred upd: ID:"LPU64", "
-                             "ver:"LPU64"/"LPU64"\n", qsd->qsd_svname,
+               list_for_each_entry_safe(upd, tmp, &qqi->qqi_deferred_glb,
+                                        qur_link) {
+                       CWARN("%s: Free global deferred upd: ID:%llu, "
+                             "ver:%llu/%llu\n", qsd->qsd_svname,
                              upd->qur_qid.qid_uid, upd->qur_ver,
                              qqi->qqi_glb_ver);
                        list_del_init(&upd->qur_link);
                        qsd_upd_free(upd);
                }
-               cfs_list_for_each_entry_safe(upd, tmp, &qqi->qqi_deferred_slv,
-                                            qur_link) {
-                       CWARN("%s: Free slave deferred upd: ID:"LPU64", "
-                             "ver:"LPU64"/"LPU64"\n", qsd->qsd_svname,
+               list_for_each_entry_safe(upd, tmp, &qqi->qqi_deferred_slv,
+                                        qur_link) {
+                       CWARN("%s: Free slave deferred upd: ID:%llu, "
+                             "ver:%llu/%llu\n", qsd->qsd_svname,
                              upd->qur_qid.qid_uid, upd->qur_ver,
                              qqi->qqi_slv_ver);
                        list_del_init(&upd->qur_link);
@@ -535,10 +665,10 @@ static void qsd_cleanup_adjust(struct qsd_instance *qsd)
        struct lquota_entry     *lqe;
 
        spin_lock(&qsd->qsd_adjust_lock);
-       while (!cfs_list_empty(&qsd->qsd_adjust_list)) {
-               lqe = cfs_list_entry(qsd->qsd_adjust_list.next,
-                                    struct lquota_entry, lqe_link);
-               cfs_list_del_init(&lqe->lqe_link);
+       while (!list_empty(&qsd->qsd_adjust_list)) {
+               lqe = list_entry(qsd->qsd_adjust_list.next,
+                                struct lquota_entry, lqe_link);
+               list_del_init(&lqe->lqe_link);
                lqe_putref(lqe);
        }
        spin_unlock(&qsd->qsd_adjust_lock);
@@ -546,16 +676,15 @@ static void qsd_cleanup_adjust(struct qsd_instance *qsd)
 
 void qsd_stop_upd_thread(struct qsd_instance *qsd)
 {
-       struct ptlrpc_thread    *thread = &qsd->qsd_upd_thread;
-       struct l_wait_info       lwi    = { 0 };
+       struct task_struct *task;
 
-       if (!thread_is_stopped(thread)) {
-               thread_set_flags(thread, SVC_STOPPING);
-               cfs_waitq_signal(&thread->t_ctl_waitq);
+       write_lock(&qsd->qsd_lock);
+       task = qsd->qsd_upd_task;
+       qsd->qsd_upd_task = NULL;
+       write_unlock(&qsd->qsd_lock);
+       if (task)
+               kthread_stop(task);
 
-               l_wait_event(thread->t_ctl_waitq, thread_is_stopped(thread),
-                            &lwi);
-       }
        qsd_cleanup_deferred(qsd);
        qsd_cleanup_adjust(qsd);
 }