* GPL HEADER END
*/
/*
- * Copyright (c) 2011, 2012, Intel, Inc.
+ * Copyright (c) 2012, 2017, Intel Corporation.
* Use is subject to license terms.
*
* Author: Johann Lombardi <johann.lombardi@intel.com>
* Author: Niu Yawei <yawei.niu@intel.com>
*/
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
#define DEBUG_SUBSYSTEM S_LQUOTA
+#include <linux/kthread.h>
#include "qsd_internal.h"
-extern cfs_mem_cache_t *upd_kmem;
-
/*
* Allocate and fill an qsd_upd_rec structure to be processed by the writeback
* thread.
{
struct qsd_upd_rec *upd;
- OBD_SLAB_ALLOC_PTR_GFP(upd, upd_kmem, CFS_ALLOC_IO);
+ OBD_SLAB_ALLOC_PTR_GFP(upd, upd_kmem, GFP_NOFS);
if (upd == NULL) {
- CERROR("Failed to allocate upd");
return NULL;
}
/* fill it */
- CFS_INIT_LIST_HEAD(&upd->qur_link);
+ INIT_LIST_HEAD(&upd->qur_link);
upd->qur_qqi = qqi;
upd->qur_lqe = lqe;
if (lqe)
if (!qsd->qsd_stopping) {
list_add_tail(&upd->qur_link, &qsd->qsd_upd_list);
/* wake up the upd thread */
- cfs_waitq_signal(&qsd->qsd_upd_thread.t_ctl_waitq);
+ if (qsd->qsd_upd_task)
+ wake_up_process(qsd->qsd_upd_task);
} else {
- CWARN("%s: discard deferred update.\n", qsd->qsd_svname);
+ CWARN("%s: discard update.\n", qsd->qsd_svname);
if (upd->qur_lqe)
- LQUOTA_WARN(upd->qur_lqe, "discard deferred update.");
+ LQUOTA_WARN(upd->qur_lqe, "discard update.");
qsd_upd_free(upd);
}
}
/* must hold the qsd_lock */
-static void qsd_add_deferred(cfs_list_t *list, struct qsd_upd_rec *upd)
+static void qsd_add_deferred(struct qsd_instance *qsd, struct list_head *list,
+ struct qsd_upd_rec *upd)
{
struct qsd_upd_rec *tmp, *n;
+ if (qsd->qsd_stopping) {
+ CWARN("%s: discard deferred udpate.\n", qsd->qsd_svname);
+ if (upd->qur_lqe)
+ LQUOTA_WARN(upd->qur_lqe, "discard deferred update.");
+ qsd_upd_free(upd);
+ return;
+ }
+
/* Sort the updates in ascending order */
- cfs_list_for_each_entry_safe_reverse(tmp, n, list, qur_link) {
+ list_for_each_entry_safe_reverse(tmp, n, list, qur_link) {
/* There could be some legacy records which have duplicated
* version. Imagine following scenario: slave received global
* updates. We should just delete the legacy record in such
* case. */
if (upd->qur_ver == tmp->qur_ver) {
- LASSERT(tmp->qur_lqe);
- LQUOTA_ERROR(tmp->qur_lqe, "Found a conflict record "
- "with ver:"LPU64"", tmp->qur_ver);
- cfs_list_del_init(&tmp->qur_link);
+ if (tmp->qur_lqe)
+ LQUOTA_WARN(tmp->qur_lqe, "Found a conflict "
+ "record with ver:%llu",
+ tmp->qur_ver);
+ else
+ CWARN("%s: Found a conflict record with ver: "
+ "%llu\n", qsd->qsd_svname, tmp->qur_ver);
+
+ list_del_init(&tmp->qur_link);
qsd_upd_free(tmp);
- }
-
- if (upd->qur_ver < tmp->qur_ver) {
+ } else if (upd->qur_ver < tmp->qur_ver) {
continue;
} else {
- cfs_list_add_tail(&upd->qur_link, &tmp->qur_link);
+ list_add_tail(&upd->qur_link, &tmp->qur_link);
return;
}
}
- cfs_list_add(&upd->qur_link, list);
+ list_add(&upd->qur_link, list);
}
/* must hold the qsd_lock */
-static void qsd_kickoff_deferred(struct qsd_qtype_info *qqi, cfs_list_t *list,
- __u64 ver)
+static void qsd_kickoff_deferred(struct qsd_qtype_info *qqi,
+ struct list_head *list, __u64 ver)
{
struct qsd_upd_rec *upd, *tmp;
ENTRY;
/* Get the first update record in the list, which has the smallest
* version, discard all records with versions smaller than the current
* one */
- cfs_list_for_each_entry_safe(upd, tmp, list, qur_link) {
+ list_for_each_entry_safe(upd, tmp, list, qur_link) {
if (upd->qur_ver <= ver) {
/* drop this update */
- cfs_list_del_init(&upd->qur_link);
+ list_del_init(&upd->qur_link);
CDEBUG(D_QUOTA, "%s: skipping deferred update ver:"
- LPU64"/"LPU64", global:%d, qid:"LPU64"\n",
+ "%llu/%llu, global:%d, qid:%llu\n",
qqi->qqi_qsd->qsd_svname, upd->qur_ver, ver,
upd->qur_global, upd->qur_qid.qid_uid);
qsd_upd_free(upd);
}
/* No remaining deferred update */
- if (cfs_list_empty(list))
+ if (list_empty(list))
RETURN_EXIT;
CDEBUG(D_QUOTA, "%s: found deferred update record. "
- "version:"LPU64"/"LPU64", global:%d, qid:"LPU64"\n",
+ "version:%llu/%llu, global:%d, qid:%llu\n",
qqi->qqi_qsd->qsd_svname, upd->qur_ver, ver,
upd->qur_global, upd->qur_qid.qid_uid);
- LASSERTF(upd->qur_ver > ver, "lur_ver:"LPU64", cur_ver:"LPU64"\n",
+ LASSERTF(upd->qur_ver > ver, "lur_ver:%llu, cur_ver:%llu\n",
upd->qur_ver, ver);
/* Kick off the deferred udpate */
*/
void qsd_bump_version(struct qsd_qtype_info *qqi, __u64 ver, bool global)
{
- cfs_list_t *list;
- __u64 *idx_ver;
+ struct list_head *list;
+ __u64 *idx_ver;
idx_ver = global ? &qqi->qqi_glb_ver : &qqi->qqi_slv_ver;
list = global ? &qqi->qqi_deferred_glb : &qqi->qqi_deferred_slv;
* \param qid - quota id
* \param rec - global or slave record to be updated to disk
* \param ver - new index file version
- * \param global- ture : master record; false : slave record
+ * \param global- true: master record; false: slave record
*/
void qsd_upd_schedule(struct qsd_qtype_info *qqi, struct lquota_entry *lqe,
union lquota_id *qid, union lquota_rec *rec, __u64 ver,
__u64 cur_ver;
ENTRY;
- CDEBUG(D_QUOTA, "%s: schedule update. global:%s, version:"LPU64"\n",
+ CDEBUG(D_QUOTA, "%s: schedule update. global:%s, version:%llu\n",
qsd->qsd_svname, global ? "true" : "false", ver);
upd = qsd_upd_alloc(qqi, lqe, qid, rec, ver, global);
/* legitimate race between glimpse AST and
* reintegration */
CDEBUG(D_QUOTA, "%s: discarding glb update from glimpse"
- " ver:"LPU64" local ver:"LPU64"\n",
+ " ver:%llu local ver:%llu\n",
qsd->qsd_svname, ver, cur_ver);
else
- CERROR("%s: discard slv update, ver:"LPU64" local ver:"
- LPU64"\n", qsd->qsd_svname, ver, cur_ver);
+ CERROR("%s: discard slv update, ver:%llu local ver:"
+ "%llu\n", qsd->qsd_svname, ver, cur_ver);
qsd_upd_free(upd);
} else if ((ver == cur_ver + 1) && qqi->qqi_glb_uptodate &&
qqi->qqi_slv_uptodate) {
/* Out of order update (the one with smaller version hasn't
* reached slave or hasn't been flushed to disk yet), or
* the reintegration is in progress. Defer the update. */
- cfs_list_t *list = global ? &qqi->qqi_deferred_glb :
- &qqi->qqi_deferred_slv;
- qsd_add_deferred(list, upd);
+ struct list_head *list = global ? &qqi->qqi_deferred_glb :
+ &qqi->qqi_deferred_slv;
+ qsd_add_deferred(qsd, list, upd);
}
write_unlock(&qsd->qsd_lock);
GOTO(out, rc);
/* refresh usage */
qsd_refresh_usage(env, lqe);
+
+ spin_lock(&qqi->qqi_qsd->qsd_adjust_lock);
+ lqe->lqe_adjust_time = 0;
+ spin_unlock(&qqi->qqi_qsd->qsd_adjust_lock);
+
/* Report usage asynchronously */
rc = qsd_adjust(env, lqe);
if (rc)
rc = qsd_update_index(env, qqi, &upd->qur_qid, upd->qur_global,
upd->qur_ver, &upd->qur_rec);
out:
+ if (upd->qur_global && rc == 0 &&
+ upd->qur_rec.lqr_glb_rec.qbr_softlimit == 0 &&
+ upd->qur_rec.lqr_glb_rec.qbr_hardlimit == 0 &&
+ (LQUOTA_FLAG(upd->qur_rec.lqr_glb_rec.qbr_time) &
+ LQUOTA_FLAG_DEFAULT)) {
+ lqe->lqe_is_default = true;
+ if (qqi->qqi_default_softlimit == 0 &&
+ qqi->qqi_default_hardlimit == 0)
+ lqe->lqe_enforced = false;
+ else
+ lqe->lqe_enforced = true;
+
+ LQUOTA_DEBUG(lqe, "update to use default quota");
+ }
+
if (lqe && !IS_ERR(lqe)) {
lqe_putref(lqe);
upd->qur_lqe = NULL;
struct qsd_instance *qsd = lqe2qqi(lqe)->qqi_qsd;
bool added = false;
+ read_lock(&qsd->qsd_lock);
+ if (qsd->qsd_stopping) {
+ read_unlock(&qsd->qsd_lock);
+ return;
+ }
+ read_unlock(&qsd->qsd_lock);
+
lqe_getref(lqe);
spin_lock(&qsd->qsd_adjust_lock);
/* the lqe is being queued for the per-ID lock cancel, we should
* cancel the lock cancel and re-add it for quota adjust */
- if (!cfs_list_empty(&lqe->lqe_link) &&
+ if (!list_empty(&lqe->lqe_link) &&
lqe->lqe_adjust_time == 0) {
- cfs_list_del_init(&lqe->lqe_link);
+ list_del_init(&lqe->lqe_link);
lqe_putref(lqe);
}
- if (cfs_list_empty(&lqe->lqe_link)) {
- if (cancel)
+ if (list_empty(&lqe->lqe_link)) {
+ if (!cancel) {
+ lqe->lqe_adjust_time = ktime_get_seconds();
+ if (defer)
+ lqe->lqe_adjust_time += QSD_WB_INTERVAL;
+ } else {
lqe->lqe_adjust_time = 0;
- else
- lqe->lqe_adjust_time = defer ?
- cfs_time_shift_64(QSD_WB_INTERVAL) :
- cfs_time_current_64();
- /* lqe reference transfered to list */
+ }
+
+ /* lqe reference transferred to list */
if (defer)
- cfs_list_add_tail(&lqe->lqe_link,
+ list_add_tail(&lqe->lqe_link,
&qsd->qsd_adjust_list);
else
- cfs_list_add(&lqe->lqe_link, &qsd->qsd_adjust_list);
+ list_add(&lqe->lqe_link, &qsd->qsd_adjust_list);
added = true;
}
spin_unlock(&qsd->qsd_adjust_lock);
- if (added)
- cfs_waitq_signal(&qsd->qsd_upd_thread.t_ctl_waitq);
- else
+ if (!added)
lqe_putref(lqe);
+ else {
+ read_lock(&qsd->qsd_lock);
+ if (qsd->qsd_upd_task)
+ wake_up_process(qsd->qsd_upd_task);
+ read_unlock(&qsd->qsd_lock);
+ }
}
/* return true if there is pending writeback records or the pending
* adjust requests */
-static bool qsd_job_pending(struct qsd_instance *qsd, cfs_list_t *upd,
+static bool qsd_job_pending(struct qsd_instance *qsd, struct list_head *upd,
bool *uptodate)
{
bool job_pending = false;
int qtype;
- LASSERT(cfs_list_empty(upd));
+ LASSERT(list_empty(upd));
*uptodate = true;
spin_lock(&qsd->qsd_adjust_lock);
- if (!cfs_list_empty(&qsd->qsd_adjust_list)) {
+ if (!list_empty(&qsd->qsd_adjust_list)) {
struct lquota_entry *lqe;
- lqe = cfs_list_entry(qsd->qsd_adjust_list.next,
+ lqe = list_entry(qsd->qsd_adjust_list.next,
struct lquota_entry, lqe_link);
- if (cfs_time_beforeq_64(lqe->lqe_adjust_time,
- cfs_time_current_64()))
+ if (ktime_get_seconds() >= lqe->lqe_adjust_time)
job_pending = true;
}
spin_unlock(&qsd->qsd_adjust_lock);
write_lock(&qsd->qsd_lock);
- if (!cfs_list_empty(&qsd->qsd_upd_list)) {
- cfs_list_splice_init(&qsd->qsd_upd_list, upd);
+ if (!list_empty(&qsd->qsd_upd_list)) {
+ list_splice_init(&qsd->qsd_upd_list, upd);
job_pending = true;
}
- for (qtype = USRQUOTA; qtype < MAXQUOTAS; qtype++) {
+ for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) {
struct qsd_qtype_info *qqi = qsd->qsd_type_array[qtype];
+ /* don't bother kicking off reintegration if space accounting
+ * failed to be enabled */
+ if (qqi->qqi_acct_failed)
+ continue;
+
if (!qsd_type_enabled(qsd, qtype))
continue;
return job_pending;
}
-static int qsd_upd_thread(void *arg)
+struct qsd_upd_args {
+ struct qsd_instance *qua_inst;
+ struct lu_env qua_env;
+ struct completion *qua_started;
+};
+
+#ifndef TASK_IDLE
+/* This identity is only safe inside kernel threads, or other places where
+ * all signals are disabled. So it is placed here rather than in an include
+ * file.
+ * TASK_IDLE was added in v4.1-rc4-43-g80ed87c8a9ca so this can be removed
+ * when we no longer support kernels older than that.
+ */
+#define TASK_IDLE TASK_INTERRUPTIBLE
+#endif
+
+static int qsd_upd_thread(void *_args)
{
- struct qsd_instance *qsd = (struct qsd_instance *)arg;
- struct ptlrpc_thread *thread = &qsd->qsd_upd_thread;
- struct l_wait_info lwi;
- cfs_list_t queue;
+ struct qsd_upd_args *args = _args;
+ struct qsd_instance *qsd = args->qua_inst;
+ LIST_HEAD(queue);
struct qsd_upd_rec *upd, *n;
- char pname[MTI_NAME_MAXLEN];
- struct lu_env *env;
+ struct lu_env *env = &args->qua_env;
int qtype, rc = 0;
bool uptodate;
- struct lquota_entry *lqe, *tmp;
- __u64 cur_time;
+ struct lquota_entry *lqe;
+ time64_t cur_time;
ENTRY;
- OBD_ALLOC_PTR(env);
- if (env == NULL)
- RETURN(-ENOMEM);
-
- rc = lu_env_init(env, LCT_DT_THREAD);
- if (rc) {
- CERROR("%s: Fail to init env.", qsd->qsd_svname);
- OBD_FREE_PTR(env);
- RETURN(rc);
- }
-
- snprintf(pname, MTI_NAME_MAXLEN, "lquota_wb_%s", qsd->qsd_svname);
- cfs_daemonize(pname);
-
- thread_set_flags(thread, SVC_RUNNING);
- cfs_waitq_signal(&thread->t_ctl_waitq);
+ complete(args->qua_started);
+ while (({set_current_state(TASK_IDLE);
+ !kthread_should_stop(); })) {
- CFS_INIT_LIST_HEAD(&queue);
- lwi = LWI_TIMEOUT(cfs_time_seconds(QSD_WB_INTERVAL), NULL, NULL);
- while (1) {
- l_wait_event(thread->t_ctl_waitq,
- qsd_job_pending(qsd, &queue, &uptodate) ||
- !thread_is_running(thread), &lwi);
+ if (!qsd_job_pending(qsd, &queue, &uptodate))
+ schedule_timeout(cfs_time_seconds(QSD_WB_INTERVAL));
+ __set_current_state(TASK_RUNNING);
- cfs_list_for_each_entry_safe(upd, n, &queue, qur_link) {
- cfs_list_del_init(&upd->qur_link);
+ list_for_each_entry_safe(upd, n, &queue, qur_link) {
+ list_del_init(&upd->qur_link);
qsd_process_upd(env, upd);
qsd_upd_free(upd);
}
spin_lock(&qsd->qsd_adjust_lock);
- cur_time = cfs_time_current_64();
- cfs_list_for_each_entry_safe(lqe, tmp, &qsd->qsd_adjust_list,
- lqe_link) {
+ cur_time = ktime_get_seconds();
+ while (!list_empty(&qsd->qsd_adjust_list)) {
+ lqe = list_entry(qsd->qsd_adjust_list.next,
+ struct lquota_entry, lqe_link);
/* deferred items are sorted by time */
- if (!cfs_time_beforeq_64(lqe->lqe_adjust_time,
- cur_time))
+ if (lqe->lqe_adjust_time > cur_time)
break;
- cfs_list_del_init(&lqe->lqe_link);
+ list_del_init(&lqe->lqe_link);
spin_unlock(&qsd->qsd_adjust_lock);
- if (thread_is_running(thread) && uptodate) {
+ if (!kthread_should_stop() && uptodate) {
qsd_refresh_usage(env, lqe);
if (lqe->lqe_adjust_time == 0)
qsd_id_lock_cancel(env, lqe);
}
spin_unlock(&qsd->qsd_adjust_lock);
- if (!thread_is_running(thread))
- break;
-
- if (uptodate)
+ if (uptodate || kthread_should_stop())
continue;
- for (qtype = USRQUOTA; qtype < MAXQUOTAS; qtype++)
+ for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++)
qsd_start_reint_thread(qsd->qsd_type_array[qtype]);
}
+ __set_current_state(TASK_RUNNING);
+
lu_env_fini(env);
- OBD_FREE_PTR(env);
- thread_set_flags(thread, SVC_STOPPED);
- cfs_waitq_signal(&thread->t_ctl_waitq);
+ OBD_FREE_PTR(args);
+
RETURN(rc);
}
int qsd_start_upd_thread(struct qsd_instance *qsd)
{
- struct ptlrpc_thread *thread = &qsd->qsd_upd_thread;
- struct l_wait_info lwi = { 0 };
- int rc;
+ struct qsd_upd_args *args;
+ struct task_struct *task;
+ DECLARE_COMPLETION_ONSTACK(started);
+ int rc;
ENTRY;
- rc = cfs_create_thread(qsd_upd_thread, (void *)qsd, 0);
- if (rc < 0) {
- CERROR("Fail to start quota update thread. rc: %d\n", rc);
- thread_set_flags(thread, SVC_STOPPED);
- RETURN(rc);
+ OBD_ALLOC_PTR(args);
+ if (args == NULL)
+ RETURN(-ENOMEM);
+
+ rc = lu_env_init(&args->qua_env, LCT_DT_THREAD);
+ if (rc) {
+ CERROR("%s: cannot init env: rc = %d\n", qsd->qsd_svname, rc);
+ goto out_free;
+ }
+ args->qua_inst = qsd;
+ args->qua_started = &started;
+
+ task = kthread_create(qsd_upd_thread, args,
+ "lquota_wb_%s", qsd->qsd_svname);
+ if (IS_ERR(task)) {
+ rc = PTR_ERR(task);
+ CERROR("fail to start quota update thread: rc = %d\n", rc);
+ goto out_fini;
}
+ qsd->qsd_upd_task = task;
+ wake_up_process(task);
+ wait_for_completion(&started);
- l_wait_event(thread->t_ctl_waitq,
- thread_is_running(thread) || thread_is_stopped(thread),
- &lwi);
RETURN(0);
+
+out_fini:
+ lu_env_fini(&args->qua_env);
+out_free:
+ OBD_FREE_PTR(args);
+ RETURN(rc);
}
static void qsd_cleanup_deferred(struct qsd_instance *qsd)
{
int qtype;
- for (qtype = USRQUOTA; qtype < MAXQUOTAS; qtype++) {
+ for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) {
struct qsd_upd_rec *upd, *tmp;
struct qsd_qtype_info *qqi = qsd->qsd_type_array[qtype];
continue;
write_lock(&qsd->qsd_lock);
- cfs_list_for_each_entry_safe(upd, tmp, &qqi->qqi_deferred_glb,
- qur_link) {
- CWARN("%s: Free global deferred upd: ID:"LPU64", "
- "ver:"LPU64"/"LPU64"\n", qsd->qsd_svname,
+ list_for_each_entry_safe(upd, tmp, &qqi->qqi_deferred_glb,
+ qur_link) {
+ CWARN("%s: Free global deferred upd: ID:%llu, "
+ "ver:%llu/%llu\n", qsd->qsd_svname,
upd->qur_qid.qid_uid, upd->qur_ver,
qqi->qqi_glb_ver);
list_del_init(&upd->qur_link);
qsd_upd_free(upd);
}
- cfs_list_for_each_entry_safe(upd, tmp, &qqi->qqi_deferred_slv,
- qur_link) {
- CWARN("%s: Free slave deferred upd: ID:"LPU64", "
- "ver:"LPU64"/"LPU64"\n", qsd->qsd_svname,
+ list_for_each_entry_safe(upd, tmp, &qqi->qqi_deferred_slv,
+ qur_link) {
+ CWARN("%s: Free slave deferred upd: ID:%llu, "
+ "ver:%llu/%llu\n", qsd->qsd_svname,
upd->qur_qid.qid_uid, upd->qur_ver,
qqi->qqi_slv_ver);
list_del_init(&upd->qur_link);
struct lquota_entry *lqe;
spin_lock(&qsd->qsd_adjust_lock);
- while (!cfs_list_empty(&qsd->qsd_adjust_list)) {
- lqe = cfs_list_entry(qsd->qsd_adjust_list.next,
- struct lquota_entry, lqe_link);
- cfs_list_del_init(&lqe->lqe_link);
+ while (!list_empty(&qsd->qsd_adjust_list)) {
+ lqe = list_entry(qsd->qsd_adjust_list.next,
+ struct lquota_entry, lqe_link);
+ list_del_init(&lqe->lqe_link);
lqe_putref(lqe);
}
spin_unlock(&qsd->qsd_adjust_lock);
void qsd_stop_upd_thread(struct qsd_instance *qsd)
{
- struct ptlrpc_thread *thread = &qsd->qsd_upd_thread;
- struct l_wait_info lwi = { 0 };
+ struct task_struct *task;
- if (!thread_is_stopped(thread)) {
- thread_set_flags(thread, SVC_STOPPING);
- cfs_waitq_signal(&thread->t_ctl_waitq);
+ write_lock(&qsd->qsd_lock);
+ task = qsd->qsd_upd_task;
+ qsd->qsd_upd_task = NULL;
+ write_unlock(&qsd->qsd_lock);
+ if (task)
+ kthread_stop(task);
- l_wait_event(thread->t_ctl_waitq, thread_is_stopped(thread),
- &lwi);
- }
qsd_cleanup_deferred(qsd);
qsd_cleanup_adjust(qsd);
}