4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA
24 * Copyright (c) 2012, 2017, Intel Corporation.
25 * Use is subject to license terms.
27 * Author: Johann Lombardi <johann.lombardi@intel.com>
28 * Author: Niu Yawei <yawei.niu@intel.com>
31 #define DEBUG_SUBSYSTEM S_LQUOTA
33 #include <linux/kthread.h>
34 #include "qsd_internal.h"
37 * Allocate and fill an qsd_upd_rec structure to be processed by the writeback
40 * \param qqi - is the qsd_qtype_info structure relevant to the update
41 * \param lqe - is the lquota entry subject to the update
42 * \param qid - is the identifier subject to the update
43 * \param rec - is the record storing the new quota settings
44 * \param ver - is the version associated with the update
45 * \param global - is a boolean set to true if this is an update of the global
46 * index and false for a slave index.
48 static struct qsd_upd_rec *qsd_upd_alloc(struct qsd_qtype_info *qqi,
49 struct lquota_entry *lqe,
51 union lquota_rec *rec, __u64 ver,
54 struct qsd_upd_rec *upd;
56 OBD_SLAB_ALLOC_PTR_GFP(upd, upd_kmem, GFP_NOFS);
62 INIT_LIST_HEAD(&upd->qur_link);
70 upd->qur_global = global;
75 static void qsd_upd_free(struct qsd_upd_rec *upd)
78 lqe_putref(upd->qur_lqe);
79 OBD_SLAB_FREE_PTR(upd, upd_kmem);
82 /* must hold the qsd_lock */
83 static void qsd_upd_add(struct qsd_instance *qsd, struct qsd_upd_rec *upd)
85 if (!qsd->qsd_stopping) {
86 list_add_tail(&upd->qur_link, &qsd->qsd_upd_list);
87 /* wake up the upd thread */
88 if (qsd->qsd_upd_task)
89 wake_up_process(qsd->qsd_upd_task);
91 CWARN("%s: discard update.\n", qsd->qsd_svname);
93 LQUOTA_WARN(upd->qur_lqe, "discard update.");
98 /* must hold the qsd_lock */
99 static void qsd_add_deferred(struct qsd_instance *qsd, struct list_head *list,
100 struct qsd_upd_rec *upd)
102 struct qsd_upd_rec *tmp, *n;
104 if (qsd->qsd_stopping) {
105 CWARN("%s: discard deferred udpate.\n", qsd->qsd_svname);
107 LQUOTA_WARN(upd->qur_lqe, "discard deferred update.");
112 /* Sort the updates in ascending order */
113 list_for_each_entry_safe_reverse(tmp, n, list, qur_link) {
115 /* There could be some legacy records which have duplicated
116 * version. Imagine following scenario: slave received global
117 * glimpse and queued a record in the deferred list, then
118 * master crash and rollback to an ealier version, then the
119 * version of queued record will be conflicting with later
120 * updates. We should just delete the legacy record in such
122 if (upd->qur_ver == tmp->qur_ver) {
124 LQUOTA_WARN(tmp->qur_lqe, "Found a conflict "
125 "record with ver:%llu",
128 CWARN("%s: Found a conflict record with ver: "
129 "%llu\n", qsd->qsd_svname, tmp->qur_ver);
131 list_del_init(&tmp->qur_link);
133 } else if (upd->qur_ver < tmp->qur_ver) {
136 list_add_tail(&upd->qur_link, &tmp->qur_link);
140 list_add(&upd->qur_link, list);
143 /* must hold the qsd_lock */
144 static void qsd_kickoff_deferred(struct qsd_qtype_info *qqi,
145 struct list_head *list, __u64 ver)
147 struct qsd_upd_rec *upd, *tmp;
150 /* Get the first update record in the list, which has the smallest
151 * version, discard all records with versions smaller than the current
153 list_for_each_entry_safe(upd, tmp, list, qur_link) {
154 if (upd->qur_ver <= ver) {
155 /* drop this update */
156 list_del_init(&upd->qur_link);
157 CDEBUG(D_QUOTA, "%s: skipping deferred update ver:"
158 "%llu/%llu, global:%d, qid:%llu\n",
159 qqi->qqi_qsd->qsd_svname, upd->qur_ver, ver,
160 upd->qur_global, upd->qur_qid.qid_uid);
167 /* No remaining deferred update */
168 if (list_empty(list))
171 CDEBUG(D_QUOTA, "%s: found deferred update record. "
172 "version:%llu/%llu, global:%d, qid:%llu\n",
173 qqi->qqi_qsd->qsd_svname, upd->qur_ver, ver,
174 upd->qur_global, upd->qur_qid.qid_uid);
176 LASSERTF(upd->qur_ver > ver, "lur_ver:%llu, cur_ver:%llu\n",
179 /* Kick off the deferred udpate */
180 if (upd->qur_ver == ver + 1) {
181 list_del_init(&upd->qur_link);
182 qsd_upd_add(qqi->qqi_qsd, upd);
187 /* Bump version of global or slave index copy
189 * \param qqi - qsd_qtype_info
190 * \param ver - version to be bumped to
191 * \param global - global or slave index copy?
193 void qsd_bump_version(struct qsd_qtype_info *qqi, __u64 ver, bool global)
195 struct list_head *list;
198 idx_ver = global ? &qqi->qqi_glb_ver : &qqi->qqi_slv_ver;
199 list = global ? &qqi->qqi_deferred_glb : &qqi->qqi_deferred_slv;
201 write_lock(&qqi->qqi_qsd->qsd_lock);
204 qqi->qqi_glb_uptodate = 1;
206 qqi->qqi_slv_uptodate = 1;
207 qsd_kickoff_deferred(qqi, list, ver);
208 write_unlock(&qqi->qqi_qsd->qsd_lock);
212 * Schedule a commit of a lquota entry
214 * \param qqi - qsd_qtype_info
215 * \param lqe - lquota_entry
216 * \param qid - quota id
217 * \param rec - global or slave record to be updated to disk
218 * \param ver - new index file version
219 * \param global- true: master record; false: slave record
221 void qsd_upd_schedule(struct qsd_qtype_info *qqi, struct lquota_entry *lqe,
222 union lquota_id *qid, union lquota_rec *rec, __u64 ver,
225 struct qsd_upd_rec *upd;
226 struct qsd_instance *qsd = qqi->qqi_qsd;
230 CDEBUG(D_QUOTA, "%s: schedule update. global:%s, version:%llu\n",
231 qsd->qsd_svname, global ? "true" : "false", ver);
233 upd = qsd_upd_alloc(qqi, lqe, qid, rec, ver, global);
237 /* If we don't want update index version, no need to sort the
238 * records in version order, just schedule the updates instantly. */
240 write_lock(&qsd->qsd_lock);
241 qsd_upd_add(qsd, upd);
242 write_unlock(&qsd->qsd_lock);
246 write_lock(&qsd->qsd_lock);
248 cur_ver = global ? qqi->qqi_glb_ver : qqi->qqi_slv_ver;
250 if (ver <= cur_ver) {
252 /* legitimate race between glimpse AST and
254 CDEBUG(D_QUOTA, "%s: discarding glb update from glimpse"
255 " ver:%llu local ver:%llu\n",
256 qsd->qsd_svname, ver, cur_ver);
258 CERROR("%s: discard slv update, ver:%llu local ver:"
259 "%llu\n", qsd->qsd_svname, ver, cur_ver);
261 } else if ((ver == cur_ver + 1) && qqi->qqi_glb_uptodate &&
262 qqi->qqi_slv_uptodate) {
263 /* In order update, and reintegration has been done. */
264 qsd_upd_add(qsd, upd);
266 /* Out of order update (the one with smaller version hasn't
267 * reached slave or hasn't been flushed to disk yet), or
268 * the reintegration is in progress. Defer the update. */
269 struct list_head *list = global ? &qqi->qqi_deferred_glb :
270 &qqi->qqi_deferred_slv;
271 qsd_add_deferred(qsd, list, upd);
274 write_unlock(&qsd->qsd_lock);
279 static int qsd_process_upd(const struct lu_env *env, struct qsd_upd_rec *upd)
281 struct lquota_entry *lqe = upd->qur_lqe;
282 struct qsd_qtype_info *qqi = upd->qur_qqi;
283 struct qsd_instance *qsd = qqi->qqi_qsd;
287 if (qsd->qsd_exclusive) { /* It could be deadlock running with reint */
288 read_lock(&qsd->qsd_lock);
290 read_unlock(&qsd->qsd_lock);
295 if (upd->qur_global &&
296 (LQUOTA_FLAG(upd->qur_rec.lqr_glb_rec.qbr_time) &
297 LQUOTA_FLAG_DELETED)) {
298 struct thandle *th = NULL;
299 struct dt_object *obj;
301 obj = qqi->qqi_glb_obj;
303 th = dt_trans_create(env, qqi->qqi_qsd->qsd_dev);
307 rc = lquota_disk_declare_write(env, th, obj, &upd->qur_qid);
311 rc = dt_trans_start_local(env, qqi->qqi_qsd->qsd_dev, th);
315 rc = lquota_disk_delete(env, th, obj, upd->qur_qid.qid_uid,
321 dt_trans_stop(env, qqi->qqi_qsd->qsd_dev, th);
323 lqe_set_deleted(lqe);
325 qsd_bump_version(qqi, upd->qur_ver, true);
330 lqe = lqe_locate(env, qqi->qqi_site, &upd->qur_qid);
332 GOTO(out, rc = PTR_ERR(lqe));
335 lqe->lqe_is_reset = false;
336 lqe->lqe_is_deleted = 0;
338 /* The in-memory lqe update for slave index copy isn't deferred,
339 * we shouldn't touch it here. */
340 if (upd->qur_global) {
341 rc = qsd_update_lqe(env, lqe, upd->qur_global, &upd->qur_rec);
345 qsd_refresh_usage(env, lqe);
347 spin_lock(&qsd->qsd_adjust_lock);
348 lqe->lqe_adjust_time = 0;
349 spin_unlock(&qsd->qsd_adjust_lock);
351 /* Report usage asynchronously */
352 rc = qsd_adjust(env, lqe);
354 LQUOTA_ERROR(lqe, "failed to report usage, rc:%d", rc);
357 rc = qsd_update_index(env, qqi, &upd->qur_qid, upd->qur_global,
358 upd->qur_ver, &upd->qur_rec);
360 if (upd->qur_global && rc == 0 &&
361 upd->qur_rec.lqr_glb_rec.qbr_softlimit == 0 &&
362 upd->qur_rec.lqr_glb_rec.qbr_hardlimit == 0 &&
363 (LQUOTA_FLAG(upd->qur_rec.lqr_glb_rec.qbr_time) &
364 LQUOTA_FLAG_DEFAULT)) {
365 lqe->lqe_is_default = true;
366 if (qqi->qqi_default_softlimit == 0 &&
367 qqi->qqi_default_hardlimit == 0)
368 lqe->lqe_enforced = false;
370 lqe->lqe_enforced = true;
372 LQUOTA_DEBUG(lqe, "update to use default quota");
374 if (upd->qur_global && rc == 0 &&
375 (LQUOTA_FLAG(upd->qur_rec.lqr_glb_rec.qbr_time) &
376 LQUOTA_FLAG_RESET)) {
377 struct lquota_slv_rec srec;
379 lqe->lqe_granted = 0;
380 lqe->lqe_softlimit = 0;
381 lqe->lqe_hardlimit = 0;
382 lqe->lqe_is_default = false;
383 lqe->lqe_is_reset = true;
385 memset(&srec, 0, sizeof(srec));
386 rc = qsd_update_index(env, qqi, &upd->qur_qid, false, 0, &srec);
389 if (lqe && !IS_ERR(lqe)) {
396 void qsd_adjust_schedule(struct lquota_entry *lqe, bool defer, bool cancel)
398 struct qsd_instance *qsd = lqe2qqi(lqe)->qqi_qsd;
401 read_lock(&qsd->qsd_lock);
402 if (qsd->qsd_stopping) {
403 read_unlock(&qsd->qsd_lock);
406 read_unlock(&qsd->qsd_lock);
409 spin_lock(&qsd->qsd_adjust_lock);
411 /* the lqe is being queued for the per-ID lock cancel, we should
412 * cancel the lock cancel and re-add it for quota adjust */
413 if (!list_empty(&lqe->lqe_link) &&
414 lqe->lqe_adjust_time == 0) {
415 list_del_init(&lqe->lqe_link);
419 if (list_empty(&lqe->lqe_link)) {
421 lqe->lqe_adjust_time = ktime_get_seconds();
423 lqe->lqe_adjust_time += QSD_WB_INTERVAL;
425 lqe->lqe_adjust_time = 0;
428 /* lqe reference transferred to list */
430 list_add_tail(&lqe->lqe_link,
431 &qsd->qsd_adjust_list);
433 list_add(&lqe->lqe_link, &qsd->qsd_adjust_list);
436 spin_unlock(&qsd->qsd_adjust_lock);
441 read_lock(&qsd->qsd_lock);
442 if (qsd->qsd_upd_task)
443 wake_up_process(qsd->qsd_upd_task);
444 read_unlock(&qsd->qsd_lock);
448 /* return true if there is pending writeback records or the pending
450 static bool qsd_job_pending(struct qsd_instance *qsd, struct list_head *upd,
453 bool job_pending = false;
456 LASSERT(list_empty(upd));
459 spin_lock(&qsd->qsd_adjust_lock);
460 if (!list_empty(&qsd->qsd_adjust_list)) {
461 struct lquota_entry *lqe;
462 lqe = list_entry(qsd->qsd_adjust_list.next,
463 struct lquota_entry, lqe_link);
464 if (ktime_get_seconds() >= lqe->lqe_adjust_time)
467 spin_unlock(&qsd->qsd_adjust_lock);
469 write_lock(&qsd->qsd_lock);
470 if (!list_empty(&qsd->qsd_upd_list)) {
471 list_splice_init(&qsd->qsd_upd_list, upd);
474 if (qsd->qsd_exclusive)
475 qsd->qsd_updating = job_pending;
477 for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) {
478 struct qsd_qtype_info *qqi = qsd->qsd_type_array[qtype];
480 /* don't bother kicking off reintegration if space accounting
481 * failed to be enabled */
482 if (qqi->qqi_acct_failed)
485 if (!qsd_type_enabled(qsd, qtype))
488 if ((!qqi->qqi_glb_uptodate || !qqi->qqi_slv_uptodate) &&
490 /* global or slave index not up to date and reint
491 * thread not running */
495 write_unlock(&qsd->qsd_lock);
499 struct qsd_upd_args {
500 struct qsd_instance *qua_inst;
501 struct lu_env qua_env;
502 struct completion *qua_started;
506 /* This identity is only safe inside kernel threads, or other places where
507 * all signals are disabled. So it is placed here rather than in an include
509 * TASK_IDLE was added in v4.1-rc4-43-g80ed87c8a9ca so this can be removed
510 * when we no longer support kernels older than that.
512 #define TASK_IDLE TASK_INTERRUPTIBLE
515 static int qsd_upd_thread(void *_args)
517 struct qsd_upd_args *args = _args;
518 struct qsd_instance *qsd = args->qua_inst;
520 struct qsd_upd_rec *upd, *n;
521 struct lu_env *env = &args->qua_env;
524 struct lquota_entry *lqe;
528 complete(args->qua_started);
529 while (({set_current_state(TASK_IDLE);
530 !kthread_should_stop(); })) {
533 if (!qsd_job_pending(qsd, &queue, &uptodate))
534 schedule_timeout(cfs_time_seconds(QSD_WB_INTERVAL));
535 __set_current_state(TASK_RUNNING);
538 list_for_each_entry_safe(upd, n, &queue, qur_link) {
539 if (qsd_process_upd(env, upd) <= 0) {
540 list_del_init(&upd->qur_link);
544 if (list_empty(&queue))
547 if (count % 7 == 0) {
548 n = list_first_entry(&queue, struct qsd_upd_rec,
550 CWARN("%s: The reintegration thread [%d] "
551 "blocked more than %ld seconds\n",
552 n->qur_qqi->qqi_qsd->qsd_svname,
553 n->qur_qqi->qqi_qtype, count *
554 cfs_time_seconds(QSD_WB_INTERVAL) / 10);
556 schedule_timeout_interruptible(
557 cfs_time_seconds(QSD_WB_INTERVAL) / 10);
559 if (qsd->qsd_exclusive) {
560 write_lock(&qsd->qsd_lock);
561 qsd->qsd_updating = false;
562 write_unlock(&qsd->qsd_lock);
565 spin_lock(&qsd->qsd_adjust_lock);
566 cur_time = ktime_get_seconds();
567 while (!list_empty(&qsd->qsd_adjust_list)) {
568 lqe = list_entry(qsd->qsd_adjust_list.next,
569 struct lquota_entry, lqe_link);
570 /* deferred items are sorted by time */
571 if (lqe->lqe_adjust_time > cur_time)
574 list_del_init(&lqe->lqe_link);
575 spin_unlock(&qsd->qsd_adjust_lock);
577 if (!kthread_should_stop() && uptodate) {
578 qsd_refresh_usage(env, lqe);
579 if (lqe->lqe_adjust_time == 0)
580 qsd_id_lock_cancel(env, lqe);
582 qsd_adjust(env, lqe);
586 spin_lock(&qsd->qsd_adjust_lock);
588 spin_unlock(&qsd->qsd_adjust_lock);
590 if (uptodate || kthread_should_stop())
593 for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++)
594 qsd_start_reint_thread(qsd->qsd_type_array[qtype]);
596 __set_current_state(TASK_RUNNING);
604 int qsd_start_upd_thread(struct qsd_instance *qsd)
606 struct qsd_upd_args *args;
607 struct task_struct *task;
608 DECLARE_COMPLETION_ONSTACK(started);
616 rc = lu_env_init(&args->qua_env, LCT_DT_THREAD);
618 CERROR("%s: cannot init env: rc = %d\n", qsd->qsd_svname, rc);
621 args->qua_inst = qsd;
622 args->qua_started = &started;
624 task = kthread_create(qsd_upd_thread, args,
625 "lquota_wb_%s", qsd->qsd_svname);
628 CERROR("fail to start quota update thread: rc = %d\n", rc);
631 qsd->qsd_upd_task = task;
632 wake_up_process(task);
633 wait_for_completion(&started);
638 lu_env_fini(&args->qua_env);
644 static void qsd_cleanup_deferred(struct qsd_instance *qsd)
648 for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) {
649 struct qsd_upd_rec *upd, *tmp;
650 struct qsd_qtype_info *qqi = qsd->qsd_type_array[qtype];
655 write_lock(&qsd->qsd_lock);
656 list_for_each_entry_safe(upd, tmp, &qqi->qqi_deferred_glb,
658 CWARN("%s: Free global deferred upd: ID:%llu, "
659 "ver:%llu/%llu\n", qsd->qsd_svname,
660 upd->qur_qid.qid_uid, upd->qur_ver,
662 list_del_init(&upd->qur_link);
665 list_for_each_entry_safe(upd, tmp, &qqi->qqi_deferred_slv,
667 CWARN("%s: Free slave deferred upd: ID:%llu, "
668 "ver:%llu/%llu\n", qsd->qsd_svname,
669 upd->qur_qid.qid_uid, upd->qur_ver,
671 list_del_init(&upd->qur_link);
674 write_unlock(&qsd->qsd_lock);
678 static void qsd_cleanup_adjust(struct qsd_instance *qsd)
680 struct lquota_entry *lqe;
682 spin_lock(&qsd->qsd_adjust_lock);
683 while (!list_empty(&qsd->qsd_adjust_list)) {
684 lqe = list_entry(qsd->qsd_adjust_list.next,
685 struct lquota_entry, lqe_link);
686 list_del_init(&lqe->lqe_link);
689 spin_unlock(&qsd->qsd_adjust_lock);
692 void qsd_stop_upd_thread(struct qsd_instance *qsd)
694 struct task_struct *task;
696 write_lock(&qsd->qsd_lock);
697 task = qsd->qsd_upd_task;
698 qsd->qsd_upd_task = NULL;
699 write_unlock(&qsd->qsd_lock);
703 qsd_cleanup_deferred(qsd);
704 qsd_cleanup_adjust(qsd);