Whamcloud - gitweb
6584df52d60b7e61953e53dd42968aa736221341
[fs/lustre-release.git] / lustre / quota / qsd_writeback.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; if not, write to the
18  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19  * Boston, MA 021110-1307, USA
20  *
21  * GPL HEADER END
22  */
23 /*
24  * Copyright (c) 2012, 2017, Intel Corporation.
25  * Use is subject to license terms.
26  *
27  * Author: Johann Lombardi <johann.lombardi@intel.com>
28  * Author: Niu    Yawei    <yawei.niu@intel.com>
29  */
30
31 #define DEBUG_SUBSYSTEM S_LQUOTA
32
33 #include <linux/kthread.h>
34 #include "qsd_internal.h"
35
36 /*
37  * Allocate and fill an qsd_upd_rec structure to be processed by the writeback
38  * thread.
39  *
40  * \param qqi - is the qsd_qtype_info structure relevant to the update
41  * \param lqe - is the lquota entry subject to the update
42  * \param qid - is the identifier subject to the update
43  * \param rec - is the record storing the new quota settings
44  * \param ver - is the version associated with the update
45  * \param global - is a boolean set to true if this is an update of the global
46  *                 index and false for a slave index.
47  */
48 static struct qsd_upd_rec *qsd_upd_alloc(struct qsd_qtype_info *qqi,
49                                          struct lquota_entry *lqe,
50                                          union lquota_id *qid,
51                                          union lquota_rec *rec, __u64 ver,
52                                          bool global)
53 {
54         struct qsd_upd_rec      *upd;
55
56         OBD_SLAB_ALLOC_PTR_GFP(upd, upd_kmem, GFP_NOFS);
57         if (upd == NULL) {
58                 return NULL;
59         }
60
61         /* fill it */
62         INIT_LIST_HEAD(&upd->qur_link);
63         upd->qur_qqi = qqi;
64         upd->qur_lqe = lqe;
65         if (lqe)
66                 lqe_getref(lqe);
67         upd->qur_qid    = *qid;
68         upd->qur_rec    = *rec;
69         upd->qur_ver    = ver;
70         upd->qur_global = global;
71
72         return upd;
73 }
74
75 static void qsd_upd_free(struct qsd_upd_rec *upd)
76 {
77         if (upd->qur_lqe)
78                 lqe_putref(upd->qur_lqe);
79         OBD_SLAB_FREE_PTR(upd, upd_kmem);
80 }
81
82 /* must hold the qsd_lock */
83 static void qsd_upd_add(struct qsd_instance *qsd, struct qsd_upd_rec *upd)
84 {
85         if (!qsd->qsd_stopping) {
86                 list_add_tail(&upd->qur_link, &qsd->qsd_upd_list);
87                 /* wake up the upd thread */
88                 if (qsd->qsd_upd_task)
89                         wake_up_process(qsd->qsd_upd_task);
90         } else {
91                 CWARN("%s: discard update.\n", qsd->qsd_svname);
92                 if (upd->qur_lqe)
93                         LQUOTA_WARN(upd->qur_lqe, "discard update.");
94                 qsd_upd_free(upd);
95         }
96 }
97
98 /* must hold the qsd_lock */
99 static void qsd_add_deferred(struct qsd_instance *qsd, struct list_head *list,
100                              struct qsd_upd_rec *upd)
101 {
102         struct qsd_upd_rec      *tmp, *n;
103
104         if (qsd->qsd_stopping) {
105                 CWARN("%s: discard deferred udpate.\n", qsd->qsd_svname);
106                 if (upd->qur_lqe)
107                         LQUOTA_WARN(upd->qur_lqe, "discard deferred update.");
108                 qsd_upd_free(upd);
109                 return;
110         }
111
112         /* Sort the updates in ascending order */
113         list_for_each_entry_safe_reverse(tmp, n, list, qur_link) {
114
115                 /* There could be some legacy records which have duplicated
116                  * version. Imagine following scenario: slave received global
117                  * glimpse and queued a record in the deferred list, then
118                  * master crash and rollback to an ealier version, then the
119                  * version of queued record will be conflicting with later
120                  * updates. We should just delete the legacy record in such
121                  * case. */
122                 if (upd->qur_ver == tmp->qur_ver) {
123                         if (tmp->qur_lqe)
124                                 LQUOTA_WARN(tmp->qur_lqe, "Found a conflict "
125                                             "record with ver:%llu",
126                                             tmp->qur_ver);
127                         else
128                                 CWARN("%s: Found a conflict record with ver: "
129                                       "%llu\n", qsd->qsd_svname, tmp->qur_ver);
130
131                         list_del_init(&tmp->qur_link);
132                         qsd_upd_free(tmp);
133                 } else if (upd->qur_ver < tmp->qur_ver) {
134                         continue;
135                 } else {
136                         list_add_tail(&upd->qur_link, &tmp->qur_link);
137                         return;
138                 }
139         }
140         list_add(&upd->qur_link, list);
141 }
142
143 /* must hold the qsd_lock */
144 static void qsd_kickoff_deferred(struct qsd_qtype_info *qqi,
145                                  struct list_head *list, __u64 ver)
146 {
147         struct qsd_upd_rec      *upd, *tmp;
148         ENTRY;
149
150         /* Get the first update record in the list, which has the smallest
151          * version, discard all records with versions smaller than the current
152          * one */
153         list_for_each_entry_safe(upd, tmp, list, qur_link) {
154                 if (upd->qur_ver <= ver) {
155                         /* drop this update */
156                         list_del_init(&upd->qur_link);
157                         CDEBUG(D_QUOTA, "%s: skipping deferred update ver:"
158                                "%llu/%llu, global:%d, qid:%llu\n",
159                                qqi->qqi_qsd->qsd_svname, upd->qur_ver, ver,
160                                upd->qur_global, upd->qur_qid.qid_uid);
161                         qsd_upd_free(upd);
162                 } else {
163                         break;
164                 }
165         }
166
167         /* No remaining deferred update */
168         if (list_empty(list))
169                 RETURN_EXIT;
170
171         CDEBUG(D_QUOTA, "%s: found deferred update record. "
172                "version:%llu/%llu, global:%d, qid:%llu\n",
173                qqi->qqi_qsd->qsd_svname, upd->qur_ver, ver,
174                upd->qur_global, upd->qur_qid.qid_uid);
175
176         LASSERTF(upd->qur_ver > ver, "lur_ver:%llu, cur_ver:%llu\n",
177                  upd->qur_ver, ver);
178
179         /* Kick off the deferred udpate */
180         if (upd->qur_ver == ver + 1) {
181                 list_del_init(&upd->qur_link);
182                 qsd_upd_add(qqi->qqi_qsd, upd);
183         }
184         EXIT;
185 }
186
187 /* Bump version of global or slave index copy
188  *
189  * \param qqi    - qsd_qtype_info
190  * \param ver    - version to be bumped to
191  * \param global - global or slave index copy?
192  */
193 void qsd_bump_version(struct qsd_qtype_info *qqi, __u64 ver, bool global)
194 {
195         struct list_head *list;
196         __u64            *idx_ver;
197
198         idx_ver = global ? &qqi->qqi_glb_ver : &qqi->qqi_slv_ver;
199         list    = global ? &qqi->qqi_deferred_glb : &qqi->qqi_deferred_slv;
200
201         write_lock(&qqi->qqi_qsd->qsd_lock);
202         *idx_ver = ver;
203         if (global)
204                 qqi->qqi_glb_uptodate = 1;
205         else
206                 qqi->qqi_slv_uptodate = 1;
207         qsd_kickoff_deferred(qqi, list, ver);
208         write_unlock(&qqi->qqi_qsd->qsd_lock);
209 }
210
211 /*
212  * Schedule a commit of a lquota entry
213  *
214  * \param  qqi   - qsd_qtype_info
215  * \param  lqe   - lquota_entry
216  * \param  qid   - quota id
217  * \param  rec   - global or slave record to be updated to disk
218  * \param  ver   - new index file version
219  * \param  global- true: master record; false: slave record
220  */
221 void qsd_upd_schedule(struct qsd_qtype_info *qqi, struct lquota_entry *lqe,
222                       union lquota_id *qid, union lquota_rec *rec, __u64 ver,
223                       bool global)
224 {
225         struct qsd_upd_rec      *upd;
226         struct qsd_instance     *qsd = qqi->qqi_qsd;
227         __u64                    cur_ver;
228         ENTRY;
229
230         CDEBUG(D_QUOTA, "%s: schedule update. global:%s, version:%llu\n",
231                qsd->qsd_svname, global ? "true" : "false", ver);
232
233         upd = qsd_upd_alloc(qqi, lqe, qid, rec, ver, global);
234         if (upd == NULL)
235                 RETURN_EXIT;
236
237         /* If we don't want update index version, no need to sort the
238          * records in version order, just schedule the updates instantly. */
239         if (ver == 0) {
240                 write_lock(&qsd->qsd_lock);
241                 qsd_upd_add(qsd, upd);
242                 write_unlock(&qsd->qsd_lock);
243                 RETURN_EXIT;
244         }
245
246         write_lock(&qsd->qsd_lock);
247
248         cur_ver = global ? qqi->qqi_glb_ver : qqi->qqi_slv_ver;
249
250         if (ver <= cur_ver) {
251                 if (global)
252                         /* legitimate race between glimpse AST and
253                          * reintegration */
254                         CDEBUG(D_QUOTA, "%s: discarding glb update from glimpse"
255                                " ver:%llu local ver:%llu\n",
256                                qsd->qsd_svname, ver, cur_ver);
257                 else
258                         CERROR("%s: discard slv update, ver:%llu local ver:"
259                                "%llu\n", qsd->qsd_svname, ver, cur_ver);
260                 qsd_upd_free(upd);
261         } else if ((ver == cur_ver + 1) && qqi->qqi_glb_uptodate &&
262                    qqi->qqi_slv_uptodate) {
263                 /* In order update, and reintegration has been done. */
264                 qsd_upd_add(qsd, upd);
265         } else {
266                 /* Out of order update (the one with smaller version hasn't
267                  * reached slave or hasn't been flushed to disk yet), or
268                  * the reintegration is in progress. Defer the update. */
269                 struct list_head *list = global ? &qqi->qqi_deferred_glb :
270                                                   &qqi->qqi_deferred_slv;
271                 qsd_add_deferred(qsd, list, upd);
272         }
273
274         write_unlock(&qsd->qsd_lock);
275
276         EXIT;
277 }
278
279 static int qsd_process_upd(const struct lu_env *env, struct qsd_upd_rec *upd)
280 {
281         struct lquota_entry     *lqe = upd->qur_lqe;
282         struct qsd_qtype_info   *qqi = upd->qur_qqi;
283         int                      rc;
284         ENTRY;
285
286         if (lqe == NULL) {
287                 lqe = lqe_locate(env, qqi->qqi_site, &upd->qur_qid);
288                 if (IS_ERR(lqe))
289                         GOTO(out, rc = PTR_ERR(lqe));
290         }
291
292         /* The in-memory lqe update for slave index copy isn't deferred,
293          * we shouldn't touch it here. */
294         if (upd->qur_global) {
295                 rc = qsd_update_lqe(env, lqe, upd->qur_global, &upd->qur_rec);
296                 if (rc)
297                         GOTO(out, rc);
298                 /* refresh usage */
299                 qsd_refresh_usage(env, lqe);
300
301                 spin_lock(&qqi->qqi_qsd->qsd_adjust_lock);
302                 lqe->lqe_adjust_time = 0;
303                 spin_unlock(&qqi->qqi_qsd->qsd_adjust_lock);
304
305                 /* Report usage asynchronously */
306                 rc = qsd_adjust(env, lqe);
307                 if (rc)
308                         LQUOTA_ERROR(lqe, "failed to report usage, rc:%d", rc);
309         }
310
311         rc = qsd_update_index(env, qqi, &upd->qur_qid, upd->qur_global,
312                               upd->qur_ver, &upd->qur_rec);
313 out:
314         if (upd->qur_global && rc == 0 &&
315             upd->qur_rec.lqr_glb_rec.qbr_softlimit == 0 &&
316             upd->qur_rec.lqr_glb_rec.qbr_hardlimit == 0 &&
317             (LQUOTA_FLAG(upd->qur_rec.lqr_glb_rec.qbr_time) &
318                                                         LQUOTA_FLAG_DEFAULT)) {
319                 lqe->lqe_is_default = true;
320                 if (qqi->qqi_default_softlimit == 0 &&
321                     qqi->qqi_default_hardlimit == 0)
322                         lqe->lqe_enforced = false;
323                 else
324                         lqe->lqe_enforced = true;
325
326                 LQUOTA_DEBUG(lqe, "update to use default quota");
327         }
328
329         if (lqe && !IS_ERR(lqe)) {
330                 lqe_putref(lqe);
331                 upd->qur_lqe = NULL;
332         }
333         RETURN(rc);
334 }
335
336 void qsd_adjust_schedule(struct lquota_entry *lqe, bool defer, bool cancel)
337 {
338         struct qsd_instance     *qsd = lqe2qqi(lqe)->qqi_qsd;
339         bool                     added = false;
340
341         read_lock(&qsd->qsd_lock);
342         if (qsd->qsd_stopping) {
343                 read_unlock(&qsd->qsd_lock);
344                 return;
345         }
346         read_unlock(&qsd->qsd_lock);
347
348         lqe_getref(lqe);
349         spin_lock(&qsd->qsd_adjust_lock);
350
351         /* the lqe is being queued for the per-ID lock cancel, we should
352          * cancel the lock cancel and re-add it for quota adjust */
353         if (!list_empty(&lqe->lqe_link) &&
354             lqe->lqe_adjust_time == 0) {
355                 list_del_init(&lqe->lqe_link);
356                 lqe_putref(lqe);
357         }
358
359         if (list_empty(&lqe->lqe_link)) {
360                 if (!cancel) {
361                         lqe->lqe_adjust_time = ktime_get_seconds();
362                         if (defer)
363                                 lqe->lqe_adjust_time += QSD_WB_INTERVAL;
364                 } else {
365                         lqe->lqe_adjust_time = 0;
366                 }
367
368                 /* lqe reference transferred to list */
369                 if (defer)
370                         list_add_tail(&lqe->lqe_link,
371                                           &qsd->qsd_adjust_list);
372                 else
373                         list_add(&lqe->lqe_link, &qsd->qsd_adjust_list);
374                 added = true;
375         }
376         spin_unlock(&qsd->qsd_adjust_lock);
377
378         if (!added)
379                 lqe_putref(lqe);
380         else {
381                 read_lock(&qsd->qsd_lock);
382                 if (qsd->qsd_upd_task)
383                         wake_up_process(qsd->qsd_upd_task);
384                 read_unlock(&qsd->qsd_lock);
385         }
386 }
387
388 /* return true if there is pending writeback records or the pending
389  * adjust requests */
390 static bool qsd_job_pending(struct qsd_instance *qsd, struct list_head *upd,
391                             bool *uptodate)
392 {
393         bool    job_pending = false;
394         int     qtype;
395
396         LASSERT(list_empty(upd));
397         *uptodate = true;
398
399         spin_lock(&qsd->qsd_adjust_lock);
400         if (!list_empty(&qsd->qsd_adjust_list)) {
401                 struct lquota_entry *lqe;
402                 lqe = list_entry(qsd->qsd_adjust_list.next,
403                                      struct lquota_entry, lqe_link);
404                 if (ktime_get_seconds() >= lqe->lqe_adjust_time)
405                         job_pending = true;
406         }
407         spin_unlock(&qsd->qsd_adjust_lock);
408
409         write_lock(&qsd->qsd_lock);
410         if (!list_empty(&qsd->qsd_upd_list)) {
411                 list_splice_init(&qsd->qsd_upd_list, upd);
412                 job_pending = true;
413         }
414
415         for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) {
416                 struct qsd_qtype_info *qqi = qsd->qsd_type_array[qtype];
417
418                 /* don't bother kicking off reintegration if space accounting
419                  * failed to be enabled */
420                 if (qqi->qqi_acct_failed)
421                         continue;
422
423                 if (!qsd_type_enabled(qsd, qtype))
424                         continue;
425
426                 if ((!qqi->qqi_glb_uptodate || !qqi->qqi_slv_uptodate) &&
427                      !qqi->qqi_reint)
428                         /* global or slave index not up to date and reint
429                          * thread not running */
430                         *uptodate = false;
431         }
432
433         write_unlock(&qsd->qsd_lock);
434         return job_pending;
435 }
436
437 struct qsd_upd_args {
438         struct qsd_instance     *qua_inst;
439         struct lu_env            qua_env;
440         struct completion       *qua_started;
441 };
442
443 #ifndef TASK_IDLE
444 /* This identity is only safe inside kernel threads, or other places where
445  * all signals are disabled.  So it is placed here rather than in an include
446  * file.
447  * TASK_IDLE was added in v4.1-rc4-43-g80ed87c8a9ca so this can be removed
448  * when we no longer support kernels older than that.
449  */
450 #define TASK_IDLE TASK_INTERRUPTIBLE
451 #endif
452
453 static int qsd_upd_thread(void *_args)
454 {
455         struct qsd_upd_args     *args = _args;
456         struct qsd_instance     *qsd = args->qua_inst;
457         LIST_HEAD(queue);
458         struct qsd_upd_rec      *upd, *n;
459         struct lu_env           *env = &args->qua_env;
460         int                      qtype, rc = 0;
461         bool                     uptodate;
462         struct lquota_entry     *lqe;
463         time64_t cur_time;
464         ENTRY;
465
466         complete(args->qua_started);
467         while (({set_current_state(TASK_IDLE);
468                  !kthread_should_stop(); })) {
469
470                 if (!qsd_job_pending(qsd, &queue, &uptodate))
471                         schedule_timeout(cfs_time_seconds(QSD_WB_INTERVAL));
472                 __set_current_state(TASK_RUNNING);
473
474                 list_for_each_entry_safe(upd, n, &queue, qur_link) {
475                         list_del_init(&upd->qur_link);
476                         qsd_process_upd(env, upd);
477                         qsd_upd_free(upd);
478                 }
479
480                 spin_lock(&qsd->qsd_adjust_lock);
481                 cur_time = ktime_get_seconds();
482                 while (!list_empty(&qsd->qsd_adjust_list)) {
483                         lqe = list_entry(qsd->qsd_adjust_list.next,
484                                          struct lquota_entry, lqe_link);
485                         /* deferred items are sorted by time */
486                         if (lqe->lqe_adjust_time > cur_time)
487                                 break;
488
489                         list_del_init(&lqe->lqe_link);
490                         spin_unlock(&qsd->qsd_adjust_lock);
491
492                         if (!kthread_should_stop() && uptodate) {
493                                 qsd_refresh_usage(env, lqe);
494                                 if (lqe->lqe_adjust_time == 0)
495                                         qsd_id_lock_cancel(env, lqe);
496                                 else
497                                         qsd_adjust(env, lqe);
498                         }
499
500                         lqe_putref(lqe);
501                         spin_lock(&qsd->qsd_adjust_lock);
502                 }
503                 spin_unlock(&qsd->qsd_adjust_lock);
504
505                 if (uptodate || kthread_should_stop())
506                         continue;
507
508                 for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++)
509                         qsd_start_reint_thread(qsd->qsd_type_array[qtype]);
510         }
511         __set_current_state(TASK_RUNNING);
512
513         lu_env_fini(env);
514         OBD_FREE_PTR(args);
515
516         RETURN(rc);
517 }
518
519 int qsd_start_upd_thread(struct qsd_instance *qsd)
520 {
521         struct qsd_upd_args *args;
522         struct task_struct *task;
523         DECLARE_COMPLETION_ONSTACK(started);
524         int rc;
525         ENTRY;
526
527         OBD_ALLOC_PTR(args);
528         if (args == NULL)
529                 RETURN(-ENOMEM);
530
531         rc = lu_env_init(&args->qua_env, LCT_DT_THREAD);
532         if (rc) {
533                 CERROR("%s: cannot init env: rc = %d\n", qsd->qsd_svname, rc);
534                 goto out_free;
535         }
536         args->qua_inst = qsd;
537         args->qua_started = &started;
538
539         task = kthread_create(qsd_upd_thread, args,
540                               "lquota_wb_%s", qsd->qsd_svname);
541         if (IS_ERR(task)) {
542                 rc = PTR_ERR(task);
543                 CERROR("fail to start quota update thread: rc = %d\n", rc);
544                 goto out_fini;
545         }
546         qsd->qsd_upd_task = task;
547         wake_up_process(task);
548         wait_for_completion(&started);
549
550         RETURN(0);
551
552 out_fini:
553         lu_env_fini(&args->qua_env);
554 out_free:
555         OBD_FREE_PTR(args);
556         RETURN(rc);
557 }
558
559 static void qsd_cleanup_deferred(struct qsd_instance *qsd)
560 {
561         int     qtype;
562
563         for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) {
564                 struct qsd_upd_rec      *upd, *tmp;
565                 struct qsd_qtype_info   *qqi = qsd->qsd_type_array[qtype];
566
567                 if (qqi == NULL)
568                         continue;
569
570                 write_lock(&qsd->qsd_lock);
571                 list_for_each_entry_safe(upd, tmp, &qqi->qqi_deferred_glb,
572                                          qur_link) {
573                         CWARN("%s: Free global deferred upd: ID:%llu, "
574                               "ver:%llu/%llu\n", qsd->qsd_svname,
575                               upd->qur_qid.qid_uid, upd->qur_ver,
576                               qqi->qqi_glb_ver);
577                         list_del_init(&upd->qur_link);
578                         qsd_upd_free(upd);
579                 }
580                 list_for_each_entry_safe(upd, tmp, &qqi->qqi_deferred_slv,
581                                          qur_link) {
582                         CWARN("%s: Free slave deferred upd: ID:%llu, "
583                               "ver:%llu/%llu\n", qsd->qsd_svname,
584                               upd->qur_qid.qid_uid, upd->qur_ver,
585                               qqi->qqi_slv_ver);
586                         list_del_init(&upd->qur_link);
587                         qsd_upd_free(upd);
588                 }
589                 write_unlock(&qsd->qsd_lock);
590         }
591 }
592
593 static void qsd_cleanup_adjust(struct qsd_instance *qsd)
594 {
595         struct lquota_entry     *lqe;
596
597         spin_lock(&qsd->qsd_adjust_lock);
598         while (!list_empty(&qsd->qsd_adjust_list)) {
599                 lqe = list_entry(qsd->qsd_adjust_list.next,
600                                  struct lquota_entry, lqe_link);
601                 list_del_init(&lqe->lqe_link);
602                 lqe_putref(lqe);
603         }
604         spin_unlock(&qsd->qsd_adjust_lock);
605 }
606
607 void qsd_stop_upd_thread(struct qsd_instance *qsd)
608 {
609         struct task_struct *task;
610
611         write_lock(&qsd->qsd_lock);
612         task = qsd->qsd_upd_task;
613         qsd->qsd_upd_task = NULL;
614         write_unlock(&qsd->qsd_lock);
615         if (task)
616                 kthread_stop(task);
617
618         qsd_cleanup_deferred(qsd);
619         qsd_cleanup_adjust(qsd);
620 }