Whamcloud - gitweb
LU-2237 tests: new test for re-recreating last_rcvd
[fs/lustre-release.git] / lustre / quota / qsd_writeback.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; if not, write to the
18  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19  * Boston, MA 021110-1307, USA
20  *
21  * GPL HEADER END
22  */
23 /*
24  * Copyright (c) 2011, 2012, Intel, Inc.
25  * Use is subject to license terms.
26  *
27  * Author: Johann Lombardi <johann.lombardi@intel.com>
28  * Author: Niu    Yawei    <yawei.niu@intel.com>
29  */
30
31 #ifndef EXPORT_SYMTAB
32 # define EXPORT_SYMTAB
33 #endif
34
35 #define DEBUG_SUBSYSTEM S_LQUOTA
36
37 #include "qsd_internal.h"
38
39 extern cfs_mem_cache_t *upd_kmem;
40
41 /*
42  * Allocate and fill an qsd_upd_rec structure to be processed by the writeback
43  * thread.
44  *
45  * \param qqi - is the qsd_qtype_info structure relevant to the update
46  * \param lqe - is the lquota entry subject to the update
47  * \param qid - is the identifier subject to the update
48  * \param rec - is the record storing the new quota settings
49  * \param ver - is the version associated with the update
50  * \param global - is a boolean set to true if this is an update of the global
51  *                 index and false for a slave index.
52  */
53 static struct qsd_upd_rec *qsd_upd_alloc(struct qsd_qtype_info *qqi,
54                                          struct lquota_entry *lqe,
55                                          union lquota_id *qid,
56                                          union lquota_rec *rec, __u64 ver,
57                                          bool global)
58 {
59         struct qsd_upd_rec      *upd;
60
61         OBD_SLAB_ALLOC_PTR_GFP(upd, upd_kmem, CFS_ALLOC_IO);
62         if (upd == NULL) {
63                 CERROR("Failed to allocate upd");
64                 return NULL;
65         }
66
67         /* fill it */
68         CFS_INIT_LIST_HEAD(&upd->qur_link);
69         upd->qur_qqi = qqi;
70         upd->qur_lqe = lqe;
71         if (lqe)
72                 lqe_getref(lqe);
73         upd->qur_qid    = *qid;
74         upd->qur_rec    = *rec;
75         upd->qur_ver    = ver;
76         upd->qur_global = global;
77
78         return upd;
79 }
80
81 static void qsd_upd_free(struct qsd_upd_rec *upd)
82 {
83         if (upd->qur_lqe)
84                 lqe_putref(upd->qur_lqe);
85         OBD_SLAB_FREE_PTR(upd, upd_kmem);
86 }
87
88 /* must hold the qsd_lock */
89 static void qsd_upd_add(struct qsd_instance *qsd, struct qsd_upd_rec *upd)
90 {
91         if (!qsd->qsd_stopping) {
92                 list_add_tail(&upd->qur_link, &qsd->qsd_upd_list);
93                 /* wake up the upd thread */
94                 cfs_waitq_signal(&qsd->qsd_upd_thread.t_ctl_waitq);
95         } else {
96                 CWARN("%s: discard deferred update.\n", qsd->qsd_svname);
97                 if (upd->qur_lqe)
98                         LQUOTA_WARN(upd->qur_lqe, "discard deferred update.");
99                 qsd_upd_free(upd);
100         }
101 }
102
103 /* must hold the qsd_lock */
104 static void qsd_add_deferred(cfs_list_t *list, struct qsd_upd_rec *upd)
105 {
106         struct qsd_upd_rec      *tmp, *n;
107
108         /* Sort the updates in ascending order */
109         cfs_list_for_each_entry_safe_reverse(tmp, n, list, qur_link) {
110
111                 /* There could be some legacy records which have duplicated
112                  * version. Imagine following scenario: slave received global
113                  * glimpse and queued a record in the deferred list, then
114                  * master crash and rollback to an ealier version, then the
115                  * version of queued record will be conflicting with later
116                  * updates. We should just delete the legacy record in such
117                  * case. */
118                 if (upd->qur_ver == tmp->qur_ver) {
119                         LASSERT(tmp->qur_lqe);
120                         LQUOTA_ERROR(tmp->qur_lqe, "Found a conflict record "
121                                      "with ver:"LPU64"", tmp->qur_ver);
122                         cfs_list_del_init(&tmp->qur_link);
123                         qsd_upd_free(tmp);
124                 }
125
126                 if (upd->qur_ver < tmp->qur_ver) {
127                         continue;
128                 } else {
129                         cfs_list_add_tail(&upd->qur_link, &tmp->qur_link);
130                         return;
131                 }
132         }
133         cfs_list_add(&upd->qur_link, list);
134 }
135
136 /* must hold the qsd_lock */
137 static void qsd_kickoff_deferred(struct qsd_qtype_info *qqi, cfs_list_t *list,
138                                  __u64 ver)
139 {
140         struct qsd_upd_rec      *upd, *tmp;
141         ENTRY;
142
143         /* Get the first update record in the list, which has the smallest
144          * version, discard all records with versions smaller than the current
145          * one */
146         cfs_list_for_each_entry_safe(upd, tmp, list, qur_link) {
147                 if (upd->qur_ver <= ver) {
148                         /* drop this update */
149                         cfs_list_del_init(&upd->qur_link);
150                         CDEBUG(D_QUOTA, "%s: skipping deferred update ver:"
151                                LPU64"/"LPU64", global:%d, qid:"LPU64"\n",
152                                qqi->qqi_qsd->qsd_svname, upd->qur_ver, ver,
153                                upd->qur_global, upd->qur_qid.qid_uid);
154                         qsd_upd_free(upd);
155                 } else {
156                         break;
157                 }
158         }
159
160         /* No remaining deferred update */
161         if (cfs_list_empty(list))
162                 RETURN_EXIT;
163
164         CDEBUG(D_QUOTA, "%s: found deferred update record. "
165                "version:"LPU64"/"LPU64", global:%d, qid:"LPU64"\n",
166                qqi->qqi_qsd->qsd_svname, upd->qur_ver, ver,
167                upd->qur_global, upd->qur_qid.qid_uid);
168
169         LASSERTF(upd->qur_ver > ver, "lur_ver:"LPU64", cur_ver:"LPU64"\n",
170                  upd->qur_ver, ver);
171
172         /* Kick off the deferred udpate */
173         if (upd->qur_ver == ver + 1) {
174                 list_del_init(&upd->qur_link);
175                 qsd_upd_add(qqi->qqi_qsd, upd);
176         }
177         EXIT;
178 }
179
180 /* Bump version of global or slave index copy
181  *
182  * \param qqi    - qsd_qtype_info
183  * \param ver    - version to be bumped to
184  * \param global - global or slave index copy?
185  */
186 void qsd_bump_version(struct qsd_qtype_info *qqi, __u64 ver, bool global)
187 {
188         cfs_list_t      *list;
189         __u64           *idx_ver;
190
191         idx_ver = global ? &qqi->qqi_glb_ver : &qqi->qqi_slv_ver;
192         list    = global ? &qqi->qqi_deferred_glb : &qqi->qqi_deferred_slv;
193
194         write_lock(&qqi->qqi_qsd->qsd_lock);
195         *idx_ver = ver;
196         if (global)
197                 qqi->qqi_glb_uptodate = 1;
198         else
199                 qqi->qqi_slv_uptodate = 1;
200         qsd_kickoff_deferred(qqi, list, ver);
201         write_unlock(&qqi->qqi_qsd->qsd_lock);
202 }
203
204 /*
205  * Schedule a commit of a lquota entry
206  *
207  * \param  qqi   - qsd_qtype_info
208  * \param  lqe   - lquota_entry
209  * \param  qid   - quota id
210  * \param  rec   - global or slave record to be updated to disk
211  * \param  ver   - new index file version
212  * \param  global- ture : master record; false : slave record
213  */
214 void qsd_upd_schedule(struct qsd_qtype_info *qqi, struct lquota_entry *lqe,
215                       union lquota_id *qid, union lquota_rec *rec, __u64 ver,
216                       bool global)
217 {
218         struct qsd_upd_rec      *upd;
219         struct qsd_instance     *qsd = qqi->qqi_qsd;
220         __u64                    cur_ver;
221         ENTRY;
222
223         CDEBUG(D_QUOTA, "%s: schedule update. global:%s, version:"LPU64"\n",
224                qsd->qsd_svname, global ? "true" : "false", ver);
225
226         upd = qsd_upd_alloc(qqi, lqe, qid, rec, ver, global);
227         if (upd == NULL)
228                 RETURN_EXIT;
229
230         /* If we don't want update index version, no need to sort the
231          * records in version order, just schedule the updates instantly. */
232         if (ver == 0) {
233                 write_lock(&qsd->qsd_lock);
234                 qsd_upd_add(qsd, upd);
235                 write_unlock(&qsd->qsd_lock);
236                 RETURN_EXIT;
237         }
238
239         write_lock(&qsd->qsd_lock);
240
241         cur_ver = global ? qqi->qqi_glb_ver : qqi->qqi_slv_ver;
242
243         if (ver <= cur_ver) {
244                 if (global)
245                         /* legitimate race between glimpse AST and
246                          * reintegration */
247                         CDEBUG(D_QUOTA, "%s: discarding glb update from glimpse"
248                                " ver:"LPU64" local ver:"LPU64"\n",
249                                qsd->qsd_svname, ver, cur_ver);
250                 else
251                         CERROR("%s: discard slv update, ver:"LPU64" local ver:"
252                                LPU64"\n", qsd->qsd_svname, ver, cur_ver);
253                 qsd_upd_free(upd);
254         } else if ((ver == cur_ver + 1) && qqi->qqi_glb_uptodate &&
255                    qqi->qqi_slv_uptodate) {
256                 /* In order update, and reintegration has been done. */
257                 qsd_upd_add(qsd, upd);
258         } else {
259                 /* Out of order update (the one with smaller version hasn't
260                  * reached slave or hasn't been flushed to disk yet), or
261                  * the reintegration is in progress. Defer the update. */
262                 cfs_list_t *list = global ? &qqi->qqi_deferred_glb :
263                                             &qqi->qqi_deferred_slv;
264                 qsd_add_deferred(list, upd);
265         }
266
267         write_unlock(&qsd->qsd_lock);
268
269         EXIT;
270 }
271
272 static int qsd_process_upd(const struct lu_env *env, struct qsd_upd_rec *upd)
273 {
274         struct lquota_entry     *lqe = upd->qur_lqe;
275         struct qsd_qtype_info   *qqi = upd->qur_qqi;
276         int                      rc;
277         ENTRY;
278
279         if (lqe == NULL) {
280                 lqe = lqe_locate(env, qqi->qqi_site, &upd->qur_qid);
281                 if (IS_ERR(lqe))
282                         GOTO(out, rc = PTR_ERR(lqe));
283         }
284
285         /* The in-memory lqe update for slave index copy isn't deferred,
286          * we shouldn't touch it here. */
287         if (upd->qur_global) {
288                 rc = qsd_update_lqe(env, lqe, upd->qur_global, &upd->qur_rec);
289                 if (rc)
290                         GOTO(out, rc);
291                 /* refresh usage */
292                 qsd_refresh_usage(env, lqe);
293                 /* Report usage asynchronously */
294                 rc = qsd_adjust(env, lqe);
295                 if (rc)
296                         LQUOTA_ERROR(lqe, "failed to report usage, rc:%d", rc);
297         }
298
299         rc = qsd_update_index(env, qqi, &upd->qur_qid, upd->qur_global,
300                               upd->qur_ver, &upd->qur_rec);
301 out:
302         if (lqe && !IS_ERR(lqe)) {
303                 lqe_putref(lqe);
304                 upd->qur_lqe = NULL;
305         }
306         RETURN(rc);
307 }
308
309 void qsd_adjust_schedule(struct lquota_entry *lqe, bool defer, bool cancel)
310 {
311         struct qsd_instance     *qsd = lqe2qqi(lqe)->qqi_qsd;
312         bool                     added = false;
313
314         lqe_getref(lqe);
315         spin_lock(&qsd->qsd_adjust_lock);
316
317         /* the lqe is being queued for the per-ID lock cancel, we should
318          * cancel the lock cancel and re-add it for quota adjust */
319         if (!cfs_list_empty(&lqe->lqe_link) &&
320             lqe->lqe_adjust_time == 0) {
321                 cfs_list_del_init(&lqe->lqe_link);
322                 lqe_putref(lqe);
323         }
324
325         if (cfs_list_empty(&lqe->lqe_link)) {
326                 if (cancel)
327                         lqe->lqe_adjust_time = 0;
328                 else
329                         lqe->lqe_adjust_time = defer ?
330                                 cfs_time_shift_64(QSD_WB_INTERVAL) :
331                                 cfs_time_current_64();
332                 /* lqe reference transfered to list */
333                 if (defer)
334                         cfs_list_add_tail(&lqe->lqe_link,
335                                           &qsd->qsd_adjust_list);
336                 else
337                         cfs_list_add(&lqe->lqe_link, &qsd->qsd_adjust_list);
338                 added = true;
339         }
340         spin_unlock(&qsd->qsd_adjust_lock);
341
342         if (added)
343                 cfs_waitq_signal(&qsd->qsd_upd_thread.t_ctl_waitq);
344         else
345                 lqe_putref(lqe);
346 }
347
348 /* return true if there is pending writeback records or the pending
349  * adjust requests */
350 static bool qsd_job_pending(struct qsd_instance *qsd, cfs_list_t *upd,
351                             bool *uptodate)
352 {
353         bool    job_pending = false;
354         int     qtype;
355
356         LASSERT(cfs_list_empty(upd));
357         *uptodate = true;
358
359         spin_lock(&qsd->qsd_adjust_lock);
360         if (!cfs_list_empty(&qsd->qsd_adjust_list)) {
361                 struct lquota_entry *lqe;
362                 lqe = cfs_list_entry(qsd->qsd_adjust_list.next,
363                                      struct lquota_entry, lqe_link);
364                 if (cfs_time_beforeq_64(lqe->lqe_adjust_time,
365                                         cfs_time_current_64()))
366                         job_pending = true;
367         }
368         spin_unlock(&qsd->qsd_adjust_lock);
369
370         write_lock(&qsd->qsd_lock);
371         if (!cfs_list_empty(&qsd->qsd_upd_list)) {
372                 cfs_list_splice_init(&qsd->qsd_upd_list, upd);
373                 job_pending = true;
374         }
375
376         for (qtype = USRQUOTA; qtype < MAXQUOTAS; qtype++) {
377                 struct qsd_qtype_info *qqi = qsd->qsd_type_array[qtype];
378
379                 if (!qsd_type_enabled(qsd, qtype))
380                         continue;
381
382                 if ((!qqi->qqi_glb_uptodate || !qqi->qqi_slv_uptodate) &&
383                      !qqi->qqi_reint)
384                         /* global or slave index not up to date and reint
385                          * thread not running */
386                         *uptodate = false;
387         }
388
389         write_unlock(&qsd->qsd_lock);
390         return job_pending;
391 }
392
393 static int qsd_upd_thread(void *arg)
394 {
395         struct qsd_instance     *qsd = (struct qsd_instance *)arg;
396         struct ptlrpc_thread    *thread = &qsd->qsd_upd_thread;
397         struct l_wait_info       lwi;
398         cfs_list_t               queue;
399         struct qsd_upd_rec      *upd, *n;
400         char                     pname[MTI_NAME_MAXLEN];
401         struct lu_env           *env;
402         int                      qtype, rc = 0;
403         bool                     uptodate;
404         struct lquota_entry     *lqe, *tmp;
405         __u64                    cur_time;
406         ENTRY;
407
408         OBD_ALLOC_PTR(env);
409         if (env == NULL)
410                 RETURN(-ENOMEM);
411
412         rc = lu_env_init(env, LCT_DT_THREAD);
413         if (rc) {
414                 CERROR("%s: Fail to init env.", qsd->qsd_svname);
415                 OBD_FREE_PTR(env);
416                 RETURN(rc);
417         }
418
419         snprintf(pname, MTI_NAME_MAXLEN, "lquota_wb_%s", qsd->qsd_svname);
420         cfs_daemonize(pname);
421
422         thread_set_flags(thread, SVC_RUNNING);
423         cfs_waitq_signal(&thread->t_ctl_waitq);
424
425         CFS_INIT_LIST_HEAD(&queue);
426         lwi = LWI_TIMEOUT(cfs_time_seconds(QSD_WB_INTERVAL), NULL, NULL);
427         while (1) {
428                 l_wait_event(thread->t_ctl_waitq,
429                              qsd_job_pending(qsd, &queue, &uptodate) ||
430                              !thread_is_running(thread), &lwi);
431
432                 cfs_list_for_each_entry_safe(upd, n, &queue, qur_link) {
433                         cfs_list_del_init(&upd->qur_link);
434                         qsd_process_upd(env, upd);
435                         qsd_upd_free(upd);
436                 }
437
438                 spin_lock(&qsd->qsd_adjust_lock);
439                 cur_time = cfs_time_current_64();
440                 cfs_list_for_each_entry_safe(lqe, tmp, &qsd->qsd_adjust_list,
441                                              lqe_link) {
442                         /* deferred items are sorted by time */
443                         if (!cfs_time_beforeq_64(lqe->lqe_adjust_time,
444                                                  cur_time))
445                                 break;
446
447                         cfs_list_del_init(&lqe->lqe_link);
448                         spin_unlock(&qsd->qsd_adjust_lock);
449
450                         if (thread_is_running(thread) && uptodate) {
451                                 qsd_refresh_usage(env, lqe);
452                                 if (lqe->lqe_adjust_time == 0)
453                                         qsd_id_lock_cancel(env, lqe);
454                                 else
455                                         qsd_adjust(env, lqe);
456                         }
457
458                         lqe_putref(lqe);
459                         spin_lock(&qsd->qsd_adjust_lock);
460                 }
461                 spin_unlock(&qsd->qsd_adjust_lock);
462
463                 if (!thread_is_running(thread))
464                         break;
465
466                 if (uptodate)
467                         continue;
468
469                 for (qtype = USRQUOTA; qtype < MAXQUOTAS; qtype++)
470                         qsd_start_reint_thread(qsd->qsd_type_array[qtype]);
471         }
472         lu_env_fini(env);
473         OBD_FREE_PTR(env);
474         thread_set_flags(thread, SVC_STOPPED);
475         cfs_waitq_signal(&thread->t_ctl_waitq);
476         RETURN(rc);
477 }
478
479 int qsd_start_upd_thread(struct qsd_instance *qsd)
480 {
481         struct ptlrpc_thread    *thread = &qsd->qsd_upd_thread;
482         struct l_wait_info       lwi = { 0 };
483         int                      rc;
484         ENTRY;
485
486         rc = cfs_create_thread(qsd_upd_thread, (void *)qsd, 0);
487         if (rc < 0) {
488                 CERROR("Fail to start quota update thread. rc: %d\n", rc);
489                 thread_set_flags(thread, SVC_STOPPED);
490                 RETURN(rc);
491         }
492
493         l_wait_event(thread->t_ctl_waitq,
494                      thread_is_running(thread) || thread_is_stopped(thread),
495                      &lwi);
496         RETURN(0);
497 }
498
499 static void qsd_cleanup_deferred(struct qsd_instance *qsd)
500 {
501         int     qtype;
502
503         for (qtype = USRQUOTA; qtype < MAXQUOTAS; qtype++) {
504                 struct qsd_upd_rec      *upd, *tmp;
505                 struct qsd_qtype_info   *qqi = qsd->qsd_type_array[qtype];
506
507                 if (qqi == NULL)
508                         continue;
509
510                 write_lock(&qsd->qsd_lock);
511                 cfs_list_for_each_entry_safe(upd, tmp, &qqi->qqi_deferred_glb,
512                                              qur_link) {
513                         CWARN("%s: Free global deferred upd: ID:"LPU64", "
514                               "ver:"LPU64"/"LPU64"\n", qsd->qsd_svname,
515                               upd->qur_qid.qid_uid, upd->qur_ver,
516                               qqi->qqi_glb_ver);
517                         list_del_init(&upd->qur_link);
518                         qsd_upd_free(upd);
519                 }
520                 cfs_list_for_each_entry_safe(upd, tmp, &qqi->qqi_deferred_slv,
521                                              qur_link) {
522                         CWARN("%s: Free slave deferred upd: ID:"LPU64", "
523                               "ver:"LPU64"/"LPU64"\n", qsd->qsd_svname,
524                               upd->qur_qid.qid_uid, upd->qur_ver,
525                               qqi->qqi_slv_ver);
526                         list_del_init(&upd->qur_link);
527                         qsd_upd_free(upd);
528                 }
529                 write_unlock(&qsd->qsd_lock);
530         }
531 }
532
533 static void qsd_cleanup_adjust(struct qsd_instance *qsd)
534 {
535         struct lquota_entry     *lqe;
536
537         spin_lock(&qsd->qsd_adjust_lock);
538         while (!cfs_list_empty(&qsd->qsd_adjust_list)) {
539                 lqe = cfs_list_entry(qsd->qsd_adjust_list.next,
540                                      struct lquota_entry, lqe_link);
541                 cfs_list_del_init(&lqe->lqe_link);
542                 lqe_putref(lqe);
543         }
544         spin_unlock(&qsd->qsd_adjust_lock);
545 }
546
547 void qsd_stop_upd_thread(struct qsd_instance *qsd)
548 {
549         struct ptlrpc_thread    *thread = &qsd->qsd_upd_thread;
550         struct l_wait_info       lwi    = { 0 };
551
552         if (!thread_is_stopped(thread)) {
553                 thread_set_flags(thread, SVC_STOPPING);
554                 cfs_waitq_signal(&thread->t_ctl_waitq);
555
556                 l_wait_event(thread->t_ctl_waitq, thread_is_stopped(thread),
557                              &lwi);
558         }
559         qsd_cleanup_deferred(qsd);
560         qsd_cleanup_adjust(qsd);
561 }