4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA
24 * Copyright (c) 2012, 2017, Intel Corporation.
25 * Use is subject to license terms.
27 * Author: Johann Lombardi <johann.lombardi@intel.com>
28 * Author: Niu Yawei <yawei.niu@intel.com>
31 #define DEBUG_SUBSYSTEM S_LQUOTA
33 #include <linux/kthread.h>
34 #include <lustre_swab.h>
35 #include "qsd_internal.h"
38 * Completion function invoked when the global quota lock enqueue has completed
40 static void qsd_reint_completion(const struct lu_env *env,
41 struct qsd_qtype_info *qqi,
42 struct quota_body *req_qbody,
43 struct quota_body *rep_qbody,
44 struct lustre_handle *lockh,
45 struct lquota_lvb *lvb,
48 struct qsd_instance *qsd = qqi->qqi_qsd;
49 __u64 *slv_ver = (__u64 *)arg;
53 CDEBUG_LIMIT(rc != -EAGAIN ? D_ERROR : D_QUOTA,
54 "%s: failed to enqueue global quota lock, glb fid:"
55 DFID", rc:%d\n", qsd->qsd_svname,
56 PFID(&req_qbody->qb_fid), rc);
60 CDEBUG(D_QUOTA, "%s: global quota lock successfully acquired, glb "
61 "fid:"DFID", glb ver:%llu, slv fid:"DFID", slv ver:%llu\n",
62 qsd->qsd_svname, PFID(&req_qbody->qb_fid),
63 lvb->lvb_glb_ver, PFID(&rep_qbody->qb_slv_fid),
64 rep_qbody->qb_slv_ver);
66 *slv_ver = rep_qbody->qb_slv_ver;
67 memcpy(&qqi->qqi_slv_fid, &rep_qbody->qb_slv_fid,
68 sizeof(struct lu_fid));
69 lustre_handle_copy(&qqi->qqi_lockh, lockh);
73 static int qsd_reint_qid(const struct lu_env *env, struct qsd_qtype_info *qqi,
74 bool global, union lquota_id *qid, void *rec)
76 struct lquota_entry *lqe;
80 lqe = lqe_locate(env, qqi->qqi_site, qid);
84 LQUOTA_DEBUG(lqe, "reintegrating entry");
86 rc = qsd_update_lqe(env, lqe, global, rec);
90 rc = qsd_update_index(env, qqi, qid, global, 0, rec);
93 if (global && qid->qid_uid == 0) {
94 struct lquota_glb_rec *glb_rec = (struct lquota_glb_rec *)rec;
95 qsd_update_default_quota(qqi, glb_rec->qbr_hardlimit,
96 glb_rec->qbr_softlimit,
104 static int qsd_reint_entries(const struct lu_env *env,
105 struct qsd_qtype_info *qqi,
106 struct idx_info *ii, bool global,
108 unsigned int npages, bool need_swab)
110 struct qsd_thread_info *qti = qsd_info(env);
111 struct qsd_instance *qsd = qqi->qqi_qsd;
112 union lquota_id *qid = &qti->qti_id;
117 CDEBUG(D_QUOTA, "%s: processing %d pages for %s index\n",
118 qsd->qsd_svname, npages, global ? "global" : "slave");
120 /* sanity check on the record size */
121 if ((global && ii->ii_recsize != sizeof(struct lquota_glb_rec)) ||
122 (!global && ii->ii_recsize != sizeof(struct lquota_slv_rec))) {
123 CERROR("%s: invalid record size (%d) for %s index\n",
124 qsd->qsd_svname, ii->ii_recsize,
125 global ? "global" : "slave");
129 size = ii->ii_recsize + ii->ii_keysize;
131 for (i = 0; i < npages; i++) {
132 union lu_page *lip = kmap(pages[i]);
134 for (j = 0; j < LU_PAGE_COUNT; j++) {
137 lustre_swab_lip_header(&lip->lp_idx);
139 if (lip->lp_idx.lip_magic != LIP_MAGIC) {
140 CERROR("%s: invalid magic (%x != %x) for page "
141 "%d/%d while transferring %s index\n",
142 qsd->qsd_svname, lip->lp_idx.lip_magic,
143 LIP_MAGIC, i + 1, npages,
144 global ? "global" : "slave");
145 GOTO(out, rc = -EINVAL);
148 CDEBUG(D_QUOTA, "%s: processing page %d/%d with %d "
149 "entries for %s index\n", qsd->qsd_svname, i + 1,
150 npages, lip->lp_idx.lip_nr,
151 global ? "global" : "slave");
153 for (k = 0; k < lip->lp_idx.lip_nr; k++) {
156 entry = lip->lp_idx.lip_entries + k * size;
157 memcpy(qid, entry, ii->ii_keysize); /* key */
158 entry += ii->ii_keysize; /* value */
164 __swab64s(&qid->qid_uid);
165 /* quota records only include 64-bit
167 while (offset < ii->ii_recsize) {
170 offset += sizeof(__u64);
174 rc = qsd_reint_qid(env, qqi, global, qid,
189 static int qsd_reint_index(const struct lu_env *env, struct qsd_qtype_info *qqi,
192 struct qsd_thread_info *qti = qsd_info(env);
193 struct qsd_instance *qsd = qqi->qqi_qsd;
194 struct idx_info *ii = &qti->qti_ii;
196 struct page **pages = NULL;
197 unsigned int npages, pg_cnt;
198 __u64 start_hash = 0, ver = 0;
199 bool need_swab = false;
203 fid = global ? &qqi->qqi_fid : &qqi->qqi_slv_fid;
205 /* let's do a 1MB bulk */
206 npages = min_t(unsigned int, OFD_MAX_BRW_SIZE, 1 << 20);
209 /* allocate pages for bulk index read */
210 OBD_ALLOC_PTR_ARRAY(pages, npages);
212 GOTO(out, rc = -ENOMEM);
213 for (i = 0; i < npages; i++) {
214 pages[i] = alloc_page(GFP_NOFS);
215 if (pages[i] == NULL)
216 GOTO(out, rc = -ENOMEM);
220 /* initialize index_info request with FID of global index */
221 memset(ii, 0, sizeof(*ii));
222 memcpy(&ii->ii_fid, fid, sizeof(*fid));
223 ii->ii_magic = IDX_INFO_MAGIC;
224 ii->ii_flags = II_FL_NOHASH;
225 ii->ii_count = npages * LU_PAGE_COUNT;
226 ii->ii_hash_start = start_hash;
228 /* send bulk request to quota master to read global index */
229 rc = qsd_fetch_index(env, qsd->qsd_exp, ii, npages, pages, &need_swab);
231 CWARN("%s: failed to fetch index for "DFID". %d\n",
232 qsd->qsd_svname, PFID(fid), rc);
236 /* various sanity checks */
237 if (ii->ii_magic != IDX_INFO_MAGIC) {
238 CERROR("%s: invalid magic in index transfer %x != %x\n",
239 qsd->qsd_svname, ii->ii_magic, IDX_INFO_MAGIC);
240 GOTO(out, rc = -EPROTO);
242 if ((ii->ii_flags & II_FL_VARKEY) != 0)
243 CWARN("%s: II_FL_VARKEY is set on index transfer for fid "DFID
244 ", it shouldn't be\n", qsd->qsd_svname, PFID(fid));
245 if ((ii->ii_flags & II_FL_NONUNQ) != 0)
246 CWARN("%s: II_FL_NONUNQ is set on index transfer for fid "DFID
247 ", it shouldn't be\n", qsd->qsd_svname, PFID(fid));
248 if (ii->ii_keysize != sizeof(__u64)) {
249 CERROR("%s: invalid key size reported on index transfer for "
250 "fid "DFID", %u != %u\n", qsd->qsd_svname, PFID(fid),
251 ii->ii_keysize, (int)sizeof(__u64));
252 GOTO(out, rc = -EPROTO);
254 if (ii->ii_version == 0 && ii->ii_count != 0)
255 CWARN("%s: index version for fid "DFID" is 0, but index isn't "
256 "empty (%d)\n", qsd->qsd_svname, PFID(fid), ii->ii_count);
258 CDEBUG(D_QUOTA, "%s: reintegration process for fid "DFID" successfully "
259 "fetched %s index, count = %d\n", qsd->qsd_svname,
260 PFID(fid), global ? "global" : "slave", ii->ii_count);
263 /* record version associated with the first bulk transfer */
264 ver = ii->ii_version;
266 pg_cnt = (ii->ii_count + (LU_PAGE_COUNT) - 1);
267 pg_cnt >>= PAGE_SHIFT - LU_PAGE_SHIFT;
269 if (pg_cnt > npages) {
270 CERROR("%s: master returned more pages than expected, %u > %u"
271 "\n", qsd->qsd_svname, pg_cnt, npages);
275 rc = qsd_reint_entries(env, qqi, ii, global, pages, pg_cnt, need_swab);
279 if (ii->ii_hash_end != II_END_OFF) {
280 start_hash = ii->ii_hash_end;
285 for (i = 0; i < npages; i++)
286 if (pages[i] != NULL)
287 __free_page(pages[i]);
288 OBD_FREE_PTR_ARRAY(pages, npages);
291 /* Update index version */
293 rc = qsd_write_version(env, qqi, ver, global);
295 CERROR("%s: write version %llu to "DFID" failed : rc = %d\n",
296 qsd->qsd_svname, ver, PFID(fid), rc);
302 static int qsd_reconciliation(const struct lu_env *env,
303 struct qsd_qtype_info *qqi)
305 struct qsd_thread_info *qti = qsd_info(env);
306 struct qsd_instance *qsd = qqi->qqi_qsd;
307 const struct dt_it_ops *iops;
310 struct lquota_entry *lqe;
311 union lquota_id *qid = &qti->qti_id;
315 LASSERT(qqi->qqi_glb_obj != NULL);
316 iops = &qqi->qqi_glb_obj->do_index_ops->dio_it;
318 it = iops->init(env, qqi->qqi_glb_obj, 0);
320 CWARN("%s: Initialize it for "DFID" failed. %ld\n",
321 qsd->qsd_svname, PFID(&qqi->qqi_fid), PTR_ERR(it));
325 rc = iops->load(env, it, 0);
327 CWARN("%s: Load first entry for "DFID" failed. %d\n",
328 qsd->qsd_svname, PFID(&qqi->qqi_fid), rc);
330 } else if (rc == 0) {
331 rc = iops->next(env, it);
333 GOTO(out, rc = (rc < 0) ? rc : 0);
337 key = iops->key(env, it);
339 CWARN("%s: Error key for "DFID". %ld\n",
340 qsd->qsd_svname, PFID(&qqi->qqi_fid),
342 GOTO(out, rc = PTR_ERR(key));
345 /* skip the root user/group */
346 if (*((__u64 *)key) == 0)
349 qid->qid_uid = *((__u64 *)key);
351 lqe = lqe_locate(env, qqi->qqi_site, qid);
353 CWARN("%s: failed to locate lqe. "DFID", %ld\n",
354 qsd->qsd_svname, PFID(&qqi->qqi_fid),
356 GOTO(out, rc = PTR_ERR(lqe));
359 rc = qsd_refresh_usage(env, lqe);
361 CWARN("%s: failed to get usage. "DFID", %d\n",
362 qsd->qsd_svname, PFID(&qqi->qqi_fid), rc);
367 rc = qsd_adjust(env, lqe);
370 CWARN("%s: failed to report quota. "DFID", %d\n",
371 qsd->qsd_svname, PFID(&qqi->qqi_fid), rc);
375 rc = iops->next(env, it);
377 CWARN("%s: failed to parse index, ->next error:%d "DFID
378 "\n", qsd->qsd_svname, rc, PFID(&qqi->qqi_fid));
390 static int qsd_connected(struct qsd_instance *qsd)
394 read_lock(&qsd->qsd_lock);
395 connected = qsd->qsd_exp_valid ? 1 : 0;
396 read_unlock(&qsd->qsd_lock);
401 static int qsd_started(struct qsd_instance *qsd)
405 read_lock(&qsd->qsd_lock);
406 started = qsd->qsd_started ? 1 : 0;
407 read_unlock(&qsd->qsd_lock);
412 struct qsd_reint_args {
413 struct qsd_qtype_info *qra_qqi;
414 struct lu_env qra_env;
415 struct completion *qra_started;
419 #define TASK_IDLE TASK_INTERRUPTIBLE
423 * Routine executed by the reintegration thread.
425 static int qsd_reint_main(void *_args)
427 struct qsd_reint_args *args = _args;
428 struct lu_env *env = &args->qra_env;
429 struct qsd_thread_info *qti;
430 struct qsd_qtype_info *qqi = args->qra_qqi;
431 struct qsd_instance *qsd = qqi->qqi_qsd;
435 CDEBUG(D_QUOTA, "%s: Starting reintegration thread for "DFID"\n",
436 qsd->qsd_svname, PFID(&qqi->qqi_fid));
439 lu_ref_add(&qqi->qqi_reference, "reint_thread", current);
442 complete(args->qra_started);
444 /* wait for the connection to master established */
445 while (({set_current_state(TASK_IDLE);
446 !qsd_connected(qsd) && !kthread_should_stop(); }))
448 __set_current_state(TASK_RUNNING);
450 /* Step 1: enqueue global index lock */
451 if (kthread_should_stop())
452 GOTO(out_env_init, rc = 0);
454 LASSERT(qsd->qsd_exp != NULL);
455 LASSERT(qqi->qqi_glb_uptodate == 0 || qqi->qqi_slv_uptodate == 0);
457 memset(&qti->qti_lvb, 0, sizeof(qti->qti_lvb));
459 read_lock(&qsd->qsd_lock);
460 /* check whether we already own a global quota lock for this type */
461 if (lustre_handle_is_used(&qqi->qqi_lockh) &&
462 ldlm_lock_addref_try(&qqi->qqi_lockh, qsd_glb_einfo.ei_mode) == 0) {
463 read_unlock(&qsd->qsd_lock);
464 /* force refresh of global & slave index copy */
465 qti->qti_lvb.lvb_glb_ver = ~0ULL;
466 qti->qti_slv_ver = ~0ULL;
468 /* no valid lock found, let's enqueue a new one */
469 read_unlock(&qsd->qsd_lock);
471 memset(&qti->qti_body, 0, sizeof(qti->qti_body));
472 memcpy(&qti->qti_body.qb_fid, &qqi->qqi_fid,
473 sizeof(qqi->qqi_fid));
475 rc = qsd_intent_lock(env, qsd->qsd_exp, &qti->qti_body, true,
476 IT_QUOTA_CONN, qsd_reint_completion, qqi,
477 &qti->qti_lvb, (void *)&qti->qti_slv_ver);
479 GOTO(out_env_init, rc);
481 CDEBUG(D_QUOTA, "%s: glb_ver:%llu/%llu,slv_ver:%llu/"
482 "%llu\n", qsd->qsd_svname,
483 qti->qti_lvb.lvb_glb_ver, qqi->qqi_glb_ver,
484 qti->qti_slv_ver, qqi->qqi_slv_ver);
487 /* Step 2: reintegrate global index */
488 if (kthread_should_stop())
489 GOTO(out_lock, rc = 0);
491 CFS_FAIL_TIMEOUT(OBD_FAIL_QUOTA_DELAY_REINT, 10);
493 if (qqi->qqi_glb_ver != qti->qti_lvb.lvb_glb_ver) {
494 rc = qsd_reint_index(env, qqi, true);
496 CWARN("%s: reint global for "DFID" failed. %d\n",
497 qsd->qsd_svname, PFID(&qqi->qqi_fid), rc);
501 qsd_bump_version(qqi, qqi->qqi_glb_ver, true);
504 /* Step 3: reintegrate slave index */
505 if (kthread_should_stop())
506 GOTO(out_lock, rc = 0);
508 if (qqi->qqi_slv_ver != qti->qti_slv_ver) {
509 rc = qsd_reint_index(env, qqi, false);
511 CWARN("%s: reintegration for "DFID" failed with %d\n",
512 qsd->qsd_svname, PFID(&qqi->qqi_slv_fid), rc);
516 qsd_bump_version(qqi, qqi->qqi_slv_ver, false);
519 /* wait for the qsd instance started (target recovery done) */
520 while (({set_current_state(TASK_IDLE);
521 !qsd_started(qsd) && !kthread_should_stop(); }))
523 __set_current_state(TASK_RUNNING);
525 if (kthread_should_stop())
526 GOTO(out_lock, rc = 0);
528 /* Step 4: start reconciliation for each enforced ID */
529 rc = qsd_reconciliation(env, qqi);
531 CWARN("%s: reconciliation for "DFID" failed with %d\n",
532 qsd->qsd_svname, PFID(&qqi->qqi_slv_fid), rc);
536 ldlm_lock_decref(&qqi->qqi_lockh, qsd_glb_einfo.ei_mode);
540 write_lock(&qsd->qsd_lock);
542 write_unlock(&qsd->qsd_lock);
544 if (xchg(&qqi->qqi_reint_task, NULL) == NULL)
545 wait_var_event(qqi, kthread_should_stop());
547 lu_ref_del(&qqi->qqi_reference, "reint_thread", current);
553 void qsd_stop_reint_thread(struct qsd_qtype_info *qqi)
555 struct task_struct *task;
557 task = xchg(&qqi->qqi_reint_task, NULL);
562 static int qsd_entry_iter_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd,
563 struct hlist_node *hnode, void *data)
565 struct lquota_entry *lqe;
566 int *pending = (int *)data;
568 lqe = hlist_entry(hnode, struct lquota_entry, lqe_hash);
569 LASSERT(atomic_read(&lqe->lqe_ref) > 0);
572 *pending += lqe->lqe_pending_req;
573 lqe_read_unlock(lqe);
578 static bool qqi_reint_delayed(struct qsd_qtype_info *qqi)
580 struct qsd_instance *qsd = qqi->qqi_qsd;
581 struct qsd_upd_rec *upd;
582 struct lquota_entry *lqe, *n;
587 /* any pending quota adjust? */
588 spin_lock(&qsd->qsd_adjust_lock);
589 list_for_each_entry_safe(lqe, n, &qsd->qsd_adjust_list, lqe_link) {
590 if (lqe2qqi(lqe) == qqi) {
591 list_del_init(&lqe->lqe_link);
595 spin_unlock(&qsd->qsd_adjust_lock);
597 /* any pending quota request? */
598 cfs_hash_for_each_safe(qqi->qqi_site->lqs_hash, qsd_entry_iter_cb,
601 CDEBUG(D_QUOTA, "%s: pending dqacq for type:%d.\n",
602 qsd->qsd_svname, qqi->qqi_qtype);
603 GOTO(out, delay = true);
606 /* any pending updates? */
607 write_lock(&qsd->qsd_lock);
609 /* check if the reintegration has already started or finished */
610 if ((qqi->qqi_glb_uptodate && qqi->qqi_slv_uptodate) ||
611 qqi->qqi_reint || qsd->qsd_stopping || qsd->qsd_updating)
612 GOTO(out_lock, delay = true);
614 /* there could be some unfinished global or index entry updates
615 * (very unlikely), to avoid them messing up with the reint
616 * procedure, we just return and try to re-start reint later. */
617 list_for_each_entry(upd, &qsd->qsd_upd_list, qur_link) {
618 if (upd->qur_qqi == qqi) {
619 CDEBUG(D_QUOTA, "%s: pending %s updates for type:%d.\n",
621 upd->qur_global ? "global" : "slave",
623 GOTO(out_lock, delay = true);
630 write_unlock(&qsd->qsd_lock);
633 CERROR("%s: Delaying reintegration for qtype:%d until pending "
634 "updates are flushed.\n",
635 qsd->qsd_svname, qqi->qqi_qtype);
639 int qsd_start_reint_thread(struct qsd_qtype_info *qqi)
641 struct qsd_instance *qsd = qqi->qqi_qsd;
642 struct task_struct *task;
643 struct qsd_reint_args *args;
644 DECLARE_COMPLETION_ONSTACK(started);
648 /* do not try to start a new thread as this can lead to a deadlock */
649 if (current->flags & (PF_MEMALLOC | PF_KSWAPD))
652 if (qsd->qsd_dev->dd_rdonly)
655 /* don't bother to do reintegration when quota isn't enabled */
656 if (!qsd_type_enabled(qsd, qqi->qqi_qtype))
659 if (qqi->qqi_acct_failed)
660 /* no space accounting support, can't enable enforcement */
663 if (qqi_reint_delayed(qqi))
668 GOTO(out, rc = -ENOMEM);
670 args->qra_started = &started;
672 /* initialize environment */
673 rc = lu_env_init(&args->qra_env, LCT_DT_THREAD);
676 task = kthread_create(qsd_reint_main, args, "qsd_reint_%d.%s",
677 qqi->qqi_qtype, qsd->qsd_svname);
681 lu_env_fini(&args->qra_env);
685 write_lock(&qsd->qsd_lock);
687 write_unlock(&qsd->qsd_lock);
691 qqi->qqi_reint_task = task;
692 wake_up_process(task);
693 wait_for_completion(&started);