4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA
24 * Copyright (c) 2012, 2013, Intel Corporation.
25 * Use is subject to license terms.
27 * Author: Johann Lombardi <johann.lombardi@intel.com>
28 * Author: Niu Yawei <yawei.niu@intel.com>
32 # define EXPORT_SYMTAB
35 #define DEBUG_SUBSYSTEM S_LQUOTA
37 #include "qsd_internal.h"
40 * Completion function invoked when the global quota lock enqueue has completed
42 static void qsd_reint_completion(const struct lu_env *env,
43 struct qsd_qtype_info *qqi,
44 struct quota_body *req_qbody,
45 struct quota_body *rep_qbody,
46 struct lustre_handle *lockh,
47 struct lquota_lvb *lvb,
50 struct qsd_instance *qsd = qqi->qqi_qsd;
51 __u64 *slv_ver = (__u64 *)arg;
55 CDEBUG_LIMIT(rc != -EAGAIN ? D_ERROR : D_QUOTA,
56 "%s: failed to enqueue global quota lock, glb fid:"
57 DFID", rc:%d\n", qsd->qsd_svname,
58 PFID(&req_qbody->qb_fid), rc);
62 CDEBUG(D_QUOTA, "%s: global quota lock successfully acquired, glb "
63 "fid:"DFID", glb ver:"LPU64", slv fid:"DFID", slv ver:"LPU64"\n",
64 qsd->qsd_svname, PFID(&req_qbody->qb_fid),
65 lvb->lvb_glb_ver, PFID(&rep_qbody->qb_slv_fid),
66 rep_qbody->qb_slv_ver);
68 *slv_ver = rep_qbody->qb_slv_ver;
69 memcpy(&qqi->qqi_slv_fid, &rep_qbody->qb_slv_fid,
70 sizeof(struct lu_fid));
71 lustre_handle_copy(&qqi->qqi_lockh, lockh);
75 static int qsd_reint_qid(const struct lu_env *env, struct qsd_qtype_info *qqi,
76 bool global, union lquota_id *qid, void *rec)
78 struct lquota_entry *lqe;
82 lqe = lqe_locate(env, qqi->qqi_site, qid);
86 LQUOTA_DEBUG(lqe, "reintegrating entry");
88 rc = qsd_update_lqe(env, lqe, global, rec);
92 rc = qsd_update_index(env, qqi, qid, global, 0, rec);
98 static int qsd_reint_entries(const struct lu_env *env,
99 struct qsd_qtype_info *qqi,
100 struct idx_info *ii, bool global,
102 unsigned int npages, bool need_swab)
104 struct qsd_thread_info *qti = qsd_info(env);
105 struct qsd_instance *qsd = qqi->qqi_qsd;
106 union lquota_id *qid = &qti->qti_id;
111 CDEBUG(D_QUOTA, "%s: processing %d pages for %s index\n",
112 qsd->qsd_svname, npages, global ? "global" : "slave");
114 /* sanity check on the record size */
115 if ((global && ii->ii_recsize != sizeof(struct lquota_glb_rec)) ||
116 (!global && ii->ii_recsize != sizeof(struct lquota_slv_rec))) {
117 CERROR("%s: invalid record size (%d) for %s index\n",
118 qsd->qsd_svname, ii->ii_recsize,
119 global ? "global" : "slave");
123 size = ii->ii_recsize + ii->ii_keysize;
125 for (i = 0; i < npages; i++) {
126 union lu_page *lip = kmap(pages[i]);
128 for (j = 0; j < LU_PAGE_COUNT; j++) {
131 lustre_swab_lip_header(&lip->lp_idx);
133 if (lip->lp_idx.lip_magic != LIP_MAGIC) {
134 CERROR("%s: invalid magic (%x != %x) for page "
135 "%d/%d while transferring %s index\n",
136 qsd->qsd_svname, lip->lp_idx.lip_magic,
137 LIP_MAGIC, i + 1, npages,
138 global ? "global" : "slave");
139 GOTO(out, rc = -EINVAL);
142 CDEBUG(D_QUOTA, "%s: processing page %d/%d with %d "
143 "entries for %s index\n", qsd->qsd_svname, i + 1,
144 npages, lip->lp_idx.lip_nr,
145 global ? "global" : "slave");
147 for (k = 0; k < lip->lp_idx.lip_nr; k++) {
150 entry = lip->lp_idx.lip_entries + k * size;
151 memcpy(qid, entry, ii->ii_keysize); /* key */
152 entry += ii->ii_keysize; /* value */
158 __swab64s(&qid->qid_uid);
159 /* quota records only include 64-bit
161 while (offset < ii->ii_recsize) {
164 offset += sizeof(__u64);
168 rc = qsd_reint_qid(env, qqi, global, qid,
183 static int qsd_reint_index(const struct lu_env *env, struct qsd_qtype_info *qqi,
186 struct qsd_thread_info *qti = qsd_info(env);
187 struct qsd_instance *qsd = qqi->qqi_qsd;
188 struct idx_info *ii = &qti->qti_ii;
190 struct page **pages = NULL;
191 unsigned int npages, pg_cnt;
192 __u64 start_hash = 0, ver = 0;
193 bool need_swab = false;
197 fid = global ? &qqi->qqi_fid : &qqi->qqi_slv_fid;
199 /* let's do a 1MB bulk */
200 npages = min_t(unsigned int, OFD_MAX_BRW_SIZE, 1 << 20);
201 npages /= PAGE_CACHE_SIZE;
203 /* allocate pages for bulk index read */
204 OBD_ALLOC(pages, npages * sizeof(*pages));
206 GOTO(out, rc = -ENOMEM);
207 for (i = 0; i < npages; i++) {
208 pages[i] = alloc_page(GFP_IOFS);
209 if (pages[i] == NULL)
210 GOTO(out, rc = -ENOMEM);
214 /* initialize index_info request with FID of global index */
215 memset(ii, 0, sizeof(*ii));
216 memcpy(&ii->ii_fid, fid, sizeof(*fid));
217 ii->ii_magic = IDX_INFO_MAGIC;
218 ii->ii_flags = II_FL_NOHASH;
219 ii->ii_count = npages * LU_PAGE_COUNT;
220 ii->ii_hash_start = start_hash;
222 /* send bulk request to quota master to read global index */
223 rc = qsd_fetch_index(env, qsd->qsd_exp, ii, npages, pages, &need_swab);
225 CWARN("%s: failed to fetch index for "DFID". %d\n",
226 qsd->qsd_svname, PFID(fid), rc);
230 /* various sanity checks */
231 if (ii->ii_magic != IDX_INFO_MAGIC) {
232 CERROR("%s: invalid magic in index transfer %x != %x\n",
233 qsd->qsd_svname, ii->ii_magic, IDX_INFO_MAGIC);
234 GOTO(out, rc = -EPROTO);
236 if ((ii->ii_flags & II_FL_VARKEY) != 0)
237 CWARN("%s: II_FL_VARKEY is set on index transfer for fid "DFID
238 ", it shouldn't be\n", qsd->qsd_svname, PFID(fid));
239 if ((ii->ii_flags & II_FL_NONUNQ) != 0)
240 CWARN("%s: II_FL_NONUNQ is set on index transfer for fid "DFID
241 ", it shouldn't be\n", qsd->qsd_svname, PFID(fid));
242 if (ii->ii_keysize != sizeof(__u64)) {
243 CERROR("%s: invalid key size reported on index transfer for "
244 "fid "DFID", %u != %u\n", qsd->qsd_svname, PFID(fid),
245 ii->ii_keysize, (int)sizeof(__u64));
246 GOTO(out, rc = -EPROTO);
248 if (ii->ii_version == 0 && ii->ii_count != 0)
249 CWARN("%s: index version for fid "DFID" is 0, but index isn't "
250 "empty (%d)\n", qsd->qsd_svname, PFID(fid), ii->ii_count);
252 CDEBUG(D_QUOTA, "%s: reintegration process for fid "DFID" successfully "
253 "fetched %s index, count = %d\n", qsd->qsd_svname,
254 PFID(fid), global ? "global" : "slave", ii->ii_count);
257 /* record version associated with the first bulk transfer */
258 ver = ii->ii_version;
260 pg_cnt = (ii->ii_count + (LU_PAGE_COUNT) - 1);
261 pg_cnt >>= PAGE_CACHE_SHIFT - LU_PAGE_SHIFT;
263 if (pg_cnt > npages) {
264 CERROR("%s: master returned more pages than expected, %u > %u"
265 "\n", qsd->qsd_svname, pg_cnt, npages);
269 rc = qsd_reint_entries(env, qqi, ii, global, pages, pg_cnt, need_swab);
273 if (ii->ii_hash_end != II_END_OFF) {
274 start_hash = ii->ii_hash_end;
279 for (i = 0; i < npages; i++)
280 if (pages[i] != NULL)
281 __free_page(pages[i]);
282 OBD_FREE(pages, npages * sizeof(*pages));
285 /* Update index version */
287 rc = qsd_write_version(env, qqi, ver, global);
289 CERROR("%s: write version "LPU64" to "DFID" failed. "
290 "%d\n", qsd->qsd_svname, ver, PFID(fid), rc);
296 static int qsd_reconciliation(const struct lu_env *env,
297 struct qsd_qtype_info *qqi)
299 struct qsd_thread_info *qti = qsd_info(env);
300 struct qsd_instance *qsd = qqi->qqi_qsd;
301 const struct dt_it_ops *iops;
304 struct lquota_entry *lqe;
305 union lquota_id *qid = &qti->qti_id;
309 LASSERT(qqi->qqi_glb_obj != NULL);
310 iops = &qqi->qqi_glb_obj->do_index_ops->dio_it;
312 it = iops->init(env, qqi->qqi_glb_obj, 0, BYPASS_CAPA);
314 CWARN("%s: Initialize it for "DFID" failed. %ld\n",
315 qsd->qsd_svname, PFID(&qqi->qqi_fid), PTR_ERR(it));
319 rc = iops->load(env, it, 0);
321 CWARN("%s: Load first entry for "DFID" failed. %d\n",
322 qsd->qsd_svname, PFID(&qqi->qqi_fid), rc);
324 } else if (rc == 0) {
325 rc = iops->next(env, it);
327 GOTO(out, rc = (rc < 0) ? rc : 0);
331 key = iops->key(env, it);
333 CWARN("%s: Error key for "DFID". %ld\n",
334 qsd->qsd_svname, PFID(&qqi->qqi_fid),
336 GOTO(out, rc = PTR_ERR(key));
339 /* skip the root user/group */
340 if (*((__u64 *)key) == 0)
343 qid->qid_uid = *((__u64 *)key);
345 lqe = lqe_locate(env, qqi->qqi_site, qid);
347 CWARN("%s: failed to locate lqe. "DFID", %ld\n",
348 qsd->qsd_svname, PFID(&qqi->qqi_fid),
350 GOTO(out, rc = PTR_ERR(lqe));
353 rc = qsd_refresh_usage(env, lqe);
355 CWARN("%s: failed to get usage. "DFID", %d\n",
356 qsd->qsd_svname, PFID(&qqi->qqi_fid), rc);
361 rc = qsd_adjust(env, lqe);
364 CWARN("%s: failed to report quota. "DFID", %d\n",
365 qsd->qsd_svname, PFID(&qqi->qqi_fid), rc);
369 rc = iops->next(env, it);
371 CWARN("%s: failed to parse index, ->next error:%d "DFID
372 "\n", qsd->qsd_svname, rc, PFID(&qqi->qqi_fid));
384 static int qsd_connected(struct qsd_instance *qsd)
388 read_lock(&qsd->qsd_lock);
389 connected = qsd->qsd_exp_valid ? 1 : 0;
390 read_unlock(&qsd->qsd_lock);
395 static int qsd_started(struct qsd_instance *qsd)
399 read_lock(&qsd->qsd_lock);
400 started = qsd->qsd_started ? 1 : 0;
401 read_unlock(&qsd->qsd_lock);
407 * Routine executed by the reintegration thread.
409 static int qsd_reint_main(void *args)
412 struct qsd_thread_info *qti;
413 struct qsd_qtype_info *qqi = (struct qsd_qtype_info *)args;
414 struct qsd_instance *qsd = qqi->qqi_qsd;
415 struct ptlrpc_thread *thread = &qqi->qqi_reint_thread;
416 struct l_wait_info lwi = { 0 };
420 CDEBUG(D_QUOTA, "%s: Starting reintegration thread for "DFID"\n",
421 qsd->qsd_svname, PFID(&qqi->qqi_fid));
424 lu_ref_add(&qqi->qqi_reference, "reint_thread", thread);
426 thread_set_flags(thread, SVC_RUNNING);
427 cfs_waitq_signal(&thread->t_ctl_waitq);
431 GOTO(out, rc = -ENOMEM);
433 /* initialize environment */
434 rc = lu_env_init(env, LCT_DT_THREAD);
439 /* wait for the connection to master established */
440 l_wait_event(thread->t_ctl_waitq,
441 qsd_connected(qsd) || !thread_is_running(thread), &lwi);
443 /* Step 1: enqueue global index lock */
444 if (!thread_is_running(thread))
445 GOTO(out_env_init, rc = 0);
447 LASSERT(qsd->qsd_exp != NULL);
448 LASSERT(qqi->qqi_glb_uptodate == 0 || qqi->qqi_slv_uptodate == 0);
450 memset(&qti->qti_lvb, 0, sizeof(qti->qti_lvb));
452 read_lock(&qsd->qsd_lock);
453 /* check whether we already own a global quota lock for this type */
454 if (lustre_handle_is_used(&qqi->qqi_lockh) &&
455 ldlm_lock_addref_try(&qqi->qqi_lockh, qsd_glb_einfo.ei_mode) == 0) {
456 read_unlock(&qsd->qsd_lock);
457 /* force refresh of global & slave index copy */
458 qti->qti_lvb.lvb_glb_ver = ~0ULL;
459 qti->qti_slv_ver = ~0ULL;
461 /* no valid lock found, let's enqueue a new one */
462 read_unlock(&qsd->qsd_lock);
464 memset(&qti->qti_body, 0, sizeof(qti->qti_body));
465 memcpy(&qti->qti_body.qb_fid, &qqi->qqi_fid,
466 sizeof(qqi->qqi_fid));
468 rc = qsd_intent_lock(env, qsd->qsd_exp, &qti->qti_body, true,
469 IT_QUOTA_CONN, qsd_reint_completion, qqi,
470 &qti->qti_lvb, (void *)&qti->qti_slv_ver);
472 GOTO(out_env_init, rc);
474 CDEBUG(D_QUOTA, "%s: glb_ver:"LPU64"/"LPU64",slv_ver:"LPU64"/"
475 LPU64"\n", qsd->qsd_svname,
476 qti->qti_lvb.lvb_glb_ver, qqi->qqi_glb_ver,
477 qti->qti_slv_ver, qqi->qqi_slv_ver);
480 /* Step 2: reintegrate global index */
481 if (!thread_is_running(thread))
482 GOTO(out_lock, rc = 0);
484 OBD_FAIL_TIMEOUT(OBD_FAIL_QUOTA_DELAY_REINT, 10);
486 if (qqi->qqi_glb_ver != qti->qti_lvb.lvb_glb_ver) {
487 rc = qsd_reint_index(env, qqi, true);
489 CWARN("%s: reint global for "DFID" failed. %d\n",
490 qsd->qsd_svname, PFID(&qqi->qqi_fid), rc);
494 qsd_bump_version(qqi, qqi->qqi_glb_ver, true);
497 /* Step 3: reintegrate slave index */
498 if (!thread_is_running(thread))
499 GOTO(out_lock, rc = 0);
501 if (qqi->qqi_slv_ver != qti->qti_slv_ver) {
502 rc = qsd_reint_index(env, qqi, false);
504 CWARN("%s: Reint slave for "DFID" failed. %d\n",
505 qsd->qsd_svname, PFID(&qqi->qqi_slv_fid), rc);
509 qsd_bump_version(qqi, qqi->qqi_slv_ver, false);
512 /* wait for the qsd instance started (target recovery done) */
513 l_wait_event(thread->t_ctl_waitq,
514 qsd_started(qsd) || !thread_is_running(thread), &lwi);
516 if (!thread_is_running(thread))
517 GOTO(out_lock, rc = 0);
519 /* Step 4: start reconciliation for each enforced ID */
520 rc = qsd_reconciliation(env, qqi);
522 CWARN("%s: reconciliation failed. "DFID", %d\n",
523 qsd->qsd_svname, PFID(&qti->qti_fid), rc);
527 ldlm_lock_decref(&qqi->qqi_lockh, qsd_glb_einfo.ei_mode);
533 write_lock(&qsd->qsd_lock);
535 write_unlock(&qsd->qsd_lock);
538 lu_ref_del(&qqi->qqi_reference, "reint_thread", thread);
540 thread_set_flags(thread, SVC_STOPPED);
541 cfs_waitq_signal(&thread->t_ctl_waitq);
545 void qsd_stop_reint_thread(struct qsd_qtype_info *qqi)
547 struct ptlrpc_thread *thread = &qqi->qqi_reint_thread;
548 struct l_wait_info lwi = { 0 };
550 if (!thread_is_stopped(thread)) {
551 thread_set_flags(thread, SVC_STOPPING);
552 cfs_waitq_signal(&thread->t_ctl_waitq);
554 l_wait_event(thread->t_ctl_waitq,
555 thread_is_stopped(thread), &lwi);
559 static int qsd_entry_iter_cb(cfs_hash_t *hs, cfs_hash_bd_t *bd,
560 cfs_hlist_node_t *hnode, void *data)
562 struct lquota_entry *lqe;
563 int *pending = (int *)data;
565 lqe = cfs_hlist_entry(hnode, struct lquota_entry, lqe_hash);
566 LASSERT(atomic_read(&lqe->lqe_ref) > 0);
569 *pending += lqe->lqe_pending_req;
570 lqe_read_unlock(lqe);
575 static bool qsd_pending_updates(struct qsd_qtype_info *qqi)
577 struct qsd_instance *qsd = qqi->qqi_qsd;
578 struct qsd_upd_rec *upd;
579 struct lquota_entry *lqe, *n;
581 bool updates = false;
584 /* any pending quota adjust? */
585 spin_lock(&qsd->qsd_adjust_lock);
586 cfs_list_for_each_entry_safe(lqe, n, &qsd->qsd_adjust_list, lqe_link) {
587 if (lqe2qqi(lqe) == qqi) {
588 cfs_list_del_init(&lqe->lqe_link);
592 spin_unlock(&qsd->qsd_adjust_lock);
594 /* any pending updates? */
595 read_lock(&qsd->qsd_lock);
596 cfs_list_for_each_entry(upd, &qsd->qsd_upd_list, qur_link) {
597 if (upd->qur_qqi == qqi) {
598 read_unlock(&qsd->qsd_lock);
599 CDEBUG(D_QUOTA, "%s: pending %s updates for type:%d.\n",
601 upd->qur_global ? "global" : "slave",
603 GOTO(out, updates = true);
606 read_unlock(&qsd->qsd_lock);
608 /* any pending quota request? */
609 cfs_hash_for_each_safe(qqi->qqi_site->lqs_hash, qsd_entry_iter_cb,
612 CDEBUG(D_QUOTA, "%s: pending dqacq for type:%d.\n",
613 qsd->qsd_svname, qqi->qqi_qtype);
619 CERROR("%s: Delaying reintegration for qtype:%d until pending "
620 "updates are flushed.\n",
621 qsd->qsd_svname, qqi->qqi_qtype);
625 int qsd_start_reint_thread(struct qsd_qtype_info *qqi)
627 struct ptlrpc_thread *thread = &qqi->qqi_reint_thread;
628 struct qsd_instance *qsd = qqi->qqi_qsd;
629 struct l_wait_info lwi = { 0 };
634 /* don't bother to do reintegration when quota isn't enabled */
635 if (!qsd_type_enabled(qsd, qqi->qqi_qtype))
638 if (qsd->qsd_acct_failed)
639 /* no space accounting support, can't enable enforcement */
642 /* check if the reintegration has already started or finished */
643 write_lock(&qsd->qsd_lock);
645 if ((qqi->qqi_glb_uptodate && qqi->qqi_slv_uptodate) ||
646 qqi->qqi_reint || qsd->qsd_stopping) {
647 write_unlock(&qsd->qsd_lock);
652 write_unlock(&qsd->qsd_lock);
654 /* there could be some unfinished global or index entry updates
655 * (very unlikely), to avoid them messing up with the reint
656 * procedure, we just return and try to re-start reint later. */
657 if (qsd_pending_updates(qqi)) {
658 write_lock(&qsd->qsd_lock);
660 write_unlock(&qsd->qsd_lock);
664 OBD_ALLOC(name, MTI_NAME_MAXLEN);
668 snprintf(name, MTI_NAME_MAXLEN, "qsd_reint_%d.%s",
669 qqi->qqi_qtype, qsd->qsd_svname);
671 rc = PTR_ERR(kthread_run(qsd_reint_main, (void *)qqi, name));
672 OBD_FREE(name, MTI_NAME_MAXLEN);
674 if (IS_ERR_VALUE(rc)) {
675 thread_set_flags(thread, SVC_STOPPED);
676 write_lock(&qsd->qsd_lock);
678 write_unlock(&qsd->qsd_lock);
682 l_wait_event(thread->t_ctl_waitq,
683 thread_is_running(thread) || thread_is_stopped(thread),