Whamcloud - gitweb
040b88518d5df44d6120a6009e767cecd6e4f4ed
[fs/lustre-release.git] / lustre / quota / qsd_reint.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; if not, write to the
18  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19  * Boston, MA 021110-1307, USA
20  *
21  * GPL HEADER END
22  */
23 /*
24  * Copyright (c) 2011, 2012, Intel, Inc.
25  * Use is subject to license terms.
26  *
27  * Author: Johann Lombardi <johann.lombardi@intel.com>
28  * Author: Niu    Yawei    <yawei.niu@intel.com>
29  */
30
31 #ifndef EXPORT_SYMTAB
32 # define EXPORT_SYMTAB
33 #endif
34
35 #define DEBUG_SUBSYSTEM S_LQUOTA
36
37 #include "qsd_internal.h"
38
39 /*
40  * Completion function invoked when the global quota lock enqueue has completed
41  */
42 static void qsd_reint_completion(const struct lu_env *env,
43                                  struct qsd_qtype_info *qqi,
44                                  struct quota_body *req_qbody,
45                                  struct quota_body *rep_qbody,
46                                  struct lustre_handle *lockh,
47                                  union ldlm_wire_lvb *lvb,
48                                  void *arg, int rc)
49 {
50         struct qsd_instance     *qsd = qqi->qqi_qsd;
51         __u64                   *slv_ver = (__u64 *)arg;
52         ENTRY;
53
54         if (rc) {
55                 CERROR("%s: failed to enqueue global quota lock, glb "
56                        "fid:"DFID", rc:%d\n", qsd->qsd_svname,
57                        PFID(&req_qbody->qb_fid), rc);
58                 RETURN_EXIT;
59         }
60
61         CDEBUG(D_QUOTA, "%s: global quota lock successfully acquired, glb "
62                "fid:"DFID", glb ver:"LPU64", slv fid:"DFID", slv ver:"LPU64"\n",
63                qsd->qsd_svname, PFID(&req_qbody->qb_fid),
64                lvb->l_lquota.lvb_glb_ver, PFID(&rep_qbody->qb_slv_fid),
65                rep_qbody->qb_slv_ver);
66
67         *slv_ver = rep_qbody->qb_slv_ver;
68         memcpy(&qqi->qqi_slv_fid, &rep_qbody->qb_slv_fid,
69                sizeof(struct lu_fid));
70         lustre_handle_copy(&qqi->qqi_lockh, lockh);
71         EXIT;
72 }
73
74 static int qsd_reint_qid(const struct lu_env *env, struct qsd_qtype_info *qqi,
75                          bool global, union lquota_id *qid, void *rec)
76 {
77         struct lquota_entry     *lqe;
78         int                      rc;
79         ENTRY;
80
81         lqe = lqe_locate(env, qqi->qqi_site, qid);
82         if (IS_ERR(lqe))
83                 RETURN(PTR_ERR(lqe));
84
85         rc = qsd_update_lqe(env, lqe, global, rec);
86         if (rc)
87                 GOTO(out, rc);
88
89         rc = qsd_update_index(env, qqi, qid, global, 0, rec);
90 out:
91         lqe_putref(lqe);
92         RETURN(rc);
93 }
94
95 static int qsd_reint_entries(const struct lu_env *env,
96                              struct qsd_qtype_info *qqi,
97                              struct idx_info *ii, bool global,
98                              cfs_page_t **pages,
99                              unsigned int npages, bool need_swab)
100 {
101         struct qsd_thread_info  *qti = qsd_info(env);
102         union lquota_id         *qid = &qti->qti_id;
103         int                      i, j, k, size;
104         int                      rc = 0;
105         ENTRY;
106
107         /* sanity check on the record size */
108         if ((global && ii->ii_recsize != sizeof(struct lquota_glb_rec)) ||
109             (!global && ii->ii_recsize != sizeof(struct lquota_slv_rec))) {
110                 CERROR("Invalid record size:%d, global:%s\n",
111                        ii->ii_recsize, global ? "true" : "false");
112                 RETURN(-EINVAL);
113         }
114
115         size = ii->ii_recsize + ii->ii_keysize + sizeof(__u64);
116
117         for (i = 0; i < npages; i++) {
118                 union lu_page   *lip = cfs_kmap(pages[i]);
119
120                 for (j = 0; j < LU_PAGE_COUNT; j++) {
121                         for (k = 0; k < lip->lp_idx.lip_nr; k++) {
122                                 char *entry;
123
124                                 entry = lip->lp_idx.lip_entries + k * size;
125                                 memcpy(qid, entry, ii->ii_keysize); /* key */
126                                 entry += ii->ii_keysize;            /* value */
127
128                                 if (need_swab) {
129                                         int offset = 0;
130
131                                         /* swab key */
132                                         __swab64s(&qid->qid_uid);
133                                         /* quota records only include 64-bit
134                                          * fields */
135                                         while (offset < ii->ii_recsize) {
136                                                 __swab64s((__u64 *)
137                                                               (entry + offset));
138                                                 offset += sizeof(__u64);
139                                         }
140                                 }
141
142                                 rc = qsd_reint_qid(env, qqi, global, qid,
143                                                    (void *)entry);
144                                 if (rc)
145                                         GOTO(out, rc);
146                         }
147                         lip++;
148                 }
149 out:
150                 cfs_kunmap(pages[i]);
151                 if (rc)
152                         break;
153         }
154         RETURN(rc);
155 }
156
157 static int qsd_reint_index(const struct lu_env *env, struct qsd_qtype_info *qqi,
158                            bool global)
159 {
160         struct qsd_thread_info  *qti = qsd_info(env);
161         struct qsd_instance     *qsd = qqi->qqi_qsd;
162         struct idx_info         *ii = &qti->qti_ii;
163         struct lu_fid           *fid;
164         cfs_page_t              **pages = NULL;
165         unsigned int             npages, pg_cnt;
166         __u64                    start_hash = 0, ver = 0;
167         bool                     need_swab = false;
168         int                      i, rc;
169         ENTRY;
170
171         fid = global ? &qqi->qqi_fid : &qqi->qqi_slv_fid;
172
173         /* let's do a 1MB bulk */
174         npages = min_t(unsigned int, PTLRPC_MAX_BRW_SIZE, 1 << 20);
175         npages /= CFS_PAGE_SIZE;
176
177         /* allocate pages for bulk index read */
178         OBD_ALLOC(pages, npages * sizeof(*pages));
179         if (pages == NULL)
180                 GOTO(out, rc = -ENOMEM);
181         for (i = 0; i < npages; i++) {
182                 pages[i] = cfs_alloc_page(CFS_ALLOC_STD);
183                 if (pages[i] == NULL)
184                         GOTO(out, rc = -ENOMEM);
185         }
186
187 repeat:
188         /* initialize index_info request with FID of global index */
189         memset(ii, 0, sizeof(*ii));
190         memcpy(&ii->ii_fid, fid, sizeof(*fid));
191         ii->ii_magic = IDX_INFO_MAGIC;
192         ii->ii_flags = II_FL_NOHASH;
193         ii->ii_count = npages * LU_PAGE_COUNT;
194         ii->ii_hash_start = start_hash;
195
196         /* send bulk request to quota master to read global index */
197         rc = qsd_fetch_index(env, qsd->qsd_exp, ii, npages, pages, &need_swab);
198         if (rc) {
199                 CWARN("%s: failed to fetch index for "DFID". %d\n",
200                       qsd->qsd_svname, PFID(fid), rc);
201                 GOTO(out, rc);
202         }
203
204         /* various sanity checks */
205         if (ii->ii_magic != IDX_INFO_MAGIC) {
206                 CERROR("%s: invalid magic in index transfer %x != %x\n",
207                        qsd->qsd_svname, ii->ii_magic, IDX_INFO_MAGIC);
208                 GOTO(out, rc = -EPROTO);
209         }
210         if ((ii->ii_flags & II_FL_VARKEY) != 0)
211                 CWARN("%s: II_FL_VARKEY is set on index transfer for fid "DFID
212                       ", it shouldn't be\n", qsd->qsd_svname, PFID(fid));
213         if ((ii->ii_flags & II_FL_NONUNQ) != 0)
214                 CWARN("%s: II_FL_NONUNQ is set on index transfer for fid "DFID
215                       ", it shouldn't be\n", qsd->qsd_svname, PFID(fid));
216         if (ii->ii_keysize != sizeof(__u64)) {
217                 CERROR("%s: invalid key size reported on index transfer for "
218                        "fid "DFID", %u != %u\n", qsd->qsd_svname, PFID(fid),
219                        ii->ii_keysize, (int)sizeof(__u64));
220                 GOTO(out, rc = -EPROTO);
221         }
222         if (ii->ii_version == 0 && ii->ii_count != 0)
223                 CWARN("%s: index version for fid "DFID" is 0, but index isn't "
224                       "empty (%d)\n", qsd->qsd_svname, PFID(fid), ii->ii_count);
225
226         CDEBUG(D_QUOTA, "%s: reintegration process for fid "DFID" successfully "
227                "fetched %s index, count = %d\n", qsd->qsd_svname,
228                PFID(fid), global ? "global" : "slave", ii->ii_count);
229
230         if (start_hash == 0)
231                 /* record version associated with the first bulk transfer */
232                 ver = ii->ii_version;
233
234         pg_cnt = (ii->ii_count + (LU_PAGE_COUNT) - 1);
235         pg_cnt >>= CFS_PAGE_SHIFT - LU_PAGE_SHIFT;
236
237         if (pg_cnt > npages) {
238                 CERROR("%s: master returned more pages than expected, %u > %u"
239                        "\n", qsd->qsd_svname, pg_cnt, npages);
240                 pg_cnt = npages;
241         }
242
243         rc = qsd_reint_entries(env, qqi, ii, global, pages, pg_cnt, need_swab);
244         if (rc)
245                 GOTO(out, rc);
246
247         if (ii->ii_hash_end != II_END_OFF) {
248                 start_hash = ii->ii_hash_end;
249                 goto repeat;
250         }
251 out:
252         if (pages != NULL) {
253                 for (i = 0; i < npages; i++)
254                         if (pages[i] != NULL)
255                                 cfs_free_page(pages[i]);
256                 OBD_FREE(pages, npages * sizeof(*pages));
257         }
258
259         /* Update index version */
260         if (rc == 0) {
261                 rc = qsd_write_version(env, qqi, ver, global);
262                 if (rc)
263                         CERROR("%s: write version "LPU64" to "DFID" failed. "
264                                "%d\n", qsd->qsd_svname, ver, PFID(fid), rc);
265         }
266
267         RETURN(rc);
268 }
269
270 static int qsd_reconciliation(const struct lu_env *env,
271                               struct qsd_qtype_info *qqi)
272 {
273         struct qsd_thread_info  *qti = qsd_info(env);
274         struct qsd_instance     *qsd = qqi->qqi_qsd;
275         const struct dt_it_ops  *iops;
276         struct dt_it            *it;
277         struct dt_key           *key;
278         struct lquota_entry     *lqe;
279         union lquota_id         *qid = &qti->qti_id;
280         int                      rc;
281         ENTRY;
282
283         LASSERT(qqi->qqi_glb_obj != NULL);
284         iops = &qqi->qqi_glb_obj->do_index_ops->dio_it;
285
286         it = iops->init(env, qqi->qqi_glb_obj, 0, BYPASS_CAPA);
287         if (IS_ERR(it)) {
288                 CWARN("%s: Initialize it for "DFID" failed. %ld\n",
289                       qsd->qsd_svname, PFID(&qqi->qqi_fid), PTR_ERR(it));
290                 RETURN(PTR_ERR(it));
291         }
292
293         rc = iops->load(env, it, 0);
294         if (rc < 0) {
295                 CWARN("%s: Load first entry for "DFID" failed. %d\n",
296                       qsd->qsd_svname, PFID(&qqi->qqi_fid), rc);
297                 GOTO(out, rc);
298         } else if (rc == 0) {
299                 rc = iops->next(env, it);
300                 if (rc != 0)
301                         GOTO(out, rc = (rc < 0) ? rc : 0);
302         }
303
304         do {
305                 key = iops->key(env, it);
306                 if (IS_ERR(key)) {
307                         CWARN("%s: Error key for "DFID". %ld\n",
308                               qsd->qsd_svname, PFID(&qqi->qqi_fid),
309                               PTR_ERR(key));
310                         GOTO(out, rc = PTR_ERR(key));
311                 }
312
313                 /* skip the root user/group */
314                 if (*((__u64 *)key) == 0)
315                         goto next;
316
317                 qid->qid_uid = *((__u64 *)key);
318
319                 lqe = lqe_locate(env, qqi->qqi_site, qid);
320                 if (IS_ERR(lqe)) {
321                         CWARN("%s: Fail to locate lqe. "DFID", %ld\n",
322                               qsd->qsd_svname, PFID(&qqi->qqi_fid),
323                               PTR_ERR(lqe));
324                         GOTO(out, rc = PTR_ERR(lqe));
325                 }
326
327                 if (!lqe->lqe_enforced) {
328                         lqe_putref(lqe);
329                         goto next;
330                 }
331
332                 rc = qsd_refresh_usage(env, lqe);
333                 if (rc) {
334                         CWARN("%s: Fail to get usage. "DFID", %d\n",
335                               qsd->qsd_svname, PFID(&qqi->qqi_fid), rc);
336                         lqe_putref(lqe);
337                         GOTO(out, rc);
338                 }
339
340                 rc = qsd_dqacq(env, lqe, QSD_REP);
341                 lqe_putref(lqe);
342
343                 if (rc) {
344                         CWARN("%s: Fail to report quota. "DFID", %d\n",
345                               qsd->qsd_svname, PFID(&qqi->qqi_fid), rc);
346                         GOTO(out, rc);
347                 }
348 next:
349                 rc = iops->next(env, it);
350                 if (rc < 0)
351                         CWARN("%s: Error next "DFID". %d\n", qsd->qsd_svname,
352                               PFID(&qqi->qqi_fid), rc);
353         } while (rc == 0);
354
355         /* reach the end */
356         if (rc > 0)
357                 rc = 0;
358 out:
359         iops->put(env, it);
360         iops->fini(env, it);
361         RETURN(rc);
362 }
363
364 static int qsd_connected(struct qsd_instance *qsd)
365 {
366         int     connected;
367
368         cfs_read_lock(&qsd->qsd_lock);
369         connected = qsd->qsd_exp_valid ? 1 : 0;
370         cfs_read_unlock(&qsd->qsd_lock);
371
372         return connected;
373 }
374
375 static int qsd_started(struct qsd_instance *qsd)
376 {
377         int     started;
378
379         cfs_read_lock(&qsd->qsd_lock);
380         started = qsd->qsd_started ? 1 : 0;
381         cfs_read_unlock(&qsd->qsd_lock);
382
383         return started;
384 }
385
386 /*
387  * Routine executed by the reintegration thread.
388  */
389 static int qsd_reint_main(void *args)
390 {
391         struct lu_env           *env;
392         struct qsd_thread_info  *qti;
393         struct qsd_qtype_info   *qqi = (struct qsd_qtype_info *)args;
394         struct qsd_instance     *qsd = qqi->qqi_qsd;
395         struct ptlrpc_thread    *thread = &qqi->qqi_reint_thread;
396         struct l_wait_info       lwi = { 0 };
397         int                      rc;
398         ENTRY;
399
400         cfs_daemonize("qsd_reint");
401
402         CDEBUG(D_QUOTA, "%s: Starting reintegration thread for "DFID"\n",
403                qsd->qsd_svname, PFID(&qqi->qqi_fid));
404
405         qqi_getref(qqi);
406         lu_ref_add(&qqi->qqi_reference, "reint_thread", thread);
407
408         thread_set_flags(thread, SVC_RUNNING);
409         cfs_waitq_signal(&thread->t_ctl_waitq);
410
411         OBD_ALLOC_PTR(env);
412         if (env == NULL)
413                 GOTO(out, rc = -ENOMEM);
414
415         /* initialize environment */
416         rc = lu_env_init(env, LCT_DT_THREAD);
417         if (rc)
418                 GOTO(out_env, rc);
419         qti = qsd_info(env);
420
421         /* wait for the connection to master established */
422         l_wait_event(thread->t_ctl_waitq,
423                      qsd_connected(qsd) || !thread_is_running(thread), &lwi);
424
425         /* Step 1: enqueue global index lock */
426         if (!thread_is_running(thread))
427                 GOTO(out_env_init, rc = 0);
428
429         LASSERT(qsd->qsd_exp != NULL);
430         LASSERT(qqi->qqi_glb_uptodate == 0 || qqi->qqi_slv_uptodate == 0);
431
432         memset(&qti->qti_lvb, 0, sizeof(qti->qti_lvb));
433
434         cfs_read_lock(&qsd->qsd_lock);
435         /* check whether we already own a global quota lock for this type */
436         if (lustre_handle_is_used(&qqi->qqi_lockh) &&
437             ldlm_lock_addref_try(&qqi->qqi_lockh, qsd_glb_einfo.ei_mode) == 0) {
438                 cfs_read_unlock(&qsd->qsd_lock);
439                 /* force refresh of global & slave index copy */
440                 qti->qti_lvb.l_lquota.lvb_glb_ver = ~0ULL;
441                 qti->qti_slv_ver = ~0ULL;
442         } else {
443                 /* no valid lock found, let's enqueue a new one */
444                 cfs_read_unlock(&qsd->qsd_lock);
445
446                 memset(&qti->qti_body, 0, sizeof(qti->qti_body));
447                 memcpy(&qti->qti_body.qb_fid, &qqi->qqi_fid,
448                        sizeof(qqi->qqi_fid));
449
450                 rc = qsd_intent_lock(env, qsd->qsd_exp, &qti->qti_body, true,
451                                      IT_QUOTA_CONN, qsd_reint_completion, qqi,
452                                      &qti->qti_lvb, (void *)&qti->qti_slv_ver);
453                 if (rc)
454                         GOTO(out_env_init, rc);
455
456                 CDEBUG(D_QUOTA, "%s: glb_ver:"LPU64"/"LPU64",slv_ver:"LPU64"/"
457                        LPU64"\n", qsd->qsd_svname,
458                        qti->qti_lvb.l_lquota.lvb_glb_ver, qqi->qqi_glb_ver,
459                        qti->qti_slv_ver, qqi->qqi_slv_ver);
460         }
461
462         /* Step 2: reintegrate global index */
463         if (!thread_is_running(thread))
464                 GOTO(out_lock, rc = 0);
465
466         OBD_FAIL_TIMEOUT(OBD_FAIL_QUOTA_DELAY_REINT, 10);
467
468         if (qqi->qqi_glb_ver != qti->qti_lvb.l_lquota.lvb_glb_ver) {
469                 rc = qsd_reint_index(env, qqi, true);
470                 if (rc) {
471                         CWARN("%s: reint global for "DFID" failed. %d\n",
472                               qsd->qsd_svname, PFID(&qqi->qqi_fid), rc);
473                         GOTO(out_lock, rc);
474                 }
475         } else {
476                 qsd_bump_version(qqi, qqi->qqi_glb_ver, true);
477         }
478
479         /* Step 3: reintegrate slave index */
480         if (!thread_is_running(thread))
481                 GOTO(out_lock, rc = 0);
482
483         if (qqi->qqi_slv_ver != qti->qti_slv_ver) {
484                 rc = qsd_reint_index(env, qqi, false);
485                 if (rc) {
486                         CWARN("%s: Reint slave for "DFID" failed. %d\n",
487                               qsd->qsd_svname, PFID(&qqi->qqi_slv_fid), rc);
488                         GOTO(out_lock, rc);
489                 }
490         } else {
491                 qsd_bump_version(qqi, qqi->qqi_slv_ver, false);
492         }
493
494         /* wait for the connection to master established */
495         l_wait_event(thread->t_ctl_waitq,
496                      qsd_started(qsd) || !thread_is_running(thread), &lwi);
497
498         if (!thread_is_running(thread))
499                 GOTO(out_lock, rc = 0);
500
501         /* Step 4: start reconciliation for each enforced ID */
502         rc = qsd_reconciliation(env, qqi);
503         if (rc)
504                 CWARN("%s: reconciliation failed. "DFID", %d\n",
505                       qsd->qsd_svname, PFID(&qti->qti_fid), rc);
506
507         EXIT;
508 out_lock:
509         ldlm_lock_decref(&qqi->qqi_lockh, qsd_glb_einfo.ei_mode);
510 out_env_init:
511         lu_env_fini(env);
512 out_env:
513         OBD_FREE_PTR(env);
514 out:
515         cfs_write_lock(&qsd->qsd_lock);
516         qqi->qqi_reint = 0;
517         cfs_write_unlock(&qsd->qsd_lock);
518
519         qqi_putref(qqi);
520         lu_ref_del(&qqi->qqi_reference, "reint_thread", thread);
521
522         thread_set_flags(thread, SVC_STOPPED);
523         cfs_waitq_signal(&thread->t_ctl_waitq);
524         return rc;
525 }
526
527 void qsd_stop_reint_thread(struct qsd_qtype_info *qqi)
528 {
529         struct ptlrpc_thread    *thread = &qqi->qqi_reint_thread;
530         struct l_wait_info       lwi = { 0 };
531
532         if (!thread_is_stopped(thread)) {
533                 thread_set_flags(thread, SVC_STOPPING);
534                 cfs_waitq_signal(&thread->t_ctl_waitq);
535
536                 l_wait_event(thread->t_ctl_waitq,
537                              thread_is_stopped(thread), &lwi);
538         }
539 }
540
541 int qsd_start_reint_thread(struct qsd_qtype_info *qqi)
542 {
543         struct ptlrpc_thread    *thread = &qqi->qqi_reint_thread;
544         struct qsd_instance     *qsd = qqi->qqi_qsd;
545         struct l_wait_info      lwi = { 0 };
546         int                     rc;
547         ENTRY;
548
549         /* don't bother to do reintegration when quota isn't enabled */
550         if (!qsd_type_enabled(qqi->qqi_qsd, qqi->qqi_qtype))
551                 RETURN(0);
552
553         /* check if the reintegration has already started or finished */
554         cfs_write_lock(&qsd->qsd_lock);
555
556         if ((qqi->qqi_glb_uptodate && qqi->qqi_slv_uptodate) ||
557              qqi->qqi_reint || qsd->qsd_stopping) {
558                 cfs_write_unlock(&qsd->qsd_lock);
559                 RETURN(0);
560         }
561         qqi->qqi_reint = 1;
562
563         cfs_write_unlock(&qsd->qsd_lock);
564
565         rc = cfs_create_thread(qsd_reint_main, (void *)qqi, 0);
566         if (rc < 0) {
567                 thread_set_flags(thread, SVC_STOPPED);
568                 cfs_write_lock(&qsd->qsd_lock);
569                 qqi->qqi_reint = 0;
570                 cfs_write_unlock(&qsd->qsd_lock);
571                 RETURN(rc);
572         }
573
574         l_wait_event(thread->t_ctl_waitq,
575                      thread_is_running(thread) || thread_is_stopped(thread),
576                      &lwi);
577         RETURN(0);
578 }