Whamcloud - gitweb
LU-17705 ptlrpc: replace synchronize_rcu() with rcu_barrier()
[fs/lustre-release.git] / lustre / quota / qsd_lib.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; if not, write to the
18  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19  * Boston, MA 021110-1307, USA
20  *
21  * GPL HEADER END
22  */
23 /*
24  * Copyright (c) 2012, 2017, Intel Corporation.
25  * Use is subject to license terms.
26  *
27  * Author: Johann Lombardi <johann.lombardi@intel.com>
28  * Author: Niu    Yawei    <yawei.niu@intel.com>
29  */
30
31 /*
32  * Quota Slave Driver (QSD) management.
33  *
34  * The quota slave feature is implemented under the form of a library called
35  * QSD. Each OSD device should create a QSD instance via qsd_init() which will
36  * be used to manage quota enforcement for this device. This implies:
37  * - completing the reintegration procedure with the quota master (aka QMT, see
38  *   qmt_dev.c) to retrieve the latest quota settings and space distribution.
39  * - managing quota locks in order to be notified of configuration changes.
40  * - acquiring space from the QMT when quota space for a given user/group is
41  *   close to exhaustion.
42  * - allocating quota space to service threads for local request processing.
43  *
44  * Once the QSD instance created, the OSD device should invoke qsd_start()
45  * when recovery is completed. This notifies the QSD that we are about to
46  * process new requests on which quota should be strictly enforced.
47  * Then, qsd_op_begin/end can be used to reserve/release/pre-acquire quota space
48  * for/after each operation until shutdown where the QSD instance should be
49  * freed via qsd_fini().
50  */
51
52 #define DEBUG_SUBSYSTEM S_LQUOTA
53
54 #include <obd_class.h>
55 #include "qsd_internal.h"
56
57 struct kmem_cache *upd_kmem;
58
59 struct lu_kmem_descr qsd_caches[] = {
60         {
61                 .ckd_cache = &upd_kmem,
62                 .ckd_name  = "upd_kmem",
63                 .ckd_size  = sizeof(struct qsd_upd_rec)
64         },
65         {
66                 .ckd_cache = NULL
67         }
68 };
69
70 /* define qsd thread key */
71 LU_KEY_INIT_FINI(qsd, struct qsd_thread_info);
72 LU_CONTEXT_KEY_DEFINE(qsd, LCT_MD_THREAD | LCT_MG_THREAD | LCT_DT_THREAD | LCT_LOCAL);
73 LU_KEY_INIT_GENERIC(qsd);
74
75 /* some procfs helpers */
76 static int qsd_state_seq_show(struct seq_file *m, void *data)
77 {
78         struct qsd_instance     *qsd = m->private;
79         char                     enabled[5];
80
81         LASSERT(qsd != NULL);
82
83         memset(enabled, 0, sizeof(enabled));
84         if (qsd_type_enabled(qsd, USRQUOTA))
85                 strcat(enabled, "u");
86         if (qsd_type_enabled(qsd, GRPQUOTA))
87                 strcat(enabled, "g");
88         if (qsd_type_enabled(qsd, PRJQUOTA))
89                 strncat(enabled, "p", 1);
90         if (strlen(enabled) == 0)
91                 strcat(enabled, "none");
92
93         /* TODO: further pool ID should be removed or
94          * replaced with pool Name */
95         seq_printf(m, "target name:    %s\n"
96                    "pool ID:        %d\n"
97                    "type:           %s\n"
98                    "quota enabled:  %s\n"
99                    "conn to master: %s\n",
100                    qsd->qsd_svname, 0,
101                    qsd->qsd_is_md ? "md" : "dt", enabled,
102                    qsd->qsd_exp_valid ? "setup" : "not setup yet");
103
104         if (qsd->qsd_prepared) {
105                 memset(enabled, 0, sizeof(enabled));
106                 if (qsd->qsd_type_array[USRQUOTA]->qqi_acct_obj != NULL)
107                         strcat(enabled, "u");
108                 if (qsd->qsd_type_array[GRPQUOTA]->qqi_acct_obj != NULL)
109                         strcat(enabled, "g");
110                 if (qsd->qsd_type_array[PRJQUOTA]->qqi_acct_obj != NULL)
111                         strncat(enabled, "p", 1);
112                 if (strlen(enabled) == 0)
113                         strcat(enabled, "none");
114                 seq_printf(m, "space acct:     %s\n"
115                            "user uptodate:  glb[%d],slv[%d],reint[%d]\n"
116                            "group uptodate: glb[%d],slv[%d],reint[%d]\n"
117                            "project uptodate: glb[%d],slv[%d],reint[%d]\n",
118                            enabled,
119                            qsd->qsd_type_array[USRQUOTA]->qqi_glb_uptodate,
120                            qsd->qsd_type_array[USRQUOTA]->qqi_slv_uptodate,
121                            qsd->qsd_type_array[USRQUOTA]->qqi_reint,
122                            qsd->qsd_type_array[GRPQUOTA]->qqi_glb_uptodate,
123                            qsd->qsd_type_array[GRPQUOTA]->qqi_slv_uptodate,
124                            qsd->qsd_type_array[GRPQUOTA]->qqi_reint,
125                            qsd->qsd_type_array[PRJQUOTA]->qqi_glb_uptodate,
126                            qsd->qsd_type_array[PRJQUOTA]->qqi_slv_uptodate,
127                            qsd->qsd_type_array[PRJQUOTA]->qqi_reint);
128         }
129         return 0;
130 }
131 LPROC_SEQ_FOPS_RO(qsd_state);
132
133 static int qsd_enabled_seq_show(struct seq_file *m, void *data)
134 {
135         struct qsd_instance *qsd = m->private;
136         char enabled[5] = "";
137         int types = 0;
138
139         LASSERT(qsd != NULL);
140
141         memset(enabled, 0, sizeof(enabled));
142         if (qsd_type_enabled(qsd, USRQUOTA))
143                 enabled[types++] = 'u';
144         if (qsd_type_enabled(qsd, GRPQUOTA))
145                 enabled[types++] = 'g';
146         if (qsd_type_enabled(qsd, PRJQUOTA))
147                 enabled[types++] = 'p';
148         if (!types)
149                 strncpy(enabled, "none", 4);
150
151         seq_printf(m, "%s\n", enabled);
152         return 0;
153 }
154
155 static ssize_t qsd_enabled_seq_write(struct file *file,
156                                      const char __user *buffer,
157                                      size_t count, loff_t *off)
158 {
159         struct seq_file *m = file->private_data;
160         struct qsd_instance *qsd = m->private;
161         char fsname[LUSTRE_MAXFSNAME + 1];
162         int enabled = 0;
163         char valstr[5];
164         int pool, rc;
165
166         if (count > 4)
167                 return -E2BIG;
168
169         if (copy_from_user(valstr, buffer, count))
170                 GOTO(out, count = -EFAULT);
171
172         valstr[sizeof(valstr) - 1] = 0;
173         if (strchr(valstr, 'u'))
174                 enabled |= BIT(USRQUOTA);
175         if (strchr(valstr, 'g'))
176                 enabled |= BIT(GRPQUOTA);
177         if (strchr(valstr, 'p'))
178                 enabled |= BIT(PRJQUOTA);
179
180         if (enabled == 0 && strcmp(valstr, "none"))
181                 GOTO(out, count = -EINVAL);
182
183         if (qsd->qsd_is_md)
184                 pool = LQUOTA_RES_MD;
185         else
186                 pool = LQUOTA_RES_DT;
187
188         if (server_name2fsname(qsd->qsd_svname, fsname, NULL))
189                 GOTO(out, count = -EINVAL);
190
191         rc = qsd_config(valstr, fsname, pool);
192         if (rc)
193                 count = rc;
194 out:
195         return count;
196 }
197 LPROC_SEQ_FOPS(qsd_enabled);
198
199 /* force reintegration procedure to be executed.
200  * Used for test/debugging purpose */
201 static ssize_t
202 lprocfs_force_reint_seq_write(struct file *file, const char __user *buffer,
203                                 size_t count, loff_t *off)
204 {
205         struct seq_file     *m = file->private_data;
206         struct qsd_instance *qsd = m->private;
207         int                  rc = 0, qtype;
208
209         LASSERT(qsd != NULL);
210
211         write_lock(&qsd->qsd_lock);
212         if (qsd->qsd_stopping) {
213                 /* don't mess up with shutdown procedure, it is already
214                  * complicated enough */
215                 rc = -ESHUTDOWN;
216         } else if (!qsd->qsd_prepared) {
217                 rc = -EAGAIN;
218         } else {
219                 /* mark all indexes as stale */
220                 for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) {
221                         qsd->qsd_type_array[qtype]->qqi_glb_uptodate = false;
222                         qsd->qsd_type_array[qtype]->qqi_slv_uptodate = false;
223                 }
224         }
225         write_unlock(&qsd->qsd_lock);
226
227         if (rc)
228                 return rc;
229
230         /* kick off reintegration */
231         for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) {
232                 rc = qsd_start_reint_thread(qsd->qsd_type_array[qtype]);
233                 if (rc)
234                         break;
235         }
236         return rc == 0 ? count : rc;
237 }
238 LPROC_SEQ_FOPS_WR_ONLY(qsd, force_reint);
239
240 static int qsd_timeout_seq_show(struct seq_file *m, void *data)
241 {
242         struct qsd_instance *qsd = m->private;
243         LASSERT(qsd != NULL);
244
245         seq_printf(m, "%d\n", qsd_wait_timeout(qsd));
246         return 0;
247 }
248
249 static ssize_t
250 qsd_timeout_seq_write(struct file *file, const char __user *buffer,
251                         size_t count, loff_t *off)
252 {
253         struct seq_file *m = file->private_data;
254         struct qsd_instance *qsd = m->private;
255         time64_t timeout;
256         int rc;
257
258         LASSERT(qsd != NULL);
259         rc = kstrtoll_from_user(buffer, count, 0, &timeout);
260         if (rc)
261                 return rc;
262
263         if (timeout < 0)
264                 return -EINVAL;
265
266         qsd->qsd_timeout = timeout;
267         return count;
268 }
269 LPROC_SEQ_FOPS(qsd_timeout);
270
271 static int qsd_root_prj_enable_seq_show(struct seq_file *m, void *data)
272 {
273         struct qsd_instance *qsd = m->private;
274
275         LASSERT(qsd != NULL);
276         seq_printf(m, "%d\n", qsd->qsd_root_prj_enable);
277         return 0;
278 }
279
280 static ssize_t
281 qsd_root_prj_enable_seq_write(struct file *file, const char __user *buffer,
282                         size_t count, loff_t *off)
283 {
284         struct seq_file *m = file->private_data;
285         struct qsd_instance *qsd = m->private;
286         bool enable;
287         int rc;
288
289         LASSERT(qsd != NULL);
290         rc = kstrtobool_from_user(buffer, count, &enable);
291         if (rc)
292                 return rc;
293
294         qsd->qsd_root_prj_enable = enable;
295         return count;
296 }
297 LPROC_SEQ_FOPS(qsd_root_prj_enable);
298
299 static struct lprocfs_vars lprocfs_quota_qsd_vars[] = {
300         { .name =       "info",
301           .fops =       &qsd_state_fops         },
302         { .name =       "enabled",
303           .fops =       &qsd_enabled_fops       },
304         { .name =       "force_reint",
305           .fops =       &qsd_force_reint_fops   },
306         { .name =       "timeout",
307           .fops =       &qsd_timeout_fops       },
308         { .name =       "root_prj_enable",
309           .fops =       &qsd_root_prj_enable_fops       },
310         { NULL }
311 };
312
313 /*
314  * Callback function invoked by the OSP layer when the connection to the master
315  * has been set up.
316  *
317  * \param data - is a pointer to the qsd_instance
318  *
319  * \retval - 0 on success, appropriate error on failure
320  */
321 static int qsd_conn_callback(void *data)
322 {
323         struct qsd_instance *qsd = (struct qsd_instance *)data;
324         int                  type;
325         ENTRY;
326
327         /* qsd_exp should now be valid */
328         LASSERT(qsd->qsd_exp);
329
330         qsd->qsd_ns = class_exp2obd(qsd->qsd_exp)->obd_namespace;
331
332         write_lock(&qsd->qsd_lock);
333         /* notify that qsd_exp is now valid */
334         qsd->qsd_exp_valid = true;
335         write_unlock(&qsd->qsd_lock);
336
337         /* Now that the connection to master is setup, we can initiate the
338          * reintegration procedure for quota types which are enabled.
339          * It is worth noting that, if the qsd_instance hasn't been started
340          * already, then we can only complete the first two steps of the
341          * reintegration procedure (i.e. global lock enqueue and slave
342          * index transfer) since the space usage reconciliation (i.e.
343          * step 3) will have to wait for qsd_start() to be called */
344         for (type = USRQUOTA; type < LL_MAXQUOTAS; type++) {
345                 struct qsd_qtype_info *qqi = qsd->qsd_type_array[type];
346                 struct task_struct *t;
347
348                 /* qqi_reint_task can be set to NULL at any time,
349                  * so we need to be careful.
350                  */
351                 rcu_read_lock();
352                 t = rcu_dereference(qqi->qqi_reint_task);
353                 if (t)
354                         wake_up_process(t);
355                 rcu_read_unlock();
356         }
357
358         RETURN(0);
359 }
360
361 /*
362  * Release qsd_qtype_info structure which contains data associated with a
363  * given quota type. This releases the accounting objects.
364  * It's called on OSD cleanup when the qsd instance is released.
365  *
366  * \param env - is the environment passed by the caller
367  * \param qsd - is the qsd instance managing the qsd_qtype_info structure
368  *              to be released
369  * \param qtype - is the quota type to be shutdown
370  */
371 static void qsd_qtype_fini(const struct lu_env *env, struct qsd_instance *qsd,
372                            int qtype)
373 {
374         struct qsd_qtype_info   *qqi;
375         int repeat = 0;
376         ENTRY;
377
378         if (qsd->qsd_type_array[qtype] == NULL)
379                 RETURN_EXIT;
380         qqi = qsd->qsd_type_array[qtype];
381         qsd->qsd_type_array[qtype] = NULL;
382
383         /* all deferred work lists should be empty */
384         LASSERT(list_empty(&qqi->qqi_deferred_glb));
385         LASSERT(list_empty(&qqi->qqi_deferred_slv));
386
387         /* shutdown lquota site */
388         if (qqi->qqi_site != NULL && !IS_ERR(qqi->qqi_site)) {
389                 lquota_site_free(env, qqi->qqi_site);
390                 qqi->qqi_site = NULL;
391         }
392
393         /* The qqi may still be holding by global locks which are being
394          * canceled asynchronously (LU-4365), see the following steps:
395          *
396          * - On server umount, we try to clear all quota locks first by
397          *   disconnecting LWP (which will invalidate import and cleanup
398          *   all locks on it), however, if quota reint process is holding
399          *   the global lock for reintegration at that time, global lock
400          *   will fail to be cleared on LWP disconnection.
401          *
402          * - Umount process goes on and stops reint process, the global
403          *   lock will be dropped on reint process exit, however, the lock
404          *   cancel in done in asynchronous way, so the
405          *   qsd_glb_blocking_ast() might haven't been called yet when we
406          *   get here.
407          */
408         while (atomic_read(&qqi->qqi_ref) > 1) {
409                 CDEBUG(D_QUOTA, "qqi reference count %u, repeat: %d\n",
410                        atomic_read(&qqi->qqi_ref), repeat);
411                 repeat++;
412                 schedule_timeout_interruptible(cfs_time_seconds(1));
413         }
414
415         /* by now, all qqi users should have gone away */
416         LASSERT(atomic_read(&qqi->qqi_ref) == 1);
417         lu_ref_fini(&qqi->qqi_reference);
418
419         /* release accounting object */
420         if (qqi->qqi_acct_obj != NULL && !IS_ERR(qqi->qqi_acct_obj)) {
421                 dt_object_put(env, qqi->qqi_acct_obj);
422                 qqi->qqi_acct_obj = NULL;
423         }
424
425         /* release slv index */
426         if (qqi->qqi_slv_obj != NULL && !IS_ERR(qqi->qqi_slv_obj)) {
427                 dt_object_put(env, qqi->qqi_slv_obj);
428                 qqi->qqi_slv_obj = NULL;
429                 qqi->qqi_slv_ver = 0;
430         }
431
432         /* release global index */
433         if (qqi->qqi_glb_obj != NULL && !IS_ERR(qqi->qqi_glb_obj)) {
434                 dt_object_put(env, qqi->qqi_glb_obj);
435                 qqi->qqi_glb_obj = NULL;
436                 qqi->qqi_glb_ver = 0;
437         }
438
439         OBD_FREE_PTR(qqi);
440         EXIT;
441 }
442
443 static const char *qtype2acct_name(int qtype)
444 {
445         static char unknown[24];
446
447         switch (qtype) {
448         case USRQUOTA:
449                 return "acct_user";
450         case GRPQUOTA:
451                 return "acct_group";
452         case PRJQUOTA:
453                 return "acct_project";
454         }
455
456         snprintf(unknown, sizeof(unknown), "acct_unknown_%u", qtype);
457         return unknown;
458 }
459
460 static const char *qtype2glb_name(int qtype)
461 {
462         static char unknown[24];
463
464         switch (qtype) {
465         case USRQUOTA:
466                 return "limit_user";
467         case GRPQUOTA:
468                 return "limit_group";
469         case PRJQUOTA:
470                 return "limit_project";
471         }
472
473         snprintf(unknown, sizeof(unknown), "acct_unknown_%u", qtype);
474         return unknown;
475 }
476
477 /*
478  * Allocate and initialize a qsd_qtype_info structure for quota type \qtype.
479  * This opens the accounting object and initializes the proc file.
480  * It's called on OSD start when the qsd_prepare() is invoked on the qsd
481  * instance.
482  *
483  * \param env  - the environment passed by the caller
484  * \param qsd  - is the qsd instance which will be in charge of the new
485  *               qsd_qtype_info instance.
486  * \param qtype - is quota type to set up
487  *
488  * \retval - 0 on success and qsd->qsd_type_array[qtype] is allocated,
489  *           appropriate error on failure
490  */
491 static int qsd_qtype_init(const struct lu_env *env, struct qsd_instance *qsd,
492                           int qtype)
493 {
494         struct qsd_qtype_info   *qqi;
495         int                      rc;
496         struct obd_uuid          uuid;
497         ENTRY;
498
499         LASSERT(qsd->qsd_type_array[qtype] == NULL);
500
501         /* allocate structure for this quota type */
502         OBD_ALLOC_PTR(qqi);
503         if (qqi == NULL)
504                 RETURN(-ENOMEM);
505         qsd->qsd_type_array[qtype] = qqi;
506         atomic_set(&qqi->qqi_ref, 1); /* referenced from qsd */
507
508         /* set backpointer and other parameters */
509         qqi->qqi_qsd   = qsd;
510         qqi->qqi_qtype = qtype;
511         lu_ref_init(&qqi->qqi_reference);
512         qqi->qqi_glb_uptodate = false;
513         qqi->qqi_slv_uptodate = false;
514         qqi->qqi_reint        = false;
515         INIT_LIST_HEAD(&qqi->qqi_deferred_glb);
516         INIT_LIST_HEAD(&qqi->qqi_deferred_slv);
517         lquota_generate_fid(&qqi->qqi_fid, QSD_RES_TYPE(qsd), qtype);
518
519         /* open accounting object */
520         LASSERT(qqi->qqi_acct_obj == NULL);
521         qqi->qqi_acct_obj = acct_obj_lookup(env, qsd->qsd_dev, qtype);
522         if (IS_ERR(qqi->qqi_acct_obj)) {
523                 CDEBUG(D_QUOTA, "%s: no %s space accounting support: rc = %ld\n",
524                        qsd->qsd_svname, qtype_name(qtype),
525                        PTR_ERR(qqi->qqi_acct_obj));
526                 qqi->qqi_acct_obj = NULL;
527                 qqi->qqi_acct_failed = true;
528         }
529
530         /* open global index copy */
531         LASSERT(qqi->qqi_glb_obj == NULL);
532         qqi->qqi_glb_obj = lquota_disk_glb_find_create(env, qsd->qsd_dev,
533                                                        qsd->qsd_root,
534                                                        &qqi->qqi_fid, true);
535         if (IS_ERR(qqi->qqi_glb_obj)) {
536                 CERROR("%s: can't open global index copy "DFID" %ld\n",
537                        qsd->qsd_svname, PFID(&qqi->qqi_fid),
538                        PTR_ERR(qqi->qqi_glb_obj));
539                 GOTO(out, rc = PTR_ERR(qqi->qqi_glb_obj));
540         }
541         qqi->qqi_glb_ver = dt_version_get(env, qqi->qqi_glb_obj);
542
543         /* open slave index copy */
544         LASSERT(qqi->qqi_slv_obj == NULL);
545         obd_str2uuid(&uuid, qsd->qsd_svname);
546         qqi->qqi_slv_obj = lquota_disk_slv_find_create(env, qsd->qsd_dev,
547                                                        qsd->qsd_root,
548                                                        &qqi->qqi_fid, &uuid,
549                                                        true);
550         if (IS_ERR(qqi->qqi_slv_obj)) {
551                 CERROR("%s: can't open slave index copy "DFID" %ld\n",
552                        qsd->qsd_svname, PFID(&qqi->qqi_fid),
553                        PTR_ERR(qqi->qqi_slv_obj));
554                 GOTO(out, rc = PTR_ERR(qqi->qqi_slv_obj));
555         }
556         qqi->qqi_slv_ver = dt_version_get(env, qqi->qqi_slv_obj);
557
558         /* allocate site */
559         qqi->qqi_site = lquota_site_alloc(env, qqi, false, qtype, &qsd_lqe_ops);
560         if (IS_ERR(qqi->qqi_site)) {
561                 CERROR("%s: can't allocate site "DFID" %ld\n", qsd->qsd_svname,
562                        PFID(&qqi->qqi_fid), PTR_ERR(qqi->qqi_site));
563                 GOTO(out, rc = PTR_ERR(qqi->qqi_site));
564         }
565
566         /* register proc entry for accounting & global index copy objects */
567         rc = lprocfs_seq_create(qsd->qsd_proc, qtype2acct_name(qtype),
568                                 0444, &lprocfs_quota_seq_fops,
569                                 qqi->qqi_acct_obj);
570         if (rc) {
571                 CERROR("%s: can't add procfs entry for accounting file %d\n",
572                        qsd->qsd_svname, rc);
573                 GOTO(out, rc);
574         }
575
576         rc = lprocfs_seq_create(qsd->qsd_proc, qtype2glb_name(qtype),
577                                 0444, &lprocfs_quota_seq_fops,
578                                 qqi->qqi_glb_obj);
579         if (rc) {
580                 CERROR("%s: can't add procfs entry for global index copy %d\n",
581                        qsd->qsd_svname, rc);
582                 GOTO(out, rc);
583         }
584         EXIT;
585 out:
586         if (rc)
587                 qsd_qtype_fini(env, qsd, qtype);
588         return rc;
589 }
590
591 /*
592  * Release a qsd_instance. Companion of qsd_init(). This releases all data
593  * structures associated with the quota slave (on-disk objects, lquota entry
594  * tables, ...).
595  * This function should be called when the OSD is shutting down.
596  *
597  * \param env - is the environment passed by the caller
598  * \param qsd - is the qsd instance to shutdown
599  */
600 void qsd_fini(const struct lu_env *env, struct qsd_instance *qsd)
601 {
602         int     qtype;
603         ENTRY;
604
605         if (unlikely(qsd == NULL))
606                 RETURN_EXIT;
607
608         CDEBUG(D_QUOTA, "%s: initiating QSD shutdown\n", qsd->qsd_svname);
609         write_lock(&qsd->qsd_lock);
610         qsd->qsd_stopping = true;
611         write_unlock(&qsd->qsd_lock);
612
613         /* remove qsd proc entry */
614         if (qsd->qsd_proc != NULL) {
615                 lprocfs_remove(&qsd->qsd_proc);
616                 qsd->qsd_proc = NULL;
617         }
618
619         /* stop the writeback thread */
620         qsd_stop_upd_thread(qsd);
621
622         /* shutdown the reintegration threads */
623         for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) {
624                 if (qsd->qsd_type_array[qtype] == NULL)
625                         continue;
626                 qsd_stop_reint_thread(qsd->qsd_type_array[qtype]);
627         }
628
629         if (qsd->qsd_ns != NULL) {
630                 qsd->qsd_ns = NULL;
631         }
632
633         /* release per-filesystem information */
634         if (qsd->qsd_fsinfo != NULL) {
635                 mutex_lock(&qsd->qsd_fsinfo->qfs_mutex);
636                 /* remove from the list of fsinfo */
637                 list_del_init(&qsd->qsd_link);
638                 mutex_unlock(&qsd->qsd_fsinfo->qfs_mutex);
639                 qsd_put_fsinfo(qsd->qsd_fsinfo);
640                 qsd->qsd_fsinfo = NULL;
641         }
642
643         /* free per-quota type data */
644         for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++)
645                 qsd_qtype_fini(env, qsd, qtype);
646
647         /* deregister connection to the quota master */
648         qsd->qsd_exp_valid = false;
649         lustre_deregister_lwp_item(&qsd->qsd_exp);
650
651         /* release quota root directory */
652         if (qsd->qsd_root != NULL) {
653                 dt_object_put(env, qsd->qsd_root);
654                 qsd->qsd_root = NULL;
655         }
656
657         /* release reference on dt_device */
658         if (qsd->qsd_dev != NULL) {
659                 lu_ref_del(&qsd->qsd_dev->dd_lu_dev.ld_reference, "qsd", qsd);
660                 lu_device_put(&qsd->qsd_dev->dd_lu_dev);
661                 qsd->qsd_dev = NULL;
662         }
663
664         CDEBUG(D_QUOTA, "%s: QSD shutdown completed\n", qsd->qsd_svname);
665         OBD_FREE_PTR(qsd);
666         EXIT;
667 }
668 EXPORT_SYMBOL(qsd_fini);
669
670 /*
671  * Create a new qsd_instance to be associated with backend osd device
672  * identified by \dev.
673  *
674  * \param env    - the environment passed by the caller
675  * \param svname - is the service name of the OSD device creating this instance
676  * \param dev    - is the dt_device where to store quota index files
677  * \param osd_proc - is the procfs parent directory where to create procfs file
678  *                   related to this new qsd instance
679  *
680  * \retval - pointer to new qsd_instance associated with dev \dev on success,
681  *           appropriate error on failure
682  */
683 struct qsd_instance *qsd_init(const struct lu_env *env, char *svname,
684                               struct dt_device *dev,
685                               struct proc_dir_entry *osd_proc,
686                               bool is_md, bool excl)
687 {
688         struct qsd_thread_info  *qti = qsd_info(env);
689         struct qsd_instance     *qsd;
690         int                      rc, type, idx;
691         ENTRY;
692
693         /* only configure qsd for MDT & OST */
694         type = server_name2index(svname, &idx, NULL);
695         if (type != LDD_F_SV_TYPE_MDT && type != LDD_F_SV_TYPE_OST)
696                 RETURN(NULL);
697
698         /* allocate qsd instance */
699         OBD_ALLOC_PTR(qsd);
700         if (qsd == NULL)
701                 RETURN(ERR_PTR(-ENOMEM));
702
703         /* generic initializations */
704         rwlock_init(&qsd->qsd_lock);
705         INIT_LIST_HEAD(&qsd->qsd_link);
706         INIT_LIST_HEAD(&qsd->qsd_upd_list);
707         spin_lock_init(&qsd->qsd_adjust_lock);
708         INIT_LIST_HEAD(&qsd->qsd_adjust_list);
709         qsd->qsd_prepared = false;
710         qsd->qsd_started = false;
711         qsd->qsd_is_md = is_md;
712         qsd->qsd_updating = false;
713         qsd->qsd_exclusive = excl;
714
715         /* copy service name */
716         rc = strscpy(qsd->qsd_svname, svname, sizeof(qsd->qsd_svname));
717         if (rc < 0)
718                 GOTO(out, rc);
719
720         /* grab reference on osd device */
721         lu_device_get(&dev->dd_lu_dev);
722         lu_ref_add(&dev->dd_lu_dev.ld_reference, "qsd", qsd);
723         qsd->qsd_dev = dev;
724
725         /* get fsname from svname */
726         rc = server_name2fsname(svname, qti->qti_buf, NULL);
727         if (rc) {
728                 CERROR("%s: fail to extract filesystem name\n", svname);
729                 GOTO(out, rc);
730         }
731
732         /* look up quota setting for the filesystem the target belongs to */
733         qsd->qsd_fsinfo = qsd_get_fsinfo(qti->qti_buf, 1);
734         if (qsd->qsd_fsinfo == NULL) {
735                 CERROR("%s: failed to locate filesystem information\n", svname);
736                 GOTO(out, rc = -EINVAL);
737         }
738
739         /* add in the list of lquota_fsinfo */
740         mutex_lock(&qsd->qsd_fsinfo->qfs_mutex);
741         list_add_tail(&qsd->qsd_link, &qsd->qsd_fsinfo->qfs_qsd_list);
742         mutex_unlock(&qsd->qsd_fsinfo->qfs_mutex);
743
744         /* register procfs directory */
745         if (qsd->qsd_is_md)
746                 qsd->qsd_proc = lprocfs_register(QSD_DIR_MD, osd_proc,
747                                                  lprocfs_quota_qsd_vars, qsd);
748         else
749                 qsd->qsd_proc = lprocfs_register(QSD_DIR_DT, osd_proc,
750                                                  lprocfs_quota_qsd_vars, qsd);
751
752         if (type == LDD_F_SV_TYPE_MDT && qsd->qsd_is_md)
753                 lprocfs_add_symlink(QSD_DIR, osd_proc, "./%s", QSD_DIR_MD);
754         else if (type == LDD_F_SV_TYPE_OST && !qsd->qsd_is_md)
755                 lprocfs_add_symlink(QSD_DIR, osd_proc, "./%s", QSD_DIR_DT);
756
757         if (IS_ERR(qsd->qsd_proc)) {
758                 rc = PTR_ERR(qsd->qsd_proc);
759                 qsd->qsd_proc = NULL;
760                 CERROR("%s: fail to create quota slave proc entry (%d)\n",
761                        svname, rc);
762                 GOTO(out, rc);
763         }
764         EXIT;
765 out:
766         if (rc) {
767                 qsd_fini(env, qsd);
768                 return ERR_PTR(rc);
769         }
770         RETURN(qsd);
771 }
772 EXPORT_SYMBOL(qsd_init);
773
774 /*
775  * Initialize on-disk structures in order to manage quota enforcement for
776  * the target associated with the qsd instance \qsd and starts the reintegration
777  * procedure for each quota type as soon as possible.
778  * The last step of the reintegration will be completed once qsd_start() is
779  * called, at which points the space reconciliation with the master will be
780  * executed.
781  * This function must be called when the server stack is fully configured,
782  * typically when ->ldo_prepare is called across the stack.
783  *
784  * \param env - the environment passed by the caller
785  * \param qsd - is qsd_instance to prepare
786  *
787  * \retval - 0 on success, appropriate error on failure
788  */
789 int qsd_prepare(const struct lu_env *env, struct qsd_instance *qsd)
790 {
791         struct qsd_thread_info  *qti = qsd_info(env);
792         int                      qtype, rc = 0;
793         ENTRY;
794
795         if (unlikely(qsd == NULL))
796                 RETURN(0);
797
798         read_lock(&qsd->qsd_lock);
799         if (qsd->qsd_prepared) {
800                 CERROR("%s: qsd instance already prepared\n", qsd->qsd_svname);
801                 rc = -EALREADY;
802         }
803         read_unlock(&qsd->qsd_lock);
804         if (rc)
805                 RETURN(rc);
806
807         /* Record whether this qsd instance is managing quota enforcement for a
808          * MDT (i.e. inode quota) or OST (block quota) */
809         if (qsd->qsd_is_md)
810                 qsd->qsd_sync_threshold = LQUOTA_LEAST_QUNIT(LQUOTA_RES_MD);
811         else
812                 qsd->qsd_sync_threshold = LQUOTA_LEAST_QUNIT(LQUOTA_RES_DT);
813
814         /* look-up on-disk directory for the quota slave */
815         qsd->qsd_root = lquota_disk_dir_find_create(env, qsd->qsd_dev, NULL,
816                                                     QSD_DIR);
817         if (IS_ERR(qsd->qsd_root)) {
818                 rc = PTR_ERR(qsd->qsd_root);
819                 qsd->qsd_root = NULL;
820                 CERROR("%s: failed to create quota slave root dir (%d)\n",
821                        qsd->qsd_svname, rc);
822                 RETURN(rc);
823         }
824
825         /* initialize per-quota type data */
826         for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) {
827                 rc = qsd_qtype_init(env, qsd, qtype);
828                 if (rc)
829                         RETURN(rc);
830         }
831
832         /* pools successfully setup, mark the qsd as prepared */
833         write_lock(&qsd->qsd_lock);
834         qsd->qsd_prepared = true;
835         write_unlock(&qsd->qsd_lock);
836
837         if (qsd->qsd_dev->dd_rdonly)
838                 RETURN(0);
839
840         /* start reintegration thread for each type, if required */
841         for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) {
842                 struct qsd_qtype_info   *qqi = qsd->qsd_type_array[qtype];
843
844                 if (qsd_type_enabled(qsd, qtype) &&
845                     qqi->qqi_acct_failed) {
846                         LCONSOLE_ERROR("%s: can't enable quota enforcement "
847                                        "since space accounting isn't functional"
848                                        ". Please run tunefs.lustre --quota on "
849                                        "an unmounted filesystem if not done "
850                                        "already\n", qsd->qsd_svname);
851                         continue;
852                 }
853
854                 rc = qsd_start_reint_thread(qqi);
855                 if (rc) {
856                         CERROR("%s: failed to start reint thread for type %s: rc = %d\n",
857                                 qsd->qsd_svname, qtype_name(qtype), rc);
858                         RETURN(rc);
859                 }
860         }
861
862         /* start writeback thread */
863         rc = qsd_start_upd_thread(qsd);
864         if (rc) {
865                 CERROR("%s: failed to start writeback thread (%d)\n",
866                        qsd->qsd_svname, rc);
867                 RETURN(rc);
868         }
869
870         /* generate osp name */
871         rc = tgt_name2lwp_name(qsd->qsd_svname, qti->qti_buf,
872                                MTI_NAME_MAXLEN, 0);
873         if (rc) {
874                 CERROR("%s: failed to generate ospname (%d)\n",
875                        qsd->qsd_svname, rc);
876                 RETURN(rc);
877         }
878
879         /* the connection callback will start the reintegration
880          * procedure if quota is enabled */
881         rc = lustre_register_lwp_item(qti->qti_buf, &qsd->qsd_exp,
882                                       qsd_conn_callback, (void *)qsd);
883         if (rc) {
884                 CERROR("%s: fail to get connection to master (%d)\n",
885                        qsd->qsd_svname, rc);
886                 RETURN(rc);
887         }
888
889         RETURN(0);
890 }
891 EXPORT_SYMBOL(qsd_prepare);
892
893 /*
894  * Start a qsd instance. This will complete the last step of the reintegration
895  * procedure as soon as possible (provided that the master is reachable).
896  * This should be called when recovery has been completed and quota should now
897  * be enforced on every operations.
898  *
899  * \param env - the environment passed by the caller
900  * \param qsd - is the qsd instance associated with the osd device to start
901  */
902 int qsd_start(const struct lu_env *env, struct qsd_instance *qsd)
903 {
904         int     type, rc = 0;
905         ENTRY;
906
907         if (unlikely(qsd == NULL))
908                 RETURN(0);
909
910         write_lock(&qsd->qsd_lock);
911         if (!qsd->qsd_prepared) {
912                 CERROR("%s: can't start qsd instance since it wasn't properly "
913                        "initialized\n", qsd->qsd_svname);
914                 rc = -EFAULT;
915         } else if (qsd->qsd_started) {
916                 CERROR("%s: qsd instance already started\n", qsd->qsd_svname);
917                 rc = -EALREADY;
918         } else {
919                 /* notify that the qsd_instance is now started */
920                 qsd->qsd_started = true;
921         }
922         write_unlock(&qsd->qsd_lock);
923
924         if (rc)
925                 RETURN(rc);
926
927         /* Trigger the 3rd step of reintegration: If usage > granted, acquire
928          * up to usage; If usage < granted, release down to usage.  */
929         for (type = USRQUOTA; type < LL_MAXQUOTAS; type++) {
930                 struct qsd_qtype_info   *qqi = qsd->qsd_type_array[type];
931                 struct task_struct *t;
932
933                 /* qqi_reint_task can be set to NULL at any time,
934                  * so we need to be careful.
935                  */
936                 rcu_read_lock();
937                 t = rcu_dereference(qqi->qqi_reint_task);
938                 if (t)
939                         wake_up_process(t);
940                 rcu_read_unlock();
941         }
942
943         RETURN(rc);
944 }
945 EXPORT_SYMBOL(qsd_start);
946
947 void lustre_register_quota_process_config(int (*qpc)(struct lustre_cfg *lcfg));
948
949 /*
950  * Global initialization performed at module load time
951  */
952 int qsd_glb_init(void)
953 {
954         int     rc;
955
956         rc = lu_kmem_init(qsd_caches);
957         if (rc)
958                 return rc;
959
960         qsd_key_init_generic(&qsd_thread_key, NULL);
961         lu_context_key_register(&qsd_thread_key);
962         lustre_register_quota_process_config(qsd_process_config);
963
964         return 0;
965 }
966
967 /*
968  * Companion of qsd_glb_init() called at module unload time
969  */
970 void qsd_glb_fini(void)
971 {
972         lustre_register_quota_process_config(NULL);
973         lu_kmem_fini(qsd_caches);
974         lu_context_key_degister(&qsd_thread_key);
975 }