Whamcloud - gitweb
LU-4017 quota: add setting/getting project id function
[fs/lustre-release.git] / lustre / quota / qsd_lib.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; if not, write to the
18  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19  * Boston, MA 021110-1307, USA
20  *
21  * GPL HEADER END
22  */
23 /*
24  * Copyright (c) 2012, 2016, Intel Corporation.
25  * Use is subject to license terms.
26  *
27  * Author: Johann Lombardi <johann.lombardi@intel.com>
28  * Author: Niu    Yawei    <yawei.niu@intel.com>
29  */
30
31 /*
32  * Quota Slave Driver (QSD) management.
33  *
34  * The quota slave feature is implemented under the form of a library called
35  * QSD. Each OSD device should create a QSD instance via qsd_init() which will
36  * be used to manage quota enforcement for this device. This implies:
37  * - completing the reintegration procedure with the quota master (aka QMT, see
38  *   qmt_dev.c) to retrieve the latest quota settings and space distribution.
39  * - managing quota locks in order to be notified of configuration changes.
40  * - acquiring space from the QMT when quota space for a given user/group is
41  *   close to exhaustion.
42  * - allocating quota space to service threads for local request processing.
43  *
44  * Once the QSD instance created, the OSD device should invoke qsd_start()
45  * when recovery is completed. This notifies the QSD that we are about to
46  * process new requests on which quota should be strictly enforced.
47  * Then, qsd_op_begin/end can be used to reserve/release/pre-acquire quota space
48  * for/after each operation until shutdown where the QSD instance should be
49  * freed via qsd_fini().
50  */
51
52 #define DEBUG_SUBSYSTEM S_LQUOTA
53
54 #include <obd_class.h>
55 #include "qsd_internal.h"
56
57 struct kmem_cache *upd_kmem;
58
59 struct lu_kmem_descr qsd_caches[] = {
60         {
61                 .ckd_cache = &upd_kmem,
62                 .ckd_name  = "upd_kmem",
63                 .ckd_size  = sizeof(struct qsd_upd_rec)
64         },
65         {
66                 .ckd_cache = NULL
67         }
68 };
69
70 /* define qsd thread key */
71 LU_KEY_INIT_FINI(qsd, struct qsd_thread_info);
72 LU_CONTEXT_KEY_DEFINE(qsd, LCT_MD_THREAD | LCT_DT_THREAD | LCT_LOCAL);
73 LU_KEY_INIT_GENERIC(qsd);
74
75 /* some procfs helpers */
76 static int qsd_state_seq_show(struct seq_file *m, void *data)
77 {
78         struct qsd_instance     *qsd = m->private;
79         char                     enabled[5];
80
81         LASSERT(qsd != NULL);
82
83         memset(enabled, 0, sizeof(enabled));
84         if (qsd_type_enabled(qsd, USRQUOTA))
85                 strcat(enabled, "u");
86         if (qsd_type_enabled(qsd, GRPQUOTA))
87                 strcat(enabled, "g");
88         if (strlen(enabled) == 0)
89                 strcat(enabled, "none");
90
91         seq_printf(m, "target name:    %s\n"
92                    "pool ID:        %d\n"
93                    "type:           %s\n"
94                    "quota enabled:  %s\n"
95                    "conn to master: %s\n",
96                    qsd->qsd_svname, qsd->qsd_pool_id,
97                    qsd->qsd_is_md ? "md" : "dt", enabled,
98                    qsd->qsd_exp_valid ? "setup" : "not setup yet");
99
100         if (qsd->qsd_prepared) {
101                 memset(enabled, 0, sizeof(enabled));
102                 if (qsd->qsd_type_array[USRQUOTA]->qqi_acct_obj != NULL)
103                         strcat(enabled, "u");
104                 if (qsd->qsd_type_array[GRPQUOTA]->qqi_acct_obj != NULL)
105                         strcat(enabled, "g");
106                 if (strlen(enabled) == 0)
107                         strcat(enabled, "none");
108                 seq_printf(m, "space acct:     %s\n"
109                            "user uptodate:  glb[%d],slv[%d],reint[%d]\n"
110                            "group uptodate: glb[%d],slv[%d],reint[%d]\n",
111                            enabled,
112                            qsd->qsd_type_array[USRQUOTA]->qqi_glb_uptodate,
113                            qsd->qsd_type_array[USRQUOTA]->qqi_slv_uptodate,
114                            qsd->qsd_type_array[USRQUOTA]->qqi_reint,
115                            qsd->qsd_type_array[GRPQUOTA]->qqi_glb_uptodate,
116                            qsd->qsd_type_array[GRPQUOTA]->qqi_slv_uptodate,
117                            qsd->qsd_type_array[GRPQUOTA]->qqi_reint);
118         }
119         return 0;
120 }
121 LPROC_SEQ_FOPS_RO(qsd_state);
122
123 static int qsd_enabled_seq_show(struct seq_file *m, void *data)
124 {
125         struct qsd_instance     *qsd = m->private;
126         char                     enabled[5];
127
128         LASSERT(qsd != NULL);
129
130         memset(enabled, 0, sizeof(enabled));
131         if (qsd_type_enabled(qsd, USRQUOTA))
132                 strcat(enabled, "u");
133         if (qsd_type_enabled(qsd, GRPQUOTA))
134                 strcat(enabled, "g");
135         if (strlen(enabled) == 0)
136                 strcat(enabled, "none");
137
138         seq_printf(m, "%s\n", enabled);
139         return 0;
140 }
141 LPROC_SEQ_FOPS_RO(qsd_enabled);
142
143 /* force reintegration procedure to be executed.
144  * Used for test/debugging purpose */
145 static ssize_t
146 lprocfs_force_reint_seq_write(struct file *file, const char __user *buffer,
147                                 size_t count, loff_t *off)
148 {
149         struct qsd_instance *qsd = ((struct seq_file *)file->private_data)->private;
150         int                  rc = 0, qtype;
151
152         LASSERT(qsd != NULL);
153
154         write_lock(&qsd->qsd_lock);
155         if (qsd->qsd_stopping) {
156                 /* don't mess up with shutdown procedure, it is already
157                  * complicated enough */
158                 rc = -ESHUTDOWN;
159         } else if (!qsd->qsd_prepared) {
160                 rc = -EAGAIN;
161         } else {
162                 /* mark all indexes as stale */
163                 for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) {
164                         qsd->qsd_type_array[qtype]->qqi_glb_uptodate = false;
165                         qsd->qsd_type_array[qtype]->qqi_slv_uptodate = false;
166                 }
167         }
168         write_unlock(&qsd->qsd_lock);
169
170         if (rc)
171                 return rc;
172
173         /* kick off reintegration */
174         for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) {
175                 rc = qsd_start_reint_thread(qsd->qsd_type_array[qtype]);
176                 if (rc)
177                         break;
178         }
179         return rc == 0 ? count : rc;
180 }
181 LPROC_SEQ_FOPS_WO_TYPE(qsd, force_reint);
182
183 static int qsd_timeout_seq_show(struct seq_file *m, void *data)
184 {
185         struct qsd_instance *qsd = m->private;
186         LASSERT(qsd != NULL);
187
188         seq_printf(m, "%d\n", qsd_wait_timeout(qsd));
189         return 0;
190 }
191
192 static ssize_t
193 qsd_timeout_seq_write(struct file *file, const char __user *buffer,
194                         size_t count, loff_t *off)
195 {
196         struct qsd_instance *qsd = ((struct seq_file *)file->private_data)->private;
197         int rc;
198         __s64 timeout;
199         LASSERT(qsd != NULL);
200
201         rc = lprocfs_str_to_s64(buffer, count, &timeout);
202         if (rc)
203                 return rc;
204         if (timeout < 0 || timeout > INT_MAX)
205                 return -EINVAL;
206
207         qsd->qsd_timeout = timeout;
208         return count;
209 }
210 LPROC_SEQ_FOPS(qsd_timeout);
211
212 static struct lprocfs_vars lprocfs_quota_qsd_vars[] = {
213         { .name =       "info",
214           .fops =       &qsd_state_fops         },
215         { .name =       "enabled",
216           .fops =       &qsd_enabled_fops       },
217         { .name =       "force_reint",
218           .fops =       &qsd_force_reint_fops   },
219         { .name =       "timeout",
220           .fops =       &qsd_timeout_fops       },
221         { NULL }
222 };
223
224 /*
225  * Callback function invoked by the OSP layer when the connection to the master
226  * has been set up.
227  *
228  * \param data - is a pointer to the qsd_instance
229  *
230  * \retval - 0 on success, appropriate error on failure
231  */
232 static int qsd_conn_callback(void *data)
233 {
234         struct qsd_instance *qsd = (struct qsd_instance *)data;
235         int                  type;
236         ENTRY;
237
238         /* qsd_exp should now be valid */
239         LASSERT(qsd->qsd_exp);
240
241         qsd->qsd_ns = class_exp2obd(qsd->qsd_exp)->obd_namespace;
242
243         write_lock(&qsd->qsd_lock);
244         /* notify that qsd_exp is now valid */
245         qsd->qsd_exp_valid = true;
246         write_unlock(&qsd->qsd_lock);
247
248         /* Now that the connection to master is setup, we can initiate the
249          * reintegration procedure for quota types which are enabled.
250          * It is worth noting that, if the qsd_instance hasn't been started
251          * already, then we can only complete the first two steps of the
252          * reintegration procedure (i.e. global lock enqueue and slave
253          * index transfer) since the space usage reconciliation (i.e.
254          * step 3) will have to wait for qsd_start() to be called */
255         for (type = USRQUOTA; type < LL_MAXQUOTAS; type++) {
256                 struct qsd_qtype_info *qqi = qsd->qsd_type_array[type];
257                 wake_up(&qqi->qqi_reint_thread.t_ctl_waitq);
258         }
259
260         RETURN(0);
261 }
262
263 /*
264  * Release qsd_qtype_info structure which contains data associated with a
265  * given quota type. This releases the accounting objects.
266  * It's called on OSD cleanup when the qsd instance is released.
267  *
268  * \param env - is the environment passed by the caller
269  * \param qsd - is the qsd instance managing the qsd_qtype_info structure
270  *              to be released
271  * \param qtype - is the quota type to be shutdown
272  */
273 static void qsd_qtype_fini(const struct lu_env *env, struct qsd_instance *qsd,
274                            int qtype)
275 {
276         struct qsd_qtype_info   *qqi;
277         int repeat = 0;
278         ENTRY;
279
280         if (qsd->qsd_type_array[qtype] == NULL)
281                 RETURN_EXIT;
282         qqi = qsd->qsd_type_array[qtype];
283         qsd->qsd_type_array[qtype] = NULL;
284
285         /* all deferred work lists should be empty */
286         LASSERT(list_empty(&qqi->qqi_deferred_glb));
287         LASSERT(list_empty(&qqi->qqi_deferred_slv));
288
289         /* shutdown lquota site */
290         if (qqi->qqi_site != NULL && !IS_ERR(qqi->qqi_site)) {
291                 lquota_site_free(env, qqi->qqi_site);
292                 qqi->qqi_site = NULL;
293         }
294
295         /* The qqi may still be holding by global locks which are being
296          * canceled asynchronously (LU-4365), see the following steps:
297          *
298          * - On server umount, we try to clear all quota locks first by
299          *   disconnecting LWP (which will invalidate import and cleanup
300          *   all locks on it), however, if quota reint process is holding
301          *   the global lock for reintegration at that time, global lock
302          *   will fail to be cleared on LWP disconnection.
303          *
304          * - Umount process goes on and stops reint process, the global
305          *   lock will be dropped on reint process exit, however, the lock
306          *   cancel in done in asynchronous way, so the
307          *   qsd_glb_blocking_ast() might haven't been called yet when we
308          *   get here.
309          */
310         while (atomic_read(&qqi->qqi_ref) > 1) {
311                 CDEBUG(D_QUOTA, "qqi reference count %u, repeat: %d\n",
312                        atomic_read(&qqi->qqi_ref), repeat);
313                 repeat++;
314                 set_current_state(TASK_INTERRUPTIBLE);
315                 schedule_timeout(cfs_time_seconds(1));
316         }
317
318         /* by now, all qqi users should have gone away */
319         LASSERT(atomic_read(&qqi->qqi_ref) == 1);
320         lu_ref_fini(&qqi->qqi_reference);
321
322         /* release accounting object */
323         if (qqi->qqi_acct_obj != NULL && !IS_ERR(qqi->qqi_acct_obj)) {
324                 dt_object_put(env, qqi->qqi_acct_obj);
325                 qqi->qqi_acct_obj = NULL;
326         }
327
328         /* release slv index */
329         if (qqi->qqi_slv_obj != NULL && !IS_ERR(qqi->qqi_slv_obj)) {
330                 dt_object_put(env, qqi->qqi_slv_obj);
331                 qqi->qqi_slv_obj = NULL;
332                 qqi->qqi_slv_ver = 0;
333         }
334
335         /* release global index */
336         if (qqi->qqi_glb_obj != NULL && !IS_ERR(qqi->qqi_glb_obj)) {
337                 dt_object_put(env, qqi->qqi_glb_obj);
338                 qqi->qqi_glb_obj = NULL;
339                 qqi->qqi_glb_ver = 0;
340         }
341
342         OBD_FREE_PTR(qqi);
343         EXIT;
344 }
345
346 static const char *qtype2acct_name(int qtype)
347 {
348         switch (qtype) {
349         case USRQUOTA:
350                 return "acct_user";
351         case GRPQUOTA:
352                 return "acct_group";
353         case PRJQUOTA:
354                 return "acct_project";
355         }
356
357         LASSERTF(0, "invalid quota type: %d", qtype);
358         return NULL;
359 }
360
361 static const char *qtype2glb_name(int qtype)
362 {
363         switch (qtype) {
364         case USRQUOTA:
365                 return "limit_user";
366         case GRPQUOTA:
367                 return "limit_group";
368         case PRJQUOTA:
369                 return "limit_project";
370         }
371
372         LASSERTF(0, "invalid quota type: %d", qtype);
373         return NULL;
374 }
375
376 /*
377  * Allocate and initialize a qsd_qtype_info structure for quota type \qtype.
378  * This opens the accounting object and initializes the proc file.
379  * It's called on OSD start when the qsd_prepare() is invoked on the qsd
380  * instance.
381  *
382  * \param env  - the environment passed by the caller
383  * \param qsd  - is the qsd instance which will be in charge of the new
384  *               qsd_qtype_info instance.
385  * \param qtype - is quota type to set up
386  *
387  * \retval - 0 on success and qsd->qsd_type_array[qtype] is allocated,
388  *           appropriate error on failure
389  */
390 static int qsd_qtype_init(const struct lu_env *env, struct qsd_instance *qsd,
391                           int qtype)
392 {
393         struct qsd_qtype_info   *qqi;
394         int                      rc;
395         struct obd_uuid          uuid;
396         ENTRY;
397
398         LASSERT(qsd->qsd_type_array[qtype] == NULL);
399
400         /* allocate structure for this quota type */
401         OBD_ALLOC_PTR(qqi);
402         if (qqi == NULL)
403                 RETURN(-ENOMEM);
404         qsd->qsd_type_array[qtype] = qqi;
405         atomic_set(&qqi->qqi_ref, 1); /* referenced from qsd */
406
407         /* set backpointer and other parameters */
408         qqi->qqi_qsd   = qsd;
409         qqi->qqi_qtype = qtype;
410         lu_ref_init(&qqi->qqi_reference);
411         qqi->qqi_glb_uptodate = false;
412         qqi->qqi_slv_uptodate = false;
413         qqi->qqi_reint        = false;
414         init_waitqueue_head(&qqi->qqi_reint_thread.t_ctl_waitq);
415         thread_set_flags(&qqi->qqi_reint_thread, SVC_STOPPED);
416         INIT_LIST_HEAD(&qqi->qqi_deferred_glb);
417         INIT_LIST_HEAD(&qqi->qqi_deferred_slv);
418         lquota_generate_fid(&qqi->qqi_fid, qsd->qsd_pool_id,
419                             QSD_RES_TYPE(qsd), qtype);
420
421         /* open accounting object */
422         LASSERT(qqi->qqi_acct_obj == NULL);
423         qqi->qqi_acct_obj = acct_obj_lookup(env, qsd->qsd_dev, qtype);
424         if (IS_ERR(qqi->qqi_acct_obj)) {
425                 CDEBUG(D_QUOTA, "%s: no %s space accounting support: rc = %ld\n",
426                        qsd->qsd_svname, qtype_name(qtype),
427                        PTR_ERR(qqi->qqi_acct_obj));
428                 qqi->qqi_acct_obj = NULL;
429                 qqi->qqi_acct_failed = true;
430         }
431
432         /* open global index copy */
433         LASSERT(qqi->qqi_glb_obj == NULL);
434         qqi->qqi_glb_obj = lquota_disk_glb_find_create(env, qsd->qsd_dev,
435                                                        qsd->qsd_root,
436                                                        &qqi->qqi_fid, true);
437         if (IS_ERR(qqi->qqi_glb_obj)) {
438                 CERROR("%s: can't open global index copy "DFID" %ld\n",
439                        qsd->qsd_svname, PFID(&qqi->qqi_fid),
440                        PTR_ERR(qqi->qqi_glb_obj));
441                 GOTO(out, rc = PTR_ERR(qqi->qqi_glb_obj));
442         }
443         qqi->qqi_glb_ver = dt_version_get(env, qqi->qqi_glb_obj);
444
445         /* open slave index copy */
446         LASSERT(qqi->qqi_slv_obj == NULL);
447         obd_str2uuid(&uuid, qsd->qsd_svname);
448         qqi->qqi_slv_obj = lquota_disk_slv_find_create(env, qsd->qsd_dev,
449                                                        qsd->qsd_root,
450                                                        &qqi->qqi_fid, &uuid,
451                                                        true);
452         if (IS_ERR(qqi->qqi_slv_obj)) {
453                 CERROR("%s: can't open slave index copy "DFID" %ld\n",
454                        qsd->qsd_svname, PFID(&qqi->qqi_fid),
455                        PTR_ERR(qqi->qqi_slv_obj));
456                 GOTO(out, rc = PTR_ERR(qqi->qqi_slv_obj));
457         }
458         qqi->qqi_slv_ver = dt_version_get(env, qqi->qqi_slv_obj);
459
460         /* allocate site */
461         qqi->qqi_site = lquota_site_alloc(env, qqi, false, qtype, &qsd_lqe_ops);
462         if (IS_ERR(qqi->qqi_site)) {
463                 CERROR("%s: can't allocate site "DFID" %ld\n", qsd->qsd_svname,
464                        PFID(&qqi->qqi_fid), PTR_ERR(qqi->qqi_site));
465                 GOTO(out, rc = PTR_ERR(qqi->qqi_site));
466         }
467
468         /* register proc entry for accounting & global index copy objects */
469         rc = lprocfs_seq_create(qsd->qsd_proc, qtype2acct_name(qtype),
470                                 0444, &lprocfs_quota_seq_fops,
471                                 qqi->qqi_acct_obj);
472         if (rc) {
473                 CERROR("%s: can't add procfs entry for accounting file %d\n",
474                        qsd->qsd_svname, rc);
475                 GOTO(out, rc);
476         }
477
478         rc = lprocfs_seq_create(qsd->qsd_proc, qtype2glb_name(qtype),
479                                 0444, &lprocfs_quota_seq_fops,
480                                 qqi->qqi_glb_obj);
481         if (rc) {
482                 CERROR("%s: can't add procfs entry for global index copy %d\n",
483                        qsd->qsd_svname, rc);
484                 GOTO(out, rc);
485         }
486         EXIT;
487 out:
488         if (rc)
489                 qsd_qtype_fini(env, qsd, qtype);
490         return rc;
491 }
492
493 /*
494  * Release a qsd_instance. Companion of qsd_init(). This releases all data
495  * structures associated with the quota slave (on-disk objects, lquota entry
496  * tables, ...).
497  * This function should be called when the OSD is shutting down.
498  *
499  * \param env - is the environment passed by the caller
500  * \param qsd - is the qsd instance to shutdown
501  */
502 void qsd_fini(const struct lu_env *env, struct qsd_instance *qsd)
503 {
504         int     qtype;
505         ENTRY;
506
507         if (unlikely(qsd == NULL))
508                 RETURN_EXIT;
509
510         CDEBUG(D_QUOTA, "%s: initiating QSD shutdown\n", qsd->qsd_svname);
511         write_lock(&qsd->qsd_lock);
512         qsd->qsd_stopping = true;
513         write_unlock(&qsd->qsd_lock);
514
515         /* remove qsd proc entry */
516         if (qsd->qsd_proc != NULL) {
517                 lprocfs_remove(&qsd->qsd_proc);
518                 qsd->qsd_proc = NULL;
519         }
520
521         /* stop the writeback thread */
522         qsd_stop_upd_thread(qsd);
523
524         /* shutdown the reintegration threads */
525         for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) {
526                 if (qsd->qsd_type_array[qtype] == NULL)
527                         continue;
528                 qsd_stop_reint_thread(qsd->qsd_type_array[qtype]);
529         }
530
531         if (qsd->qsd_ns != NULL) {
532                 qsd->qsd_ns = NULL;
533         }
534
535         /* free per-quota type data */
536         for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++)
537                 qsd_qtype_fini(env, qsd, qtype);
538
539         if (qsd->qsd_exp) {
540                 /* deregister connection to the quota master */
541                 qsd->qsd_exp_valid = false;
542                 lustre_deregister_lwp_item(&qsd->qsd_exp);
543         }
544
545         /* release per-filesystem information */
546         if (qsd->qsd_fsinfo != NULL) {
547                 mutex_lock(&qsd->qsd_fsinfo->qfs_mutex);
548                 /* remove from the list of fsinfo */
549                 list_del_init(&qsd->qsd_link);
550                 mutex_unlock(&qsd->qsd_fsinfo->qfs_mutex);
551                 qsd_put_fsinfo(qsd->qsd_fsinfo);
552                 qsd->qsd_fsinfo = NULL;
553         }
554
555         /* release quota root directory */
556         if (qsd->qsd_root != NULL) {
557                 dt_object_put(env, qsd->qsd_root);
558                 qsd->qsd_root = NULL;
559         }
560
561         /* release reference on dt_device */
562         if (qsd->qsd_dev != NULL) {
563                 lu_ref_del(&qsd->qsd_dev->dd_lu_dev.ld_reference, "qsd", qsd);
564                 lu_device_put(&qsd->qsd_dev->dd_lu_dev);
565                 qsd->qsd_dev = NULL;
566         }
567
568         CDEBUG(D_QUOTA, "%s: QSD shutdown completed\n", qsd->qsd_svname);
569         OBD_FREE_PTR(qsd);
570         EXIT;
571 }
572 EXPORT_SYMBOL(qsd_fini);
573
574 /*
575  * Create a new qsd_instance to be associated with backend osd device
576  * identified by \dev.
577  *
578  * \param env    - the environment passed by the caller
579  * \param svname - is the service name of the OSD device creating this instance
580  * \param dev    - is the dt_device where to store quota index files
581  * \param osd_proc - is the procfs parent directory where to create procfs file
582  *                   related to this new qsd instance
583  *
584  * \retval - pointer to new qsd_instance associated with dev \dev on success,
585  *           appropriate error on failure
586  */
587 struct qsd_instance *qsd_init(const struct lu_env *env, char *svname,
588                               struct dt_device *dev,
589                               struct proc_dir_entry *osd_proc)
590 {
591         struct qsd_thread_info  *qti = qsd_info(env);
592         struct qsd_instance     *qsd;
593         int                      rc, type, idx;
594         ENTRY;
595
596         /* only configure qsd for MDT & OST */
597         type = server_name2index(svname, &idx, NULL);
598         if (type != LDD_F_SV_TYPE_MDT && type != LDD_F_SV_TYPE_OST)
599                 RETURN(NULL);
600
601         /* allocate qsd instance */
602         OBD_ALLOC_PTR(qsd);
603         if (qsd == NULL)
604                 RETURN(ERR_PTR(-ENOMEM));
605
606         /* generic initializations */
607         rwlock_init(&qsd->qsd_lock);
608         INIT_LIST_HEAD(&qsd->qsd_link);
609         thread_set_flags(&qsd->qsd_upd_thread, SVC_STOPPED);
610         init_waitqueue_head(&qsd->qsd_upd_thread.t_ctl_waitq);
611         INIT_LIST_HEAD(&qsd->qsd_upd_list);
612         spin_lock_init(&qsd->qsd_adjust_lock);
613         INIT_LIST_HEAD(&qsd->qsd_adjust_list);
614         qsd->qsd_prepared = false;
615         qsd->qsd_started = false;
616
617         /* copy service name */
618         if (strlcpy(qsd->qsd_svname, svname, sizeof(qsd->qsd_svname))
619             >= sizeof(qsd->qsd_svname))
620                 GOTO(out, rc = -E2BIG);
621
622         /* grab reference on osd device */
623         lu_device_get(&dev->dd_lu_dev);
624         lu_ref_add(&dev->dd_lu_dev.ld_reference, "qsd", qsd);
625         qsd->qsd_dev = dev;
626
627         /* we only support pool ID 0 (default data or metadata pool) for the
628          * time being. A different pool ID could be assigned to this target via
629          * the configuration log in the future */
630         qsd->qsd_pool_id  = 0;
631
632         /* get fsname from svname */
633         rc = server_name2fsname(svname, qti->qti_buf, NULL);
634         if (rc) {
635                 CERROR("%s: fail to extract filesystem name\n", svname);
636                 GOTO(out, rc);
637         }
638
639         /* look up quota setting for the filesystem the target belongs to */
640         qsd->qsd_fsinfo = qsd_get_fsinfo(qti->qti_buf, 1);
641         if (qsd->qsd_fsinfo == NULL) {
642                 CERROR("%s: failed to locate filesystem information\n", svname);
643                 GOTO(out, rc = -EINVAL);
644         }
645
646         /* add in the list of lquota_fsinfo */
647         mutex_lock(&qsd->qsd_fsinfo->qfs_mutex);
648         list_add_tail(&qsd->qsd_link, &qsd->qsd_fsinfo->qfs_qsd_list);
649         mutex_unlock(&qsd->qsd_fsinfo->qfs_mutex);
650
651         /* register procfs directory */
652         qsd->qsd_proc = lprocfs_register(QSD_DIR, osd_proc,
653                                          lprocfs_quota_qsd_vars, qsd);
654         if (IS_ERR(qsd->qsd_proc)) {
655                 rc = PTR_ERR(qsd->qsd_proc);
656                 qsd->qsd_proc = NULL;
657                 CERROR("%s: fail to create quota slave proc entry (%d)\n",
658                        svname, rc);
659                 GOTO(out, rc);
660         }
661         EXIT;
662 out:
663         if (rc) {
664                 qsd_fini(env, qsd);
665                 return ERR_PTR(rc);
666         }
667         RETURN(qsd);
668 }
669 EXPORT_SYMBOL(qsd_init);
670
671 /*
672  * Initialize on-disk structures in order to manage quota enforcement for
673  * the target associated with the qsd instance \qsd and starts the reintegration
674  * procedure for each quota type as soon as possible.
675  * The last step of the reintegration will be completed once qsd_start() is
676  * called, at which points the space reconciliation with the master will be
677  * executed.
678  * This function must be called when the server stack is fully configured,
679  * typically when ->ldo_prepare is called across the stack.
680  *
681  * \param env - the environment passed by the caller
682  * \param qsd - is qsd_instance to prepare
683  *
684  * \retval - 0 on success, appropriate error on failure
685  */
686 int qsd_prepare(const struct lu_env *env, struct qsd_instance *qsd)
687 {
688         struct qsd_thread_info  *qti = qsd_info(env);
689         int                      qtype, rc = 0;
690         ENTRY;
691
692         if (unlikely(qsd == NULL))
693                 RETURN(0);
694
695         read_lock(&qsd->qsd_lock);
696         if (qsd->qsd_prepared) {
697                 CERROR("%s: qsd instance already prepared\n", qsd->qsd_svname);
698                 rc = -EALREADY;
699         }
700         read_unlock(&qsd->qsd_lock);
701         if (rc)
702                 RETURN(rc);
703
704         /* Record whether this qsd instance is managing quota enforcement for a
705          * MDT (i.e. inode quota) or OST (block quota) */
706         if (lu_device_is_md(qsd->qsd_dev->dd_lu_dev.ld_site->ls_top_dev)) {
707                 qsd->qsd_is_md = true;
708                 qsd->qsd_sync_threshold = LQUOTA_LEAST_QUNIT(LQUOTA_RES_MD);
709         } else {
710                 qsd->qsd_sync_threshold = LQUOTA_LEAST_QUNIT(LQUOTA_RES_DT);
711         }
712
713         /* look-up on-disk directory for the quota slave */
714         qsd->qsd_root = lquota_disk_dir_find_create(env, qsd->qsd_dev, NULL,
715                                                     QSD_DIR);
716         if (IS_ERR(qsd->qsd_root)) {
717                 rc = PTR_ERR(qsd->qsd_root);
718                 qsd->qsd_root = NULL;
719                 CERROR("%s: failed to create quota slave root dir (%d)\n",
720                        qsd->qsd_svname, rc);
721                 RETURN(rc);
722         }
723
724         /* initialize per-quota type data */
725         for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) {
726                 rc = qsd_qtype_init(env, qsd, qtype);
727                 if (rc)
728                         RETURN(rc);
729         }
730
731         /* pools successfully setup, mark the qsd as prepared */
732         write_lock(&qsd->qsd_lock);
733         qsd->qsd_prepared = true;
734         write_unlock(&qsd->qsd_lock);
735
736         if (qsd->qsd_dev->dd_rdonly)
737                 RETURN(0);
738
739         /* start reintegration thread for each type, if required */
740         for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) {
741                 struct qsd_qtype_info   *qqi = qsd->qsd_type_array[qtype];
742
743                 if (qsd_type_enabled(qsd, qtype) &&
744                     qqi->qqi_acct_failed) {
745                         LCONSOLE_ERROR("%s: can't enable quota enforcement "
746                                        "since space accounting isn't functional"
747                                        ". Please run tunefs.lustre --quota on "
748                                        "an unmounted filesystem if not done "
749                                        "already\n", qsd->qsd_svname);
750                         break;
751                 }
752
753                 rc = qsd_start_reint_thread(qqi);
754                 if (rc) {
755                         CERROR("%s: failed to start reint thread for type %s: rc = %d\n",
756                                 qsd->qsd_svname, qtype_name(qtype), rc);
757                         RETURN(rc);
758                 }
759         }
760
761         /* start writeback thread */
762         rc = qsd_start_upd_thread(qsd);
763         if (rc) {
764                 CERROR("%s: failed to start writeback thread (%d)\n",
765                        qsd->qsd_svname, rc);
766                 RETURN(rc);
767         }
768
769         /* generate osp name */
770         rc = tgt_name2lwp_name(qsd->qsd_svname, qti->qti_buf,
771                                MTI_NAME_MAXLEN, 0);
772         if (rc) {
773                 CERROR("%s: failed to generate ospname (%d)\n",
774                        qsd->qsd_svname, rc);
775                 RETURN(rc);
776         }
777
778         /* the connection callback will start the reintegration
779          * procedure if quota is enabled */
780         rc = lustre_register_lwp_item(qti->qti_buf, &qsd->qsd_exp,
781                                       qsd_conn_callback, (void *)qsd);
782         if (rc) {
783                 CERROR("%s: fail to get connection to master (%d)\n",
784                        qsd->qsd_svname, rc);
785                 RETURN(rc);
786         }
787
788         RETURN(0);
789 }
790 EXPORT_SYMBOL(qsd_prepare);
791
792 /*
793  * Start a qsd instance. This will complete the last step of the reintegration
794  * procedure as soon as possible (provided that the master is reachable).
795  * This should be called when recovery has been completed and quota should now
796  * be enforced on every operations.
797  *
798  * \param env - the environment passed by the caller
799  * \param qsd - is the qsd instance associated with the osd device to start
800  */
801 int qsd_start(const struct lu_env *env, struct qsd_instance *qsd)
802 {
803         int     type, rc = 0;
804         ENTRY;
805
806         if (unlikely(qsd == NULL))
807                 RETURN(0);
808
809         write_lock(&qsd->qsd_lock);
810         if (!qsd->qsd_prepared) {
811                 CERROR("%s: can't start qsd instance since it wasn't properly "
812                        "initialized\n", qsd->qsd_svname);
813                 rc = -EFAULT;
814         } else if (qsd->qsd_started) {
815                 CERROR("%s: qsd instance already started\n", qsd->qsd_svname);
816                 rc = -EALREADY;
817         } else {
818                 /* notify that the qsd_instance is now started */
819                 qsd->qsd_started = true;
820         }
821         write_unlock(&qsd->qsd_lock);
822
823         if (rc)
824                 RETURN(rc);
825
826         /* Trigger the 3rd step of reintegration: If usage > granted, acquire
827          * up to usage; If usage < granted, release down to usage.  */
828         for (type = USRQUOTA; type < LL_MAXQUOTAS; type++) {
829                 struct qsd_qtype_info   *qqi = qsd->qsd_type_array[type];
830                 wake_up(&qqi->qqi_reint_thread.t_ctl_waitq);
831         }
832
833         RETURN(rc);
834 }
835 EXPORT_SYMBOL(qsd_start);
836
837 void lustre_register_quota_process_config(int (*qpc)(struct lustre_cfg *lcfg));
838
839 /*
840  * Global initialization performed at module load time
841  */
842 int qsd_glb_init(void)
843 {
844         int     rc;
845
846         rc = lu_kmem_init(qsd_caches);
847         if (rc)
848                 return rc;
849
850         qsd_key_init_generic(&qsd_thread_key, NULL);
851         lu_context_key_register(&qsd_thread_key);
852         lustre_register_quota_process_config(qsd_process_config);
853
854         return 0;
855 }
856
857 /*
858  * Companion of qsd_glb_init() called at module unload time
859  */
860 void qsd_glb_fini(void)
861 {
862         lustre_register_quota_process_config(NULL);
863         lu_kmem_fini(qsd_caches);
864         lu_context_key_degister(&qsd_thread_key);
865 }