Whamcloud - gitweb
990cfee719a1fedb59b1575f696c0f87b84f0053
[fs/lustre-release.git] / lustre / quota / qsd_lib.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; if not, write to the
18  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19  * Boston, MA 021110-1307, USA
20  *
21  * GPL HEADER END
22  */
23 /*
24  * Copyright (c) 2012, 2013, Intel Corporation.
25  * Use is subject to license terms.
26  *
27  * Author: Johann Lombardi <johann.lombardi@intel.com>
28  * Author: Niu    Yawei    <yawei.niu@intel.com>
29  */
30
31 /*
32  * Quota Slave Driver (QSD) management.
33  *
34  * The quota slave feature is implemented under the form of a library called
35  * QSD. Each OSD device should create a QSD instance via qsd_init() which will
36  * be used to manage quota enforcement for this device. This implies:
37  * - completing the reintegration procedure with the quota master (aka QMT, see
38  *   qmt_dev.c) to retrieve the latest quota settings and space distribution.
39  * - managing quota locks in order to be notified of configuration changes.
40  * - acquiring space from the QMT when quota space for a given user/group is
41  *   close to exhaustion.
42  * - allocating quota space to service threads for local request processing.
43  *
44  * Once the QSD instance created, the OSD device should invoke qsd_start()
45  * when recovery is completed. This notifies the QSD that we are about to
46  * process new requests on which quota should be strictly enforced.
47  * Then, qsd_op_begin/end can be used to reserve/release/pre-acquire quota space
48  * for/after each operation until shutdown where the QSD instance should be
49  * freed via qsd_fini().
50  */
51
52 #define DEBUG_SUBSYSTEM S_LQUOTA
53
54 #include <obd_class.h>
55 #include "qsd_internal.h"
56
57 struct kmem_cache *upd_kmem;
58
59 struct lu_kmem_descr qsd_caches[] = {
60         {
61                 .ckd_cache = &upd_kmem,
62                 .ckd_name  = "upd_kmem",
63                 .ckd_size  = sizeof(struct qsd_upd_rec)
64         },
65         {
66                 .ckd_cache = NULL
67         }
68 };
69
70 /* define qsd thread key */
71 LU_KEY_INIT_FINI(qsd, struct qsd_thread_info);
72 LU_CONTEXT_KEY_DEFINE(qsd, LCT_MD_THREAD | LCT_DT_THREAD | LCT_LOCAL);
73 LU_KEY_INIT_GENERIC(qsd);
74
75 /* some procfs helpers */
76 static int qsd_state_seq_show(struct seq_file *m, void *data)
77 {
78         struct qsd_instance     *qsd = m->private;
79         char                     enabled[5];
80         int                      rc;
81
82         LASSERT(qsd != NULL);
83
84         memset(enabled, 0, sizeof(enabled));
85         if (qsd_type_enabled(qsd, USRQUOTA))
86                 strcat(enabled, "u");
87         if (qsd_type_enabled(qsd, GRPQUOTA))
88                 strcat(enabled, "g");
89         if (strlen(enabled) == 0)
90                 strcat(enabled, "none");
91
92         rc = seq_printf(m, "target name:    %s\n"
93                         "pool ID:        %d\n"
94                         "type:           %s\n"
95                         "quota enabled:  %s\n"
96                         "conn to master: %s\n",
97                         qsd->qsd_svname, qsd->qsd_pool_id,
98                         qsd->qsd_is_md ? "md" : "dt", enabled,
99                         qsd->qsd_exp_valid ? "setup" : "not setup yet");
100
101         if (qsd->qsd_prepared) {
102                 memset(enabled, 0, sizeof(enabled));
103                 if (qsd->qsd_type_array[USRQUOTA]->qqi_acct_obj != NULL)
104                         strcat(enabled, "u");
105                 if (qsd->qsd_type_array[GRPQUOTA]->qqi_acct_obj != NULL)
106                         strcat(enabled, "g");
107                 if (strlen(enabled) == 0)
108                         strcat(enabled, "none");
109                 rc += seq_printf(m, "space acct:     %s\n"
110                                 "user uptodate:  glb[%d],slv[%d],reint[%d]\n"
111                                 "group uptodate: glb[%d],slv[%d],reint[%d]\n",
112                                 enabled,
113                                 qsd->qsd_type_array[USRQUOTA]->qqi_glb_uptodate,
114                                 qsd->qsd_type_array[USRQUOTA]->qqi_slv_uptodate,
115                                 qsd->qsd_type_array[USRQUOTA]->qqi_reint,
116                                 qsd->qsd_type_array[GRPQUOTA]->qqi_glb_uptodate,
117                                 qsd->qsd_type_array[GRPQUOTA]->qqi_slv_uptodate,
118                                 qsd->qsd_type_array[GRPQUOTA]->qqi_reint);
119         }
120         return rc;
121 }
122 LPROC_SEQ_FOPS_RO(qsd_state);
123
124 static int qsd_enabled_seq_show(struct seq_file *m, void *data)
125 {
126         struct qsd_instance     *qsd = m->private;
127         char                     enabled[5];
128
129         LASSERT(qsd != NULL);
130
131         memset(enabled, 0, sizeof(enabled));
132         if (qsd_type_enabled(qsd, USRQUOTA))
133                 strcat(enabled, "u");
134         if (qsd_type_enabled(qsd, GRPQUOTA))
135                 strcat(enabled, "g");
136         if (strlen(enabled) == 0)
137                 strcat(enabled, "none");
138
139         return seq_printf(m, "%s\n", enabled);
140 }
141 LPROC_SEQ_FOPS_RO(qsd_enabled);
142
143 /* force reintegration procedure to be executed.
144  * Used for test/debugging purpose */
145 static ssize_t
146 lprocfs_force_reint_seq_write(struct file *file, const char *buffer,
147                                 size_t count, loff_t *off)
148 {
149         struct qsd_instance *qsd = ((struct seq_file *)file->private_data)->private;
150         int                  rc = 0, qtype;
151
152         LASSERT(qsd != NULL);
153
154         write_lock(&qsd->qsd_lock);
155         if (qsd->qsd_stopping) {
156                 /* don't mess up with shutdown procedure, it is already
157                  * complicated enough */
158                 rc = -ESHUTDOWN;
159         } else if (!qsd->qsd_prepared) {
160                 rc = -EAGAIN;
161         } else {
162                 /* mark all indexes as stale */
163                 for (qtype = USRQUOTA; qtype < MAXQUOTAS; qtype++) {
164                         qsd->qsd_type_array[qtype]->qqi_glb_uptodate = false;
165                         qsd->qsd_type_array[qtype]->qqi_slv_uptodate = false;
166                 }
167         }
168         write_unlock(&qsd->qsd_lock);
169
170         if (rc)
171                 return rc;
172
173         /* kick off reintegration */
174         for (qtype = USRQUOTA; qtype < MAXQUOTAS; qtype++) {
175                 rc = qsd_start_reint_thread(qsd->qsd_type_array[qtype]);
176                 if (rc)
177                         break;
178         }
179         return rc == 0 ? count : rc;
180 }
181 LPROC_SEQ_FOPS_WO_TYPE(qsd, force_reint);
182
183 static int qsd_timeout_seq_show(struct seq_file *m, void *data)
184 {
185         struct qsd_instance *qsd = m->private;
186         LASSERT(qsd != NULL);
187
188         return seq_printf(m, "%d\n", qsd_wait_timeout(qsd));
189 }
190
191 static ssize_t
192 qsd_timeout_seq_write(struct file *file, const char *buffer,
193                         size_t count, loff_t *off)
194 {
195         struct qsd_instance *qsd = ((struct seq_file *)file->private_data)->private;
196         int                  timeout, rc;
197         LASSERT(qsd != NULL);
198
199         rc = lprocfs_write_helper(buffer, count, &timeout);
200         if (rc)
201                 return rc;
202         if (timeout < 0)
203                 return -EINVAL;
204
205         qsd->qsd_timeout = timeout;
206         return count;
207 }
208 LPROC_SEQ_FOPS(qsd_timeout);
209
210 static struct lprocfs_seq_vars lprocfs_quota_qsd_vars[] = {
211         { "info",               &qsd_state_fops         },
212         { "enabled",            &qsd_enabled_fops       },
213         { "force_reint",        &qsd_force_reint_fops   },
214         { "timeout",            &qsd_timeout_fops       },
215         { NULL }
216 };
217
218 /*
219  * Callback function invoked by the OSP layer when the connection to the master
220  * has been set up.
221  *
222  * \param data - is a pointer to the qsd_instance
223  *
224  * \retval - 0 on success, appropriate error on failure
225  */
226 static int qsd_conn_callback(void *data)
227 {
228         struct qsd_instance *qsd = (struct qsd_instance *)data;
229         int                  type;
230         ENTRY;
231
232         /* qsd_exp should now be valid */
233         LASSERT(qsd->qsd_exp);
234
235         qsd->qsd_ns = class_exp2obd(qsd->qsd_exp)->obd_namespace;
236
237         write_lock(&qsd->qsd_lock);
238         /* notify that qsd_exp is now valid */
239         qsd->qsd_exp_valid = true;
240         write_unlock(&qsd->qsd_lock);
241
242         /* Now that the connection to master is setup, we can initiate the
243          * reintegration procedure for quota types which are enabled.
244          * It is worth noting that, if the qsd_instance hasn't been started
245          * already, then we can only complete the first two steps of the
246          * reintegration procedure (i.e. global lock enqueue and slave
247          * index transfer) since the space usage reconciliation (i.e.
248          * step 3) will have to wait for qsd_start() to be called */
249         for (type = USRQUOTA; type < MAXQUOTAS; type++) {
250                 struct qsd_qtype_info *qqi = qsd->qsd_type_array[type];
251                 wake_up(&qqi->qqi_reint_thread.t_ctl_waitq);
252         }
253
254         RETURN(0);
255 }
256
257 /*
258  * Release qsd_qtype_info structure which contains data associated with a
259  * given quota type. This releases the accounting objects.
260  * It's called on OSD cleanup when the qsd instance is released.
261  *
262  * \param env - is the environment passed by the caller
263  * \param qsd - is the qsd instance managing the qsd_qtype_info structure
264  *              to be released
265  * \param qtype - is the quota type to be shutdown
266  */
267 static void qsd_qtype_fini(const struct lu_env *env, struct qsd_instance *qsd,
268                            int qtype)
269 {
270         struct qsd_qtype_info   *qqi;
271         int repeat = 0;
272         ENTRY;
273
274         if (qsd->qsd_type_array[qtype] == NULL)
275                 RETURN_EXIT;
276         qqi = qsd->qsd_type_array[qtype];
277         qsd->qsd_type_array[qtype] = NULL;
278
279         /* all deferred work lists should be empty */
280         LASSERT(cfs_list_empty(&qqi->qqi_deferred_glb));
281         LASSERT(cfs_list_empty(&qqi->qqi_deferred_slv));
282
283         /* shutdown lquota site */
284         if (qqi->qqi_site != NULL && !IS_ERR(qqi->qqi_site)) {
285                 lquota_site_free(env, qqi->qqi_site);
286                 qqi->qqi_site = NULL;
287         }
288
289         /* The qqi may still be holding by global locks which are being
290          * canceled asynchronously (LU-4365), see the following steps:
291          *
292          * - On server umount, we try to clear all quota locks first by
293          *   disconnecting LWP (which will invalidate import and cleanup
294          *   all locks on it), however, if quota reint process is holding
295          *   the global lock for reintegration at that time, global lock
296          *   will fail to be cleared on LWP disconnection.
297          *
298          * - Umount process goes on and stops reint process, the global
299          *   lock will be dropped on reint process exit, however, the lock
300          *   cancel in done in asynchronous way, so the
301          *   qsd_glb_blocking_ast() might haven't been called yet when we
302          *   get here.
303          */
304         while (cfs_atomic_read(&qqi->qqi_ref) > 1) {
305                 CDEBUG(D_QUOTA, "qqi reference count %u, repeat: %d\n",
306                        cfs_atomic_read(&qqi->qqi_ref), repeat);
307                 repeat++;
308                 schedule_timeout_and_set_state(TASK_INTERRUPTIBLE,
309                                                 cfs_time_seconds(1));
310         }
311
312         /* by now, all qqi users should have gone away */
313         LASSERT(cfs_atomic_read(&qqi->qqi_ref) == 1);
314         lu_ref_fini(&qqi->qqi_reference);
315
316         /* release accounting object */
317         if (qqi->qqi_acct_obj != NULL && !IS_ERR(qqi->qqi_acct_obj)) {
318                 lu_object_put(env, &qqi->qqi_acct_obj->do_lu);
319                 qqi->qqi_acct_obj = NULL;
320         }
321
322         /* release slv index */
323         if (qqi->qqi_slv_obj != NULL && !IS_ERR(qqi->qqi_slv_obj)) {
324                 lu_object_put(env, &qqi->qqi_slv_obj->do_lu);
325                 qqi->qqi_slv_obj = NULL;
326                 qqi->qqi_slv_ver = 0;
327         }
328
329         /* release global index */
330         if (qqi->qqi_glb_obj != NULL && !IS_ERR(qqi->qqi_glb_obj)) {
331                 lu_object_put(env, &qqi->qqi_glb_obj->do_lu);
332                 qqi->qqi_glb_obj = NULL;
333                 qqi->qqi_glb_ver = 0;
334         }
335
336         OBD_FREE_PTR(qqi);
337         EXIT;
338 }
339
340 /*
341  * Allocate and initialize a qsd_qtype_info structure for quota type \qtype.
342  * This opens the accounting object and initializes the proc file.
343  * It's called on OSD start when the qsd_prepare() is invoked on the qsd
344  * instance.
345  *
346  * \param env  - the environment passed by the caller
347  * \param qsd  - is the qsd instance which will be in charge of the new
348  *               qsd_qtype_info instance.
349  * \param qtype - is quota type to set up
350  *
351  * \retval - 0 on success and qsd->qsd_type_array[qtype] is allocated,
352  *           appropriate error on failure
353  */
354 static int qsd_qtype_init(const struct lu_env *env, struct qsd_instance *qsd,
355                           int qtype)
356 {
357         struct qsd_qtype_info   *qqi;
358         int                      rc;
359         struct obd_uuid          uuid;
360         ENTRY;
361
362         LASSERT(qsd->qsd_type_array[qtype] == NULL);
363
364         /* allocate structure for this quota type */
365         OBD_ALLOC_PTR(qqi);
366         if (qqi == NULL)
367                 RETURN(-ENOMEM);
368         qsd->qsd_type_array[qtype] = qqi;
369         cfs_atomic_set(&qqi->qqi_ref, 1); /* referenced from qsd */
370
371         /* set backpointer and other parameters */
372         qqi->qqi_qsd   = qsd;
373         qqi->qqi_qtype = qtype;
374         lu_ref_init(&qqi->qqi_reference);
375         lquota_generate_fid(&qqi->qqi_fid, qsd->qsd_pool_id, QSD_RES_TYPE(qsd),
376                             qtype);
377         qqi->qqi_glb_uptodate = false;
378         qqi->qqi_slv_uptodate = false;
379         qqi->qqi_reint        = false;
380         init_waitqueue_head(&qqi->qqi_reint_thread.t_ctl_waitq);
381         thread_set_flags(&qqi->qqi_reint_thread, SVC_STOPPED);
382         CFS_INIT_LIST_HEAD(&qqi->qqi_deferred_glb);
383         CFS_INIT_LIST_HEAD(&qqi->qqi_deferred_slv);
384
385         /* open accounting object */
386         LASSERT(qqi->qqi_acct_obj == NULL);
387         qqi->qqi_acct_obj = acct_obj_lookup(env, qsd->qsd_dev, qtype);
388         if (IS_ERR(qqi->qqi_acct_obj)) {
389                 CDEBUG(D_QUOTA, "%s: no %s space accounting support rc:%ld\n",
390                        qsd->qsd_svname, QTYPE_NAME(qtype),
391                        PTR_ERR(qqi->qqi_acct_obj));
392                 qqi->qqi_acct_obj = NULL;
393                 qsd->qsd_acct_failed = true;
394         }
395
396         /* open global index copy */
397         LASSERT(qqi->qqi_glb_obj == NULL);
398         qqi->qqi_glb_obj = lquota_disk_glb_find_create(env, qsd->qsd_dev,
399                                                        qsd->qsd_root,
400                                                        &qqi->qqi_fid, true);
401         if (IS_ERR(qqi->qqi_glb_obj)) {
402                 CERROR("%s: can't open global index copy "DFID" %ld\n",
403                        qsd->qsd_svname, PFID(&qqi->qqi_fid),
404                        PTR_ERR(qqi->qqi_glb_obj));
405                 GOTO(out, rc = PTR_ERR(qqi->qqi_glb_obj));
406         }
407         qqi->qqi_glb_ver = dt_version_get(env, qqi->qqi_glb_obj);
408
409         /* open slave index copy */
410         LASSERT(qqi->qqi_slv_obj == NULL);
411         obd_str2uuid(&uuid, qsd->qsd_svname);
412         qqi->qqi_slv_obj = lquota_disk_slv_find_create(env, qsd->qsd_dev,
413                                                        qsd->qsd_root,
414                                                        &qqi->qqi_fid, &uuid,
415                                                        true);
416         if (IS_ERR(qqi->qqi_slv_obj)) {
417                 CERROR("%s: can't open slave index copy "DFID" %ld\n",
418                        qsd->qsd_svname, PFID(&qqi->qqi_fid),
419                        PTR_ERR(qqi->qqi_slv_obj));
420                 GOTO(out, rc = PTR_ERR(qqi->qqi_slv_obj));
421         }
422         qqi->qqi_slv_ver = dt_version_get(env, qqi->qqi_slv_obj);
423
424         /* allocate site */
425         qqi->qqi_site = lquota_site_alloc(env, qqi, false, qtype, &qsd_lqe_ops);
426         if (IS_ERR(qqi->qqi_site)) {
427                 CERROR("%s: can't allocate site "DFID" %ld\n", qsd->qsd_svname,
428                        PFID(&qqi->qqi_fid), PTR_ERR(qqi->qqi_site));
429                 GOTO(out, rc = PTR_ERR(qqi->qqi_site));
430         }
431
432         /* register proc entry for accounting & global index copy objects */
433         rc = lprocfs_seq_create(qsd->qsd_proc,
434                                 qtype == USRQUOTA ? "acct_user" : "acct_group",
435                                 0444, &lprocfs_quota_seq_fops,
436                                 qqi->qqi_acct_obj);
437         if (rc) {
438                 CERROR("%s: can't add procfs entry for accounting file %d\n",
439                        qsd->qsd_svname, rc);
440                 GOTO(out, rc);
441         }
442
443         rc = lprocfs_seq_create(qsd->qsd_proc,
444                                 qtype == USRQUOTA ? "limit_user" : "limit_group",
445                                 0444, &lprocfs_quota_seq_fops,
446                                 qqi->qqi_glb_obj);
447         if (rc) {
448                 CERROR("%s: can't add procfs entry for global index copy %d\n",
449                        qsd->qsd_svname, rc);
450                 GOTO(out, rc);
451         }
452         EXIT;
453 out:
454         if (rc)
455                 qsd_qtype_fini(env, qsd, qtype);
456         return rc;
457 }
458
459 /*
460  * Release a qsd_instance. Companion of qsd_init(). This releases all data
461  * structures associated with the quota slave (on-disk objects, lquota entry
462  * tables, ...).
463  * This function should be called when the OSD is shutting down.
464  *
465  * \param env - is the environment passed by the caller
466  * \param qsd - is the qsd instance to shutdown
467  */
468 void qsd_fini(const struct lu_env *env, struct qsd_instance *qsd)
469 {
470         int     qtype;
471         ENTRY;
472
473         if (unlikely(qsd == NULL))
474                 RETURN_EXIT;
475
476         CDEBUG(D_QUOTA, "%s: initiating QSD shutdown\n", qsd->qsd_svname);
477         write_lock(&qsd->qsd_lock);
478         qsd->qsd_stopping = true;
479         write_unlock(&qsd->qsd_lock);
480
481         /* remove qsd proc entry */
482         if (qsd->qsd_proc != NULL) {
483                 lprocfs_remove(&qsd->qsd_proc);
484                 qsd->qsd_proc = NULL;
485         }
486
487         /* stop the writeback thread */
488         qsd_stop_upd_thread(qsd);
489
490         /* shutdown the reintegration threads */
491         for (qtype = USRQUOTA; qtype < MAXQUOTAS; qtype++) {
492                 if (qsd->qsd_type_array[qtype] == NULL)
493                         continue;
494                 qsd_stop_reint_thread(qsd->qsd_type_array[qtype]);
495         }
496
497         if (qsd->qsd_ns != NULL) {
498                 qsd->qsd_ns = NULL;
499         }
500
501         /* free per-quota type data */
502         for (qtype = USRQUOTA; qtype < MAXQUOTAS; qtype++)
503                 qsd_qtype_fini(env, qsd, qtype);
504
505         /* deregister connection to the quota master */
506         qsd->qsd_exp_valid = false;
507         lustre_deregister_lwp_item(&qsd->qsd_exp);
508
509         /* release per-filesystem information */
510         if (qsd->qsd_fsinfo != NULL) {
511                 down(&qsd->qsd_fsinfo->qfs_sem);
512                 /* remove from the list of fsinfo */
513                 cfs_list_del_init(&qsd->qsd_link);
514                 up(&qsd->qsd_fsinfo->qfs_sem);
515                 qsd_put_fsinfo(qsd->qsd_fsinfo);
516                 qsd->qsd_fsinfo = NULL;
517         }
518
519         /* release quota root directory */
520         if (qsd->qsd_root != NULL) {
521                 lu_object_put(env, &qsd->qsd_root->do_lu);
522                 qsd->qsd_root = NULL;
523         }
524
525         /* release reference on dt_device */
526         if (qsd->qsd_dev != NULL) {
527                 lu_ref_del(&qsd->qsd_dev->dd_lu_dev.ld_reference, "qsd", qsd);
528                 lu_device_put(&qsd->qsd_dev->dd_lu_dev);
529                 qsd->qsd_dev = NULL;
530         }
531
532         CDEBUG(D_QUOTA, "%s: QSD shutdown completed\n", qsd->qsd_svname);
533         OBD_FREE_PTR(qsd);
534         EXIT;
535 }
536 EXPORT_SYMBOL(qsd_fini);
537
538 /*
539  * Create a new qsd_instance to be associated with backend osd device
540  * identified by \dev.
541  *
542  * \param env    - the environment passed by the caller
543  * \param svname - is the service name of the OSD device creating this instance
544  * \param dev    - is the dt_device where to store quota index files
545  * \param osd_proc - is the procfs parent directory where to create procfs file
546  *                   related to this new qsd instance
547  *
548  * \retval - pointer to new qsd_instance associated with dev \dev on success,
549  *           appropriate error on failure
550  */
551 struct qsd_instance *qsd_init(const struct lu_env *env, char *svname,
552                               struct dt_device *dev,
553                               cfs_proc_dir_entry_t *osd_proc)
554 {
555         struct qsd_thread_info  *qti = qsd_info(env);
556         struct qsd_instance     *qsd;
557         int                      rc, type, idx;
558         ENTRY;
559
560         /* only configure qsd for MDT & OST */
561         type = server_name2index(svname, &idx, NULL);
562         if (type != LDD_F_SV_TYPE_MDT && type != LDD_F_SV_TYPE_OST)
563                 RETURN(NULL);
564
565         /* allocate qsd instance */
566         OBD_ALLOC_PTR(qsd);
567         if (qsd == NULL)
568                 RETURN(ERR_PTR(-ENOMEM));
569
570         /* generic initializations */
571         rwlock_init(&qsd->qsd_lock);
572         CFS_INIT_LIST_HEAD(&qsd->qsd_link);
573         thread_set_flags(&qsd->qsd_upd_thread, SVC_STOPPED);
574         init_waitqueue_head(&qsd->qsd_upd_thread.t_ctl_waitq);
575         CFS_INIT_LIST_HEAD(&qsd->qsd_upd_list);
576         spin_lock_init(&qsd->qsd_adjust_lock);
577         CFS_INIT_LIST_HEAD(&qsd->qsd_adjust_list);
578         qsd->qsd_prepared = false;
579         qsd->qsd_started = false;
580
581         /* copy service name */
582         if (strlcpy(qsd->qsd_svname, svname, sizeof(qsd->qsd_svname))
583             >= sizeof(qsd->qsd_svname))
584                 GOTO(out, rc = -E2BIG);
585
586         /* grab reference on osd device */
587         lu_device_get(&dev->dd_lu_dev);
588         lu_ref_add(&dev->dd_lu_dev.ld_reference, "qsd", qsd);
589         qsd->qsd_dev = dev;
590
591         /* we only support pool ID 0 (default data or metadata pool) for the
592          * time being. A different pool ID could be assigned to this target via
593          * the configuration log in the future */
594         qsd->qsd_pool_id  = 0;
595
596         /* get fsname from svname */
597         rc = server_name2fsname(svname, qti->qti_buf, NULL);
598         if (rc) {
599                 CERROR("%s: fail to extract filesystem name\n", svname);
600                 GOTO(out, rc);
601         }
602
603         /* look up quota setting for the filesystem the target belongs to */
604         qsd->qsd_fsinfo = qsd_get_fsinfo(qti->qti_buf, 1);
605         if (qsd->qsd_fsinfo == NULL) {
606                 CERROR("%s: failed to locate filesystem information\n", svname);
607                 GOTO(out, rc = -EINVAL);
608         }
609
610         /* add in the list of lquota_fsinfo */
611         down(&qsd->qsd_fsinfo->qfs_sem);
612         list_add_tail(&qsd->qsd_link, &qsd->qsd_fsinfo->qfs_qsd_list);
613         up(&qsd->qsd_fsinfo->qfs_sem);
614
615         /* register procfs directory */
616         qsd->qsd_proc = lprocfs_seq_register(QSD_DIR, osd_proc,
617                                                 lprocfs_quota_qsd_vars, qsd);
618         if (IS_ERR(qsd->qsd_proc)) {
619                 rc = PTR_ERR(qsd->qsd_proc);
620                 qsd->qsd_proc = NULL;
621                 CERROR("%s: fail to create quota slave proc entry (%d)\n",
622                        svname, rc);
623                 GOTO(out, rc);
624         }
625         EXIT;
626 out:
627         if (rc) {
628                 qsd_fini(env, qsd);
629                 return ERR_PTR(rc);
630         }
631         RETURN(qsd);
632 }
633 EXPORT_SYMBOL(qsd_init);
634
635 /*
636  * Initialize on-disk structures in order to manage quota enforcement for
637  * the target associated with the qsd instance \qsd and starts the reintegration
638  * procedure for each quota type as soon as possible.
639  * The last step of the reintegration will be completed once qsd_start() is
640  * called, at which points the space reconciliation with the master will be
641  * executed.
642  * This function must be called when the server stack is fully configured,
643  * typically when ->ldo_prepare is called across the stack.
644  *
645  * \param env - the environment passed by the caller
646  * \param qsd - is qsd_instance to prepare
647  *
648  * \retval - 0 on success, appropriate error on failure
649  */
650 int qsd_prepare(const struct lu_env *env, struct qsd_instance *qsd)
651 {
652         struct qsd_thread_info  *qti = qsd_info(env);
653         int                      qtype, rc = 0;
654         ENTRY;
655
656         if (unlikely(qsd == NULL))
657                 RETURN(0);
658
659         read_lock(&qsd->qsd_lock);
660         if (qsd->qsd_prepared) {
661                 CERROR("%s: qsd instance already prepared\n", qsd->qsd_svname);
662                 rc = -EALREADY;
663         }
664         read_unlock(&qsd->qsd_lock);
665         if (rc)
666                 RETURN(rc);
667
668         /* Record whether this qsd instance is managing quota enforcement for a
669          * MDT (i.e. inode quota) or OST (block quota) */
670         if (lu_device_is_md(qsd->qsd_dev->dd_lu_dev.ld_site->ls_top_dev)) {
671                 qsd->qsd_is_md = true;
672                 qsd->qsd_sync_threshold = LQUOTA_LEAST_QUNIT(LQUOTA_RES_MD);
673         } else {
674                 qsd->qsd_sync_threshold = LQUOTA_LEAST_QUNIT(LQUOTA_RES_DT);
675         }
676
677         /* look-up on-disk directory for the quota slave */
678         qsd->qsd_root = lquota_disk_dir_find_create(env, qsd->qsd_dev, NULL,
679                                                     QSD_DIR);
680         if (IS_ERR(qsd->qsd_root)) {
681                 rc = PTR_ERR(qsd->qsd_root);
682                 qsd->qsd_root = NULL;
683                 CERROR("%s: failed to create quota slave root dir (%d)\n",
684                        qsd->qsd_svname, rc);
685                 RETURN(rc);
686         }
687
688         /* initialize per-quota type data */
689         for (qtype = USRQUOTA; qtype < MAXQUOTAS; qtype++) {
690                 rc = qsd_qtype_init(env, qsd, qtype);
691                 if (rc)
692                         RETURN(rc);
693         }
694
695         /* pools successfully setup, mark the qsd as prepared */
696         write_lock(&qsd->qsd_lock);
697         qsd->qsd_prepared = true;
698         write_unlock(&qsd->qsd_lock);
699
700         /* start reintegration thread for each type, if required */
701         for (qtype = USRQUOTA; qtype < MAXQUOTAS; qtype++) {
702                 struct qsd_qtype_info   *qqi = qsd->qsd_type_array[qtype];
703
704                 if (qsd_type_enabled(qsd, qtype) && qsd->qsd_acct_failed) {
705                         LCONSOLE_ERROR("%s: can't enable quota enforcement "
706                                        "since space accounting isn't functional"
707                                        ". Please run tunefs.lustre --quota on "
708                                        "an unmounted filesystem if not done "
709                                        "already\n", qsd->qsd_svname);
710                         break;
711                 }
712
713                 rc = qsd_start_reint_thread(qqi);
714                 if (rc) {
715                         CERROR("%s: failed to start reint thread for type %s "
716                                "(%d)\n", qsd->qsd_svname, QTYPE_NAME(qtype),
717                                rc);
718                         RETURN(rc);
719                 }
720         }
721
722         /* start writeback thread */
723         rc = qsd_start_upd_thread(qsd);
724         if (rc) {
725                 CERROR("%s: failed to start writeback thread (%d)\n",
726                        qsd->qsd_svname, rc);
727                 RETURN(rc);
728         }
729
730         /* generate osp name */
731         rc = tgt_name2lwp_name(qsd->qsd_svname, qti->qti_buf,
732                                MTI_NAME_MAXLEN, 0);
733         if (rc) {
734                 CERROR("%s: failed to generate ospname (%d)\n",
735                        qsd->qsd_svname, rc);
736                 RETURN(rc);
737         }
738
739         /* the connection callback will start the reintegration
740          * procedure if quota is enabled */
741         rc = lustre_register_lwp_item(qti->qti_buf, &qsd->qsd_exp,
742                                       qsd_conn_callback, (void *)qsd);
743         if (rc) {
744                 CERROR("%s: fail to get connection to master (%d)\n",
745                        qsd->qsd_svname, rc);
746                 RETURN(rc);
747         }
748
749         RETURN(0);
750 }
751 EXPORT_SYMBOL(qsd_prepare);
752
753 /*
754  * Start a qsd instance. This will complete the last step of the reintegration
755  * procedure as soon as possible (provided that the master is reachable).
756  * This should be called when recovery has been completed and quota should now
757  * be enforced on every operations.
758  *
759  * \param env - the environment passed by the caller
760  * \param qsd - is the qsd instance associated with the osd device to start
761  */
762 int qsd_start(const struct lu_env *env, struct qsd_instance *qsd)
763 {
764         int     type, rc = 0;
765         ENTRY;
766
767         if (unlikely(qsd == NULL))
768                 RETURN(0);
769
770         write_lock(&qsd->qsd_lock);
771         if (!qsd->qsd_prepared) {
772                 CERROR("%s: can't start qsd instance since it wasn't properly "
773                        "initialized\n", qsd->qsd_svname);
774                 rc = -EFAULT;
775         } else if (qsd->qsd_started) {
776                 CERROR("%s: qsd instance already started\n", qsd->qsd_svname);
777                 rc = -EALREADY;
778         } else {
779                 /* notify that the qsd_instance is now started */
780                 qsd->qsd_started = true;
781         }
782         write_unlock(&qsd->qsd_lock);
783
784         if (rc)
785                 RETURN(rc);
786
787         /* Trigger the 3rd step of reintegration: If usage > granted, acquire
788          * up to usage; If usage < granted, release down to usage.  */
789         for (type = USRQUOTA; type < MAXQUOTAS; type++) {
790                 struct qsd_qtype_info   *qqi = qsd->qsd_type_array[type];
791                 wake_up(&qqi->qqi_reint_thread.t_ctl_waitq);
792         }
793
794         RETURN(rc);
795 }
796 EXPORT_SYMBOL(qsd_start);
797
798 void lustre_register_quota_process_config(int (*qpc)(struct lustre_cfg *lcfg));
799
800 /*
801  * Global initialization performed at module load time
802  */
803 int qsd_glb_init(void)
804 {
805         int     rc;
806
807         rc = lu_kmem_init(qsd_caches);
808         if (rc)
809                 return rc;
810
811         qsd_key_init_generic(&qsd_thread_key, NULL);
812         lu_context_key_register(&qsd_thread_key);
813         lustre_register_quota_process_config(qsd_process_config);
814
815         return 0;
816 }
817
818 /*
819  * Companion of qsd_glb_init() called at module unload time
820  */
821 void qsd_glb_fini(void)
822 {
823         lustre_register_quota_process_config(NULL);
824         lu_kmem_fini(qsd_caches);
825         lu_context_key_degister(&qsd_thread_key);
826 }