4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA
24 * Copyright (c) 2012, 2017, Intel Corporation.
25 * Use is subject to license terms.
27 * Author: Johann Lombardi <johann.lombardi@intel.com>
28 * Author: Niu Yawei <yawei.niu@intel.com>
32 * Management of the device associated with a Quota Master Target (QMT).
34 * The QMT holds the cluster wide quota limits. It stores the quota settings
35 * ({hard,soft} limit & grace time) in a global index file and is in charge
36 * of allocating quota space to slaves while guaranteeing that the overall
37 * limits aren't exceeded. The QMT also maintains one index per slave (in fact,
38 * one per slave per quota type) used to track how much space is allocated
39 * to a given slave. Now that the QMT is aware of the quota space distribution
40 * among slaves, it can afford to rebalance efficiently quota space from one
41 * slave to another. Slaves are asked to release quota space via glimpse
42 * callbacks sent on DLM locks which are granted to slaves when those latters
43 * acquire quota space.
45 * The QMT device is currently set up by the MDT and should probably be moved
46 * to a separate target in the future. Meanwhile, the MDT forwards all quota
47 * requests to the QMT via a list of request handlers (see struct qmt_handlers
48 * in lustre_quota.h). The QMT also borrows the LDLM namespace from the MDT.
50 * To bring up a QMT device, the following steps must be completed:
52 * - call ->ldto_device_alloc to allocate the QMT device and perform basic
53 * initialization like connecting to the backend OSD device or setting up the
54 * default pools and the QMT procfs directory.
56 * - the MDT can then connect to the QMT instance via legacy obd_connect path.
58 * - once the MDT stack has been fully configured, ->ldto_prepare must be called
59 * to configure on-disk objects associated with this master target.
61 * To shutdown a QMT device, the MDT just has to disconnect from the QMT.
63 * The qmt_device_type structure is registered when the lquota module is
64 * loaded and all the steps described above are automatically done when the MDT
65 * set up the Quota Master Target via calls to class_attach/class_setup, see
66 * mdt_quota_init() for more details.
69 #define DEBUG_SUBSYSTEM S_LQUOTA
71 #include <obd_class.h>
72 #include <lprocfs_status.h>
73 #include <lustre_disk.h>
74 #include "qmt_internal.h"
76 static const struct lu_device_operations qmt_lu_ops;
79 * Release quota master target and all data structure associated with this
81 * Called on MDT0 cleanup.
83 * \param env - is the environment passed by the caller
84 * \param ld - is the lu_device associated with the qmt device to be released
86 * \retval - NULL on success (backend OSD device is managed by the main stack),
87 * appropriate error on failure
89 static struct lu_device *qmt_device_fini(const struct lu_env *env,
92 struct qmt_device *qmt = lu2qmt_dev(ld);
97 CDEBUG(D_QUOTA, "%s: initiating QMT shutdown\n", qmt->qmt_svname);
98 qmt->qmt_stopping = true;
100 if (qmt_lvbo_free_wq) {
101 destroy_workqueue(qmt_lvbo_free_wq);
102 qmt_lvbo_free_wq = NULL;
105 /* kill pool instances, if any */
106 qmt_pool_fini(env, qmt);
108 /* remove qmt proc entry */
109 if (qmt->qmt_proc != NULL && !IS_ERR(qmt->qmt_proc)) {
110 lprocfs_remove(&qmt->qmt_proc);
111 qmt->qmt_proc = NULL;
114 /* stop rebalance thread */
115 if (!qmt->qmt_child->dd_rdonly)
116 qmt_stop_reba_thread(qmt);
119 dt_object_put(env, qmt->qmt_root);
120 qmt->qmt_root = NULL;
123 /* disconnect from OSD */
124 if (qmt->qmt_child_exp != NULL) {
125 obd_disconnect(qmt->qmt_child_exp);
126 qmt->qmt_child_exp = NULL;
127 qmt->qmt_child = NULL;
130 /* clear references to MDT namespace */
131 ld->ld_obd->obd_namespace = NULL;
138 * Connect a quota master to the backend OSD device.
140 * \param env - is the environment passed by the caller
141 * \param qmt - is the quota master target to be connected
142 * \param cfg - is the configuration log record from which we need to extract
143 * the service name of the backend OSD device to connect to.
145 * \retval - 0 on success, appropriate error on failure
147 static int qmt_connect_to_osd(const struct lu_env *env, struct qmt_device *qmt,
148 struct lustre_cfg *cfg)
150 struct obd_connect_data *data = NULL;
151 struct obd_device *obd;
152 struct lu_device *ld = qmt2lu_dev(qmt);
156 LASSERT(qmt->qmt_child_exp == NULL);
160 GOTO(out, rc = -ENOMEM);
162 /* look-up OBD device associated with the backend OSD device.
163 * The MDT is kind enough to pass the OBD name in QMT configuration */
164 obd = class_name2obd(lustre_cfg_string(cfg, 3));
166 CERROR("%s: can't locate backend osd device: %s\n",
167 qmt->qmt_svname, lustre_cfg_string(cfg, 3));
168 GOTO(out, rc = -ENOTCONN);
171 data->ocd_connect_flags = OBD_CONNECT_VERSION;
172 data->ocd_version = LUSTRE_VERSION_CODE;
174 /* connect to OSD device */
175 rc = obd_connect(NULL, &qmt->qmt_child_exp, obd, &obd->obd_uuid, data,
178 CERROR("%s: cannot connect to osd dev %s (%d)\n",
179 qmt->qmt_svname, obd->obd_name, rc);
183 /* initialize site (although it isn't used anywhere) and lu_device
184 * pointer to next device */
185 qmt->qmt_child = lu2dt_dev(qmt->qmt_child_exp->exp_obd->obd_lu_dev);
186 ld->ld_site = qmt->qmt_child_exp->exp_obd->obd_lu_dev->ld_site;
195 * Initialize quota master target device. This includers connecting to
196 * the backend OSD device, initializing the pool configuration and creating the
197 * root procfs directory dedicated to this quota target.
198 * The rest of the initialization is done when the stack is fully configured
199 * (i.e. when ->ldo_start is called across the stack).
201 * This function is called on MDT0 setup.
203 * \param env - is the environment passed by the caller
204 * \param qmt - is the quota master target to be initialized
205 * \param ldt - is the device type structure associated with the qmt device
206 * \param cfg - is the configuration record used to configure the qmt device
208 * \retval - 0 on success, appropriate error on failure
210 static int qmt_device_init0(const struct lu_env *env, struct qmt_device *qmt,
211 struct lu_device_type *ldt, struct lustre_cfg *cfg)
213 struct lu_device *ld = qmt2lu_dev(qmt);
214 struct obd_device *obd, *mdt_obd;
215 struct obd_type *type;
216 char *svname = lustre_cfg_string(cfg, 0);
223 /* record who i am, it might be useful ... */
224 rc = strscpy(qmt->qmt_svname, svname, sizeof(qmt->qmt_svname));
228 /* look-up the obd_device associated with the qmt */
229 obd = class_name2obd(qmt->qmt_svname);
233 /* reference each other */
234 obd->obd_lu_dev = ld;
237 /* look-up the parent MDT to steal its ldlm namespace ... */
238 mdt_obd = class_name2obd(lustre_cfg_string(cfg, 2));
242 /* borrow MDT namespace. kind of a hack until we have our own namespace
243 * & service threads */
244 LASSERT(mdt_obd->obd_namespace != NULL);
245 obd->obd_namespace = mdt_obd->obd_namespace;
246 qmt->qmt_ns = obd->obd_namespace;
248 /* connect to backend osd device */
249 rc = qmt_connect_to_osd(env, qmt, cfg);
253 /* set up and start rebalance thread */
254 INIT_LIST_HEAD(&qmt->qmt_reba_list);
255 spin_lock_init(&qmt->qmt_reba_lock);
256 if (!qmt->qmt_child->dd_rdonly) {
257 rc = qmt_start_reba_thread(qmt);
259 CERROR("%s: failed to start rebalance thread (%d)\n",
260 qmt->qmt_svname, rc);
265 /* at the moment there is no linkage between lu_type and obd_type, so
266 * we lookup obd_type this way */
267 type = class_search_type(LUSTRE_QMT_NAME);
268 LASSERT(type != NULL);
270 /* put reference taken by class_search_type */
271 kobject_put(&type->typ_kobj);
273 /* register proc directory associated with this qmt */
274 qmt->qmt_proc = lprocfs_register(qmt->qmt_svname, type->typ_procroot,
276 if (IS_ERR(qmt->qmt_proc)) {
277 rc = PTR_ERR(qmt->qmt_proc);
278 CERROR("%s: failed to create qmt proc entry (%d)\n",
279 qmt->qmt_svname, rc);
283 /* initialize pool configuration */
284 rc = qmt_pool_init(env, qmt);
288 qmt_lvbo_free_wq = alloc_workqueue("qmt_lvbo_free", WQ_UNBOUND, 0);
289 if (!qmt_lvbo_free_wq) {
291 CERROR("%s: failed to start qmt_lvbo_free workqueue: rc = %d\n",
292 qmt->qmt_svname, rc);
299 qmt_device_fini(env, ld);
304 * Free quota master target device. Companion of qmt_device_alloc()
306 * \param env - is the environment passed by the caller
307 * \param ld - is the lu_device associated with the qmt dev to be freed
309 * \retval - NULL on success (backend OSD device is managed by the main stack),
310 * appropriate error on failure
312 static struct lu_device *qmt_device_free(const struct lu_env *env,
313 struct lu_device *ld)
315 struct qmt_device *qmt = lu2qmt_dev(ld);
318 LASSERT(qmt != NULL);
326 * Allocate quota master target and initialize it.
328 * \param env - is the environment passed by the caller
329 * \param ldt - is the device type structure associated with the qmt
330 * \param cfg - is the configuration record used to configure the qmt
332 * \retval - lu_device structure associated with the qmt on success,
333 * appropriate error on failure
335 static struct lu_device *qmt_device_alloc(const struct lu_env *env,
336 struct lu_device_type *ldt,
337 struct lustre_cfg *cfg)
339 struct qmt_device *qmt;
340 struct lu_device *ld;
344 /* allocate qmt device */
347 RETURN(ERR_PTR(-ENOMEM));
349 /* configure lu/dt_device */
350 ld = qmt2lu_dev(qmt);
351 dt_device_init(&qmt->qmt_dt_dev, ldt);
352 ld->ld_ops = &qmt_lu_ops;
354 /* initialize qmt device */
355 rc = qmt_device_init0(env, qmt, ldt, cfg);
357 qmt_device_free(env, ld);
364 LU_KEY_INIT_FINI(qmt, struct qmt_thread_info);
365 LU_TYPE_INIT_FINI(qmt, &qmt_thread_key);
366 LU_CONTEXT_KEY_DEFINE(qmt, LCT_MD_THREAD);
369 * lu device type operations associated with the master target.
371 static const struct lu_device_type_operations qmt_device_type_ops = {
372 .ldto_init = qmt_type_init,
373 .ldto_fini = qmt_type_fini,
375 .ldto_start = qmt_type_start,
376 .ldto_stop = qmt_type_stop,
378 .ldto_device_alloc = qmt_device_alloc,
379 .ldto_device_free = qmt_device_free,
381 .ldto_device_fini = qmt_device_fini,
385 * lu device type structure associated with the master target.
386 * MDT0 uses this structure to configure the qmt.
388 static struct lu_device_type qmt_device_type = {
389 .ldt_tags = LU_DEVICE_DT,
390 .ldt_name = LUSTRE_QMT_NAME,
391 .ldt_ops = &qmt_device_type_ops,
392 .ldt_ctx_tags = LCT_MD_THREAD,
396 * obd_connect handler used by the MDT to connect to the master target.
398 static int qmt_device_obd_connect(const struct lu_env *env,
399 struct obd_export **exp,
400 struct obd_device *obd,
401 struct obd_uuid *cluuid,
402 struct obd_connect_data *data,
405 struct lustre_handle conn;
409 rc = class_connect(&conn, obd, cluuid);
413 *exp = class_conn2export(&conn);
418 * obd_disconnect handler used by the MDT to disconnect from the master target.
419 * We trigger cleanup on disconnect since it means that the MDT is about to
422 static int qmt_device_obd_disconnect(struct obd_export *exp)
424 struct obd_device *obd = exp->exp_obd;
428 rc = class_disconnect(exp);
432 rc = class_manual_cleanup(obd);
437 * obd device operations associated with the master target.
439 static const struct obd_ops qmt_obd_ops = {
440 .o_owner = THIS_MODULE,
441 .o_connect = qmt_device_obd_connect,
442 .o_disconnect = qmt_device_obd_disconnect,
443 .o_pool_new = qmt_pool_new,
444 .o_pool_rem = qmt_pool_rem,
445 .o_pool_add = qmt_pool_add,
446 .o_pool_del = qmt_pool_del,
450 * Called when the MDS is fully configured. We use it to set up local objects
451 * associated with the quota master target.
453 * \param env - is the environment passed by the caller
454 * \param parent - is the lu_device of the parent, that's to say the mdt
455 * \param ld - is the lu_device associated with the master target
457 * \retval - 0 on success, appropriate error on failure
459 static int qmt_device_prepare(const struct lu_env *env,
460 struct lu_device *parent,
461 struct lu_device *ld)
463 struct qmt_device *qmt = lu2qmt_dev(ld);
464 struct dt_object *qmt_root;
468 /* initialize quota master root directory where all index files will be
470 qmt_root = lquota_disk_dir_find_create(env, qmt->qmt_child, NULL,
472 if (IS_ERR(qmt_root)) {
473 rc = PTR_ERR(qmt_root);
474 CERROR("%s: failed to create master quota directory (%d)\n",
475 qmt->qmt_svname, rc);
479 qmt->qmt_root = qmt_root;
480 /* initialize on-disk indexes associated with each pool */
481 rc = qmt_pool_prepare(env, qmt, qmt_root, NULL);
486 * lu device operations for the quota master target
488 static const struct lu_device_operations qmt_lu_ops = {
489 .ldo_prepare = qmt_device_prepare,
490 .ldo_process_config = NULL, /* to be defined for dynamic pool
494 /* global variable initialization called when the lquota module is loaded */
495 int qmt_glb_init(void)
500 rc = class_register_type(&qmt_obd_ops, NULL, true,
501 LUSTRE_QMT_NAME, &qmt_device_type);
505 /* called when the lquota module is about to be unloaded */
506 void qmt_glb_fini(void)
508 class_unregister_type(LUSTRE_QMT_NAME);