4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA
24 * Copyright (c) 2012, 2017, Intel Corporation.
25 * Use is subject to license terms.
28 #ifndef _QMT_INTERNAL_H
29 #define _QMT_INTERNAL_H
31 #include "lquota_internal.h"
33 extern struct workqueue_struct *qmt_lvbo_free_wq;
36 * The Quota Master Target Device.
37 * The qmt is responsible for:
38 * - all interactions with MDT0 (provide request handlers, share ldlm namespace,
39 * manage ldlm lvbo, ...)
40 * - all quota lock management (i.e. global quota locks as well as per-ID locks)
41 * - manage the quota pool configuration
43 * That's the structure MDT0 connects to in mdt_quota_init().
46 /* Super-class. dt_device/lu_device for this master target */
47 struct dt_device qmt_dt_dev;
49 /* service name of this qmt */
50 char qmt_svname[MAX_OBD_NAME];
51 /* root directory for this qmt */
52 struct dt_object *qmt_root;
54 /* Reference to the next device in the side stack
55 * The child device is actually the OSD device where we store the quota
57 struct obd_export *qmt_child_exp;
58 struct dt_device *qmt_child;
60 /* pointer to ldlm namespace to be used for quota locks */
61 struct ldlm_namespace *qmt_ns;
63 /* List of pools managed by this master target */
64 struct list_head qmt_pool_list;
65 /* rw semaphore to protect pool list */
66 struct rw_semaphore qmt_pool_lock;
68 /* procfs root directory for this qmt */
69 struct proc_dir_entry *qmt_proc;
71 /* dedicated thread in charge of space rebalancing */
72 struct task_struct *qmt_reba_task;
74 /* list of lqe entry which need space rebalancing */
75 struct list_head qmt_reba_list;
77 /* lock protecting rebalancing list */
78 spinlock_t qmt_reba_lock;
80 unsigned long qmt_stopping:1; /* qmt is stopping */
85 #define QPI_MAXNAME (LOV_MAXPOOLNAME + 1)
86 #define qmt_pool_global(qpi) \
87 (!strncmp(qpi->qpi_name, GLB_POOL_NAME, \
88 strlen(GLB_POOL_NAME) + 1) ? true : false)
89 /* Draft for mdt pools */
91 struct lu_tgt_pool osts;
94 /* Since DOM support, data resources can exist
95 * on both MDT and OST targets. */
102 #define qmt_dom(rtype, stype) \
103 ((rtype == LQUOTA_RES_DT && \
104 stype == QMT_STYPE_MDT) ? true : false)
107 /* set while recalc_thread is working */
108 QPI_FLAG_RECALC_OFFSET,
109 QPI_FLAG_STATE_INITED,
113 * Per-pool quota information.
114 * The qmt creates one such structure for each pool
115 * with quota enforced. All the structures are kept in a list.
116 * We currently only support the default data pool and default metadata pool.
118 struct qmt_pool_info {
119 /* chained list of all pools managed by the same qmt */
120 struct list_head qpi_linkage;
122 /* Could be LQUOTA_RES_MD or LQUOTA_RES_DT */
124 char qpi_name[QPI_MAXNAME];
126 union qmt_sarray qpi_sarr;
127 /* recalculation thread pointer */
128 struct task_struct *qpi_recalc_task;
129 /* rw semaphore to avoid acquire/release during
130 * pool recalculation. */
131 struct rw_semaphore qpi_recalc_sem;
132 unsigned long qpi_flags;
134 /* track users of this pool instance */
137 /* back pointer to master target
138 * immutable after creation. */
139 struct qmt_device *qpi_qmt;
141 /* pointer to dt object associated with global indexes for both user
143 struct dt_object *qpi_glb_obj[LL_MAXQUOTAS];
145 /* A pool supports two different quota types: user and group quota.
146 * Each quota type has its own global index and lquota_entry hash table.
148 struct lquota_site *qpi_site[LL_MAXQUOTAS];
150 /* number of slaves registered for each quota types */
151 int qpi_slv_nr[QMT_STYPE_CNT][LL_MAXQUOTAS];
153 /* reference on lqe (ID 0) storing grace time. */
154 struct lquota_entry *qpi_grace_lqe[LL_MAXQUOTAS];
156 /* procfs root directory for this pool */
157 struct proc_dir_entry *qpi_proc;
159 /* pool directory where all indexes related to this pool instance are
161 struct dt_object *qpi_root;
163 /* Global quota parameters which apply to all quota type */
164 /* the least value of qunit */
165 unsigned long qpi_least_qunit;
167 /* Least value of qunit when soft limit is exceeded.
169 * When soft limit is exceeded, qunit will be shrinked to least_qunit
170 * (1M for block limit), that results in significant write performance
171 * drop since the client will turn to sync write from now on.
173 * To retain the write performance in an acceptable level, we choose
174 * to sacrifice grace time accuracy a bit and use a larger least_qunit
175 * when soft limit is exceeded. It's (qpi_least_qunit * 4) by default,
176 * and user may enlarge it via procfs to get even better performance
177 * (with the cost of losing more grace time accuracy).
179 * See qmt_calc_softlimit().
181 unsigned long qpi_soft_least_qunit;
184 static inline int qpi_slv_nr(struct qmt_pool_info *pool, int qtype)
188 for (i = 0; i < QMT_STYPE_CNT; i++)
189 sum += pool->qpi_slv_nr[i][qtype];
194 static inline int qpi_slv_nr_by_rtype(struct qmt_pool_info *pool, int qtype)
196 if (pool->qpi_rtype == LQUOTA_RES_DT)
197 /* Here should be qpi_slv_nr() if MDTs will be added
199 return pool->qpi_slv_nr[QMT_STYPE_OST][qtype];
201 return pool->qpi_slv_nr[QMT_STYPE_MDT][qtype];
204 * Helper routines and prototypes
207 /* helper routine to find qmt_pool_info associated a lquota_entry */
208 static inline struct qmt_pool_info *lqe2qpi(struct lquota_entry *lqe)
210 LASSERT(lqe_is_master(lqe));
211 return (struct qmt_pool_info *)lqe->lqe_site->lqs_parent;
214 /* return true if someone holds either a read or write lock on the lqe */
215 static inline bool lqe_is_locked(struct lquota_entry *lqe)
217 LASSERT(lqe_is_master(lqe));
218 if (down_write_trylock(&lqe->lqe_sem) == 0)
220 lqe_write_unlock(lqe);
224 /* value to be restored if someone wrong happens during lqe writeback */
225 struct qmt_lqe_restore {
233 #define QMT_MAX_POOL_NUM 16
234 /* Common data shared by qmt handlers */
235 struct qmt_thread_info {
236 union lquota_rec qti_rec;
237 union lquota_id qti_id;
238 char qti_buf[MTI_NAME_MAXLEN];
239 struct lu_fid qti_fid;
240 struct ldlm_res_id qti_resid;
241 union ldlm_gl_desc qti_gl_desc;
242 struct quota_body qti_body;
244 struct qmt_lqe_restore qti_lqes_rstr_small[QMT_MAX_POOL_NUM];
245 struct qmt_lqe_restore *qti_lqes_rstr;
248 struct qmt_pool_info *qti_pools_small[QMT_MAX_POOL_NUM];
249 /* Pointer to an array of qpis in case when
250 * qti_pools_cnt > QMT_MAX_POOL_NUM. */
251 struct qmt_pool_info **qti_pools;
253 /* The number of pools in qti_pools */
255 /* Maximum number of elements in qti_pools array.
256 * By default it is QMT_MAX_POOL_NUM. */
258 int qti_glbl_lqe_idx;
259 /* The same is for lqe ... */
261 struct lquota_entry *qti_lqes_small[QMT_MAX_POOL_NUM];
262 /* Pointer to an array of lqes in case when
263 * qti_lqes_cnt > QMT_MAX_POOL_NUM. */
264 struct lquota_entry **qti_lqes;
266 /* The number of lqes in qti_lqes */
268 /* Maximum number of elements in qti_lqes array.
269 * By default it is QMT_MAX_POOL_NUM. */
273 extern struct lu_context_key qmt_thread_key;
275 /* helper function to extract qmt_thread_info from current environment */
277 struct qmt_thread_info *qmt_info(const struct lu_env *env)
279 return lu_env_info(env, &qmt_thread_key);
282 #define qti_lqes_num(env) (qmt_info(env)->qti_lqes_num)
283 #define qti_lqes_inited(env) (qmt_info(env)->qti_lqes_num)
284 #define qti_lqes_cnt(env) (qmt_info(env)->qti_lqes_cnt)
285 #define qti_glbl_lqe_idx(env) (qmt_info(env)->qti_glbl_lqe_idx)
286 #define qti_lqes(env) (qti_lqes_num(env) > QMT_MAX_POOL_NUM ? \
287 qmt_info(env)->qti_lqes : \
288 qmt_info(env)->qti_lqes_small)
289 #define qti_lqes_rstr(env) (qti_lqes_num(env) > QMT_MAX_POOL_NUM ? \
290 qmt_info(env)->qti_lqes_rstr : \
291 qmt_info(env)->qti_lqes_rstr_small)
292 #define qti_lqes_glbl(env) (qti_lqes(env)[qti_glbl_lqe_idx(env)])
293 #define qti_lqe_hard(env, i) (qti_lqes(env)[i]->lqe_hardlimit)
294 #define qti_lqe_soft(env, i) (qti_lqes(env)[i]->lqe_softlimit)
295 #define qti_lqe_granted(env, i) (qti_lqes(env)[i]->lqe_granted)
296 #define qti_lqe_qunit(env, i) (qti_lqes(env)[i]->lqe_qunit)
298 /* helper routine to convert a lu_device into a qmt_device */
299 static inline struct qmt_device *lu2qmt_dev(struct lu_device *ld)
301 return container_of_safe(lu2dt_dev(ld), struct qmt_device, qmt_dt_dev);
304 /* helper routine to convert a qmt_device into lu_device */
305 static inline struct lu_device *qmt2lu_dev(struct qmt_device *qmt)
307 return &qmt->qmt_dt_dev.dd_lu_dev;
310 #define LQE_ROOT(lqe) (lqe2qpi(lqe)->qpi_root)
311 #define LQE_GLB_OBJ(lqe) (lqe2qpi(lqe)->qpi_glb_obj[lqe_qtype(lqe)])
313 /* helper function returning grace time to use for a given lquota entry */
314 static inline __u64 qmt_lqe_grace(struct lquota_entry *lqe)
316 struct qmt_pool_info *pool = lqe2qpi(lqe);
317 struct lquota_entry *grace_lqe;
319 grace_lqe = pool->qpi_grace_lqe[lqe_qtype(lqe)];
320 LASSERT(grace_lqe != NULL);
322 return grace_lqe->lqe_gracetime;
325 static inline void qmt_restore(struct lquota_entry *lqe,
326 struct qmt_lqe_restore *restore)
328 lqe->lqe_hardlimit = restore->qlr_hardlimit;
329 lqe->lqe_softlimit = restore->qlr_softlimit;
330 lqe->lqe_gracetime = restore->qlr_gracetime;
331 lqe->lqe_granted = restore->qlr_granted;
332 lqe->lqe_qunit = restore->qlr_qunit;
335 static inline void qmt_restore_lqes(const struct lu_env *env)
339 for (i = 0; i < qti_lqes_cnt(env); i++)
340 qmt_restore(qti_lqes(env)[i], &qti_lqes_rstr(env)[i]);
343 #define QMT_GRANT(lqe, slv, cnt) \
345 (lqe)->lqe_granted += (cnt); \
348 #define QMT_REL(lqe, slv, cnt) \
350 (lqe)->lqe_granted -= (cnt); \
354 /* helper routine returning true when reached hardlimit */
355 static inline bool qmt_hard_exhausted(struct lquota_entry *lqe)
357 if (lqe->lqe_hardlimit != 0 && lqe->lqe_granted >= lqe->lqe_hardlimit)
362 /* helper routine returning true when reached softlimit */
363 static inline bool qmt_soft_exhausted(struct lquota_entry *lqe, __u64 now)
365 if (lqe->lqe_softlimit != 0 && lqe->lqe_granted > lqe->lqe_softlimit &&
366 lqe->lqe_gracetime != 0 && now >= lqe->lqe_gracetime)
371 /* helper routine returning true when the id has run out of quota space:
372 * - reached hardlimit
374 * - reached softlimit and grace time expired already */
375 static inline bool qmt_space_exhausted(struct lquota_entry *lqe, __u64 now)
377 return (qmt_hard_exhausted(lqe) || qmt_soft_exhausted(lqe, now));
380 static inline bool qmt_space_exhausted_lqes(const struct lu_env *env, __u64 now)
382 bool exhausted = false;
385 for (i = 0; i < qti_lqes_cnt(env) && !exhausted; i++)
386 exhausted |= qmt_space_exhausted(qti_lqes(env)[i], now);
391 /* helper routine clearing the default quota setting */
392 static inline void qmt_lqe_clear_default(struct lquota_entry *lqe)
394 lqe->lqe_is_default = false;
395 lqe->lqe_gracetime &= ~((__u64)LQUOTA_FLAG_DEFAULT <<
399 /* number of seconds to wait for slaves to release quota space after
401 #define QMT_REBA_TIMEOUT 2
405 void qmt_pool_free(const struct lu_env *, struct qmt_pool_info *);
407 * Reference counter management for qmt_pool_info structures
409 static inline void qpi_getref(struct qmt_pool_info *pool)
411 atomic_inc(&pool->qpi_ref);
414 static inline void qpi_putref(const struct lu_env *env,
415 struct qmt_pool_info *pool)
417 LASSERT(atomic_read(&pool->qpi_ref) > 0);
418 if (atomic_dec_and_test(&pool->qpi_ref))
419 qmt_pool_free(env, pool);
423 void qmt_pool_fini(const struct lu_env *, struct qmt_device *);
424 int qmt_pool_init(const struct lu_env *, struct qmt_device *);
425 int qmt_pool_prepare(const struct lu_env *, struct qmt_device *,
426 struct dt_object *, char *);
427 int qmt_pool_new_conn(const struct lu_env *, struct qmt_device *,
428 struct lu_fid *, struct lu_fid *, __u64 *,
431 #define GLB_POOL_NAME "0x0"
432 #define qmt_pool_lookup_glb(env, qmt, type) \
433 qmt_pool_lookup(env, qmt, type, NULL, -1, false)
434 #define qmt_pool_lookup_name(env, qmt, type, name) \
435 qmt_pool_lookup(env, qmt, type, name, -1, false)
438 * Until MDT pools are not emplemented, all MDTs belong to
439 * global pool, thus lookup lqes only in global pool for the
442 #define qmt_pool_lookup_arr(env, qmt, type, idx, stype) \
443 qmt_pool_lookup(env, qmt, type, NULL, \
444 qmt_dom(type, stype) ? -1 : idx, true)
445 struct qmt_pool_info *qmt_pool_lookup(const struct lu_env *env,
446 struct qmt_device *qmt,
451 struct lquota_entry *qmt_pool_lqe_lookup(const struct lu_env *,
452 struct qmt_device *, int, int,
453 union lquota_id *, char *);
454 int qmt_pool_lqes_lookup(const struct lu_env *, struct qmt_device *, int,
455 int, int, union lquota_id *, char *, int);
456 int qmt_pool_lqes_lookup_spec(const struct lu_env *env, struct qmt_device *qmt,
457 int rtype, int qtype, union lquota_id *qid);
458 void qmt_lqes_sort(const struct lu_env *env);
459 int qmt_pool_new(struct obd_device *obd, char *poolname);
460 int qmt_pool_add(struct obd_device *obd, char *poolname, char *ostname);
461 int qmt_pool_rem(struct obd_device *obd, char *poolname, char *ostname);
462 int qmt_pool_del(struct obd_device *obd, char *poolname);
464 #define qmt_sarr_read_down(qpi) down_read(&qpi->qpi_sarr.osts.op_rw_sem)
465 #define qmt_sarr_read_up(qpi) up_read(&qpi->qpi_sarr.osts.op_rw_sem)
466 #define qmt_sarr_write_down(qpi) down_write(&qpi->qpi_sarr.osts.op_rw_sem)
467 #define qmt_sarr_write_up(qpi) up_write(&qpi->qpi_sarr.osts.op_rw_sem)
468 int qmt_sarr_get_idx(struct qmt_pool_info *qpi, int arr_idx);
469 unsigned int qmt_sarr_count(struct qmt_pool_info *qpi);
472 extern const struct lquota_entry_operations qmt_lqe_ops;
473 int qmt_lqe_set_default(const struct lu_env *env, struct qmt_pool_info *pool,
474 struct lquota_entry *lqe, bool create_record);
475 struct thandle *qmt_trans_start_with_slv(const struct lu_env *,
476 struct lquota_entry *,
479 struct thandle *qmt_trans_start(const struct lu_env *, struct lquota_entry *);
480 int qmt_glb_write_lqes(const struct lu_env *, struct thandle *, __u32, __u64 *);
481 int qmt_glb_write(const struct lu_env *, struct thandle *,
482 struct lquota_entry *, __u32, __u64 *);
483 int qmt_slv_write(const struct lu_env *, struct thandle *,
484 struct lquota_entry *, struct dt_object *, __u32, __u64 *,
486 int qmt_slv_read(const struct lu_env *, union lquota_id *,
487 struct dt_object *, __u64 *);
488 int qmt_validate_limits(struct lquota_entry *, __u64, __u64);
489 bool qmt_adjust_qunit(const struct lu_env *, struct lquota_entry *);
490 bool qmt_adjust_edquot(struct lquota_entry *, __u64);
492 #define qmt_adjust_edquot_notify(env, qmt, now, qb_flags) \
493 qmt_adjust_edquot_qunit_notify(env, qmt, now, true, \
495 #define qmt_adjust_notify_nu(env, qmt, now, qb_flags, idx) \
496 qmt_adjust_edquot_qunit_notify(env, qmt, now, true, \
498 bool qmt_adjust_edquot_qunit_notify(const struct lu_env *, struct qmt_device *,
499 __u64, bool, bool, __u32, int idx);
500 bool qmt_revalidate(const struct lu_env *, struct lquota_entry *);
501 void qmt_revalidate_lqes(const struct lu_env *, struct qmt_device *, __u32);
502 __u64 qmt_alloc_expand(struct lquota_entry *, __u64, __u64);
504 void qti_lqes_init(const struct lu_env *env);
505 int qti_lqes_add(const struct lu_env *env, struct lquota_entry *lqe);
506 void qti_lqes_del(const struct lu_env *env, int index);
507 void qti_lqes_fini(const struct lu_env *env);
508 __u64 qti_lqes_min_qunit(const struct lu_env *env);
509 int qti_lqes_edquot(const struct lu_env *env);
510 int qti_lqes_restore_init(const struct lu_env *env);
511 void qti_lqes_restore_fini(const struct lu_env *env);
512 void qti_lqes_write_lock(const struct lu_env *env);
513 void qti_lqes_write_unlock(const struct lu_env *env);
515 int qmt_map_lge_idx(struct lqe_glbl_data *lgd, int ostidx);
516 struct lqe_glbl_data *qmt_alloc_lqe_gd(struct qmt_pool_info *, int);
517 void qmt_free_lqe_gd(struct lqe_glbl_data *);
518 void qmt_setup_lqe_gd(const struct lu_env *, struct qmt_device *,
519 struct lquota_entry *, struct lqe_glbl_data *, int);
520 #define qmt_seed_glbe_edquot(env, lqeg) \
521 qmt_seed_glbe_all(env, lqeg, false, true, false)
522 #define qmt_seed_glbe_qunit(env, lqeg) \
523 qmt_seed_glbe_all(env, lqeg, true, false, false)
524 #define qmt_seed_glbe(env, lqeg, pool_locked) \
525 qmt_seed_glbe_all(env, lqeg, true, true, pool_locked)
526 void qmt_seed_glbe_all(const struct lu_env *, struct lqe_glbl_data *,
530 int qmt_set_with_lqe(const struct lu_env *env, struct qmt_device *qmt,
531 struct lquota_entry *lqe, __u64 hard, __u64 soft,
532 __u64 time, __u32 valid, bool is_default, bool is_updated);
533 int qmt_dqacq0(const struct lu_env *, struct qmt_device *, struct obd_uuid *,
534 __u32, __u64, __u64, struct quota_body *, int);
535 int qmt_uuid2idx(struct obd_uuid *, int *);
538 int qmt_intent_policy(const struct lu_env *, struct lu_device *,
539 struct ptlrpc_request *, struct ldlm_lock **, int);
540 int qmt_lvbo_init(struct lu_device *, struct ldlm_resource *);
541 int qmt_lvbo_update(struct lu_device *, struct ldlm_resource *,
542 struct ptlrpc_request *, int);
543 int qmt_lvbo_size(struct lu_device *, struct ldlm_lock *);
544 int qmt_lvbo_fill(struct lu_device *, struct ldlm_lock *, void *, int);
545 int qmt_lvbo_free(struct lu_device *, struct ldlm_resource *);
546 int qmt_start_reba_thread(struct qmt_device *);
547 void qmt_stop_reba_thread(struct qmt_device *);
548 void qmt_glb_lock_notify(const struct lu_env *, struct lquota_entry *, __u64);
549 void qmt_id_lock_notify(struct qmt_device *, struct lquota_entry *);
550 #endif /* _QMT_INTERNAL_H */