Whamcloud - gitweb
LU-4017 quota: cleanup to improve quota codes
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_quota.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; if not, write to the
18  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19  * Boston, MA 021110-1307, USA
20  *
21  * GPL HEADER END
22  */
23 /*
24  * Copyright (c) 2012, 2015, Intel Corporation.
25  * Use is subject to license terms.
26  *
27  * Author: Johann Lombardi <johann@whamcloud.com>
28  * Author: Niu    Yawei    <niu@whamcloud.com>
29  */
30
31 #include <lustre_quota.h>
32 #include "osd_internal.h"
33
34 /**
35  * Helpers function to find out the quota type (USRQUOTA/GRPQUOTA) of a
36  * given object
37  */
38 static inline int fid2type(const struct lu_fid *fid)
39 {
40         LASSERT(fid_is_acct(fid));
41         switch (fid_oid(fid)) {
42         case ACCT_USER_OID:
43                 return USRQUOTA;
44         case ACCT_GROUP_OID:
45                 return GRPQUOTA;
46         case ACCT_PROJECT_OID:
47                 return PRJQUOTA;
48         }
49
50         LASSERTF(0, "invalid fid for quota type: %u", fid_oid(fid));
51         return USRQUOTA;
52 }
53
54 /**
55  * Space Accounting Management
56  */
57
58 /**
59  * Look up an accounting object based on its fid.
60  *
61  * \param info - is the osd thread info passed by the caller
62  * \param osd  - is the osd device
63  * \param fid  - is the fid of the accounting object we want to look up
64  * \param id   - is the osd_inode_id struct to fill with the inode number of
65  *               the quota file if the lookup is successful
66  */
67 int osd_acct_obj_lookup(struct osd_thread_info *info, struct osd_device *osd,
68                         const struct lu_fid *fid, struct osd_inode_id *id)
69 {
70         struct super_block *sb = osd_sb(osd);
71
72         ENTRY;
73         LASSERT(fid_is_acct(fid));
74
75         if (!LDISKFS_HAS_RO_COMPAT_FEATURE(sb,
76                                            LDISKFS_FEATURE_RO_COMPAT_QUOTA))
77                 RETURN(-ENOENT);
78
79         id->oii_gen = OSD_OII_NOGEN;
80         switch (fid2type(fid)) {
81         case USRQUOTA:
82                 id->oii_ino =
83                         le32_to_cpu(LDISKFS_SB(sb)->s_es->s_usr_quota_inum);
84                 break;
85         case GRPQUOTA:
86                 id->oii_ino =
87                         le32_to_cpu(LDISKFS_SB(sb)->s_es->s_grp_quota_inum);
88                 break;
89         case PRJQUOTA:
90  #ifdef HAVE_PROJECT_QUOTA
91                 if (LDISKFS_HAS_RO_COMPAT_FEATURE(sb,
92                                         LDISKFS_FEATURE_RO_COMPAT_PROJECT))
93                         id->oii_ino =
94                                 le32_to_cpu(LDISKFS_SB(sb)->s_es->s_prj_quota_inum);
95                 else
96  #endif
97                         RETURN(-ENOENT);
98                 break;
99         }
100         if (!ldiskfs_valid_inum(sb, id->oii_ino))
101                 RETURN(-ENOENT);
102         RETURN(0);
103 }
104
105 /**
106  * Return space usage (#blocks & #inodes) consumed by a given uid or gid.
107  *
108  * \param env   - is the environment passed by the caller
109  * \param dtobj - is the accounting object
110  * \param dtrec - is the record to fill with space usage information
111  * \param dtkey - is the id of the user or group for which we would
112  *                like to access disk usage.
113  *
114  * \retval +ve - success : exact match
115  * \retval -ve - failure
116  */
117 static int osd_acct_index_lookup(const struct lu_env *env,
118                                  struct dt_object *dtobj,
119                                  struct dt_rec *dtrec,
120                                  const struct dt_key *dtkey)
121 {
122         struct osd_thread_info  *info = osd_oti_get(env);
123 #if defined(HAVE_DQUOT_QC_DQBLK)
124         struct qc_dqblk         *dqblk = &info->oti_qdq;
125 #elif defined(HAVE_DQUOT_FS_DISK_QUOTA)
126         struct fs_disk_quota    *dqblk = &info->oti_fdq;
127 #else
128         struct if_dqblk         *dqblk = &info->oti_dqblk;
129 #endif
130         struct super_block      *sb = osd_sb(osd_obj2dev(osd_dt_obj(dtobj)));
131         struct lquota_acct_rec  *rec = (struct lquota_acct_rec *)dtrec;
132         __u64                    id = *((__u64 *)dtkey);
133         int                      rc;
134 #ifdef HAVE_DQUOT_KQID
135         struct kqid              qid;
136 #endif
137         int type;
138
139         ENTRY;
140
141         type = fid2type(lu_object_fid(&dtobj->do_lu));
142         memset(dqblk, 0, sizeof(*dqblk));
143 #ifdef HAVE_DQUOT_KQID
144         qid = make_kqid(&init_user_ns, type, id);
145         rc = sb->s_qcop->get_dqblk(sb, qid, dqblk);
146 #else
147         rc = sb->s_qcop->get_dqblk(sb, type, (qid_t) id, dqblk);
148 #endif
149         if (rc)
150                 RETURN(rc);
151 #if defined(HAVE_DQUOT_QC_DQBLK)
152         rec->bspace = dqblk->d_space;
153         rec->ispace = dqblk->d_ino_count;
154 #elif defined(HAVE_DQUOT_FS_DISK_QUOTA)
155         rec->bspace = dqblk->d_bcount;
156         rec->ispace = dqblk->d_icount;
157 #else
158         rec->bspace = dqblk->dqb_curspace;
159         rec->ispace = dqblk->dqb_curinodes;
160 #endif
161         RETURN(+1);
162 }
163
164 #define QUOTA_IT_READ_ERROR(it, rc)                                    \
165         CERROR("%s: Error while trying to read quota information, "    \
166                "failed with %d\n",                                     \
167                osd_dev(it->oiq_obj->oo_dt.do_lu.lo_dev)->od_svname, rc); \
168
169 /**
170  * Initialize osd Iterator for given osd index object.
171  *
172  * \param  dt    - osd index object
173  * \param  attr  - not used
174  */
175 static struct dt_it *osd_it_acct_init(const struct lu_env *env,
176                                       struct dt_object *dt,
177                                       __u32 attr)
178 {
179         struct osd_it_quota     *it;
180         struct lu_object        *lo = &dt->do_lu;
181         struct osd_object       *obj = osd_dt_obj(dt);
182
183         ENTRY;
184
185         LASSERT(lu_object_exists(lo));
186
187         OBD_ALLOC_PTR(it);
188         if (it == NULL)
189                 RETURN(ERR_PTR(-ENOMEM));
190
191         lu_object_get(lo);
192         it->oiq_obj = obj;
193         INIT_LIST_HEAD(&it->oiq_list);
194
195         /* LUSTRE_DQTREEOFF is the initial offset where the tree can be found */
196         it->oiq_blk[0] = LUSTRE_DQTREEOFF;
197
198         /* NB: we don't need to store the tree depth since it is always
199          * equal to LUSTRE_DQTREEDEPTH - 1 (root has depth = 0) for a leaf
200          * block. */
201         RETURN((struct dt_it *)it);
202 }
203
204 /**
205  * Free given iterator.
206  *
207  * \param  di   - osd iterator
208  */
209 static void osd_it_acct_fini(const struct lu_env *env, struct dt_it *di)
210 {
211         struct osd_it_quota *it = (struct osd_it_quota *)di;
212         struct osd_quota_leaf *leaf, *tmp;
213         ENTRY;
214
215         osd_object_put(env, it->oiq_obj);
216
217         list_for_each_entry_safe(leaf, tmp, &it->oiq_list, oql_link) {
218                 list_del_init(&leaf->oql_link);
219                 OBD_FREE_PTR(leaf);
220         }
221
222         OBD_FREE_PTR(it);
223
224         EXIT;
225 }
226
227 /**
228  * Move Iterator to record specified by \a key, if the \a key isn't found,
229  * move to the first valid record.
230  *
231  * \param  di   - osd iterator
232  * \param  key  - uid or gid
233  *
234  * \retval +ve  - di points to the first valid record
235  * \retval  +1  - di points to exact matched key
236  * \retval -ve  - failure
237  */
238 static int osd_it_acct_get(const struct lu_env *env, struct dt_it *di,
239                            const struct dt_key *key)
240 {
241         struct osd_it_quota     *it = (struct osd_it_quota *)di;
242         const struct lu_fid     *fid =
243                                 lu_object_fid(&it->oiq_obj->oo_dt.do_lu);
244         int                      type;
245         qid_t                    dqid = *(qid_t *)key;
246         loff_t                   offset;
247         int                      rc;
248
249         ENTRY;
250         type = fid2type(fid);
251
252         offset = find_tree_dqentry(env, it->oiq_obj, type, dqid,
253                                    LUSTRE_DQTREEOFF, 0, it);
254         if (offset > 0) { /* Found */
255                 RETURN(+1);
256         } else if (offset < 0) { /* Error */
257                 QUOTA_IT_READ_ERROR(it, (int)offset);
258                 RETURN((int)offset);
259         }
260
261         /* The @key is not found, move to the first valid entry */
262         rc = walk_tree_dqentry(env, it->oiq_obj, type, it->oiq_blk[0], 0,
263                                0, it);
264         if (rc == 0)
265                 rc = 1;
266         else if (rc > 0)
267                 rc = -ENOENT;
268
269         RETURN(rc);
270 }
271
272 /**
273  * Release Iterator
274  *
275  * \param  di   - osd iterator
276  */
277 static void osd_it_acct_put(const struct lu_env *env, struct dt_it *di)
278 {
279         return;
280 }
281
282 static int osd_it_add_processed(struct osd_it_quota *it, int depth)
283 {
284         struct osd_quota_leaf *leaf;
285
286         OBD_ALLOC_PTR(leaf);
287         if (leaf == NULL)
288                 RETURN(-ENOMEM);
289         INIT_LIST_HEAD(&leaf->oql_link);
290         leaf->oql_blk = it->oiq_blk[depth];
291         list_add_tail(&leaf->oql_link, &it->oiq_list);
292         RETURN(0);
293 }
294
295 /**
296  * Move on to the next valid entry.
297  *
298  * \param  di   - osd iterator
299  *
300  * \retval +ve  - iterator reached the end
301  * \retval   0  - iterator has not reached the end yet
302  * \retval -ve  - unexpected failure
303  */
304 static int osd_it_acct_next(const struct lu_env *env, struct dt_it *di)
305 {
306         struct osd_it_quota     *it = (struct osd_it_quota *)di;
307         const struct lu_fid     *fid =
308                                 lu_object_fid(&it->oiq_obj->oo_dt.do_lu);
309         int                      type;
310         int                      depth, rc;
311         uint                     index;
312
313         ENTRY;
314
315         type = fid2type(fid);
316
317         /* Let's first check if there are any remaining valid entry in the
318          * current leaf block. Start with the next entry after the current one.
319          */
320         depth = LUSTRE_DQTREEDEPTH;
321         index = it->oiq_index[depth];
322         if (++index < LUSTRE_DQSTRINBLK) {
323                 /* Search for the next valid entry from current index */
324                 rc = walk_block_dqentry(env, it->oiq_obj, type,
325                                         it->oiq_blk[depth], index, it);
326                 if (rc < 0) {
327                         QUOTA_IT_READ_ERROR(it, rc);
328                         RETURN(rc);
329                 } else if (rc == 0) {
330                         /* Found on entry, @it is already updated to the
331                          * new position in walk_block_dqentry(). */
332                         RETURN(0);
333                 } else {
334                         rc = osd_it_add_processed(it, depth);
335                         if (rc)
336                                 RETURN(rc);
337                 }
338         } else {
339                 rc = osd_it_add_processed(it, depth);
340                 if (rc)
341                         RETURN(rc);
342         }
343         rc = 1;
344
345         /* We have consumed all the entries of the current leaf block, move on
346          * to the next one. */
347         depth--;
348
349         /* We keep searching as long as walk_tree_dqentry() returns +1
350          * (= no valid entry found). */
351         for (; depth >= 0 && rc > 0; depth--) {
352                 index = it->oiq_index[depth];
353                 if (++index > 0xff)
354                         continue;
355                 rc = walk_tree_dqentry(env, it->oiq_obj, type,
356                                        it->oiq_blk[depth], depth, index, it);
357         }
358
359         if (rc < 0)
360                 QUOTA_IT_READ_ERROR(it, rc);
361         RETURN(rc);
362 }
363
364 /**
365  * Return pointer to the key under iterator.
366  *
367  * \param  di   - osd iterator
368  */
369 static struct dt_key *osd_it_acct_key(const struct lu_env *env,
370                                       const struct dt_it *di)
371 {
372         struct osd_it_quota *it = (struct osd_it_quota *)di;
373
374         ENTRY;
375         RETURN((struct dt_key *)&it->oiq_id);
376 }
377
378 /**
379  * Return size of key under iterator (in bytes)
380  *
381  * \param  di   - osd iterator
382  */
383 static int osd_it_acct_key_size(const struct lu_env *env,
384                                 const struct dt_it *di)
385 {
386         struct osd_it_quota *it = (struct osd_it_quota *)di;
387
388         ENTRY;
389         RETURN((int)sizeof(it->oiq_id));
390 }
391
392 /**
393  * Return pointer to the record under iterator.
394  *
395  * \param  di    - osd iterator
396  * \param  attr  - not used
397  */
398 static int osd_it_acct_rec(const struct lu_env *env,
399                            const struct dt_it *di,
400                            struct dt_rec *dtrec, __u32 attr)
401 {
402         struct osd_it_quota     *it = (struct osd_it_quota *)di;
403         const struct dt_key     *key = osd_it_acct_key(env, di);
404         int                      rc;
405
406         ENTRY;
407
408         rc = osd_acct_index_lookup(env, &it->oiq_obj->oo_dt, dtrec, key);
409         RETURN(rc > 0 ? 0 : rc);
410 }
411
412 /**
413  * Returns cookie for current Iterator position.
414  *
415  * \param  di    - osd iterator
416  */
417 static __u64 osd_it_acct_store(const struct lu_env *env,
418                                const struct dt_it *di)
419 {
420         struct osd_it_quota *it = (struct osd_it_quota *)di;
421
422         ENTRY;
423         RETURN(it->oiq_id);
424 }
425
426 /**
427  * Restore iterator from cookie. if the \a hash isn't found,
428  * restore the first valid record.
429  *
430  * \param  di    - osd iterator
431  * \param  hash  - iterator location cookie
432  *
433  * \retval +ve   - di points to the first valid record
434  * \retval  +1   - di points to exact matched hash
435  * \retval -ve   - failure
436  */
437 static int osd_it_acct_load(const struct lu_env *env,
438                             const struct dt_it *di, __u64 hash)
439 {
440         ENTRY;
441         RETURN(osd_it_acct_get(env, (struct dt_it *)di,
442                                (const struct dt_key *)&hash));
443 }
444
445 /**
446  * Index and Iterator operations for accounting objects
447  */
448 const struct dt_index_operations osd_acct_index_ops = {
449         .dio_lookup     = osd_acct_index_lookup,
450         .dio_it         = {
451                 .init           = osd_it_acct_init,
452                 .fini           = osd_it_acct_fini,
453                 .get            = osd_it_acct_get,
454                 .put            = osd_it_acct_put,
455                 .next           = osd_it_acct_next,
456                 .key            = osd_it_acct_key,
457                 .key_size       = osd_it_acct_key_size,
458                 .rec            = osd_it_acct_rec,
459                 .store          = osd_it_acct_store,
460                 .load           = osd_it_acct_load
461         }
462 };
463
464 static inline void osd_quota_swab(char *ptr, size_t size)
465 {
466         int offset;
467
468         LASSERT((size & (sizeof(__u64) - 1)) == 0);
469
470         for (offset = 0; offset < size; offset += sizeof(__u64))
471              __swab64s((__u64 *)(ptr + offset));
472 }
473
474 const struct dt_rec *osd_quota_pack(struct osd_object *obj,
475                                     const struct dt_rec *rec,
476                                     union lquota_rec *quota_rec)
477 {
478 #ifdef __BIG_ENDIAN
479         struct iam_descr        *descr;
480
481         LASSERT(obj->oo_dir != NULL);
482         descr = obj->oo_dir->od_container.ic_descr;
483
484         memcpy(quota_rec, rec, descr->id_rec_size);
485
486         osd_quota_swab((char *)quota_rec, descr->id_rec_size);
487         return (const struct dt_rec *)quota_rec;
488 #else
489         return rec;
490 #endif
491 }
492
493 void osd_quota_unpack(struct osd_object *obj, const struct dt_rec *rec)
494 {
495 #ifdef __BIG_ENDIAN
496         struct iam_descr *descr;
497
498         LASSERT(obj->oo_dir != NULL);
499         descr = obj->oo_dir->od_container.ic_descr;
500
501         osd_quota_swab((char *)rec, descr->id_rec_size);
502 #else
503         return;
504 #endif
505 }
506
507 static inline int osd_qid_type(struct osd_thandle *oh, int i)
508 {
509         return oh->ot_id_types[i];
510 }
511
512 /**
513  * Reserve journal credits for quota files update first, then call
514  * ->op_begin() to perform quota enforcement.
515  *
516  * \param  env     - the environment passed by the caller
517  * \param  oh      - osd transaction handle
518  * \param  qi      - quota id & space required for this operation
519  * \param  obj     - osd object, could be NULL when it's under create
520  * \param  enforce - whether to perform quota enforcement
521  * \param  flags   - if the operation is write, return no user quota, no
522  *                   group quota, or sync commit flags to the caller
523  *
524  * \retval 0       - success
525  * \retval -ve     - failure
526  */
527 int osd_declare_qid(const struct lu_env *env, struct osd_thandle *oh,
528                     struct lquota_id_info *qi, struct osd_object *obj,
529                     bool enforce, int *flags)
530 {
531         struct osd_device       *dev;
532         struct qsd_instance     *qsd;
533         struct inode            *inode = NULL;
534         int                      i, rc = 0, crd;
535         bool                     found = false;
536         ENTRY;
537
538         LASSERT(oh != NULL);
539         LASSERTF(oh->ot_id_cnt <= OSD_MAX_UGID_CNT, "count=%d\n",
540                  oh->ot_id_cnt);
541
542         dev = osd_dt_dev(oh->ot_super.th_dev);
543         LASSERT(dev != NULL);
544
545         qsd = dev->od_quota_slave;
546
547         for (i = 0; i < oh->ot_id_cnt; i++) {
548                 if (oh->ot_id_array[i] == qi->lqi_id.qid_uid &&
549                     oh->ot_id_types[i] == qi->lqi_type) {
550                         found = true;
551                         break;
552                 }
553         }
554
555         if (!found) {
556                 /* we need to account for credits for this new ID */
557                 if (i >= OSD_MAX_UGID_CNT) {
558                         CERROR("Too many(%d) trans qids!\n", i + 1);
559                         RETURN(-EOVERFLOW);
560                 }
561
562                 if (obj != NULL)
563                         inode = obj->oo_inode;
564
565                 /* root ID entry should be always present in the quota file */
566                 if (qi->lqi_id.qid_uid == 0) {
567                         crd = 1;
568                 } else {
569                         /* used space for this ID could be dropped to zero,
570                          * reserve extra credits for removing ID entry from
571                          * the quota file */
572                         if (qi->lqi_space < 0)
573                                 crd = LDISKFS_QUOTA_DEL_BLOCKS(osd_sb(dev));
574                         /* reserve credits for adding ID entry to the quota
575                          * file if the i_dquot isn't initialized yet. */
576                         else if (inode == NULL ||
577 #ifdef HAVE_EXT4_INFO_DQUOT
578                                  LDISKFS_I(inode)->i_dquot[qi->lqi_type] == NULL)
579 #else
580                                  inode->i_dquot[qi->lqi_type] == NULL)
581 #endif
582                                 crd = LDISKFS_QUOTA_INIT_BLOCKS(osd_sb(dev));
583                         else
584                                 crd = 1;
585                 }
586
587                 osd_trans_declare_op(env, oh, OSD_OT_QUOTA, crd);
588
589                 oh->ot_id_array[i] = qi->lqi_id.qid_uid;
590                 oh->ot_id_types[i] = qi->lqi_type;
591                 oh->ot_id_cnt++;
592         }
593
594         if (unlikely(qsd == NULL))
595                 /* quota slave instance hasn't been allocated yet */
596                 RETURN(0);
597
598         /* check quota */
599         if (enforce)
600                 rc = qsd_op_begin(env, qsd, oh->ot_quota_trans, qi, flags);
601         RETURN(rc);
602 }
603
604 /**
605  * Wrapper for osd_declare_qid()
606  *
607  * \param  env    - the environment passed by the caller
608  * \param  uid    - user id of the inode
609  * \param  gid    - group id of the inode
610  * \param  space  - how many blocks/inodes will be consumed/released
611  * \param  oh     - osd transaction handle
612  * \param  obj    - osd object, could be NULL when it's under create
613  * \param  flags  - if the operation is write, return no user quota, no
614  *                  group quota, or sync commit flags to the caller
615  * \param osd_qid_flags - indicate this is a inode/block accounting
616  *                      and whether changes are performed by root user
617  *
618  * \retval 0      - success
619  * \retval -ve    - failure
620  */
621 int osd_declare_inode_qid(const struct lu_env *env, qid_t uid, qid_t gid,
622                           __u32 projid, long long space, struct osd_thandle *oh,
623                           struct osd_object *obj, int *flags,
624                           enum osd_qid_declare_flags osd_qid_declare_flags)
625 {
626         struct osd_thread_info  *info = osd_oti_get(env);
627         struct lquota_id_info   *qi = &info->oti_qi;
628         int rcu, rcg, rcp; /* user & group & project rc */
629         int force = osd_qid_declare_flags & OSD_QID_FORCE;
630         ENTRY;
631
632         /* let's start with user quota */
633         qi->lqi_id.qid_uid = uid;
634         qi->lqi_type       = USRQUOTA;
635         qi->lqi_space      = space;
636         qi->lqi_is_blk     = !!(osd_qid_declare_flags & OSD_QID_BLK);
637         rcu = osd_declare_qid(env, oh, qi, obj, true, flags);
638
639         if (force && (rcu == -EDQUOT || rcu == -EINPROGRESS))
640                 /* ignore EDQUOT & EINPROGRESS when changes are done by root */
641                 rcu = 0;
642
643         /* For non-fatal error, we want to continue to get the noquota flags
644          * for group id. This is only for commit write, which has @flags passed
645          * in. See osd_declare_write_commit().
646          * When force is set to true, we also want to proceed with the gid */
647         if (rcu && (rcu != -EDQUOT || flags == NULL))
648                 RETURN(rcu);
649
650         /* and now group quota */
651         qi->lqi_id.qid_gid = gid;
652         qi->lqi_type       = GRPQUOTA;
653         rcg = osd_declare_qid(env, oh, qi, obj, true, flags);
654
655         if (force && (rcg == -EDQUOT || rcg == -EINPROGRESS))
656                 /* as before, ignore EDQUOT & EINPROGRESS for root */
657                 rcg = 0;
658         if (rcg && (rcg != -EDQUOT || flags == NULL))
659                 RETURN(rcg);
660
661         /* and now project quota */
662         qi->lqi_id.qid_gid = projid;
663         qi->lqi_type       = PRJQUOTA;
664         rcp = osd_declare_qid(env, oh, qi, obj, true, flags);
665
666         if (force && (rcp == -EDQUOT || rcp == -EINPROGRESS))
667                 /* as before, ignore EDQUOT & EINPROGRESS for root */
668                 rcp = 0;
669
670         if (rcu)
671                 RETURN(rcu);
672         if (rcg)
673                 RETURN(rcg);
674         if (rcp)
675                 RETURN(rcp);
676
677         RETURN(0);
678 }