Whamcloud - gitweb
LU-957 scrub: Ancillary work for LFSCK/OI scrub
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_oi.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2012, Whamcloud, Inc.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/osd/osd_oi.c
37  *
38  * Object Index.
39  *
40  * Author: Nikita Danilov <nikita@clusterfs.com>
41  */
42
43 /*
44  * oi uses two mechanisms to implement fid->cookie mapping:
45  *
46  *     - persistent index, where cookie is a record and fid is a key, and
47  *
48  *     - algorithmic mapping for "igif" fids.
49  *
50  */
51
52 #define DEBUG_SUBSYSTEM S_MDS
53
54 #include <linux/module.h>
55
56 /* LUSTRE_VERSION_CODE */
57 #include <lustre_ver.h>
58 /*
59  * struct OBD_{ALLOC,FREE}*()
60  * OBD_FAIL_CHECK
61  */
62 #include <obd.h>
63 #include <obd_support.h>
64
65 /* fid_cpu_to_be() */
66 #include <lustre_fid.h>
67
68 #include "osd_oi.h"
69 /* osd_lookup(), struct osd_thread_info */
70 #include "osd_internal.h"
71 #include "osd_igif.h"
72 #include "dt_object.h"
73
74 #define OSD_OI_FID_NR         (1UL << OSD_OI_FID_OID_BITS)
75 #define OSD_OI_FID_NR_MAX     (1UL << OSD_OI_FID_OID_BITS_MAX)
76
77 static unsigned int osd_oi_count = OSD_OI_FID_NR;
78 CFS_MODULE_PARM(osd_oi_count, "i", int, 0444,
79                 "Number of Object Index containers to be created, "
80                 "it's only valid for new filesystem.");
81
82 /** to serialize concurrent OI index initialization */
83 static cfs_mutex_t oi_init_lock;
84
85 static struct dt_index_features oi_feat = {
86         .dif_flags       = DT_IND_UPDATE,
87         .dif_recsize_min = sizeof(struct osd_inode_id),
88         .dif_recsize_max = sizeof(struct osd_inode_id),
89         .dif_ptrsize     = 4
90 };
91
92 #define OSD_OI_NAME_BASE        "oi.16"
93
94 static void osd_oi_table_put(struct osd_thread_info *info,
95                              struct osd_oi **oi_table, unsigned oi_count)
96 {
97         struct iam_container *bag;
98         int                   i;
99
100         for (i = 0; i < oi_count; i++) {
101                 LASSERT(oi_table[i] != NULL);
102                 LASSERT(oi_table[i]->oi_inode != NULL);
103
104                 bag = &(oi_table[i]->oi_dir.od_container);
105                 if (bag->ic_object == oi_table[i]->oi_inode)
106                         iam_container_fini(bag);
107                 iput(oi_table[i]->oi_inode);
108                 oi_table[i]->oi_inode = NULL;
109                 OBD_FREE_PTR(oi_table[i]);
110         }
111 }
112
113 static int osd_oi_index_create_one(struct osd_thread_info *info,
114                                    struct osd_device *osd, const char *name,
115                                    struct dt_index_features *feat)
116 {
117         const struct lu_env             *env = info->oti_env;
118         struct osd_inode_id             *id  = &info->oti_id;
119         struct buffer_head              *bh;
120         struct inode                    *inode;
121         struct ldiskfs_dir_entry_2      *de;
122         struct dentry                   *dentry;
123         struct inode                    *dir;
124         handle_t                        *jh;
125         int                              rc;
126
127         dentry = osd_child_dentry_by_inode(env, osd_sb(osd)->s_root->d_inode,
128                                            name, strlen(name));
129         dir = osd_sb(osd)->s_root->d_inode;
130         bh = osd_ldiskfs_find_entry(dir, dentry, &de, NULL);
131         if (bh) {
132                 brelse(bh);
133
134                 id->oii_ino = le32_to_cpu(de->inode);
135                 id->oii_gen = OSD_OII_NOGEN;
136
137                 inode = osd_iget(info, osd, id);
138                 if (!IS_ERR(inode)) {
139                         iput(inode);
140                         RETURN(-EEXIST);
141                 }
142                 RETURN(PTR_ERR(inode));
143         }
144
145         jh = ldiskfs_journal_start_sb(osd_sb(osd), 100);
146         LASSERT(!IS_ERR(jh));
147
148         inode = ldiskfs_create_inode(jh, osd_sb(osd)->s_root->d_inode,
149                                      (S_IFREG | S_IRUGO | S_IWUSR));
150         LASSERT(!IS_ERR(inode));
151
152         if (feat->dif_flags & DT_IND_VARKEY)
153                 rc = iam_lvar_create(inode, feat->dif_keysize_max,
154                                      feat->dif_ptrsize, feat->dif_recsize_max,
155                                      jh);
156         else
157                 rc = iam_lfix_create(inode, feat->dif_keysize_max,
158                                      feat->dif_ptrsize, feat->dif_recsize_max,
159                                      jh);
160
161         dentry = osd_child_dentry_by_inode(env, osd_sb(osd)->s_root->d_inode,
162                                            name, strlen(name));
163         rc = osd_ldiskfs_add_entry(jh, dentry, inode, NULL);
164         LASSERT(rc == 0);
165
166         ldiskfs_journal_stop(jh);
167         iput(inode);
168
169         return rc;
170 }
171
172 static struct inode *osd_oi_index_open(struct osd_thread_info *info,
173                                        struct osd_device *osd,
174                                        const char *name,
175                                        struct dt_index_features *f,
176                                        bool create)
177 {
178         struct dentry *dentry;
179         struct inode  *inode;
180         int            rc;
181
182         dentry = ll_lookup_one_len(name, osd_sb(osd)->s_root, strlen(name));
183         if (IS_ERR(dentry))
184                 return (void *) dentry;
185
186         if (dentry->d_inode) {
187                 LASSERT(!is_bad_inode(dentry->d_inode));
188                 inode = dentry->d_inode;
189                 atomic_inc(&inode->i_count);
190                 dput(dentry);
191                 return inode;
192         }
193
194         /* create */
195         dput(dentry);
196         shrink_dcache_parent(osd_sb(osd)->s_root);
197         if (!create)
198                 return ERR_PTR(-ENOENT);
199
200         rc = osd_oi_index_create_one(info, osd, name, f);
201         if (rc)
202                 RETURN(ERR_PTR(rc));
203
204         dentry = ll_lookup_one_len(name, osd_sb(osd)->s_root, strlen(name));
205         if (IS_ERR(dentry))
206                 return (void *) dentry;
207
208         if (dentry->d_inode) {
209                 LASSERT(!is_bad_inode(dentry->d_inode));
210                 inode = dentry->d_inode;
211                 atomic_inc(&inode->i_count);
212                 dput(dentry);
213                 return inode;
214         }
215
216         return ERR_PTR(-ENOENT);
217 }
218
219 /**
220  * Open an OI(Ojbect Index) container.
221  *
222  * \param       name    Name of OI container
223  * \param       objp    Pointer of returned OI
224  *
225  * \retval      0       success
226  * \retval      -ve     failure
227  */
228 static int osd_oi_open(struct osd_thread_info *info, struct osd_device *osd,
229                        char *name, struct osd_oi **oi_slot, bool create)
230 {
231         struct osd_directory *dir;
232         struct iam_container *bag;
233         struct inode         *inode;
234         struct osd_oi        *oi;
235         int                   rc;
236
237         ENTRY;
238
239         oi_feat.dif_keysize_min = sizeof(struct lu_fid);
240         oi_feat.dif_keysize_max = sizeof(struct lu_fid);
241
242         inode = osd_oi_index_open(info, osd, name, &oi_feat, create);
243         if (IS_ERR(inode))
244                 RETURN(PTR_ERR(inode));
245
246         OBD_ALLOC_PTR(oi);
247         if (oi == NULL)
248                 GOTO(out_inode, rc = -ENOMEM);
249
250         oi->oi_inode = inode;
251         dir = &oi->oi_dir;
252
253         bag = &dir->od_container;
254         rc = iam_container_init(bag, &dir->od_descr, inode);
255         if (rc < 0)
256                 GOTO(out_free, rc);
257
258         rc = iam_container_setup(bag);
259         if (rc < 0)
260                 GOTO(out_container, rc);
261
262         *oi_slot = oi;
263         RETURN(0);
264
265 out_container:
266         iam_container_fini(bag);
267 out_free:
268         OBD_FREE_PTR(oi);
269 out_inode:
270         iput(inode);
271         return rc;
272 }
273
274 /**
275  * Open OI(Object Index) table.
276  * If \a oi_count is zero, which means caller doesn't know how many OIs there
277  * will be, this function can either return 0 for new filesystem, or number
278  * of OIs on existed filesystem.
279  *
280  * If \a oi_count is non-zero, which means caller does know number of OIs on
281  * filesystem, this function should return the exactly same number on
282  * success, or error code in failure.
283  *
284  * \param     oi_count  Number of expected OI containers
285  * \param     create    Create OIs if doesn't exist
286  *
287  * \retval    +ve       number of opened OI containers
288  * \retval      0       no OI containers found
289  * \retval    -ve       failure
290  */
291 static int
292 osd_oi_table_open(struct osd_thread_info *info, struct osd_device *osd,
293                   struct osd_oi **oi_table, unsigned oi_count, bool create)
294 {
295         struct dt_device *dev = &osd->od_dt_dev;
296         int               count = 0;
297         int               rc = 0;
298         int               i;
299
300         /* NB: oi_count != 0 means that we have already created/known all OIs
301          * and have known exact number of OIs. */
302         LASSERT(oi_count <= OSD_OI_FID_NR_MAX);
303
304         for (i = 0; i < (oi_count != 0 ? oi_count : OSD_OI_FID_NR_MAX); i++) {
305                 char name[12];
306
307                 sprintf(name, "%s.%d", OSD_OI_NAME_BASE, i);
308                 rc = osd_oi_open(info, osd, name, &oi_table[i], create);
309                 if (rc == 0) {
310                         count++;
311                         continue;
312                 }
313
314                 if (rc == -ENOENT && oi_count == 0)
315                         return count;
316
317                 CERROR("%s: can't open %s: rc = %d\n",
318                        dev->dd_lu_dev.ld_obd->obd_name, name, rc);
319                 if (oi_count > 0) {
320                         CERROR("%s: expect to open total %d OI files.\n",
321                                dev->dd_lu_dev.ld_obd->obd_name, oi_count);
322                 }
323                 break;
324         }
325
326         if (rc < 0) {
327                 osd_oi_table_put(info, oi_table, count);
328                 return rc;
329         }
330
331         return count;
332 }
333
334 int osd_oi_init(struct osd_thread_info *info, struct osd_device *osd)
335 {
336         struct dt_device *dev = &osd->od_dt_dev;
337         struct osd_oi   **oi;
338         int               rc;
339
340         OBD_ALLOC(oi, sizeof(*oi) * OSD_OI_FID_NR_MAX);
341         if (oi == NULL)
342                 return -ENOMEM;
343
344         cfs_mutex_lock(&oi_init_lock);
345         /* try to open existing multiple OIs first */
346         rc = osd_oi_table_open(info, osd, oi, 0, false);
347         if (rc != 0)
348                 goto out;
349
350         /* if previous failed then try found single OI from old filesystem */
351         rc = osd_oi_open(info, osd, OSD_OI_NAME_BASE, &oi[0], false);
352         if (rc == 0) { /* found single OI from old filesystem */
353                 rc = 1;
354                 goto out;
355         } else if (rc != -ENOENT) {
356                 CERROR("%s: can't open %s: rc = %d\n",
357                        dev->dd_lu_dev.ld_obd->obd_name, OSD_OI_NAME_BASE, rc);
358                 goto out;
359         }
360
361         /* No OIs exist, new filesystem, create OI objects */
362         rc = osd_oi_table_open(info, osd, oi, osd_oi_count, true);
363         LASSERT(ergo(rc >= 0, rc == osd_oi_count));
364 out:
365         if (rc < 0) {
366                 OBD_FREE(oi, sizeof(*oi) * OSD_OI_FID_NR_MAX);
367         } else {
368                 LASSERT((rc & (rc - 1)) == 0);
369                 osd->od_oi_table = oi;
370                 osd->od_oi_count = rc;
371                 rc = 0;
372         }
373
374         cfs_mutex_unlock(&oi_init_lock);
375         return rc;
376 }
377
378 void osd_oi_fini(struct osd_thread_info *info, struct osd_device *osd)
379 {
380         osd_oi_table_put(info, osd->od_oi_table, osd->od_oi_count);
381
382         OBD_FREE(osd->od_oi_table,
383                  sizeof(*(osd->od_oi_table)) * OSD_OI_FID_NR_MAX);
384         osd->od_oi_table = NULL;
385 }
386
387 static inline int fid_is_fs_root(const struct lu_fid *fid)
388 {
389         /* Map root inode to special local object FID */
390         return (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE &&
391                          fid_oid(fid) == OSD_FS_ROOT_OID));
392 }
393
394 static int osd_oi_iam_lookup(struct osd_thread_info *oti,
395                              struct osd_oi *oi, struct dt_rec *rec,
396                              const struct dt_key *key)
397 {
398         struct iam_container  *bag;
399         struct iam_iterator   *it = &oti->oti_idx_it;
400         struct iam_rec        *iam_rec;
401         struct iam_path_descr *ipd;
402         int                    rc;
403         ENTRY;
404
405         LASSERT(oi);
406         LASSERT(oi->oi_inode);
407
408         bag = &oi->oi_dir.od_container;
409         ipd = osd_idx_ipd_get(oti->oti_env, bag);
410         if (IS_ERR(ipd))
411                 RETURN(-ENOMEM);
412
413         /* got ipd now we can start iterator. */
414         iam_it_init(it, bag, 0, ipd);
415
416         rc = iam_it_get(it, (struct iam_key *)key);
417         if (rc >= 0) {
418                 if (S_ISDIR(oi->oi_inode->i_mode))
419                         iam_rec = (struct iam_rec *)oti->oti_ldp;
420                 else
421                         iam_rec = (struct iam_rec *)rec;
422
423                 iam_reccpy(&it->ii_path.ip_leaf, (struct iam_rec *)iam_rec);
424                 if (S_ISDIR(oi->oi_inode->i_mode))
425                         osd_fid_unpack((struct lu_fid *)rec,
426                                        (struct osd_fid_pack *)iam_rec);
427         }
428         iam_it_put(it);
429         iam_it_fini(it);
430         osd_ipd_put(oti->oti_env, bag, ipd);
431
432         LINVRNT(osd_invariant(obj));
433
434         RETURN(rc);
435 }
436
437 int osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd,
438                   const struct lu_fid *fid, struct osd_inode_id *id)
439 {
440         struct lu_fid       *oi_fid = &info->oti_fid;
441         const struct dt_key *key;
442         int                  rc = 0;
443
444         if (fid_is_idif(fid) || fid_seq(fid) == FID_SEQ_LLOG) {
445                 /* old OSD obj id */
446                 rc = osd_compat_objid_lookup(info, osd, fid, id);
447         } else if (fid_is_igif(fid)) {
448                 lu_igif_to_id(fid, id);
449                 rc = 0;
450         } else if (fid_is_fs_root(fid)) {
451                 struct inode *inode = osd_sb(osd)->s_root->d_inode;
452
453                 id->oii_ino = inode->i_ino;
454                 id->oii_gen = inode->i_generation;
455         } else {
456                 if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE))
457                         return osd_compat_spec_lookup(info, osd, fid, id);
458
459                 fid_cpu_to_be(oi_fid, fid);
460                 key = (struct dt_key *)oi_fid;
461
462                 rc = osd_oi_iam_lookup(info, osd_fid2oi(osd, fid),
463                                        (struct dt_rec *)id, key);
464
465                 if (rc > 0) {
466                         id->oii_ino = be32_to_cpu(id->oii_ino);
467                         id->oii_gen = be32_to_cpu(id->oii_gen);
468                         rc = 0;
469                 } else if (rc == 0) {
470                         rc = -ENOENT;
471                 }
472         }
473         return rc;
474 }
475
476 static int osd_oi_iam_insert(struct osd_thread_info *oti, struct osd_oi *oi,
477                              const struct dt_rec *rec, const struct dt_key *key,
478                              struct thandle *th, int ignore_quota)
479 {
480         struct iam_container  *bag;
481         struct iam_rec        *iam_rec = (struct iam_rec *)oti->oti_ldp;
482         struct iam_path_descr *ipd;
483         struct osd_thandle    *oh;
484         int                    rc;
485 #ifdef HAVE_QUOTA_SUPPORT
486         cfs_cap_t              save    = cfs_curproc_cap_pack();
487 #endif
488         ENTRY;
489
490         LASSERT(oi);
491         LASSERT(oi->oi_inode);
492
493         bag = &oi->oi_dir.od_container;
494         ipd = osd_idx_ipd_get(oti->oti_env, bag);
495         if (unlikely(ipd == NULL))
496                 RETURN(-ENOMEM);
497
498         oh = container_of0(th, struct osd_thandle, ot_super);
499         LASSERT(oh->ot_handle != NULL);
500         LASSERT(oh->ot_handle->h_transaction != NULL);
501 #ifdef HAVE_QUOTA_SUPPORT
502         if (ignore_quota)
503                 cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
504         else
505                 cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
506 #endif
507         if (S_ISDIR(oi->oi_inode->i_mode))
508                 osd_fid_pack((struct osd_fid_pack *)iam_rec, rec,
509                              &oti->oti_fid);
510         else
511                 iam_rec = (struct iam_rec *) rec;
512         rc = iam_insert(oh->ot_handle, bag, (const struct iam_key *)key,
513                         iam_rec, ipd);
514 #ifdef HAVE_QUOTA_SUPPORT
515         cfs_curproc_cap_unpack(save);
516 #endif
517         osd_ipd_put(oti->oti_env, bag, ipd);
518         LINVRNT(osd_invariant(obj));
519         RETURN(rc);
520 }
521
522 int osd_oi_insert(struct osd_thread_info *info, struct osd_device *osd,
523                   const struct lu_fid *fid, const struct osd_inode_id *id0,
524                   struct thandle *th, int ignore_quota)
525 {
526         struct lu_fid       *oi_fid = &info->oti_fid;
527         struct osd_inode_id *id;
528         const struct dt_key *key;
529
530         if (fid_is_igif(fid) || unlikely(fid_seq(fid) == FID_SEQ_DOT_LUSTRE))
531                 return 0;
532
533         if (fid_is_idif(fid) || fid_seq(fid) == FID_SEQ_LLOG)
534                 return osd_compat_objid_insert(info, osd, fid, id0, th);
535
536         /* Server mount should not depends on OI files */
537         if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE))
538                 return osd_compat_spec_insert(info, osd, fid, id0, th);
539
540         fid_cpu_to_be(oi_fid, fid);
541         key = (struct dt_key *)oi_fid;
542
543         id  = &info->oti_id;
544         id->oii_ino = cpu_to_be32(id0->oii_ino);
545         id->oii_gen = cpu_to_be32(id0->oii_gen);
546
547         return osd_oi_iam_insert(info, osd_fid2oi(osd, fid),
548                                  (struct dt_rec *)id, key, th, ignore_quota);
549 }
550
551 static int osd_oi_iam_delete(struct osd_thread_info *oti, struct osd_oi *oi,
552                              const struct dt_key *key, struct thandle *handle)
553 {
554         struct iam_container  *bag;
555         struct iam_path_descr *ipd;
556         struct osd_thandle    *oh;
557         int                    rc;
558         ENTRY;
559
560         LASSERT(oi);
561
562         bag = &oi->oi_dir.od_container;
563         ipd = osd_idx_ipd_get(oti->oti_env, bag);
564         if (unlikely(ipd == NULL))
565                 RETURN(-ENOMEM);
566
567         oh = container_of0(handle, struct osd_thandle, ot_super);
568         LASSERT(oh->ot_handle != NULL);
569         LASSERT(oh->ot_handle->h_transaction != NULL);
570
571         rc = iam_delete(oh->ot_handle, bag, (const struct iam_key *)key, ipd);
572         osd_ipd_put(oti->oti_env, bag, ipd);
573         LINVRNT(osd_invariant(obj));
574         RETURN(rc);
575 }
576
577 int osd_oi_delete(struct osd_thread_info *info,
578                   struct osd_device *osd, const struct lu_fid *fid,
579                   struct thandle *th)
580 {
581         struct lu_fid       *oi_fid = &info->oti_fid;
582         const struct dt_key *key;
583
584         LASSERT(fid_seq(fid) != FID_SEQ_LOCAL_FILE);
585
586         if (fid_is_idif(fid) || fid_seq(fid) == FID_SEQ_LLOG)
587                 return osd_compat_objid_delete(info, osd, fid, th);
588
589         fid_cpu_to_be(oi_fid, fid);
590         key = (struct dt_key *)oi_fid;
591
592         return osd_oi_iam_delete(info, osd_fid2oi(osd, fid), key, th);
593 }
594
595 int osd_oi_mod_init()
596 {
597         if (osd_oi_count == 0 || osd_oi_count > OSD_OI_FID_NR_MAX)
598                 osd_oi_count = OSD_OI_FID_NR;
599
600         if ((osd_oi_count & (osd_oi_count - 1)) != 0) {
601                 LCONSOLE_WARN("Round up oi_count %d to power2 %d\n",
602                               osd_oi_count, size_roundup_power2(osd_oi_count));
603                 osd_oi_count = size_roundup_power2(osd_oi_count);
604         }
605
606         cfs_mutex_init(&oi_init_lock);
607         return 0;
608 }