Whamcloud - gitweb
587a8dc8e24d8787bd01dcd8bb525eda090927eb
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_oi.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2012, Whamcloud, Inc.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/osd/osd_oi.c
37  *
38  * Object Index.
39  *
40  * Author: Nikita Danilov <nikita@clusterfs.com>
41  */
42
43 /*
44  * oi uses two mechanisms to implement fid->cookie mapping:
45  *
46  *     - persistent index, where cookie is a record and fid is a key, and
47  *
48  *     - algorithmic mapping for "igif" fids.
49  *
50  */
51
52 #define DEBUG_SUBSYSTEM S_MDS
53
54 #include <linux/module.h>
55
56 /* LUSTRE_VERSION_CODE */
57 #include <lustre_ver.h>
58 /*
59  * struct OBD_{ALLOC,FREE}*()
60  * OBD_FAIL_CHECK
61  */
62 #include <obd.h>
63 #include <obd_support.h>
64
65 /* fid_cpu_to_be() */
66 #include <lustre_fid.h>
67
68 #include "osd_oi.h"
69 /* osd_lookup(), struct osd_thread_info */
70 #include "osd_internal.h"
71 #include "osd_igif.h"
72 #include "dt_object.h"
73
74 #define OSD_OI_FID_NR         (1UL << OSD_OI_FID_OID_BITS)
75 #define OSD_OI_FID_NR_MAX     (1UL << OSD_OI_FID_OID_BITS_MAX)
76
77 static unsigned int osd_oi_count = OSD_OI_FID_NR;
78 CFS_MODULE_PARM(osd_oi_count, "i", int, 0444,
79                 "Number of Object Index containers to be created, "
80                 "it's only valid for new filesystem.");
81
82 /** to serialize concurrent OI index initialization */
83 static cfs_mutex_t oi_init_lock;
84
85 static struct dt_index_features oi_feat = {
86         .dif_flags       = DT_IND_UPDATE,
87         .dif_recsize_min = sizeof(struct osd_inode_id),
88         .dif_recsize_max = sizeof(struct osd_inode_id),
89         .dif_ptrsize     = 4
90 };
91
92 #define OSD_OI_NAME_BASE        "oi.16"
93
94 static void osd_oi_table_put(struct osd_thread_info *info,
95                              struct osd_oi **oi_table, unsigned oi_count)
96 {
97         struct iam_container *bag;
98         int                   i;
99
100         for (i = 0; i < oi_count; i++) {
101                 LASSERT(oi_table[i] != NULL);
102                 LASSERT(oi_table[i]->oi_inode != NULL);
103
104                 bag = &(oi_table[i]->oi_dir.od_container);
105                 if (bag->ic_object == oi_table[i]->oi_inode)
106                         iam_container_fini(bag);
107                 iput(oi_table[i]->oi_inode);
108                 oi_table[i]->oi_inode = NULL;
109                 OBD_FREE_PTR(oi_table[i]);
110         }
111 }
112
113 static int osd_oi_index_create_one(struct osd_thread_info *info,
114                                    struct osd_device *osd, const char *name,
115                                    struct dt_index_features *feat)
116 {
117         const struct lu_env             *env = info->oti_env;
118         struct osd_inode_id             *id  = &info->oti_id;
119         struct buffer_head              *bh;
120         struct inode                    *inode;
121         struct ldiskfs_dir_entry_2      *de;
122         struct dentry                   *dentry;
123         struct inode                    *dir;
124         handle_t                        *jh;
125         int                              rc;
126
127         dentry = osd_child_dentry_by_inode(env, osd_sb(osd)->s_root->d_inode,
128                                            name, strlen(name));
129         dir = osd_sb(osd)->s_root->d_inode;
130         bh = osd_ldiskfs_find_entry(dir, dentry, &de, NULL);
131         if (bh) {
132                 brelse(bh);
133
134                 id->oii_ino = le32_to_cpu(de->inode);
135                 id->oii_gen = OSD_OII_NOGEN;
136
137                 inode = osd_iget(info, osd, id);
138                 if (!IS_ERR(inode)) {
139                         iput(inode);
140                         RETURN(-EEXIST);
141                 }
142                 RETURN(PTR_ERR(inode));
143         }
144
145         jh = ldiskfs_journal_start_sb(osd_sb(osd), 100);
146         LASSERT(!IS_ERR(jh));
147
148         inode = ldiskfs_create_inode(jh, osd_sb(osd)->s_root->d_inode,
149                                      (S_IFREG | S_IRUGO | S_IWUSR));
150         LASSERT(!IS_ERR(inode));
151
152         if (feat->dif_flags & DT_IND_VARKEY)
153                 rc = iam_lvar_create(inode, feat->dif_keysize_max,
154                                      feat->dif_ptrsize, feat->dif_recsize_max,
155                                      jh);
156         else
157                 rc = iam_lfix_create(inode, feat->dif_keysize_max,
158                                      feat->dif_ptrsize, feat->dif_recsize_max,
159                                      jh);
160
161         dentry = osd_child_dentry_by_inode(env, osd_sb(osd)->s_root->d_inode,
162                                            name, strlen(name));
163         rc = osd_ldiskfs_add_entry(jh, dentry, inode, NULL);
164         LASSERT(rc == 0);
165
166         ldiskfs_journal_stop(jh);
167         iput(inode);
168
169         return rc;
170 }
171
172 static struct inode *osd_oi_index_open(struct osd_thread_info *info,
173                                        struct osd_device *osd,
174                                        const char *name,
175                                        struct dt_index_features *f,
176                                        bool create)
177 {
178         struct dentry *dentry;
179         struct inode  *inode;
180         int            rc;
181
182         dentry = ll_lookup_one_len(name, osd_sb(osd)->s_root, strlen(name));
183         if (IS_ERR(dentry))
184                 return (void *) dentry;
185
186         if (dentry->d_inode) {
187                 LASSERT(!is_bad_inode(dentry->d_inode));
188                 inode = dentry->d_inode;
189                 atomic_inc(&inode->i_count);
190                 dput(dentry);
191                 return inode;
192         }
193
194         /* create */
195         dput(dentry);
196         shrink_dcache_parent(osd_sb(osd)->s_root);
197         if (!create)
198                 return ERR_PTR(-ENOENT);
199
200         rc = osd_oi_index_create_one(info, osd, name, f);
201         if (rc)
202                 RETURN(ERR_PTR(rc));
203
204         dentry = ll_lookup_one_len(name, osd_sb(osd)->s_root, strlen(name));
205         if (IS_ERR(dentry))
206                 return (void *) dentry;
207
208         if (dentry->d_inode) {
209                 LASSERT(!is_bad_inode(dentry->d_inode));
210                 inode = dentry->d_inode;
211                 atomic_inc(&inode->i_count);
212                 dput(dentry);
213                 return inode;
214         }
215
216         return ERR_PTR(-ENOENT);
217 }
218
219 /**
220  * Open an OI(Ojbect Index) container.
221  *
222  * \param       name    Name of OI container
223  * \param       objp    Pointer of returned OI
224  *
225  * \retval      0       success
226  * \retval      -ve     failure
227  */
228 static int osd_oi_open(struct osd_thread_info *info, struct osd_device *osd,
229                        char *name, struct osd_oi **oi_slot, bool create)
230 {
231         struct osd_directory *dir;
232         struct iam_container *bag;
233         struct inode         *inode;
234         struct osd_oi        *oi;
235         int                   rc;
236
237         ENTRY;
238
239         oi_feat.dif_keysize_min = sizeof(struct lu_fid);
240         oi_feat.dif_keysize_max = sizeof(struct lu_fid);
241
242         inode = osd_oi_index_open(info, osd, name, &oi_feat, create);
243         if (IS_ERR(inode))
244                 RETURN(PTR_ERR(inode));
245
246         OBD_ALLOC_PTR(oi);
247         if (oi == NULL)
248                 GOTO(out_inode, rc = -ENOMEM);
249
250         oi->oi_inode = inode;
251         dir = &oi->oi_dir;
252
253         bag = &dir->od_container;
254         rc = iam_container_init(bag, &dir->od_descr, inode);
255         if (rc < 0)
256                 GOTO(out_free, rc);
257
258         rc = iam_container_setup(bag);
259         if (rc < 0)
260                 GOTO(out_container, rc);
261
262         *oi_slot = oi;
263         RETURN(0);
264
265 out_container:
266         iam_container_fini(bag);
267 out_free:
268         OBD_FREE_PTR(oi);
269 out_inode:
270         iput(inode);
271         return rc;
272 }
273
274 /**
275  * Open OI(Object Index) table.
276  * If \a oi_count is zero, which means caller doesn't know how many OIs there
277  * will be, this function can either return 0 for new filesystem, or number
278  * of OIs on existed filesystem.
279  *
280  * If \a oi_count is non-zero, which means caller does know number of OIs on
281  * filesystem, this function should return the exactly same number on
282  * success, or error code in failure.
283  *
284  * \param     oi_count  Number of expected OI containers
285  * \param     create    Create OIs if doesn't exist
286  *
287  * \retval    +ve       number of opened OI containers
288  * \retval      0       no OI containers found
289  * \retval    -ve       failure
290  */
291 static int
292 osd_oi_table_open(struct osd_thread_info *info, struct osd_device *osd,
293                   struct osd_oi **oi_table, unsigned oi_count, bool create)
294 {
295         struct dt_device *dev = &osd->od_dt_dev;
296         int               count = 0;
297         int               rc = 0;
298         int               i;
299
300         /* NB: oi_count != 0 means that we have already created/known all OIs
301          * and have known exact number of OIs. */
302         LASSERT(oi_count <= OSD_OI_FID_NR_MAX);
303
304         for (i = 0; i < (oi_count != 0 ? oi_count : OSD_OI_FID_NR_MAX); i++) {
305                 char name[12];
306
307                 sprintf(name, "%s.%d", OSD_OI_NAME_BASE, i);
308                 rc = osd_oi_open(info, osd, name, &oi_table[i], create);
309                 if (rc == 0) {
310                         count++;
311                         continue;
312                 }
313
314                 if (rc == -ENOENT && oi_count == 0)
315                         return count;
316
317                 CERROR("%s: can't open %s: rc = %d\n",
318                        dev->dd_lu_dev.ld_obd->obd_name, name, rc);
319                 if (oi_count > 0) {
320                         CERROR("%s: expect to open total %d OI files.\n",
321                                dev->dd_lu_dev.ld_obd->obd_name, oi_count);
322                 }
323                 break;
324         }
325
326         if (rc < 0) {
327                 osd_oi_table_put(info, oi_table, count);
328                 return rc;
329         }
330
331         return count;
332 }
333
334 int osd_oi_init(struct osd_thread_info *info, struct osd_device *osd)
335 {
336         struct dt_device *dev = &osd->od_dt_dev;
337         struct osd_oi   **oi;
338         int               rc;
339
340         OBD_ALLOC(oi, sizeof(*oi) * OSD_OI_FID_NR_MAX);
341         if (oi == NULL)
342                 return -ENOMEM;
343
344         cfs_mutex_lock(&oi_init_lock);
345         /* try to open existing multiple OIs first */
346         rc = osd_oi_table_open(info, osd, oi, 0, false);
347         if (rc != 0)
348                 goto out;
349
350         /* if previous failed then try found single OI from old filesystem */
351         rc = osd_oi_open(info, osd, OSD_OI_NAME_BASE, &oi[0], false);
352         if (rc == 0) { /* found single OI from old filesystem */
353                 rc = 1;
354                 goto out;
355         } else if (rc != -ENOENT) {
356                 CERROR("%s: can't open %s: rc = %d\n",
357                        dev->dd_lu_dev.ld_obd->obd_name, OSD_OI_NAME_BASE, rc);
358                 goto out;
359         }
360
361         /* No OIs exist, new filesystem, create OI objects */
362         rc = osd_oi_table_open(info, osd, oi, osd_oi_count, true);
363         LASSERT(ergo(rc >= 0, rc == osd_oi_count));
364 out:
365         if (rc < 0) {
366                 OBD_FREE(oi, sizeof(*oi) * OSD_OI_FID_NR_MAX);
367         } else {
368                 LASSERT((rc & (rc - 1)) == 0);
369                 osd->od_oi_table = oi;
370                 osd->od_oi_count = rc;
371                 rc = 0;
372         }
373
374         cfs_mutex_unlock(&oi_init_lock);
375         return rc;
376 }
377
378 void osd_oi_fini(struct osd_thread_info *info, struct osd_device *osd)
379 {
380         osd_oi_table_put(info, osd->od_oi_table, osd->od_oi_count);
381
382         OBD_FREE(osd->od_oi_table,
383                  sizeof(*(osd->od_oi_table)) * OSD_OI_FID_NR_MAX);
384         osd->od_oi_table = NULL;
385 }
386
387 static inline int fid_is_fs_root(const struct lu_fid *fid)
388 {
389         /* Map root inode to special local object FID */
390         return (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE &&
391                          fid_oid(fid) == OSD_FS_ROOT_OID));
392 }
393
394 static int osd_oi_iam_lookup(struct osd_thread_info *oti,
395                              struct osd_oi *oi, struct dt_rec *rec,
396                              const struct dt_key *key)
397 {
398         struct iam_container  *bag;
399         struct iam_iterator   *it = &oti->oti_idx_it;
400         struct iam_rec        *iam_rec;
401         struct iam_path_descr *ipd;
402         int                    rc;
403         ENTRY;
404
405         LASSERT(oi);
406         LASSERT(oi->oi_inode);
407
408         bag = &oi->oi_dir.od_container;
409         ipd = osd_idx_ipd_get(oti->oti_env, bag);
410         if (IS_ERR(ipd))
411                 RETURN(-ENOMEM);
412
413         /* got ipd now we can start iterator. */
414         iam_it_init(it, bag, 0, ipd);
415
416         rc = iam_it_get(it, (struct iam_key *)key);
417         if (rc >= 0) {
418                 if (S_ISDIR(oi->oi_inode->i_mode))
419                         iam_rec = (struct iam_rec *)oti->oti_ldp;
420                 else
421                         iam_rec = (struct iam_rec *)rec;
422
423                 iam_reccpy(&it->ii_path.ip_leaf, (struct iam_rec *)iam_rec);
424                 if (S_ISDIR(oi->oi_inode->i_mode))
425                         osd_fid_unpack((struct lu_fid *)rec,
426                                        (struct osd_fid_pack *)iam_rec);
427         }
428         iam_it_put(it);
429         iam_it_fini(it);
430         osd_ipd_put(oti->oti_env, bag, ipd);
431
432         LINVRNT(osd_invariant(obj));
433
434         RETURN(rc);
435 }
436
437 int osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd,
438                   const struct lu_fid *fid, struct osd_inode_id *id)
439 {
440         struct lu_fid       *oi_fid = &info->oti_fid;
441         const struct dt_key *key;
442         int                  rc = 0;
443
444         if (fid_is_idif(fid) || fid_seq(fid) == FID_SEQ_LLOG) {
445                 /* old OSD obj id */
446                 rc = osd_compat_objid_lookup(info, osd, fid, id);
447         } else if (fid_is_igif(fid)) {
448                 lu_igif_to_id(fid, id);
449                 rc = 0;
450         } else if (fid_is_fs_root(fid)) {
451                 struct inode *inode = osd_sb(osd)->s_root->d_inode;
452
453                 id->oii_ino = inode->i_ino;
454                 id->oii_gen = inode->i_generation;
455         } else {
456                 if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE)) {
457                         rc = osd_compat_spec_lookup(info, osd, fid, id);
458                         if (rc == 0 || rc != -ERESTART)
459                                 goto out;
460                 }
461
462                 fid_cpu_to_be(oi_fid, fid);
463                 key = (struct dt_key *)oi_fid;
464
465                 rc = osd_oi_iam_lookup(info, osd_fid2oi(osd, fid),
466                                        (struct dt_rec *)id, key);
467
468                 if (rc > 0) {
469                         id->oii_ino = be32_to_cpu(id->oii_ino);
470                         id->oii_gen = be32_to_cpu(id->oii_gen);
471                         rc = 0;
472                 } else if (rc == 0) {
473                         rc = -ENOENT;
474                 }
475         }
476
477 out:
478         return rc;
479 }
480
481 static int osd_oi_iam_insert(struct osd_thread_info *oti, struct osd_oi *oi,
482                              const struct dt_rec *rec, const struct dt_key *key,
483                              struct thandle *th, int ignore_quota)
484 {
485         struct iam_container  *bag;
486         struct iam_rec        *iam_rec = (struct iam_rec *)oti->oti_ldp;
487         struct iam_path_descr *ipd;
488         struct osd_thandle    *oh;
489         int                    rc;
490 #ifdef HAVE_QUOTA_SUPPORT
491         cfs_cap_t              save    = cfs_curproc_cap_pack();
492 #endif
493         ENTRY;
494
495         LASSERT(oi);
496         LASSERT(oi->oi_inode);
497
498         bag = &oi->oi_dir.od_container;
499         ipd = osd_idx_ipd_get(oti->oti_env, bag);
500         if (unlikely(ipd == NULL))
501                 RETURN(-ENOMEM);
502
503         oh = container_of0(th, struct osd_thandle, ot_super);
504         LASSERT(oh->ot_handle != NULL);
505         LASSERT(oh->ot_handle->h_transaction != NULL);
506 #ifdef HAVE_QUOTA_SUPPORT
507         if (ignore_quota)
508                 cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
509         else
510                 cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
511 #endif
512         if (S_ISDIR(oi->oi_inode->i_mode))
513                 osd_fid_pack((struct osd_fid_pack *)iam_rec, rec,
514                              &oti->oti_fid);
515         else
516                 iam_rec = (struct iam_rec *) rec;
517         rc = iam_insert(oh->ot_handle, bag, (const struct iam_key *)key,
518                         iam_rec, ipd);
519 #ifdef HAVE_QUOTA_SUPPORT
520         cfs_curproc_cap_unpack(save);
521 #endif
522         osd_ipd_put(oti->oti_env, bag, ipd);
523         LINVRNT(osd_invariant(obj));
524         RETURN(rc);
525 }
526
527 int osd_oi_insert(struct osd_thread_info *info, struct osd_device *osd,
528                   const struct lu_fid *fid, const struct osd_inode_id *id0,
529                   struct thandle *th, int ignore_quota)
530 {
531         struct lu_fid       *oi_fid = &info->oti_fid;
532         struct osd_inode_id *id;
533         const struct dt_key *key;
534
535         if (fid_is_igif(fid))
536                 return 0;
537
538         if (fid_is_idif(fid) || fid_seq(fid) == FID_SEQ_LLOG)
539                 return osd_compat_objid_insert(info, osd, fid, id0, th);
540
541         /* notice we don't return immediately, but continue to get into OI */
542         if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE))
543                 osd_compat_spec_insert(info, osd, fid, id0, th);
544
545         fid_cpu_to_be(oi_fid, fid);
546         key = (struct dt_key *)oi_fid;
547
548         id  = &info->oti_id;
549         id->oii_ino = cpu_to_be32(id0->oii_ino);
550         id->oii_gen = cpu_to_be32(id0->oii_gen);
551
552         return osd_oi_iam_insert(info, osd_fid2oi(osd, fid),
553                                  (struct dt_rec *)id, key, th, ignore_quota);
554 }
555
556 static int osd_oi_iam_delete(struct osd_thread_info *oti, struct osd_oi *oi,
557                              const struct dt_key *key, struct thandle *handle)
558 {
559         struct iam_container  *bag;
560         struct iam_path_descr *ipd;
561         struct osd_thandle    *oh;
562         int                    rc;
563         ENTRY;
564
565         LASSERT(oi);
566
567         bag = &oi->oi_dir.od_container;
568         ipd = osd_idx_ipd_get(oti->oti_env, bag);
569         if (unlikely(ipd == NULL))
570                 RETURN(-ENOMEM);
571
572         oh = container_of0(handle, struct osd_thandle, ot_super);
573         LASSERT(oh->ot_handle != NULL);
574         LASSERT(oh->ot_handle->h_transaction != NULL);
575
576         rc = iam_delete(oh->ot_handle, bag, (const struct iam_key *)key, ipd);
577         osd_ipd_put(oti->oti_env, bag, ipd);
578         LINVRNT(osd_invariant(obj));
579         RETURN(rc);
580 }
581
582 int osd_oi_delete(struct osd_thread_info *info,
583                   struct osd_device *osd, const struct lu_fid *fid,
584                   struct thandle *th)
585 {
586         struct lu_fid       *oi_fid = &info->oti_fid;
587         const struct dt_key *key;
588
589         if (!fid_is_norm(fid))
590                 return 0;
591
592         LASSERT(fid_seq(fid) != FID_SEQ_LOCAL_FILE);
593
594         if (fid_is_idif(fid) || fid_seq(fid) == FID_SEQ_LLOG)
595                 return osd_compat_objid_delete(info, osd, fid, th);
596
597         fid_cpu_to_be(oi_fid, fid);
598         key = (struct dt_key *)oi_fid;
599
600         return osd_oi_iam_delete(info, osd_fid2oi(osd, fid), key, th);
601 }
602
603 int osd_oi_mod_init()
604 {
605         if (osd_oi_count == 0 || osd_oi_count > OSD_OI_FID_NR_MAX)
606                 osd_oi_count = OSD_OI_FID_NR;
607
608         if ((osd_oi_count & (osd_oi_count - 1)) != 0) {
609                 LCONSOLE_WARN("Round up oi_count %d to power2 %d\n",
610                               osd_oi_count, size_roundup_power2(osd_oi_count));
611                 osd_oi_count = size_roundup_power2(osd_oi_count);
612         }
613
614         cfs_mutex_init(&oi_init_lock);
615         return 0;
616 }