Whamcloud - gitweb
LU-17744 ldiskfs: mballoc stats fixes
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_compat.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  *
31  * lustre/osd/osd_compat.c
32  *
33  * on-disk structure for managing /O
34  *
35  * Author: Alex Zhuravlev <bzzz@whamcloud.com>
36  */
37
38 /* prerequisite for linux/xattr.h */
39 #include <linux/types.h>
40 /* prerequisite for linux/xattr.h */
41 #include <linux/fs.h>
42 /* XATTR_{REPLACE,CREATE} */
43 #include <linux/xattr.h>
44
45 /*
46  * struct OBD_{ALLOC,FREE}*()
47  */
48 #include <obd_support.h>
49 #include <libcfs/libcfs.h>
50
51 #include "osd_internal.h"
52 #include "osd_oi.h"
53
54 #ifndef HAVE_INODE_LOCK_SHARED
55 #define inode_lock_shared(dir) inode_lock(dir)
56 #define inode_unlock_shared(dir) inode_unlock(dir)
57 #endif
58
59 static void osd_push_ctxt(const struct osd_device *dev,
60                           struct lvfs_run_ctxt *newctxt,
61                           struct lvfs_run_ctxt *save)
62 {
63         OBD_SET_CTXT_MAGIC(newctxt);
64         newctxt->pwdmnt = dev->od_mnt;
65         newctxt->pwd = dev->od_mnt->mnt_root;
66         newctxt->umask = current_umask();
67         newctxt->dt = NULL;
68
69         push_ctxt(save, newctxt);
70 }
71
72 struct dentry *osd_lookup_one_len_common(struct osd_device *dev,
73                                          const char *name,
74                                          struct dentry *base, int len,
75                                          enum oi_check_flags flags)
76 {
77         struct dentry *dchild;
78
79         /*
80          * We can't use inode_is_locked() directly since we can't know
81          * if the current thread context took the lock earlier or if
82          * another thread context took the lock. OI_LOCKED tells us
83          * if the current thread context has already taken the lock.
84          */
85         if (!(flags & OI_LOCKED)) {
86                 /* If another thread took this lock already we will
87                  * just have to wait until the other thread is done.
88                  */
89                 inode_lock(base->d_inode);
90                 dchild = lookup_one_len(name, base, len);
91                 inode_unlock(base->d_inode);
92         } else {
93                 /* This thread context already has taken the lock.
94                  * Other threads will have to wait until we are done.
95                  */
96                 dchild = lookup_one_len(name, base, len);
97         }
98         if (IS_ERR(dchild))
99                 return dchild;
100
101         if (dchild->d_inode && unlikely(is_bad_inode(dchild->d_inode))) {
102                 CERROR("%s: bad inode returned %lu/%u: rc = -ENOENT\n",
103                        osd_name(dev), dchild->d_inode->i_ino,
104                        dchild->d_inode->i_generation);
105                 dput(dchild);
106                 dchild = ERR_PTR(-ENOENT);
107         }
108
109         return dchild;
110 }
111
112 /**
113  * osd_lookup_one_len_unlocked
114  *
115  * @dev:        obd device we are searching
116  * @name:       pathname component to lookup
117  * @base:       base directory to lookup from
118  * @len:        maximum length @len should be interpreted to
119  *
120  * Unlike osd_lookup_one_len, this should be called without the parent
121  * i_mutex held, and will take the i_mutex itself.
122  */
123 struct dentry *osd_lookup_one_len_unlocked(struct osd_device *dev,
124                                            const char *name,
125                                            struct dentry *base, int len)
126 {
127         return osd_lookup_one_len_common(dev, name, base, len, ~OI_LOCKED);
128 }
129
130 /**
131  * osd_lookup_one_len - lookup single pathname component
132  *
133  * @dev:        obd device we are searching
134  * @name:       pathname component to lookup
135  * @base:       base directory to lookup from
136  * @len:        maximum length @len should be interpreted to
137  *
138  * The caller must hold inode lock
139  */
140 struct dentry *osd_lookup_one_len(struct osd_device *dev, const char *name,
141                                   struct dentry *base, int len)
142 {
143         return osd_lookup_one_len_common(dev, name, base, len, OI_LOCKED);
144 }
145
146 /* utility to make a directory */
147 static struct dentry *
148 simple_mkdir(const struct lu_env *env, struct osd_device *osd,
149              struct dentry *dir, const struct lu_fid *fid,
150              const char *name, __u32 compat, int mode, bool *created)
151 {
152         struct osd_thread_info *info = osd_oti_get(env);
153         struct lu_fid *tfid = &info->oti_fid3;
154         struct inode *inode;
155         struct dentry *dchild;
156         int err = 0;
157
158         ENTRY;
159
160         // ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
161         CDEBUG(D_INODE, "creating directory %.*s\n", (int)strlen(name), name);
162         dchild = osd_lookup_one_len_unlocked(osd, name, dir, strlen(name));
163         if (IS_ERR(dchild))
164                 RETURN(dchild);
165
166         inode = dchild->d_inode;
167         if (inode) {
168                 struct lustre_mdt_attrs *lma = &info->oti_ost_attrs.loa_lma;
169                 int old_mode = inode->i_mode;
170
171                 if (created)
172                         *created = false;
173
174                 if (!S_ISDIR(old_mode)) {
175                         CERROR("found %s (%lu/%u) is mode %o\n", name,
176                                inode->i_ino, inode->i_generation, old_mode);
177                         GOTO(out_err, err = -ENOTDIR);
178                 }
179
180                 if (unlikely(osd->od_dt_dev.dd_rdonly))
181                         RETURN(dchild);
182
183                 /* Fixup directory permissions if necessary */
184                 if ((old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
185                         CDEBUG(D_CONFIG,
186                                "fixing permissions on %s from %o to %o\n",
187                                name, old_mode, mode);
188                         inode->i_mode = (mode & S_IALLUGO) |
189                                         (old_mode & ~S_IALLUGO);
190                         mark_inode_dirty(inode);
191                 }
192
193                 err = osd_get_lma(info, inode, &info->oti_obj_dentry,
194                                   &info->oti_ost_attrs);
195                 if (err == -ENODATA)
196                         goto set_fid;
197
198                 if (err)
199                         GOTO(out_err, err);
200
201                 if ((fid && !lu_fid_eq(fid, &lma->lma_self_fid)) ||
202                     lma->lma_compat != compat)
203                         goto set_fid;
204
205                 RETURN(dchild);
206         }
207
208         err = vfs_mkdir(&nop_mnt_idmap, dir->d_inode, dchild, mode);
209         if (err)
210                 GOTO(out_err, err);
211
212         inode = dchild->d_inode;
213         if (created)
214                 *created = true;
215
216 set_fid:
217         if (fid)
218                 *tfid = *fid;
219         else
220                 lu_igif_build(tfid, inode->i_ino, inode->i_generation);
221         err = osd_ea_fid_set(info, inode, tfid, compat, 0);
222         if (err)
223                 GOTO(out_err, err);
224
225         RETURN(dchild);
226
227 out_err:
228         dput(dchild);
229         return ERR_PTR(err);
230 }
231
232 static int osd_last_rcvd_subdir_count(struct osd_device *osd)
233 {
234         struct lr_server_data lsd;
235         struct dentry *dlast;
236         loff_t off;
237         int rc = 0;
238         int count = OBJ_SUBDIR_COUNT;
239
240         ENTRY;
241
242         dlast = osd_lookup_one_len_unlocked(osd, LAST_RCVD, osd_sb(osd)->s_root,
243                                             strlen(LAST_RCVD));
244         if (IS_ERR(dlast))
245                 return PTR_ERR(dlast);
246         else if (dlast->d_inode == NULL)
247                 goto out;
248
249         off = 0;
250         rc = osd_ldiskfs_read(dlast->d_inode, &lsd, sizeof(lsd), &off);
251         if (rc == sizeof(lsd)) {
252                 CDEBUG(D_INFO,
253                       "read last_rcvd header, uuid = %s, subdir count = %d\n",
254                       lsd.lsd_uuid, lsd.lsd_subdir_count);
255                 if (le16_to_cpu(lsd.lsd_subdir_count) > 0)
256                         count = le16_to_cpu(lsd.lsd_subdir_count);
257         } else if (rc != 0) {
258                 CERROR("Can't read last_rcvd file, rc = %d\n", rc);
259                 if (rc > 0)
260                         rc = -EFAULT;
261                 dput(dlast);
262                 return rc;
263         }
264 out:
265         dput(dlast);
266         LASSERT(count > 0);
267         return count;
268 }
269
270 static int osd_mdt_init(const struct lu_env *env, struct osd_device *dev)
271 {
272         struct lvfs_run_ctxt new;
273         struct lvfs_run_ctxt save;
274         struct dentry *parent;
275         struct osd_mdobj_map *omm;
276         struct dentry *d;
277         struct osd_thread_info *info = osd_oti_get(env);
278         struct lu_fid *fid = &info->oti_fid3;
279         int rc = 0;
280
281         ENTRY;
282
283         OBD_ALLOC_PTR(dev->od_mdt_map);
284         if (dev->od_mdt_map == NULL)
285                 RETURN(-ENOMEM);
286
287         omm = dev->od_mdt_map;
288
289         parent = osd_sb(dev)->s_root;
290         osd_push_ctxt(dev, &new, &save);
291
292         lu_local_obj_fid(fid, REMOTE_PARENT_DIR_OID);
293         d = simple_mkdir(env, dev, parent, fid, REMOTE_PARENT_DIR,
294                          LMAC_NOT_IN_OI, 0755, NULL);
295         if (IS_ERR(d))
296                 GOTO(cleanup, rc = PTR_ERR(d));
297
298         omm->omm_remote_parent = d;
299
300         GOTO(cleanup, rc = 0);
301
302 cleanup:
303         pop_ctxt(&save, &new);
304         if (rc) {
305                 if (omm->omm_remote_parent != NULL)
306                         dput(omm->omm_remote_parent);
307                 OBD_FREE_PTR(omm);
308                 dev->od_mdt_map = NULL;
309         }
310         return rc;
311 }
312
313 static void osd_mdt_fini(struct osd_device *osd)
314 {
315         struct osd_mdobj_map *omm = osd->od_mdt_map;
316
317         if (omm == NULL)
318                 return;
319
320         if (omm->omm_remote_parent)
321                 dput(omm->omm_remote_parent);
322
323         OBD_FREE_PTR(omm);
324         osd->od_ost_map = NULL;
325 }
326
327 int osd_add_to_remote_parent(const struct lu_env *env, struct osd_device *osd,
328                              struct osd_object *obj, struct osd_thandle *oh)
329 {
330         struct osd_mdobj_map *omm = osd->od_mdt_map;
331         struct osd_thread_info *oti = osd_oti_get(env);
332         struct lustre_mdt_attrs *lma = &oti->oti_ost_attrs.loa_lma;
333         char *name = oti->oti_name;
334         struct osd_thread_info *info = osd_oti_get(env);
335         struct dentry *dentry;
336         struct dentry *parent;
337         int rc;
338
339         if (CFS_FAIL_CHECK(OBD_FAIL_LFSCK_NO_AGENTENT))
340                 RETURN(0);
341
342         /*
343          * Set REMOTE_PARENT in lma, so other process like unlink or lfsck
344          * can identify this object quickly
345          */
346         rc = osd_get_lma(oti, obj->oo_inode, &oti->oti_obj_dentry,
347                          &oti->oti_ost_attrs);
348         if (rc)
349                 RETURN(rc);
350
351         lma->lma_incompat |= LMAI_REMOTE_PARENT;
352         lustre_lma_swab(lma);
353         rc = __osd_xattr_set(oti, obj->oo_inode, XATTR_NAME_LMA, lma,
354                              sizeof(*lma), XATTR_REPLACE);
355         if (rc)
356                 RETURN(rc);
357
358         parent = omm->omm_remote_parent;
359         sprintf(name, DFID_NOBRACE, PFID(lu_object_fid(&obj->oo_dt.do_lu)));
360         dentry = osd_child_dentry_by_inode(env, parent->d_inode,
361                                            name, strlen(name));
362         inode_lock(parent->d_inode);
363         rc = osd_ldiskfs_add_entry(info, osd, oh->ot_handle, dentry,
364                                    obj->oo_inode, NULL);
365         if (!rc && S_ISDIR(obj->oo_inode->i_mode))
366                 osd_ldiskfs_inc_count(oh->ot_handle, parent->d_inode);
367         else if (unlikely(rc == -EEXIST))
368                 rc = 0;
369         if (!rc)
370                 lu_object_set_agent_entry(&obj->oo_dt.do_lu);
371         CDEBUG(D_INODE, "%s: create agent entry for %s: rc = %d\n",
372                osd_name(osd), name, rc);
373         mark_inode_dirty(parent->d_inode);
374         inode_unlock(parent->d_inode);
375         RETURN(rc);
376 }
377
378 int osd_delete_from_remote_parent(const struct lu_env *env,
379                                   struct osd_device *osd,
380                                   struct osd_object *obj,
381                                   struct osd_thandle *oh, bool destroy)
382 {
383         struct osd_mdobj_map *omm = osd->od_mdt_map;
384         struct osd_thread_info *oti = osd_oti_get(env);
385         struct lustre_mdt_attrs *lma = &oti->oti_ost_attrs.loa_lma;
386         char *name = oti->oti_name;
387         struct dentry *dentry;
388         struct dentry *parent;
389         struct ldiskfs_dir_entry_2 *de;
390         struct buffer_head *bh;
391         int rc;
392
393         parent = omm->omm_remote_parent;
394         sprintf(name, DFID_NOBRACE, PFID(lu_object_fid(&obj->oo_dt.do_lu)));
395         dentry = osd_child_dentry_by_inode(env, parent->d_inode,
396                                            name, strlen(name));
397         inode_lock(parent->d_inode);
398         bh = osd_ldiskfs_find_entry(parent->d_inode, &dentry->d_name, &de,
399                                     NULL, NULL);
400         if (IS_ERR(bh)) {
401                 inode_unlock(parent->d_inode);
402                 rc = PTR_ERR(bh);
403                 if (unlikely(rc == -ENOENT))
404                         rc = 0;
405         } else {
406                 rc = ldiskfs_delete_entry(oh->ot_handle, parent->d_inode,
407                                           de, bh);
408                 if (!rc && S_ISDIR(obj->oo_inode->i_mode))
409                         osd_ldiskfs_dec_count(oh->ot_handle, parent->d_inode);
410                 mark_inode_dirty(parent->d_inode);
411                 inode_unlock(parent->d_inode);
412                 brelse(bh);
413                 CDEBUG(D_INODE, "%s: remove agent entry for %s: rc = %d\n",
414                        osd_name(osd), name, rc);
415         }
416
417         if (destroy || rc) {
418                 if (!rc)
419                         lu_object_clear_agent_entry(&obj->oo_dt.do_lu);
420
421                 RETURN(rc);
422         }
423
424         rc = osd_get_lma(oti, obj->oo_inode, &oti->oti_obj_dentry,
425                          &oti->oti_ost_attrs);
426         if (rc)
427                 RETURN(rc);
428
429         /* Get rid of REMOTE_PARENT flag from incompat */
430         lma->lma_incompat &= ~LMAI_REMOTE_PARENT;
431         lustre_lma_swab(lma);
432         rc = __osd_xattr_set(oti, obj->oo_inode, XATTR_NAME_LMA, lma,
433                              sizeof(*lma), XATTR_REPLACE);
434         if (!rc)
435                 lu_object_clear_agent_entry(&obj->oo_dt.do_lu);
436         RETURN(rc);
437 }
438
439 int osd_lookup_in_remote_parent(struct osd_thread_info *oti,
440                                 struct osd_device *osd,
441                                 const struct lu_fid *fid,
442                                 struct osd_inode_id *id)
443 {
444         struct osd_mdobj_map *omm = osd->od_mdt_map;
445         char *name = oti->oti_name;
446         struct dentry *parent;
447         struct dentry *dentry;
448         struct ldiskfs_dir_entry_2 *de;
449         struct buffer_head *bh;
450         int rc;
451
452         ENTRY;
453
454         if (unlikely(osd->od_is_ost))
455                 RETURN(-ENOENT);
456
457         parent = omm->omm_remote_parent;
458         sprintf(name, DFID_NOBRACE, PFID(fid));
459         dentry = osd_child_dentry_by_inode(oti->oti_env, parent->d_inode,
460                                            name, strlen(name));
461         inode_lock(parent->d_inode);
462         bh = osd_ldiskfs_find_entry(parent->d_inode, &dentry->d_name, &de,
463                                     NULL, NULL);
464         if (IS_ERR(bh)) {
465                 rc = PTR_ERR(bh);
466         } else {
467                 struct inode *inode;
468
469                 osd_id_gen(id, le32_to_cpu(de->inode), OSD_OII_NOGEN);
470                 brelse(bh);
471                 inode = osd_iget(oti, osd, id);
472                 if (IS_ERR(inode)) {
473                         rc = PTR_ERR(inode);
474                         if (rc == -ESTALE)
475                                 rc = -ENOENT;
476                 } else {
477                         iput(inode);
478                         rc = 0;
479                 }
480         }
481         inode_unlock(parent->d_inode);
482         if (rc == 0)
483                 osd_add_oi_cache(oti, osd, id, fid);
484         RETURN(rc);
485 }
486
487 /*
488  * directory structure on legacy OST:
489  *
490  * O/<seq>/d0-31/<objid>
491  * O/<seq>/LAST_ID
492  * last_rcvd
493  * LAST_GROUP
494  * CONFIGS
495  *
496  */
497 static int osd_ost_init(const struct lu_env *env, struct osd_device *dev)
498 {
499         struct lvfs_run_ctxt new;
500         struct lvfs_run_ctxt save;
501         struct dentry *d;
502         int rc;
503         bool created = false;
504
505         ENTRY;
506
507         OBD_ALLOC_PTR(dev->od_ost_map);
508         if (dev->od_ost_map == NULL)
509                 RETURN(-ENOMEM);
510
511         /* to get subdir count from last_rcvd */
512         rc = osd_last_rcvd_subdir_count(dev);
513         if (rc < 0)
514                 GOTO(cleanup_alloc, rc);
515
516         dev->od_ost_map->om_subdir_count = rc;
517         INIT_LIST_HEAD(&dev->od_ost_map->om_seq_list);
518         rwlock_init(&dev->od_ost_map->om_seq_list_lock);
519         mutex_init(&dev->od_ost_map->om_dir_init_mutex);
520
521         osd_push_ctxt(dev, &new, &save);
522         d = simple_mkdir(env, dev, osd_sb(dev)->s_root, NULL, "O",
523                          LMAC_NOT_IN_OI | LMAC_FID_ON_OST, 0755, &created);
524         if (IS_ERR(d))
525                 GOTO(cleanup_ctxt, rc = PTR_ERR(d));
526
527         if (created)
528                 /* It is quite probably that the device is new formatted. */
529                 dev->od_maybe_new = 1;
530
531         dev->od_ost_map->om_root = d;
532
533         pop_ctxt(&save, &new);
534         RETURN(0);
535
536 cleanup_ctxt:
537         pop_ctxt(&save, &new);
538 cleanup_alloc:
539         OBD_FREE_PTR(dev->od_ost_map);
540         return rc;
541 }
542
543 static void osd_seq_free(struct osd_obj_seq *osd_seq)
544 {
545         int j;
546
547         if (osd_seq->oos_dirs) {
548                 for (j = 0; j < osd_seq->oos_subdir_count; j++) {
549                         if (osd_seq->oos_dirs[j])
550                                 dput(osd_seq->oos_dirs[j]);
551                 }
552                 OBD_FREE_PTR_ARRAY(osd_seq->oos_dirs,
553                                    osd_seq->oos_subdir_count);
554         }
555
556         if (osd_seq->oos_root)
557                 dput(osd_seq->oos_root);
558
559         OBD_FREE_PTR(osd_seq);
560 }
561
562 static void osd_ost_fini(struct osd_device *osd)
563 {
564         struct osd_obj_seq *osd_seq;
565         struct osd_obj_seq *tmp;
566         struct osd_obj_map *map = osd->od_ost_map;
567
568         ENTRY;
569
570         if (map == NULL)
571                 return;
572
573         write_lock(&map->om_seq_list_lock);
574         list_for_each_entry_safe(osd_seq, tmp, &map->om_seq_list,
575                                  oos_seq_list) {
576                 list_del_init(&osd_seq->oos_seq_list);
577                 write_unlock(&map->om_seq_list_lock);
578                 osd_seq_free(osd_seq);
579                 write_lock(&map->om_seq_list_lock);
580         }
581         write_unlock(&map->om_seq_list_lock);
582         if (map->om_root)
583                 dput(map->om_root);
584         OBD_FREE_PTR(map);
585         osd->od_ost_map = NULL;
586         EXIT;
587 }
588
589 static int osd_index_backup_dir_init(const struct lu_env *env,
590                                      struct osd_device *dev)
591 {
592         struct lu_fid *fid = &osd_oti_get(env)->oti_fid;
593         struct lvfs_run_ctxt new;
594         struct lvfs_run_ctxt save;
595         struct dentry *dentry;
596         int rc = 0;
597
598         ENTRY;
599
600         lu_local_obj_fid(fid, INDEX_BACKUP_OID);
601         osd_push_ctxt(dev, &new, &save);
602         dentry = simple_mkdir(env, dev, osd_sb(dev)->s_root, fid,
603                               INDEX_BACKUP_DIR, LMAC_NOT_IN_OI, 0755, NULL);
604         if (IS_ERR(dentry)) {
605                 rc = PTR_ERR(dentry);
606         } else {
607                 dev->od_index_backup_inode = igrab(dentry->d_inode);
608                 dput(dentry);
609         }
610         pop_ctxt(&save, &new);
611
612         RETURN(rc);
613 }
614
615 static void osd_index_backup_dir_fini(struct osd_device *dev)
616 {
617         iput(dev->od_index_backup_inode);
618         dev->od_index_backup_inode = NULL;
619 }
620
621 int osd_obj_map_init(const struct lu_env *env, struct osd_device *dev)
622 {
623         int rc;
624         bool mdt_init = false;
625
626         ENTRY;
627
628         rc = osd_ost_init(env, dev);
629         if (rc)
630                 RETURN(rc);
631
632         if (!dev->od_is_ost) {
633                 rc = osd_mdt_init(env, dev);
634                 if (rc) {
635                         osd_ost_fini(dev);
636                         RETURN(rc);
637                 }
638
639                 mdt_init = true;
640         }
641
642         rc = osd_index_backup_dir_init(env, dev);
643         if (rc) {
644                 osd_ost_fini(dev);
645                 if (mdt_init)
646                         osd_mdt_fini(dev);
647         }
648
649         RETURN(rc);
650 }
651
652 static struct osd_obj_seq *osd_seq_find_locked(struct osd_obj_map *map, u64 seq)
653 {
654         struct osd_obj_seq *osd_seq;
655
656         list_for_each_entry(osd_seq, &map->om_seq_list, oos_seq_list) {
657                 if (osd_seq->oos_seq == seq)
658                         return osd_seq;
659         }
660         return NULL;
661 }
662
663 static struct osd_obj_seq *osd_seq_find(struct osd_obj_map *map, u64 seq)
664 {
665         struct osd_obj_seq *osd_seq;
666
667         read_lock(&map->om_seq_list_lock);
668         osd_seq = osd_seq_find_locked(map, seq);
669         read_unlock(&map->om_seq_list_lock);
670         return osd_seq;
671 }
672
673 void osd_obj_map_fini(struct osd_device *dev)
674 {
675         osd_index_backup_dir_fini(dev);
676         osd_ost_fini(dev);
677         osd_mdt_fini(dev);
678 }
679
680 /**
681  * Update the specified OI mapping.
682  *
683  * \retval   1, changed nothing
684  * \retval   0, changed successfully
685  * \retval -ve, on error
686  */
687 static int osd_obj_update_entry(struct osd_thread_info *info,
688                                 struct osd_device *osd,
689                                 struct dentry *dir, const char *name,
690                                 const struct lu_fid *fid,
691                                 const struct osd_inode_id *id,
692                                 handle_t *th)
693 {
694         struct inode *parent = dir->d_inode;
695         struct dentry *child;
696         struct ldiskfs_dir_entry_2 *de;
697         struct buffer_head *bh;
698         struct inode *inode;
699         struct dentry *dentry = &info->oti_obj_dentry;
700         struct osd_inode_id *oi_id = &info->oti_id3;
701         struct lustre_mdt_attrs *lma = &info->oti_ost_attrs.loa_lma;
702         struct lu_fid *oi_fid = &lma->lma_self_fid;
703         int rc;
704
705         ENTRY;
706
707         LASSERT(th != NULL);
708         LASSERT(th->h_transaction != NULL);
709
710         child = &info->oti_child_dentry;
711         child->d_parent = dir;
712         child->d_name.hash = 0;
713         child->d_name.name = name;
714         child->d_name.len = strlen(name);
715
716         dquot_initialize(parent);
717         inode_lock(parent);
718         bh = osd_ldiskfs_find_entry(parent, &child->d_name, &de, NULL, NULL);
719         if (IS_ERR(bh))
720                 GOTO(out, rc = PTR_ERR(bh));
721
722         if (le32_to_cpu(de->inode) == id->oii_ino)
723                 GOTO(out, rc = 1);
724
725         osd_id_gen(oi_id, le32_to_cpu(de->inode), OSD_OII_NOGEN);
726         inode = osd_iget(info, osd, oi_id);
727         if (IS_ERR(inode)) {
728                 rc = PTR_ERR(inode);
729                 if (rc == -ENOENT || rc == -ESTALE)
730                         goto update;
731                 GOTO(out, rc);
732         }
733
734         rc = osd_get_lma(info, inode, dentry, &info->oti_ost_attrs);
735         if (rc == -ENODATA) {
736                 rc = osd_get_idif(info, inode, dentry, oi_fid);
737                 if (rc > 0 || rc == -ENODATA) {
738                         oi_fid = NULL;
739                         rc = 0;
740                 }
741         }
742         iput(inode);
743
744         if (rc != 0)
745                 GOTO(out, rc);
746
747         /*
748          * If the OST-object has neither FID-in-LMA nor FID-in-ff, it is
749          * either a crashed object or a uninitialized one. Replace it.
750          */
751         if (oi_fid != NULL && lu_fid_eq(fid, oi_fid)) {
752                 CERROR("%s: the FID "DFID" is used by two objects: "
753                        "%u/%u %u/%u\n", osd_name(osd), PFID(fid),
754                        oi_id->oii_ino, oi_id->oii_gen,
755                        id->oii_ino, id->oii_gen);
756                 GOTO(out, rc = -EEXIST);
757         }
758
759         if (fid_is_idif(fid) && oi_fid != NULL && fid_is_idif(oi_fid)) {
760                 __u32 idx1 = fid_idif_ost_idx(fid);
761                 __u32 idx2 = fid_idif_ost_idx(oi_fid);
762                 struct ost_id *ostid = &info->oti_ostid;
763                 struct lu_fid *tfid = &info->oti_fid3;
764
765                 LASSERTF(idx1 == 0 || idx1 == osd->od_index,
766                          "invalid given FID "DFID", not match the "
767                          "device index %u\n", PFID(fid), osd->od_index);
768
769                 if (idx1 != idx2) {
770                         if (idx1 == 0 && idx2 == osd->od_index) {
771                                 fid_to_ostid(fid, ostid);
772                                 ostid_to_fid(tfid, ostid, idx2);
773                                 if (lu_fid_eq(tfid, oi_fid)) {
774                                         CERROR("%s: the FID "DFID" is used by "
775                                                "two objects(2): %u/%u %u/%u\n",
776                                                osd_name(osd), PFID(fid),
777                                                oi_id->oii_ino, oi_id->oii_gen,
778                                                id->oii_ino, id->oii_gen);
779
780                                         GOTO(out, rc = -EEXIST);
781                                 }
782                         } else if (idx2 == 0 && idx1 == osd->od_index) {
783                                 fid_to_ostid(oi_fid, ostid);
784                                 ostid_to_fid(tfid, ostid, idx1);
785                                 if (lu_fid_eq(tfid, fid)) {
786                                         CERROR("%s: the FID "DFID" is used by "
787                                                "two objects(2): %u/%u %u/%u\n",
788                                                osd_name(osd), PFID(fid),
789                                                oi_id->oii_ino, oi_id->oii_gen,
790                                                id->oii_ino, id->oii_gen);
791
792                                         GOTO(out, rc = -EEXIST);
793                                 }
794                         }
795                 }
796         }
797
798 update:
799         /*
800          * There may be temporary inconsistency: On one hand, the new
801          * object may be referenced by multiple entries, which is out
802          * of our control unless we traverse the whole /O completely,
803          * which is non-flat order and inefficient, should be avoided;
804          * On the other hand, the old object may become orphan if it
805          * is still valid. Since it was referenced by an invalid entry,
806          * making it as invisible temporary may be not worse. OI scrub
807          * will process it later.
808          */
809         rc = osd_ldiskfs_journal_get_write_access(th, parent->i_sb, bh,
810                                                   LDISKFS_JTR_NONE);
811         if (rc != 0)
812                 GOTO(out, rc);
813
814         de->inode = cpu_to_le32(id->oii_ino);
815         rc = ldiskfs_handle_dirty_metadata(th, NULL, bh);
816
817         GOTO(out, rc);
818
819 out:
820         if (!IS_ERR(bh))
821                 brelse(bh);
822         inode_unlock(parent);
823         return rc;
824 }
825
826 int osd_obj_del_entry(struct osd_thread_info *info, struct osd_device *osd,
827                       struct dentry *dird, char *name, int namelen,
828                       handle_t *th)
829 {
830         struct ldiskfs_dir_entry_2 *de;
831         struct buffer_head *bh;
832         struct dentry *child;
833         struct inode *dir = dird->d_inode;
834         int rc;
835
836         ENTRY;
837
838         LASSERT(th != NULL);
839         LASSERT(th->h_transaction != NULL);
840
841         child = &info->oti_child_dentry;
842         child->d_name.hash = 0;
843         child->d_name.name = name;
844         child->d_name.len = namelen;
845         child->d_parent = dird;
846         child->d_inode = NULL;
847
848         dquot_initialize(dir);
849         inode_lock(dir);
850         bh = osd_ldiskfs_find_entry(dir, &child->d_name, &de, NULL, NULL);
851         if (IS_ERR(bh)) {
852                 rc = PTR_ERR(bh);
853         } else {
854                 rc = ldiskfs_delete_entry(th, dir, de, bh);
855                 brelse(bh);
856         }
857         inode_unlock(dir);
858
859         RETURN(rc);
860 }
861
862 static int osd_obj_add_entry(struct osd_thread_info *info,
863                              struct osd_device *osd,
864                              struct dentry *dir, char *name,
865                              const struct osd_inode_id *id,
866                              handle_t *th)
867 {
868         struct dentry *child;
869         struct inode *inode;
870         int rc;
871
872         ENTRY;
873
874         if (CFS_FAIL_CHECK(OBD_FAIL_OSD_COMPAT_NO_ENTRY))
875                 RETURN(0);
876
877         LASSERT(th != NULL);
878         LASSERT(th->h_transaction != NULL);
879
880         inode = info->oti_inode;
881         if (unlikely(inode == NULL)) {
882                 struct ldiskfs_inode_info *lii;
883
884                 OBD_ALLOC_PTR(lii);
885                 if (lii == NULL)
886                         RETURN(-ENOMEM);
887                 inode = info->oti_inode = &lii->vfs_inode;
888         }
889
890         inode->i_sb = osd_sb(osd);
891         osd_id_to_inode(inode, id);
892         inode->i_mode = S_IFREG; /* for type in ldiskfs dir entry */
893
894         child = &info->oti_child_dentry;
895         child->d_name.hash = 0;
896         child->d_name.name = name;
897         child->d_name.len = strlen(name);
898         child->d_parent = dir;
899         child->d_inode = inode;
900
901         if (CFS_FAIL_CHECK(OBD_FAIL_OSD_COMPAT_INVALID_ENTRY))
902                 inode->i_ino++;
903
904         dquot_initialize(dir->d_inode);
905         inode_lock(dir->d_inode);
906         rc = osd_ldiskfs_add_entry(info, osd, th, child, inode, NULL);
907         inode_unlock(dir->d_inode);
908
909         RETURN(rc);
910 }
911
912 /**
913  * Use %llu for legacy OST sequences, but use %llx for new
914  * sequences names, so that the O/{seq}/dN/{oid} more closely
915  * follows the DFID/PFID format. This makes it easier to map from
916  * debug messages to objects in the future, and the legacy space
917  * of FID_SEQ_OST_MDT0 will be unused in the future.
918  **/
919 static inline void osd_seq_name(char *seq_name, size_t name_size, u64 seq)
920 {
921         snprintf(seq_name, name_size,
922                  (fid_seq_is_rsvd(seq) ||
923                   fid_seq_is_mdt0(seq)) ? "%llu" : "%llx",
924                  fid_seq_is_idif(seq) ? 0 : seq);
925 }
926
927 /* external locking is required */
928 static int osd_seq_load_locked(struct osd_thread_info *info,
929                                struct osd_device *osd,
930                                struct osd_obj_seq *osd_seq)
931 {
932         struct osd_obj_map *map = osd->od_ost_map;
933         struct dentry *seq_dir;
934         int rc = 0;
935         int i;
936         char dir_name[32];
937
938         ENTRY;
939
940         if (osd_seq->oos_root != NULL)
941                 RETURN(0);
942
943         LASSERT(map);
944         LASSERT(map->om_root);
945
946         osd_seq_name(dir_name, sizeof(dir_name), osd_seq->oos_seq);
947
948         seq_dir = simple_mkdir(info->oti_env, osd, map->om_root, NULL, dir_name,
949                                LMAC_NOT_IN_OI | LMAC_FID_ON_OST, 0755, NULL);
950         if (IS_ERR(seq_dir))
951                 GOTO(out_err, rc = PTR_ERR(seq_dir));
952         else if (seq_dir->d_inode == NULL)
953                 GOTO(out_put, rc = -EFAULT);
954
955         osd_seq->oos_root = seq_dir;
956
957         LASSERT(osd_seq->oos_dirs == NULL);
958         OBD_ALLOC_PTR_ARRAY(osd_seq->oos_dirs, osd_seq->oos_subdir_count);
959         if (osd_seq->oos_dirs == NULL)
960                 GOTO(out_put, rc = -ENOMEM);
961
962         for (i = 0; i < osd_seq->oos_subdir_count; i++) {
963                 struct dentry   *dir;
964
965                 snprintf(dir_name, sizeof(dir_name), "d%u", i);
966                 dir = simple_mkdir(info->oti_env, osd, osd_seq->oos_root, NULL,
967                                    dir_name, LMAC_NOT_IN_OI | LMAC_FID_ON_OST,
968                                    0700, NULL);
969                 if (IS_ERR(dir)) {
970                         GOTO(out_free, rc = PTR_ERR(dir));
971                 } else if (dir->d_inode == NULL) {
972                         dput(dir);
973                         GOTO(out_free, rc = -EFAULT);
974                 }
975
976                 osd_seq->oos_dirs[i] = dir;
977         }
978
979         if (rc != 0) {
980 out_free:
981                 for (i = 0; i < osd_seq->oos_subdir_count; i++) {
982                         if (osd_seq->oos_dirs[i] != NULL)
983                                 dput(osd_seq->oos_dirs[i]);
984                 }
985                 OBD_FREE_PTR_ARRAY(osd_seq->oos_dirs,
986                                    osd_seq->oos_subdir_count);
987 out_put:
988                 dput(seq_dir);
989                 osd_seq->oos_root = NULL;
990         }
991 out_err:
992         RETURN(rc);
993 }
994
995 struct osd_obj_seq *osd_seq_load(struct osd_thread_info *info,
996                                  struct osd_device *osd, u64 seq)
997 {
998         struct osd_obj_map *map;
999         struct osd_obj_seq *osd_seq;
1000         int rc = 0;
1001
1002         ENTRY;
1003
1004         map = osd->od_ost_map;
1005         LASSERT(map);
1006         LASSERT(map->om_root);
1007
1008         osd_seq = osd_seq_find(map, seq);
1009         if (likely(osd_seq != NULL))
1010                 RETURN(osd_seq);
1011
1012         /* Serializing init process */
1013         mutex_lock(&map->om_dir_init_mutex);
1014
1015         /* Check whether the seq has been added */
1016         read_lock(&map->om_seq_list_lock);
1017         osd_seq = osd_seq_find_locked(map, seq);
1018         if (osd_seq != NULL) {
1019                 read_unlock(&map->om_seq_list_lock);
1020                 GOTO(cleanup, rc = 0);
1021         }
1022         read_unlock(&map->om_seq_list_lock);
1023
1024         OBD_ALLOC_PTR(osd_seq);
1025         if (osd_seq == NULL)
1026                 GOTO(cleanup, rc = -ENOMEM);
1027
1028         INIT_LIST_HEAD(&osd_seq->oos_seq_list);
1029         osd_seq->oos_seq = seq;
1030         /*
1031          * Init subdir count to be 32, but each seq can have
1032          * different subdir count
1033          */
1034         osd_seq->oos_subdir_count = map->om_subdir_count;
1035         rc = osd_seq_load_locked(info, osd, osd_seq);
1036         if (rc != 0)
1037                 GOTO(cleanup, rc);
1038
1039         write_lock(&map->om_seq_list_lock);
1040         list_add(&osd_seq->oos_seq_list, &map->om_seq_list);
1041         write_unlock(&map->om_seq_list_lock);
1042
1043 cleanup:
1044         mutex_unlock(&map->om_dir_init_mutex);
1045         if (rc != 0) {
1046                 if (osd_seq != NULL)
1047                         OBD_FREE_PTR(osd_seq);
1048                 RETURN(ERR_PTR(rc));
1049         }
1050
1051         RETURN(osd_seq);
1052 }
1053
1054 int osd_obj_map_lookup(struct osd_thread_info *info, struct osd_device *dev,
1055                        const struct lu_fid *fid, struct osd_inode_id *id)
1056 {
1057         struct osd_obj_map *map;
1058         struct osd_obj_seq *osd_seq;
1059         struct dentry *d_seq;
1060         struct dentry *child;
1061         struct ost_id *ostid = &info->oti_ostid;
1062         int dirn;
1063         char name[32];
1064         struct ldiskfs_dir_entry_2 *de;
1065         struct buffer_head *bh;
1066         struct inode *dir;
1067         struct inode *inode;
1068
1069         ENTRY;
1070
1071         /* on the very first lookup we find and open directories */
1072         map = dev->od_ost_map;
1073         LASSERT(map);
1074         LASSERT(map->om_root);
1075
1076         fid_to_ostid(fid, ostid);
1077         osd_seq = osd_seq_load(info, dev, ostid_seq(ostid));
1078         if (IS_ERR(osd_seq))
1079                 RETURN(PTR_ERR(osd_seq));
1080
1081         dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
1082         d_seq = osd_seq->oos_dirs[dirn];
1083         LASSERT(d_seq);
1084
1085         osd_oid_name(name, sizeof(name), fid, ostid_id(ostid));
1086
1087         child = &info->oti_child_dentry;
1088         child->d_parent = d_seq;
1089         child->d_name.hash = 0;
1090         child->d_name.name = name;
1091         /* XXX: we can use rc from sprintf() instead of strlen() */
1092         child->d_name.len = strlen(name);
1093
1094         dir = d_seq->d_inode;
1095         inode_lock_shared(dir);
1096         bh = osd_ldiskfs_find_entry(dir, &child->d_name, &de, NULL, NULL);
1097         inode_unlock_shared(dir);
1098
1099         if (IS_ERR(bh))
1100                 RETURN(PTR_ERR(bh));
1101
1102         osd_id_gen(id, le32_to_cpu(de->inode), OSD_OII_NOGEN);
1103         brelse(bh);
1104
1105         inode = osd_iget(info, dev, id);
1106         if (IS_ERR(inode)) {
1107                 int rc = PTR_ERR(inode);
1108
1109                 RETURN(rc == -ENOENT ? -ESTALE : rc);
1110         }
1111
1112         iput(inode);
1113         RETURN(0);
1114 }
1115
1116 int osd_obj_map_insert(struct osd_thread_info *info,
1117                        struct osd_device *osd,
1118                        const struct lu_fid *fid,
1119                        const struct osd_inode_id *id,
1120                        handle_t *th)
1121 {
1122         struct osd_obj_map *map;
1123         struct osd_obj_seq *osd_seq;
1124         struct dentry *d;
1125         struct ost_id *ostid = &info->oti_ostid;
1126         u64 oid;
1127         int dirn, rc = 0;
1128         char name[32];
1129
1130         ENTRY;
1131
1132         map = osd->od_ost_map;
1133         LASSERT(map);
1134
1135         /* map fid to seq:objid */
1136         fid_to_ostid(fid, ostid);
1137
1138         oid = ostid_id(ostid);
1139         osd_seq = osd_seq_load(info, osd, ostid_seq(ostid));
1140         if (IS_ERR(osd_seq))
1141                 RETURN(PTR_ERR(osd_seq));
1142
1143         dirn = oid & (osd_seq->oos_subdir_count - 1);
1144         d = osd_seq->oos_dirs[dirn];
1145         LASSERT(d);
1146
1147         osd_oid_name(name, sizeof(name), fid, oid);
1148
1149 again:
1150         rc = osd_obj_add_entry(info, osd, d, name, id, th);
1151         if (rc == -EEXIST) {
1152                 rc = osd_obj_update_entry(info, osd, d, name, fid, id, th);
1153                 if (unlikely(rc == -ENOENT))
1154                         goto again;
1155
1156                 if (unlikely(rc == 1))
1157                         RETURN(0);
1158         }
1159
1160         RETURN(rc);
1161 }
1162
1163 int osd_obj_map_delete(struct osd_thread_info *info, struct osd_device *osd,
1164                        const struct lu_fid *fid, handle_t *th)
1165 {
1166         struct osd_obj_map *map;
1167         struct osd_obj_seq *osd_seq;
1168         struct dentry *d;
1169         struct ost_id *ostid = &info->oti_ostid;
1170         int dirn, rc = 0;
1171         char name[32];
1172
1173         ENTRY;
1174
1175         map = osd->od_ost_map;
1176         LASSERT(map);
1177
1178         /* map fid to seq:objid */
1179         fid_to_ostid(fid, ostid);
1180
1181         osd_seq = osd_seq_load(info, osd, ostid_seq(ostid));
1182         if (IS_ERR(osd_seq))
1183                 GOTO(cleanup, rc = PTR_ERR(osd_seq));
1184
1185         dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
1186         d = osd_seq->oos_dirs[dirn];
1187         LASSERT(d);
1188
1189         osd_oid_name(name, sizeof(name), fid, ostid_id(ostid));
1190         rc = osd_obj_del_entry(info, osd, d, name, strlen(name), th);
1191 cleanup:
1192         RETURN(rc);
1193 }
1194
1195 int osd_obj_map_update(struct osd_thread_info *info,
1196                        struct osd_device *osd,
1197                        const struct lu_fid *fid,
1198                        const struct osd_inode_id *id,
1199                        handle_t *th)
1200 {
1201         struct osd_obj_seq *osd_seq;
1202         struct dentry *d;
1203         struct ost_id *ostid = &info->oti_ostid;
1204         int dirn, rc = 0;
1205         char name[32];
1206
1207         ENTRY;
1208
1209         fid_to_ostid(fid, ostid);
1210         osd_seq = osd_seq_load(info, osd, ostid_seq(ostid));
1211         if (IS_ERR(osd_seq))
1212                 RETURN(PTR_ERR(osd_seq));
1213
1214         dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
1215         d = osd_seq->oos_dirs[dirn];
1216         LASSERT(d);
1217
1218         osd_oid_name(name, sizeof(name), fid, ostid_id(ostid));
1219         rc = osd_obj_update_entry(info, osd, d, name, fid, id, th);
1220
1221         RETURN(rc);
1222 }
1223
1224 int osd_obj_map_recover(struct osd_thread_info *info,
1225                         struct osd_device *osd,
1226                         struct inode *src_parent,
1227                         struct dentry *src_child,
1228                         const struct lu_fid *fid)
1229 {
1230         struct osd_obj_seq *osd_seq;
1231         struct dentry *tgt_parent;
1232         struct dentry *tgt_child = &info->oti_child_dentry;
1233         struct inode *dir;
1234         struct inode *inode = src_child->d_inode;
1235         struct ost_id *ostid = &info->oti_ostid;
1236         handle_t *jh;
1237         struct ldiskfs_dir_entry_2 *de;
1238         struct buffer_head *bh;
1239         char name[32];
1240         int dirn;
1241         int rc = 0;
1242
1243         ENTRY;
1244
1245         if (fid_is_last_id(fid)) {
1246                 osd_seq = osd_seq_load(info, osd, fid_seq(fid));
1247                 if (IS_ERR(osd_seq))
1248                         RETURN(PTR_ERR(osd_seq));
1249
1250                 tgt_parent = osd_seq->oos_root;
1251                 tgt_child->d_name.name = "LAST_ID";
1252                 tgt_child->d_name.len = strlen("LAST_ID");
1253         } else {
1254                 fid_to_ostid(fid, ostid);
1255                 osd_seq = osd_seq_load(info, osd, ostid_seq(ostid));
1256                 if (IS_ERR(osd_seq))
1257                         RETURN(PTR_ERR(osd_seq));
1258
1259                 dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
1260                 tgt_parent = osd_seq->oos_dirs[dirn];
1261                 osd_oid_name(name, sizeof(name), fid, ostid_id(ostid));
1262                 tgt_child->d_name.name = name;
1263                 tgt_child->d_name.len = strlen(name);
1264         }
1265         LASSERT(tgt_parent != NULL);
1266
1267         dir = tgt_parent->d_inode;
1268         tgt_child->d_name.hash = 0;
1269         tgt_child->d_parent = tgt_parent;
1270         tgt_child->d_inode = inode;
1271
1272         /* The non-initialized src_child may be destroyed. */
1273         jh = osd_journal_start_sb(osd_sb(osd), LDISKFS_HT_MISC,
1274                                 osd_dto_credits_noquota[DTO_INDEX_DELETE] +
1275                                 osd_dto_credits_noquota[DTO_INDEX_INSERT] +
1276                                 osd_dto_credits_noquota[DTO_OBJECT_DELETE]);
1277         if (IS_ERR(jh))
1278                 RETURN(PTR_ERR(jh));
1279
1280         dquot_initialize(src_parent);
1281         dquot_initialize(dir);
1282
1283         inode_lock(dir);
1284         bh = osd_ldiskfs_find_entry(dir, &tgt_child->d_name, &de, NULL, NULL);
1285         if (!IS_ERR(bh)) {
1286                 /*
1287                  * XXX: If some other object occupied the same slot. And If such
1288                  *      inode is zero-sized and with SUID+SGID, then means it is
1289                  *      a new created one. Maybe we can remove it and insert the
1290                  *      original one back to the /O/<seq>/d<x>. But there are
1291                  *      something to be considered:
1292                  *
1293                  *      1) The OST-object under /lost+found has crashed LMA.
1294                  *         So it should not conflict with the current one.
1295                  *
1296                  *      2) There are race conditions that: someone may just want
1297                  *         to modify the current one. Even if the OI scrub takes
1298                  *         the object lock when remove the current one, it still
1299                  *         cause the modification to be lost becasue the target
1300                  *         has been removed when the RPC service thread waiting
1301                  *         for the lock.
1302                  *
1303                  *      So keep it there before we have suitable solution.
1304                  */
1305                 brelse(bh);
1306                 inode_unlock(dir);
1307                 ldiskfs_journal_stop(jh);
1308
1309                 rc = -EEXIST;
1310                 /* If the src object has never been modified, then remove it. */
1311                 if (inode->i_size == 0 && inode->i_mode & S_ISUID &&
1312                     inode->i_mode & S_ISGID) {
1313                         rc = vfs_unlink(&nop_mnt_idmap, src_parent, src_child);
1314                         if (unlikely(rc == -ENOENT))
1315                                 rc = 0;
1316                 }
1317                 if (rc)
1318                         RETURN(rc);
1319
1320                 jh = osd_journal_start_sb(osd_sb(osd), LDISKFS_HT_MISC,
1321                                 osd_dto_credits_noquota[DTO_INDEX_DELETE] +
1322                                 osd_dto_credits_noquota[DTO_INDEX_INSERT] +
1323                                 osd_dto_credits_noquota[DTO_OBJECT_DELETE]);
1324                 if (IS_ERR(jh))
1325                         RETURN(PTR_ERR(jh));
1326                 inode_lock(dir);
1327         }
1328
1329         bh = osd_ldiskfs_find_entry(src_parent, &src_child->d_name, &de,
1330                                     NULL, NULL);
1331         if (unlikely(IS_ERR(bh)))
1332                 GOTO(unlock, rc = PTR_ERR(bh));
1333
1334         rc = ldiskfs_delete_entry(jh, src_parent, de, bh);
1335         brelse(bh);
1336         if (rc != 0)
1337                 GOTO(unlock, rc);
1338
1339         rc = osd_ldiskfs_add_entry(info, osd, jh, tgt_child, inode, NULL);
1340
1341         GOTO(unlock, rc);
1342
1343 unlock:
1344         inode_unlock(dir);
1345         ldiskfs_journal_stop(jh);
1346         return rc;
1347 }
1348
1349 static struct dentry *
1350 osd_object_spec_find(struct osd_thread_info *info, struct osd_device *osd,
1351                      const struct lu_fid *fid, char **name)
1352 {
1353         struct dentry *root = ERR_PTR(-ENOENT);
1354
1355         if (fid_is_last_id(fid)) {
1356                 struct osd_obj_seq *osd_seq;
1357
1358                 /* on creation of LAST_ID we create O/<seq> hierarchy */
1359                 osd_seq = osd_seq_load(info, osd, fid_seq(fid));
1360                 if (IS_ERR(osd_seq))
1361                         RETURN((struct dentry *)osd_seq);
1362
1363                 *name = "LAST_ID";
1364                 root = osd_seq->oos_root;
1365         } else {
1366                 *name = osd_lf_fid2name(fid);
1367                 if (*name == NULL)
1368                         CWARN("UNKNOWN COMPAT FID "DFID"\n", PFID(fid));
1369                 else if ((*name)[0])
1370                         root = osd_sb(osd)->s_root;
1371         }
1372
1373         return root;
1374 }
1375
1376 int osd_obj_spec_update(struct osd_thread_info *info, struct osd_device *osd,
1377                         const struct lu_fid *fid, const struct osd_inode_id *id,
1378                         handle_t *th)
1379 {
1380         struct dentry *root;
1381         char *name = NULL;
1382         int rc;
1383
1384         ENTRY;
1385
1386         root = osd_object_spec_find(info, osd, fid, &name);
1387         if (!IS_ERR(root)) {
1388                 rc = osd_obj_update_entry(info, osd, root, name, fid, id, th);
1389         } else {
1390                 rc = PTR_ERR(root);
1391                 if (rc == -ENOENT)
1392                         rc = 1;
1393         }
1394
1395         RETURN(rc);
1396 }
1397
1398 int osd_obj_spec_insert(struct osd_thread_info *info, struct osd_device *osd,
1399                         const struct lu_fid *fid, const struct osd_inode_id *id,
1400                         handle_t *th)
1401 {
1402         struct dentry *root;
1403         char *name = NULL;
1404         int rc;
1405
1406         ENTRY;
1407
1408         root = osd_object_spec_find(info, osd, fid, &name);
1409         if (!IS_ERR(root)) {
1410                 rc = osd_obj_add_entry(info, osd, root, name, id, th);
1411         } else {
1412                 rc = PTR_ERR(root);
1413                 if (rc == -ENOENT)
1414                         rc = 0;
1415         }
1416
1417         RETURN(rc);
1418 }
1419
1420 int osd_obj_spec_lookup(struct osd_thread_info *info, struct osd_device *osd,
1421                         const struct lu_fid *fid, struct osd_inode_id *id,
1422                         enum oi_check_flags flags)
1423 {
1424         struct dentry *root;
1425         struct dentry *dentry;
1426         struct inode *inode;
1427         char *name = NULL;
1428         int rc = -ENOENT;
1429
1430         ENTRY;
1431
1432         if (fid_is_last_id(fid)) {
1433                 struct osd_obj_seq *osd_seq;
1434
1435                 osd_seq = osd_seq_load(info, osd, fid_seq(fid));
1436                 if (IS_ERR(osd_seq))
1437                         RETURN(PTR_ERR(osd_seq));
1438                 root = osd_seq->oos_root;
1439                 name = "LAST_ID";
1440         } else {
1441                 root = osd_sb(osd)->s_root;
1442                 name = osd_lf_fid2name(fid);
1443                 if (name == NULL || strlen(name) == 0)
1444                         RETURN(-ENOENT);
1445         }
1446
1447         dentry = osd_lookup_one_len_common(osd, name, root, strlen(name),
1448                                            flags);
1449         if (!IS_ERR(dentry)) {
1450                 inode = dentry->d_inode;
1451                 if (inode) {
1452                         if (is_bad_inode(inode)) {
1453                                 rc = -EIO;
1454                         } else {
1455                                 osd_id_gen(id, inode->i_ino,
1456                                            inode->i_generation);
1457                                 rc = 0;
1458                         }
1459                 }
1460                 /*
1461                  * if dentry is accessible after osd_compat_spec_insert it
1462                  * will still contain NULL inode, so don't keep it in cache
1463                  */
1464                 d_invalidate(dentry);
1465                 dput(dentry);
1466         }
1467
1468         RETURN(rc);
1469 }