Whamcloud - gitweb
LU-10308 misc: update Intel copyright messages for 2017
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_compat.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * lustre/osd/osd_compat.c
33  *
34  * on-disk structure for managing /O
35  *
36  * Author: Alex Zhuravlev <bzzz@whamcloud.com>
37  */
38
39 /* prerequisite for linux/xattr.h */
40 #include <linux/types.h>
41 /* prerequisite for linux/xattr.h */
42 #include <linux/fs.h>
43 /* XATTR_{REPLACE,CREATE} */
44 #include <linux/xattr.h>
45
46 /*
47  * struct OBD_{ALLOC,FREE}*()
48  * OBD_FAIL_CHECK
49  */
50 #include <obd_support.h>
51
52 #include "osd_internal.h"
53 #include "osd_oi.h"
54
55 static void osd_push_ctxt(const struct osd_device *dev,
56                           struct lvfs_run_ctxt *newctxt,
57                           struct lvfs_run_ctxt *save)
58 {
59         OBD_SET_CTXT_MAGIC(newctxt);
60         newctxt->pwdmnt = dev->od_mnt;
61         newctxt->pwd = dev->od_mnt->mnt_root;
62         newctxt->fs = get_ds();
63         newctxt->umask = current_umask();
64         newctxt->dt = NULL;
65
66         push_ctxt(save, newctxt);
67 }
68
69 /* utility to make a directory */
70 static struct dentry *simple_mkdir(struct dentry *dir, struct vfsmount *mnt,
71                                    const char *name, int mode, int fix)
72 {
73         struct dentry *dchild;
74         int err = 0;
75         ENTRY;
76
77         // ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
78         CDEBUG(D_INODE, "creating directory %.*s\n", (int)strlen(name), name);
79         dchild = ll_lookup_one_len(name, dir, strlen(name));
80         if (IS_ERR(dchild))
81                 GOTO(out_up, dchild);
82
83         if (dchild->d_inode) {
84                 int old_mode = dchild->d_inode->i_mode;
85                 if (!S_ISDIR(old_mode)) {
86                         CERROR("found %s (%lu/%u) is mode %o\n", name,
87                                dchild->d_inode->i_ino,
88                                dchild->d_inode->i_generation, old_mode);
89                         GOTO(out_err, err = -ENOTDIR);
90                 }
91
92                 /* Fixup directory permissions if necessary */
93                 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
94                         CDEBUG(D_CONFIG,
95                                "fixing permissions on %s from %o to %o\n",
96                                name, old_mode, mode);
97                         dchild->d_inode->i_mode = (mode & S_IALLUGO) |
98                                                   (old_mode & ~S_IALLUGO);
99                         mark_inode_dirty(dchild->d_inode);
100                 }
101                 GOTO(out_up, dchild);
102         }
103
104         err = vfs_mkdir(dir->d_inode, dchild, mode);
105         if (err)
106                 GOTO(out_err, err);
107
108         RETURN(dchild);
109
110 out_err:
111         dput(dchild);
112         dchild = ERR_PTR(err);
113 out_up:
114         return dchild;
115 }
116
117 static int osd_last_rcvd_subdir_count(struct osd_device *osd)
118 {
119         struct lr_server_data lsd;
120         struct dentry        *dlast;
121         loff_t                off;
122         int                   rc = 0;
123         int                   count = OBJ_SUBDIR_COUNT;
124
125         ENTRY;
126
127         dlast = ll_lookup_one_len(LAST_RCVD, osd_sb(osd)->s_root,
128                                   strlen(LAST_RCVD));
129         if (IS_ERR(dlast))
130                 return PTR_ERR(dlast);
131         else if (dlast->d_inode == NULL)
132                 goto out;
133
134         off = 0;
135         rc = osd_ldiskfs_read(dlast->d_inode, &lsd, sizeof(lsd), &off);
136         if (rc == sizeof(lsd)) {
137                 CDEBUG(D_INFO, "read last_rcvd header, uuid = %s, "
138                        "subdir count = %d\n", lsd.lsd_uuid,
139                        lsd.lsd_subdir_count);
140                 if (le16_to_cpu(lsd.lsd_subdir_count) > 0)
141                         count = le16_to_cpu(lsd.lsd_subdir_count);
142         } else if (rc != 0) {
143                 CERROR("Can't read last_rcvd file, rc = %d\n", rc);
144                 if (rc > 0)
145                         rc = -EFAULT;
146                 dput(dlast);
147                 return rc;
148         }
149 out:
150         dput(dlast);
151         LASSERT(count > 0);
152         return count;
153 }
154
155 static int osd_mdt_init(const struct lu_env *env, struct osd_device *dev)
156 {
157         struct lvfs_run_ctxt    new;
158         struct lvfs_run_ctxt    save;
159         struct dentry           *parent;
160         struct osd_mdobj_map    *omm;
161         struct dentry           *d;
162         struct osd_thread_info  *info = osd_oti_get(env);
163         struct lu_fid           *fid = &info->oti_fid3;
164         int                     rc = 0;
165         ENTRY;
166
167         OBD_ALLOC_PTR(dev->od_mdt_map);
168         if (dev->od_mdt_map == NULL)
169                 RETURN(-ENOMEM);
170
171         omm = dev->od_mdt_map;
172
173         parent = osd_sb(dev)->s_root;
174         osd_push_ctxt(dev, &new, &save);
175
176         d = simple_mkdir(parent, dev->od_mnt, REMOTE_PARENT_DIR,
177                          0755, 1);
178         if (IS_ERR(d))
179                 GOTO(cleanup, rc = PTR_ERR(d));
180
181         omm->omm_remote_parent = d;
182
183         /* Set LMA for remote parent inode */
184         lu_local_obj_fid(fid, REMOTE_PARENT_DIR_OID);
185         rc = osd_ea_fid_set(info, d->d_inode, fid, LMAC_NOT_IN_OI, 0);
186
187         GOTO(cleanup, rc);
188
189 cleanup:
190         pop_ctxt(&save, &new);
191         if (rc) {
192                 if (omm->omm_remote_parent != NULL)
193                         dput(omm->omm_remote_parent);
194                 OBD_FREE_PTR(omm);
195                 dev->od_mdt_map = NULL;
196         }
197         return rc;
198 }
199
200 static void osd_mdt_fini(struct osd_device *osd)
201 {
202         struct osd_mdobj_map *omm = osd->od_mdt_map;
203
204         if (omm == NULL)
205                 return;
206
207         if (omm->omm_remote_parent)
208                 dput(omm->omm_remote_parent);
209
210         OBD_FREE_PTR(omm);
211         osd->od_ost_map = NULL;
212 }
213
214 int osd_add_to_remote_parent(const struct lu_env *env, struct osd_device *osd,
215                              struct osd_object *obj, struct osd_thandle *oh)
216 {
217         struct osd_mdobj_map    *omm = osd->od_mdt_map;
218         struct osd_thread_info  *oti = osd_oti_get(env);
219         struct lustre_mdt_attrs *lma = &oti->oti_ost_attrs.loa_lma;
220         char                    *name = oti->oti_name;
221         struct osd_thread_info  *info = osd_oti_get(env);
222         struct dentry           *dentry;
223         struct dentry           *parent;
224         int                     rc;
225
226         if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_AGENTENT))
227                 RETURN(0);
228
229         /* Set REMOTE_PARENT in lma, so other process like unlink or lfsck
230          * can identify this object quickly */
231         rc = osd_get_lma(oti, obj->oo_inode, &oti->oti_obj_dentry,
232                          &oti->oti_ost_attrs);
233         if (rc)
234                 RETURN(rc);
235
236         lma->lma_incompat |= LMAI_REMOTE_PARENT;
237         lustre_lma_swab(lma);
238         rc = __osd_xattr_set(oti, obj->oo_inode, XATTR_NAME_LMA, lma,
239                              sizeof(*lma), XATTR_REPLACE);
240         if (rc)
241                 RETURN(rc);
242
243         parent = omm->omm_remote_parent;
244         sprintf(name, DFID_NOBRACE, PFID(lu_object_fid(&obj->oo_dt.do_lu)));
245         dentry = osd_child_dentry_by_inode(env, parent->d_inode,
246                                            name, strlen(name));
247         mutex_lock(&parent->d_inode->i_mutex);
248         rc = osd_ldiskfs_add_entry(info, osd, oh->ot_handle, dentry,
249                                    obj->oo_inode, NULL);
250         if (!rc && S_ISDIR(obj->oo_inode->i_mode))
251                 ldiskfs_inc_count(oh->ot_handle, parent->d_inode);
252         else if (unlikely(rc == -EEXIST))
253                 rc = 0;
254         if (!rc)
255                 lu_object_set_agent_entry(&obj->oo_dt.do_lu);
256         CDEBUG(D_INODE, "%s: create agent entry for %s: rc = %d\n",
257                osd_name(osd), name, rc);
258         mark_inode_dirty(parent->d_inode);
259         mutex_unlock(&parent->d_inode->i_mutex);
260         RETURN(rc);
261 }
262
263 int osd_delete_from_remote_parent(const struct lu_env *env,
264                                   struct osd_device *osd,
265                                   struct osd_object *obj,
266                                   struct osd_thandle *oh, bool destroy)
267 {
268         struct osd_mdobj_map       *omm = osd->od_mdt_map;
269         struct osd_thread_info     *oti = osd_oti_get(env);
270         struct lustre_mdt_attrs    *lma = &oti->oti_ost_attrs.loa_lma;
271         char                       *name = oti->oti_name;
272         struct dentry              *dentry;
273         struct dentry              *parent;
274         struct ldiskfs_dir_entry_2 *de;
275         struct buffer_head         *bh;
276         int                        rc;
277
278         parent = omm->omm_remote_parent;
279         sprintf(name, DFID_NOBRACE, PFID(lu_object_fid(&obj->oo_dt.do_lu)));
280         dentry = osd_child_dentry_by_inode(env, parent->d_inode,
281                                            name, strlen(name));
282         mutex_lock(&parent->d_inode->i_mutex);
283         bh = osd_ldiskfs_find_entry(parent->d_inode, &dentry->d_name, &de,
284                                     NULL, NULL);
285         if (IS_ERR(bh)) {
286                 mutex_unlock(&parent->d_inode->i_mutex);
287                 rc = PTR_ERR(bh);
288                 if (unlikely(rc == -ENOENT))
289                         rc = 0;
290         } else {
291                 rc = ldiskfs_delete_entry(oh->ot_handle, parent->d_inode,
292                                           de, bh);
293                 if (!rc && S_ISDIR(obj->oo_inode->i_mode))
294                         ldiskfs_dec_count(oh->ot_handle, parent->d_inode);
295                 mark_inode_dirty(parent->d_inode);
296                 mutex_unlock(&parent->d_inode->i_mutex);
297                 brelse(bh);
298                 CDEBUG(D_INODE, "%s: remove agent entry for %s: rc = %d\n",
299                        osd_name(osd), name, rc);
300         }
301
302         if (destroy || rc) {
303                 if (!rc)
304                         lu_object_clear_agent_entry(&obj->oo_dt.do_lu);
305
306                 RETURN(rc);
307         }
308
309         rc = osd_get_lma(oti, obj->oo_inode, &oti->oti_obj_dentry,
310                          &oti->oti_ost_attrs);
311         if (rc)
312                 RETURN(rc);
313
314         /* Get rid of REMOTE_PARENT flag from incompat */
315         lma->lma_incompat &= ~LMAI_REMOTE_PARENT;
316         lustre_lma_swab(lma);
317         rc = __osd_xattr_set(oti, obj->oo_inode, XATTR_NAME_LMA, lma,
318                              sizeof(*lma), XATTR_REPLACE);
319         if (!rc)
320                 lu_object_clear_agent_entry(&obj->oo_dt.do_lu);
321         RETURN(rc);
322 }
323
324 int osd_lookup_in_remote_parent(struct osd_thread_info *oti,
325                                 struct osd_device *osd,
326                                 const struct lu_fid *fid,
327                                 struct osd_inode_id *id)
328 {
329         struct osd_mdobj_map        *omm = osd->od_mdt_map;
330         char                        *name = oti->oti_name;
331         struct dentry               *parent;
332         struct dentry               *dentry;
333         struct ldiskfs_dir_entry_2 *de;
334         struct buffer_head         *bh;
335         int                         rc;
336         ENTRY;
337
338         parent = omm->omm_remote_parent;
339         sprintf(name, DFID_NOBRACE, PFID(fid));
340         dentry = osd_child_dentry_by_inode(oti->oti_env, parent->d_inode,
341                                            name, strlen(name));
342         mutex_lock(&parent->d_inode->i_mutex);
343         bh = osd_ldiskfs_find_entry(parent->d_inode, &dentry->d_name, &de,
344                                     NULL, NULL);
345         if (IS_ERR(bh)) {
346                 rc = PTR_ERR(bh);
347         } else {
348                 struct inode *inode;
349
350                 osd_id_gen(id, le32_to_cpu(de->inode), OSD_OII_NOGEN);
351                 brelse(bh);
352                 inode = osd_iget(oti, osd, id);
353                 if (IS_ERR(inode)) {
354                         rc = PTR_ERR(inode);
355                         if (rc == -ESTALE)
356                                 rc = -ENOENT;
357                 } else {
358                         iput(inode);
359                         rc = 0;
360                 }
361         }
362         mutex_unlock(&parent->d_inode->i_mutex);
363         if (rc == 0)
364                 osd_add_oi_cache(oti, osd, id, fid);
365         RETURN(rc);
366 }
367
368 /*
369  * directory structure on legacy OST:
370  *
371  * O/<seq>/d0-31/<objid>
372  * O/<seq>/LAST_ID
373  * last_rcvd
374  * LAST_GROUP
375  * CONFIGS
376  *
377  */
378 static int osd_ost_init(const struct lu_env *env, struct osd_device *dev)
379 {
380         struct lvfs_run_ctxt     new;
381         struct lvfs_run_ctxt     save;
382         struct dentry           *rootd = osd_sb(dev)->s_root;
383         struct dentry           *d;
384         struct osd_thread_info  *info = osd_oti_get(env);
385         struct inode            *inode;
386         struct lu_fid           *fid = &info->oti_fid3;
387         int                      rc;
388         ENTRY;
389
390         OBD_ALLOC_PTR(dev->od_ost_map);
391         if (dev->od_ost_map == NULL)
392                 RETURN(-ENOMEM);
393
394         /* to get subdir count from last_rcvd */
395         rc = osd_last_rcvd_subdir_count(dev);
396         if (rc < 0)
397                 GOTO(cleanup_alloc, rc);
398
399         dev->od_ost_map->om_subdir_count = rc;
400         rc = 0;
401
402         INIT_LIST_HEAD(&dev->od_ost_map->om_seq_list);
403         rwlock_init(&dev->od_ost_map->om_seq_list_lock);
404         mutex_init(&dev->od_ost_map->om_dir_init_mutex);
405
406         osd_push_ctxt(dev, &new, &save);
407
408         d = ll_lookup_one_len("O", rootd, strlen("O"));
409         if (IS_ERR(d))
410                 GOTO(cleanup_ctxt, rc = PTR_ERR(d));
411         if (d->d_inode == NULL) {
412                 dput(d);
413                 /* The lookup() may be called again inside simple_mkdir().
414                  * Since the repeated lookup() only be called for "/O" at
415                  * mount time, it will not affect the whole performance. */
416                 d = simple_mkdir(rootd, dev->od_mnt, "O", 0755, 1);
417                 if (IS_ERR(d))
418                         GOTO(cleanup_ctxt, rc = PTR_ERR(d));
419
420                 /* It is quite probably that the device is new formatted. */
421                 dev->od_maybe_new = 1;
422         }
423
424         inode = d->d_inode;
425         dev->od_ost_map->om_root = d;
426
427         /* 'What the @fid is' is not imporatant, because the object
428          * has no OI mapping, and only is visible inside the OSD.*/
429         lu_igif_build(fid, inode->i_ino, inode->i_generation);
430         rc = osd_ea_fid_set(info, inode, fid,
431                     LMAC_NOT_IN_OI | LMAC_FID_ON_OST, 0);
432         if (rc)
433                 GOTO(cleanup_dentry, rc);
434
435         pop_ctxt(&save, &new);
436         RETURN(0);
437
438 cleanup_dentry:
439         dput(d);
440 cleanup_ctxt:
441         pop_ctxt(&save, &new);
442 cleanup_alloc:
443         OBD_FREE_PTR(dev->od_ost_map);
444         return rc;
445 }
446
447 static void osd_seq_free(struct osd_obj_seq *osd_seq)
448 {
449         int j;
450
451         if (osd_seq->oos_dirs) {
452                 for (j = 0; j < osd_seq->oos_subdir_count; j++) {
453                         if (osd_seq->oos_dirs[j])
454                                 dput(osd_seq->oos_dirs[j]);
455                 }
456                 OBD_FREE(osd_seq->oos_dirs,
457                          sizeof(struct dentry *) * osd_seq->oos_subdir_count);
458         }
459
460         if (osd_seq->oos_root)
461                 dput(osd_seq->oos_root);
462
463         OBD_FREE_PTR(osd_seq);
464 }
465
466 static void osd_ost_fini(struct osd_device *osd)
467 {
468         struct osd_obj_seq    *osd_seq;
469         struct osd_obj_seq    *tmp;
470         struct osd_obj_map    *map = osd->od_ost_map;
471         ENTRY;
472
473         if (map == NULL)
474                 return;
475
476         write_lock(&map->om_seq_list_lock);
477         list_for_each_entry_safe(osd_seq, tmp, &map->om_seq_list,
478                                  oos_seq_list) {
479                 list_del_init(&osd_seq->oos_seq_list);
480                 write_unlock(&map->om_seq_list_lock);
481                 osd_seq_free(osd_seq);
482                 write_lock(&map->om_seq_list_lock);
483         }
484         write_unlock(&map->om_seq_list_lock);
485         if (map->om_root)
486                 dput(map->om_root);
487         OBD_FREE_PTR(map);
488         osd->od_ost_map = NULL;
489         EXIT;
490 }
491
492 int osd_obj_map_init(const struct lu_env *env, struct osd_device *dev)
493 {
494         int rc;
495         ENTRY;
496
497         /* prepare structures for OST */
498         rc = osd_ost_init(env, dev);
499         if (rc)
500                 RETURN(rc);
501
502         /* prepare structures for MDS */
503         rc = osd_mdt_init(env, dev);
504         if (rc)
505                 osd_ost_fini(dev);
506
507         RETURN(rc);
508 }
509
510 static struct osd_obj_seq *osd_seq_find_locked(struct osd_obj_map *map, u64 seq)
511 {
512         struct osd_obj_seq *osd_seq;
513
514         list_for_each_entry(osd_seq, &map->om_seq_list, oos_seq_list) {
515                 if (osd_seq->oos_seq == seq)
516                         return osd_seq;
517         }
518         return NULL;
519 }
520
521 static struct osd_obj_seq *osd_seq_find(struct osd_obj_map *map, u64 seq)
522 {
523         struct osd_obj_seq *osd_seq;
524
525         read_lock(&map->om_seq_list_lock);
526         osd_seq = osd_seq_find_locked(map, seq);
527         read_unlock(&map->om_seq_list_lock);
528         return osd_seq;
529 }
530
531 void osd_obj_map_fini(struct osd_device *dev)
532 {
533         osd_ost_fini(dev);
534         osd_mdt_fini(dev);
535 }
536
537 /**
538  * Update the specified OI mapping.
539  *
540  * \retval   1, changed nothing
541  * \retval   0, changed successfully
542  * \retval -ve, on error
543  */
544 static int osd_obj_update_entry(struct osd_thread_info *info,
545                                 struct osd_device *osd,
546                                 struct dentry *dir, const char *name,
547                                 const struct lu_fid *fid,
548                                 const struct osd_inode_id *id,
549                                 handle_t *th)
550 {
551         struct inode               *parent = dir->d_inode;
552         struct dentry              *child;
553         struct ldiskfs_dir_entry_2 *de;
554         struct buffer_head         *bh;
555         struct inode               *inode;
556         struct dentry              *dentry = &info->oti_obj_dentry;
557         struct osd_inode_id        *oi_id  = &info->oti_id3;
558         struct lustre_mdt_attrs    *lma    = &info->oti_ost_attrs.loa_lma;
559         struct lu_fid              *oi_fid = &lma->lma_self_fid;
560         int                         rc;
561         ENTRY;
562
563         LASSERT(th != NULL);
564         LASSERT(th->h_transaction != NULL);
565
566         child = &info->oti_child_dentry;
567         child->d_parent = dir;
568         child->d_name.hash = 0;
569         child->d_name.name = name;
570         child->d_name.len = strlen(name);
571
572         ll_vfs_dq_init(parent);
573         mutex_lock(&parent->i_mutex);
574         bh = osd_ldiskfs_find_entry(parent, &child->d_name, &de, NULL, NULL);
575         if (IS_ERR(bh))
576                 GOTO(out, rc = PTR_ERR(bh));
577
578         if (le32_to_cpu(de->inode) == id->oii_ino)
579                 GOTO(out, rc = 1);
580
581         osd_id_gen(oi_id, le32_to_cpu(de->inode), OSD_OII_NOGEN);
582         inode = osd_iget(info, osd, oi_id);
583         if (IS_ERR(inode)) {
584                 rc = PTR_ERR(inode);
585                 if (rc == -ENOENT || rc == -ESTALE)
586                         goto update;
587                 GOTO(out, rc);
588         }
589
590         /* The EA inode should NOT be in OI, old OI scrub may added
591          * such OI mapping by wrong, replace it. */
592         if (unlikely(osd_is_ea_inode(inode))) {
593                 iput(inode);
594                 goto update;
595         }
596
597         rc = osd_get_lma(info, inode, dentry, &info->oti_ost_attrs);
598         if (rc == -ENODATA) {
599                 rc = osd_get_idif(info, inode, dentry, oi_fid);
600                 if (rc > 0 || rc == -ENODATA) {
601                         oi_fid = NULL;
602                         rc = 0;
603                 }
604         }
605         iput(inode);
606
607         if (rc != 0)
608                 GOTO(out, rc);
609
610         /* If the OST-object has neither FID-in-LMA nor FID-in-ff, it is
611          * either a crashed object or a uninitialized one. Replace it. */
612         if (oi_fid != NULL && lu_fid_eq(fid, oi_fid)) {
613                 CERROR("%s: the FID "DFID" is used by two objects: "
614                        "%u/%u %u/%u\n", osd_name(osd), PFID(fid),
615                        oi_id->oii_ino, oi_id->oii_gen,
616                        id->oii_ino, id->oii_gen);
617                 GOTO(out, rc = -EEXIST);
618         }
619
620         if (fid_is_idif(fid) && oi_fid != NULL && fid_is_idif(oi_fid)) {
621                 __u32 idx1 = fid_idif_ost_idx(fid);
622                 __u32 idx2 = fid_idif_ost_idx(oi_fid);
623                 struct ost_id *ostid = &info->oti_ostid;
624                 struct lu_fid *tfid = &info->oti_fid3;
625
626                 LASSERTF(idx1 == 0 || idx1 == osd->od_index,
627                          "invalid given FID "DFID", not match the "
628                          "device index %u\n", PFID(fid), osd->od_index);
629
630                 if (idx1 != idx2) {
631                         if (idx1 == 0 && idx2 == osd->od_index) {
632                                 fid_to_ostid(fid, ostid);
633                                 ostid_to_fid(tfid, ostid, idx2);
634                                 if (lu_fid_eq(tfid, oi_fid)) {
635                                         CERROR("%s: the FID "DFID" is used by "
636                                                "two objects(2): %u/%u %u/%u\n",
637                                                osd_name(osd), PFID(fid),
638                                                oi_id->oii_ino, oi_id->oii_gen,
639                                                id->oii_ino, id->oii_gen);
640
641                                         GOTO(out, rc = -EEXIST);
642                                 }
643                         } else if (idx2 == 0 && idx1 == osd->od_index) {
644                                 fid_to_ostid(oi_fid, ostid);
645                                 ostid_to_fid(tfid, ostid, idx1);
646                                 if (lu_fid_eq(tfid, fid)) {
647                                         CERROR("%s: the FID "DFID" is used by "
648                                                "two objects(2): %u/%u %u/%u\n",
649                                                osd_name(osd), PFID(fid),
650                                                oi_id->oii_ino, oi_id->oii_gen,
651                                                id->oii_ino, id->oii_gen);
652
653                                         GOTO(out, rc = -EEXIST);
654                                 }
655                         }
656                 }
657         }
658
659 update:
660         /* There may be temporary inconsistency: On one hand, the new
661          * object may be referenced by multiple entries, which is out
662          * of our control unless we traverse the whole /O completely,
663          * which is non-flat order and inefficient, should be avoided;
664          * On the other hand, the old object may become orphan if it
665          * is still valid. Since it was referenced by an invalid entry,
666          * making it as invisible temporary may be not worse. OI scrub
667          * will process it later. */
668         rc = ldiskfs_journal_get_write_access(th, bh);
669         if (rc != 0)
670                 GOTO(out, rc);
671
672         de->inode = cpu_to_le32(id->oii_ino);
673         rc = ldiskfs_handle_dirty_metadata(th, NULL, bh);
674
675         GOTO(out, rc);
676
677 out:
678         if (!IS_ERR(bh))
679                 brelse(bh);
680         mutex_unlock(&parent->i_mutex);
681         return rc;
682 }
683
684 static int osd_obj_del_entry(struct osd_thread_info *info,
685                              struct osd_device *osd,
686                              struct dentry *dird, char *name,
687                              handle_t *th)
688 {
689         struct ldiskfs_dir_entry_2 *de;
690         struct buffer_head         *bh;
691         struct dentry              *child;
692         struct inode               *dir = dird->d_inode;
693         int                         rc;
694         ENTRY;
695
696         LASSERT(th != NULL);
697         LASSERT(th->h_transaction != NULL);
698
699
700         child = &info->oti_child_dentry;
701         child->d_name.hash = 0;
702         child->d_name.name = name;
703         child->d_name.len = strlen(name);
704         child->d_parent = dird;
705         child->d_inode = NULL;
706
707         ll_vfs_dq_init(dir);
708         mutex_lock(&dir->i_mutex);
709         bh = osd_ldiskfs_find_entry(dir, &child->d_name, &de, NULL, NULL);
710         if (IS_ERR(bh)) {
711                 rc = PTR_ERR(bh);
712         } else {
713                 rc = ldiskfs_delete_entry(th, dir, de, bh);
714                 brelse(bh);
715         }
716         mutex_unlock(&dir->i_mutex);
717
718         RETURN(rc);
719 }
720
721 static int osd_obj_add_entry(struct osd_thread_info *info,
722                              struct osd_device *osd,
723                              struct dentry *dir, char *name,
724                              const struct osd_inode_id *id,
725                              handle_t *th)
726 {
727         struct dentry *child;
728         struct inode *inode;
729         int rc;
730
731         ENTRY;
732
733         if (OBD_FAIL_CHECK(OBD_FAIL_OSD_COMPAT_NO_ENTRY))
734                 RETURN(0);
735
736         LASSERT(th != NULL);
737         LASSERT(th->h_transaction != NULL);
738
739         inode = info->oti_inode;
740         if (unlikely(inode == NULL)) {
741                 struct ldiskfs_inode_info *lii;
742                 OBD_ALLOC_PTR(lii);
743                 if (lii == NULL)
744                         RETURN(-ENOMEM);
745                 inode = info->oti_inode = &lii->vfs_inode;
746         }
747
748         inode->i_sb = osd_sb(osd);
749         osd_id_to_inode(inode, id);
750         inode->i_mode = S_IFREG; /* for type in ldiskfs dir entry */
751
752         child = &info->oti_child_dentry;
753         child->d_name.hash = 0;
754         child->d_name.name = name;
755         child->d_name.len = strlen(name);
756         child->d_parent = dir;
757         child->d_inode = inode;
758
759         if (OBD_FAIL_CHECK(OBD_FAIL_OSD_COMPAT_INVALID_ENTRY))
760                 inode->i_ino++;
761
762         ll_vfs_dq_init(dir->d_inode);
763         mutex_lock(&dir->d_inode->i_mutex);
764         rc = osd_ldiskfs_add_entry(info, osd, th, child, inode, NULL);
765         mutex_unlock(&dir->d_inode->i_mutex);
766
767         RETURN(rc);
768 }
769
770 /**
771  * Use %llu for legacy OST sequences, but use %llx for new
772  * sequences names, so that the O/{seq}/dN/{oid} more closely
773  * follows the DFID/PFID format. This makes it easier to map from
774  * debug messages to objects in the future, and the legacy space
775  * of FID_SEQ_OST_MDT0 will be unused in the future.
776  **/
777 static inline void osd_seq_name(char *seq_name, size_t name_size, u64 seq)
778 {
779         snprintf(seq_name, name_size,
780                  (fid_seq_is_rsvd(seq) ||
781                   fid_seq_is_mdt0(seq)) ? "%llu" : "%llx",
782                  fid_seq_is_idif(seq) ? 0 : seq);
783 }
784
785 static inline void osd_oid_name(char *name, size_t name_size,
786                                 const struct lu_fid *fid, u64 id)
787 {
788         snprintf(name, name_size,
789                  (fid_seq_is_rsvd(fid_seq(fid)) ||
790                   fid_seq_is_mdt0(fid_seq(fid)) ||
791                   fid_seq_is_idif(fid_seq(fid))) ? "%llu" : "%llx", id);
792 }
793
794 /* external locking is required */
795 static int osd_seq_load_locked(struct osd_thread_info *info,
796                                struct osd_device *osd,
797                                struct osd_obj_seq *osd_seq)
798 {
799         struct osd_obj_map  *map = osd->od_ost_map;
800         struct dentry       *seq_dir;
801         struct inode        *inode;
802         struct lu_fid       *fid = &info->oti_fid3;
803         int                 rc = 0;
804         int                 i;
805         char                dir_name[32];
806         ENTRY;
807
808         if (osd_seq->oos_root != NULL)
809                 RETURN(0);
810
811         LASSERT(map);
812         LASSERT(map->om_root);
813
814         osd_seq_name(dir_name, sizeof(dir_name), osd_seq->oos_seq);
815
816         seq_dir = simple_mkdir(map->om_root, osd->od_mnt, dir_name, 0755, 1);
817         if (IS_ERR(seq_dir))
818                 GOTO(out_err, rc = PTR_ERR(seq_dir));
819         else if (seq_dir->d_inode == NULL)
820                 GOTO(out_put, rc = -EFAULT);
821
822         inode = seq_dir->d_inode;
823         osd_seq->oos_root = seq_dir;
824
825         /* 'What the @fid is' is not imporatant, because the object
826          * has no OI mapping, and only is visible inside the OSD.*/
827         lu_igif_build(fid, inode->i_ino, inode->i_generation);
828         rc = osd_ea_fid_set(info, inode, fid,
829                             LMAC_NOT_IN_OI | LMAC_FID_ON_OST, 0);
830         if (rc != 0)
831                 GOTO(out_put, rc);
832
833         LASSERT(osd_seq->oos_dirs == NULL);
834         OBD_ALLOC(osd_seq->oos_dirs,
835                   sizeof(seq_dir) * osd_seq->oos_subdir_count);
836         if (osd_seq->oos_dirs == NULL)
837                 GOTO(out_put, rc = -ENOMEM);
838
839         for (i = 0; i < osd_seq->oos_subdir_count; i++) {
840                 struct dentry   *dir;
841
842                 snprintf(dir_name, sizeof(dir_name), "d%u", i);
843                 dir = simple_mkdir(osd_seq->oos_root, osd->od_mnt, dir_name,
844                                    0700, 1);
845                 if (IS_ERR(dir)) {
846                         GOTO(out_free, rc = PTR_ERR(dir));
847                 } else if (dir->d_inode == NULL) {
848                         dput(dir);
849                         GOTO(out_free, rc = -EFAULT);
850                 }
851
852                 inode = dir->d_inode;
853                 osd_seq->oos_dirs[i] = dir;
854
855                 /* 'What the @fid is' is not imporatant, because the object
856                  * has no OI mapping, and only is visible inside the OSD.*/
857                 lu_igif_build(fid, inode->i_ino, inode->i_generation);
858                 rc = osd_ea_fid_set(info, inode, fid,
859                                     LMAC_NOT_IN_OI | LMAC_FID_ON_OST, 0);
860                 if (rc != 0)
861                         GOTO(out_free, rc);
862         }
863
864         if (rc != 0) {
865 out_free:
866                 for (i = 0; i < osd_seq->oos_subdir_count; i++) {
867                         if (osd_seq->oos_dirs[i] != NULL)
868                                 dput(osd_seq->oos_dirs[i]);
869                 }
870                 OBD_FREE(osd_seq->oos_dirs,
871                          sizeof(seq_dir) * osd_seq->oos_subdir_count);
872 out_put:
873                 dput(seq_dir);
874                 osd_seq->oos_root = NULL;
875         }
876 out_err:
877         RETURN(rc);
878 }
879
880 static struct osd_obj_seq *osd_seq_load(struct osd_thread_info *info,
881                                         struct osd_device *osd, u64 seq)
882 {
883         struct osd_obj_map      *map;
884         struct osd_obj_seq      *osd_seq;
885         int                     rc = 0;
886         ENTRY;
887
888         map = osd->od_ost_map;
889         LASSERT(map);
890         LASSERT(map->om_root);
891
892         osd_seq = osd_seq_find(map, seq);
893         if (likely(osd_seq != NULL))
894                 RETURN(osd_seq);
895
896         /* Serializing init process */
897         mutex_lock(&map->om_dir_init_mutex);
898
899         /* Check whether the seq has been added */
900         read_lock(&map->om_seq_list_lock);
901         osd_seq = osd_seq_find_locked(map, seq);
902         if (osd_seq != NULL) {
903                 read_unlock(&map->om_seq_list_lock);
904                 GOTO(cleanup, rc = 0);
905         }
906         read_unlock(&map->om_seq_list_lock);
907
908         OBD_ALLOC_PTR(osd_seq);
909         if (osd_seq == NULL)
910                 GOTO(cleanup, rc = -ENOMEM);
911
912         INIT_LIST_HEAD(&osd_seq->oos_seq_list);
913         osd_seq->oos_seq = seq;
914         /* Init subdir count to be 32, but each seq can have
915          * different subdir count */
916         osd_seq->oos_subdir_count = map->om_subdir_count;
917         rc = osd_seq_load_locked(info, osd, osd_seq);
918         if (rc != 0)
919                 GOTO(cleanup, rc);
920
921         write_lock(&map->om_seq_list_lock);
922         list_add(&osd_seq->oos_seq_list, &map->om_seq_list);
923         write_unlock(&map->om_seq_list_lock);
924
925 cleanup:
926         mutex_unlock(&map->om_dir_init_mutex);
927         if (rc != 0) {
928                 if (osd_seq != NULL)
929                         OBD_FREE_PTR(osd_seq);
930                 RETURN(ERR_PTR(rc));
931         }
932
933         RETURN(osd_seq);
934 }
935
936 int osd_obj_map_lookup(struct osd_thread_info *info, struct osd_device *dev,
937                        const struct lu_fid *fid, struct osd_inode_id *id)
938 {
939         struct osd_obj_map              *map;
940         struct osd_obj_seq              *osd_seq;
941         struct dentry                   *d_seq;
942         struct dentry                   *child;
943         struct ost_id                   *ostid = &info->oti_ostid;
944         int                             dirn;
945         char                            name[32];
946         struct ldiskfs_dir_entry_2      *de;
947         struct buffer_head              *bh;
948         struct inode                    *dir;
949         struct inode                    *inode;
950         ENTRY;
951
952         /* on the very first lookup we find and open directories */
953
954         map = dev->od_ost_map;
955         LASSERT(map);
956         LASSERT(map->om_root);
957
958         fid_to_ostid(fid, ostid);
959         osd_seq = osd_seq_load(info, dev, ostid_seq(ostid));
960         if (IS_ERR(osd_seq))
961                 RETURN(PTR_ERR(osd_seq));
962
963         dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
964         d_seq = osd_seq->oos_dirs[dirn];
965         LASSERT(d_seq);
966
967         osd_oid_name(name, sizeof(name), fid, ostid_id(ostid));
968
969         child = &info->oti_child_dentry;
970         child->d_parent = d_seq;
971         child->d_name.hash = 0;
972         child->d_name.name = name;
973         /* XXX: we can use rc from sprintf() instead of strlen() */
974         child->d_name.len = strlen(name);
975
976         dir = d_seq->d_inode;
977         mutex_lock(&dir->i_mutex);
978         bh = osd_ldiskfs_find_entry(dir, &child->d_name, &de, NULL, NULL);
979         mutex_unlock(&dir->i_mutex);
980
981         if (IS_ERR(bh))
982                 RETURN(PTR_ERR(bh));
983
984         osd_id_gen(id, le32_to_cpu(de->inode), OSD_OII_NOGEN);
985         brelse(bh);
986
987         inode = osd_iget(info, dev, id);
988         if (IS_ERR(inode)) {
989                 int rc = PTR_ERR(inode);
990
991                 RETURN(rc == -ENOENT ? -ESTALE : rc);
992         }
993
994         iput(inode);
995         RETURN(0);
996 }
997
998 int osd_obj_map_insert(struct osd_thread_info *info,
999                        struct osd_device *osd,
1000                        const struct lu_fid *fid,
1001                        const struct osd_inode_id *id,
1002                        handle_t *th)
1003 {
1004         struct osd_obj_map      *map;
1005         struct osd_obj_seq      *osd_seq;
1006         struct dentry           *d;
1007         struct ost_id           *ostid = &info->oti_ostid;
1008         u64                      oid;
1009         int                     dirn, rc = 0;
1010         char                    name[32];
1011         ENTRY;
1012
1013         map = osd->od_ost_map;
1014         LASSERT(map);
1015
1016         /* map fid to seq:objid */
1017         fid_to_ostid(fid, ostid);
1018
1019         oid = ostid_id(ostid);
1020         osd_seq = osd_seq_load(info, osd, ostid_seq(ostid));
1021         if (IS_ERR(osd_seq))
1022                 RETURN(PTR_ERR(osd_seq));
1023
1024         dirn = oid & (osd_seq->oos_subdir_count - 1);
1025         d = osd_seq->oos_dirs[dirn];
1026         LASSERT(d);
1027
1028         osd_oid_name(name, sizeof(name), fid, oid);
1029
1030 again:
1031         rc = osd_obj_add_entry(info, osd, d, name, id, th);
1032         if (rc == -EEXIST) {
1033                 rc = osd_obj_update_entry(info, osd, d, name, fid, id, th);
1034                 if (unlikely(rc == -ENOENT))
1035                         goto again;
1036
1037                 if (unlikely(rc == 1))
1038                         RETURN(0);
1039         }
1040
1041         RETURN(rc);
1042 }
1043
1044 int osd_obj_map_delete(struct osd_thread_info *info, struct osd_device *osd,
1045                        const struct lu_fid *fid, handle_t *th)
1046 {
1047         struct osd_obj_map      *map;
1048         struct osd_obj_seq      *osd_seq;
1049         struct dentry           *d;
1050         struct ost_id           *ostid = &info->oti_ostid;
1051         int                     dirn, rc = 0;
1052         char                    name[32];
1053         ENTRY;
1054
1055         map = osd->od_ost_map;
1056         LASSERT(map);
1057
1058         /* map fid to seq:objid */
1059         fid_to_ostid(fid, ostid);
1060
1061         osd_seq = osd_seq_load(info, osd, ostid_seq(ostid));
1062         if (IS_ERR(osd_seq))
1063                 GOTO(cleanup, rc = PTR_ERR(osd_seq));
1064
1065         dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
1066         d = osd_seq->oos_dirs[dirn];
1067         LASSERT(d);
1068
1069         osd_oid_name(name, sizeof(name), fid, ostid_id(ostid));
1070         rc = osd_obj_del_entry(info, osd, d, name, th);
1071 cleanup:
1072         RETURN(rc);
1073 }
1074
1075 int osd_obj_map_update(struct osd_thread_info *info,
1076                        struct osd_device *osd,
1077                        const struct lu_fid *fid,
1078                        const struct osd_inode_id *id,
1079                        handle_t *th)
1080 {
1081         struct osd_obj_seq      *osd_seq;
1082         struct dentry           *d;
1083         struct ost_id           *ostid = &info->oti_ostid;
1084         int                     dirn, rc = 0;
1085         char                    name[32];
1086         ENTRY;
1087
1088         fid_to_ostid(fid, ostid);
1089         osd_seq = osd_seq_load(info, osd, ostid_seq(ostid));
1090         if (IS_ERR(osd_seq))
1091                 RETURN(PTR_ERR(osd_seq));
1092
1093         dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
1094         d = osd_seq->oos_dirs[dirn];
1095         LASSERT(d);
1096
1097         osd_oid_name(name, sizeof(name), fid, ostid_id(ostid));
1098         rc = osd_obj_update_entry(info, osd, d, name, fid, id, th);
1099
1100         RETURN(rc);
1101 }
1102
1103 int osd_obj_map_recover(struct osd_thread_info *info,
1104                         struct osd_device *osd,
1105                         struct inode *src_parent,
1106                         struct dentry *src_child,
1107                         const struct lu_fid *fid)
1108 {
1109         struct osd_obj_seq         *osd_seq;
1110         struct dentry              *tgt_parent;
1111         struct dentry              *tgt_child = &info->oti_child_dentry;
1112         struct inode               *dir;
1113         struct inode               *inode     = src_child->d_inode;
1114         struct ost_id              *ostid     = &info->oti_ostid;
1115         handle_t                   *jh;
1116         struct ldiskfs_dir_entry_2 *de;
1117         struct buffer_head         *bh;
1118         char                        name[32];
1119         int                         dirn;
1120         int                         rc        = 0;
1121         ENTRY;
1122
1123         if (fid_is_last_id(fid)) {
1124                 osd_seq = osd_seq_load(info, osd, fid_seq(fid));
1125                 if (IS_ERR(osd_seq))
1126                         RETURN(PTR_ERR(osd_seq));
1127
1128                 tgt_parent = osd_seq->oos_root;
1129                 tgt_child->d_name.name = "LAST_ID";
1130                 tgt_child->d_name.len = strlen("LAST_ID");
1131         } else {
1132                 fid_to_ostid(fid, ostid);
1133                 osd_seq = osd_seq_load(info, osd, ostid_seq(ostid));
1134                 if (IS_ERR(osd_seq))
1135                         RETURN(PTR_ERR(osd_seq));
1136
1137                 dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
1138                 tgt_parent = osd_seq->oos_dirs[dirn];
1139                 osd_oid_name(name, sizeof(name), fid, ostid_id(ostid));
1140                 tgt_child->d_name.name = name;
1141                 tgt_child->d_name.len = strlen(name);
1142         }
1143         LASSERT(tgt_parent != NULL);
1144
1145         dir = tgt_parent->d_inode;
1146         tgt_child->d_name.hash = 0;
1147         tgt_child->d_parent = tgt_parent;
1148         tgt_child->d_inode = inode;
1149
1150         /* The non-initialized src_child may be destroyed. */
1151         jh = osd_journal_start_sb(osd_sb(osd), LDISKFS_HT_MISC,
1152                                 osd_dto_credits_noquota[DTO_INDEX_DELETE] +
1153                                 osd_dto_credits_noquota[DTO_INDEX_INSERT] +
1154                                 osd_dto_credits_noquota[DTO_OBJECT_DELETE]);
1155         if (IS_ERR(jh))
1156                 RETURN(PTR_ERR(jh));
1157
1158         ll_vfs_dq_init(src_parent);
1159         ll_vfs_dq_init(dir);
1160
1161         mutex_lock(&src_parent->i_mutex);
1162         mutex_lock(&dir->i_mutex);
1163         bh = osd_ldiskfs_find_entry(dir, &tgt_child->d_name, &de, NULL, NULL);
1164         if (!IS_ERR(bh)) {
1165                 /* XXX: If some other object occupied the same slot. And If such
1166                  *      inode is zero-sized and with SUID+SGID, then means it is
1167                  *      a new created one. Maybe we can remove it and insert the
1168                  *      original one back to the /O/<seq>/d<x>. But there are
1169                  *      something to be considered:
1170                  *
1171                  *      1) The OST-object under /lost+found has crashed LMA.
1172                  *         So it should not conflict with the current one.
1173                  *
1174                  *      2) There are race conditions that: someone may just want
1175                  *         to modify the current one. Even if the OI scrub takes
1176                  *         the object lock when remove the current one, it still
1177                  *         cause the modification to be lost becasue the target
1178                  *         has been removed when the RPC service thread waiting
1179                  *         for the lock.
1180                  *
1181                  *      So keep it there before we have suitable solution. */
1182                 brelse(bh);
1183                 mutex_unlock(&dir->i_mutex);
1184                 mutex_unlock(&src_parent->i_mutex);
1185                 ldiskfs_journal_stop(jh);
1186
1187                 rc = -EEXIST;
1188                 /* If the src object has never been modified, then remove it. */
1189                 if (inode->i_size == 0 && inode->i_mode & S_ISUID &&
1190                     inode->i_mode & S_ISGID) {
1191                         rc = ll_vfs_unlink(src_parent, src_child);
1192                         if (unlikely(rc == -ENOENT))
1193                                 rc = 0;
1194                 }
1195                 RETURN(rc);
1196         }
1197
1198         bh = osd_ldiskfs_find_entry(src_parent, &src_child->d_name, &de,
1199                                     NULL, NULL);
1200         if (unlikely(IS_ERR(bh)))
1201                 GOTO(unlock, rc = PTR_ERR(bh));
1202
1203         rc = ldiskfs_delete_entry(jh, src_parent, de, bh);
1204         brelse(bh);
1205         if (rc != 0)
1206                 GOTO(unlock, rc);
1207
1208         rc = osd_ldiskfs_add_entry(info, osd, jh, tgt_child, inode, NULL);
1209
1210         GOTO(unlock, rc);
1211
1212 unlock:
1213         mutex_unlock(&dir->i_mutex);
1214         mutex_unlock(&src_parent->i_mutex);
1215         ldiskfs_journal_stop(jh);
1216         return rc;
1217 }
1218
1219 static struct dentry *
1220 osd_object_spec_find(struct osd_thread_info *info, struct osd_device *osd,
1221                      const struct lu_fid *fid, char **name)
1222 {
1223         struct dentry *root = ERR_PTR(-ENOENT);
1224
1225         if (fid_is_last_id(fid)) {
1226                 struct osd_obj_seq *osd_seq;
1227
1228                 /* on creation of LAST_ID we create O/<seq> hierarchy */
1229                 osd_seq = osd_seq_load(info, osd, fid_seq(fid));
1230                 if (IS_ERR(osd_seq))
1231                         RETURN((struct dentry *)osd_seq);
1232
1233                 *name = "LAST_ID";
1234                 root = osd_seq->oos_root;
1235         } else {
1236                 *name = osd_lf_fid2name(fid);
1237                 if (*name == NULL)
1238                         CWARN("UNKNOWN COMPAT FID "DFID"\n", PFID(fid));
1239                 else if ((*name)[0])
1240                         root = osd_sb(osd)->s_root;
1241         }
1242
1243         return root;
1244 }
1245
1246 int osd_obj_spec_update(struct osd_thread_info *info, struct osd_device *osd,
1247                         const struct lu_fid *fid, const struct osd_inode_id *id,
1248                         handle_t *th)
1249 {
1250         struct dentry   *root;
1251         char            *name = NULL;
1252         int              rc;
1253         ENTRY;
1254
1255         root = osd_object_spec_find(info, osd, fid, &name);
1256         if (!IS_ERR(root)) {
1257                 rc = osd_obj_update_entry(info, osd, root, name, fid, id, th);
1258         } else {
1259                 rc = PTR_ERR(root);
1260                 if (rc == -ENOENT)
1261                         rc = 1;
1262         }
1263
1264         RETURN(rc);
1265 }
1266
1267 int osd_obj_spec_insert(struct osd_thread_info *info, struct osd_device *osd,
1268                         const struct lu_fid *fid, const struct osd_inode_id *id,
1269                         handle_t *th)
1270 {
1271         struct dentry   *root;
1272         char            *name = NULL;
1273         int              rc;
1274         ENTRY;
1275
1276         root = osd_object_spec_find(info, osd, fid, &name);
1277         if (!IS_ERR(root)) {
1278                 rc = osd_obj_add_entry(info, osd, root, name, id, th);
1279         } else {
1280                 rc = PTR_ERR(root);
1281                 if (rc == -ENOENT)
1282                         rc = 0;
1283         }
1284
1285         RETURN(rc);
1286 }
1287
1288 int osd_obj_spec_lookup(struct osd_thread_info *info, struct osd_device *osd,
1289                         const struct lu_fid *fid, struct osd_inode_id *id)
1290 {
1291         struct dentry   *root;
1292         struct dentry   *dentry;
1293         struct inode    *inode;
1294         char            *name = NULL;
1295         int             rc = -ENOENT;
1296         ENTRY;
1297
1298         if (fid_is_last_id(fid)) {
1299                 struct osd_obj_seq *osd_seq;
1300
1301                 osd_seq = osd_seq_load(info, osd, fid_seq(fid));
1302                 if (IS_ERR(osd_seq))
1303                         RETURN(PTR_ERR(osd_seq));
1304                 root = osd_seq->oos_root;
1305                 name = "LAST_ID";
1306         } else {
1307                 root = osd_sb(osd)->s_root;
1308                 name = osd_lf_fid2name(fid);
1309                 if (name == NULL || strlen(name) == 0)
1310                         RETURN(-ENOENT);
1311         }
1312
1313         dentry = ll_lookup_one_len(name, root, strlen(name));
1314         if (!IS_ERR(dentry)) {
1315                 inode = dentry->d_inode;
1316                 if (inode) {
1317                         if (is_bad_inode(inode)) {
1318                                 rc = -EIO;
1319                         } else {
1320                                 osd_id_gen(id, inode->i_ino,
1321                                            inode->i_generation);
1322                                 rc = 0;
1323                         }
1324                 }
1325                 /* if dentry is accessible after osd_compat_spec_insert it
1326                  * will still contain NULL inode, so don't keep it in cache */
1327                 d_invalidate(dentry);
1328                 dput(dentry);
1329         }
1330
1331         RETURN(rc);
1332 }