Whamcloud - gitweb
LU-6142 tests: Fix style issues for chownmany.c
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_compat.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * lustre/osd/osd_compat.c
33  *
34  * on-disk structure for managing /O
35  *
36  * Author: Alex Zhuravlev <bzzz@whamcloud.com>
37  */
38
39 /* prerequisite for linux/xattr.h */
40 #include <linux/types.h>
41 /* prerequisite for linux/xattr.h */
42 #include <linux/fs.h>
43 /* XATTR_{REPLACE,CREATE} */
44 #include <linux/xattr.h>
45
46 /*
47  * struct OBD_{ALLOC,FREE}*()
48  * OBD_FAIL_CHECK
49  */
50 #include <obd_support.h>
51
52 #include "osd_internal.h"
53 #include "osd_oi.h"
54
55 static void osd_push_ctxt(const struct osd_device *dev,
56                           struct lvfs_run_ctxt *newctxt,
57                           struct lvfs_run_ctxt *save)
58 {
59         OBD_SET_CTXT_MAGIC(newctxt);
60         newctxt->pwdmnt = dev->od_mnt;
61         newctxt->pwd = dev->od_mnt->mnt_root;
62         newctxt->fs = KERNEL_DS;
63         newctxt->umask = current_umask();
64         newctxt->dt = NULL;
65
66         push_ctxt(save, newctxt);
67 }
68
69 struct dentry *osd_lookup_one_len_common(struct osd_device *dev,
70                                          const char *name,
71                                          struct dentry *base, int len,
72                                          enum oi_check_flags flags)
73 {
74         struct dentry *dchild;
75
76         /*
77          * We can't use inode_is_locked() directly since we can't know
78          * if the current thread context took the lock earlier or if
79          * another thread context took the lock. OI_LOCKED tells us
80          * if the current thread context has already taken the lock.
81          */
82         if (!(flags & OI_LOCKED)) {
83                 /* If another thread took this lock already we will
84                  * just have to wait until the other thread is done.
85                  */
86                 inode_lock(base->d_inode);
87                 dchild = lookup_one_len(name, base, len);
88                 inode_unlock(base->d_inode);
89         } else {
90                 /* This thread context already has taken the lock.
91                  * Other threads will have to wait until we are done.
92                  */
93                 dchild = lookup_one_len(name, base, len);
94         }
95         if (IS_ERR(dchild))
96                 return dchild;
97
98         if (dchild->d_inode && unlikely(is_bad_inode(dchild->d_inode))) {
99                 CERROR("%s: bad inode returned %lu/%u: rc = -ENOENT\n",
100                        osd_name(dev), dchild->d_inode->i_ino,
101                        dchild->d_inode->i_generation);
102                 dput(dchild);
103                 dchild = ERR_PTR(-ENOENT);
104         }
105
106         return dchild;
107 }
108
109 /**
110  * osd_lookup_one_len_unlocked
111  *
112  * @dev:        obd device we are searching
113  * @name:       pathname component to lookup
114  * @base:       base directory to lookup from
115  * @len:        maximum length @len should be interpreted to
116  *
117  * Unlike osd_lookup_one_len, this should be called without the parent
118  * i_mutex held, and will take the i_mutex itself.
119  */
120 struct dentry *osd_lookup_one_len_unlocked(struct osd_device *dev,
121                                            const char *name,
122                                            struct dentry *base, int len)
123 {
124         return osd_lookup_one_len_common(dev, name, base, len, ~OI_LOCKED);
125 }
126
127 /**
128  * osd_lookup_one_len - lookup single pathname component
129  *
130  * @dev:        obd device we are searching
131  * @name:       pathname component to lookup
132  * @base:       base directory to lookup from
133  * @len:        maximum length @len should be interpreted to
134  *
135  * The caller must hold inode lock
136  */
137 struct dentry *osd_lookup_one_len(struct osd_device *dev, const char *name,
138                                   struct dentry *base, int len)
139 {
140         return osd_lookup_one_len_common(dev, name, base, len, OI_LOCKED);
141 }
142
143 /* utility to make a directory */
144 static struct dentry *
145 simple_mkdir(const struct lu_env *env, struct osd_device *osd,
146              struct dentry *dir, const struct lu_fid *fid,
147              const char *name, __u32 compat, int mode, bool *created)
148 {
149         struct osd_thread_info *info = osd_oti_get(env);
150         struct lu_fid *tfid = &info->oti_fid3;
151         struct inode *inode;
152         struct dentry *dchild;
153         int err = 0;
154
155         ENTRY;
156
157         // ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
158         CDEBUG(D_INODE, "creating directory %.*s\n", (int)strlen(name), name);
159         dchild = osd_lookup_one_len_unlocked(osd, name, dir, strlen(name));
160         if (IS_ERR(dchild))
161                 RETURN(dchild);
162
163         inode = dchild->d_inode;
164         if (inode) {
165                 struct lustre_mdt_attrs *lma = &info->oti_ost_attrs.loa_lma;
166                 int old_mode = inode->i_mode;
167
168                 if (created)
169                         *created = false;
170
171                 if (!S_ISDIR(old_mode)) {
172                         CERROR("found %s (%lu/%u) is mode %o\n", name,
173                                inode->i_ino, inode->i_generation, old_mode);
174                         GOTO(out_err, err = -ENOTDIR);
175                 }
176
177                 if (unlikely(osd->od_dt_dev.dd_rdonly))
178                         RETURN(dchild);
179
180                 /* Fixup directory permissions if necessary */
181                 if ((old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
182                         CDEBUG(D_CONFIG,
183                                "fixing permissions on %s from %o to %o\n",
184                                name, old_mode, mode);
185                         inode->i_mode = (mode & S_IALLUGO) |
186                                         (old_mode & ~S_IALLUGO);
187                         mark_inode_dirty(inode);
188                 }
189
190                 err = osd_get_lma(info, inode, &info->oti_obj_dentry,
191                                   &info->oti_ost_attrs);
192                 if (err == -ENODATA)
193                         goto set_fid;
194
195                 if (err)
196                         GOTO(out_err, err);
197
198                 if ((fid && !lu_fid_eq(fid, &lma->lma_self_fid)) ||
199                     lma->lma_compat != compat)
200                         goto set_fid;
201
202                 RETURN(dchild);
203         }
204
205         err = vfs_mkdir(dir->d_inode, dchild, mode);
206         if (err)
207                 GOTO(out_err, err);
208
209         inode = dchild->d_inode;
210         if (created)
211                 *created = true;
212
213 set_fid:
214         if (fid)
215                 *tfid = *fid;
216         else
217                 lu_igif_build(tfid, inode->i_ino, inode->i_generation);
218         err = osd_ea_fid_set(info, inode, tfid, compat, 0);
219         if (err)
220                 GOTO(out_err, err);
221
222         RETURN(dchild);
223
224 out_err:
225         dput(dchild);
226         return ERR_PTR(err);
227 }
228
229 static int osd_last_rcvd_subdir_count(struct osd_device *osd)
230 {
231         struct lr_server_data lsd;
232         struct dentry *dlast;
233         loff_t off;
234         int rc = 0;
235         int count = OBJ_SUBDIR_COUNT;
236
237         ENTRY;
238
239         dlast = osd_lookup_one_len_unlocked(osd, LAST_RCVD, osd_sb(osd)->s_root,
240                                             strlen(LAST_RCVD));
241         if (IS_ERR(dlast))
242                 return PTR_ERR(dlast);
243         else if (dlast->d_inode == NULL)
244                 goto out;
245
246         off = 0;
247         rc = osd_ldiskfs_read(dlast->d_inode, &lsd, sizeof(lsd), &off);
248         if (rc == sizeof(lsd)) {
249                 CDEBUG(D_INFO,
250                       "read last_rcvd header, uuid = %s, subdir count = %d\n",
251                       lsd.lsd_uuid, lsd.lsd_subdir_count);
252                 if (le16_to_cpu(lsd.lsd_subdir_count) > 0)
253                         count = le16_to_cpu(lsd.lsd_subdir_count);
254         } else if (rc != 0) {
255                 CERROR("Can't read last_rcvd file, rc = %d\n", rc);
256                 if (rc > 0)
257                         rc = -EFAULT;
258                 dput(dlast);
259                 return rc;
260         }
261 out:
262         dput(dlast);
263         LASSERT(count > 0);
264         return count;
265 }
266
267 static int osd_mdt_init(const struct lu_env *env, struct osd_device *dev)
268 {
269         struct lvfs_run_ctxt new;
270         struct lvfs_run_ctxt save;
271         struct dentry *parent;
272         struct osd_mdobj_map *omm;
273         struct dentry *d;
274         struct osd_thread_info *info = osd_oti_get(env);
275         struct lu_fid *fid = &info->oti_fid3;
276         int rc = 0;
277
278         ENTRY;
279
280         OBD_ALLOC_PTR(dev->od_mdt_map);
281         if (dev->od_mdt_map == NULL)
282                 RETURN(-ENOMEM);
283
284         omm = dev->od_mdt_map;
285
286         parent = osd_sb(dev)->s_root;
287         osd_push_ctxt(dev, &new, &save);
288
289         lu_local_obj_fid(fid, REMOTE_PARENT_DIR_OID);
290         d = simple_mkdir(env, dev, parent, fid, REMOTE_PARENT_DIR,
291                          LMAC_NOT_IN_OI, 0755, NULL);
292         if (IS_ERR(d))
293                 GOTO(cleanup, rc = PTR_ERR(d));
294
295         omm->omm_remote_parent = d;
296
297         GOTO(cleanup, rc = 0);
298
299 cleanup:
300         pop_ctxt(&save, &new);
301         if (rc) {
302                 if (omm->omm_remote_parent != NULL)
303                         dput(omm->omm_remote_parent);
304                 OBD_FREE_PTR(omm);
305                 dev->od_mdt_map = NULL;
306         }
307         return rc;
308 }
309
310 static void osd_mdt_fini(struct osd_device *osd)
311 {
312         struct osd_mdobj_map *omm = osd->od_mdt_map;
313
314         if (omm == NULL)
315                 return;
316
317         if (omm->omm_remote_parent)
318                 dput(omm->omm_remote_parent);
319
320         OBD_FREE_PTR(omm);
321         osd->od_ost_map = NULL;
322 }
323
324 int osd_add_to_remote_parent(const struct lu_env *env, struct osd_device *osd,
325                              struct osd_object *obj, struct osd_thandle *oh)
326 {
327         struct osd_mdobj_map *omm = osd->od_mdt_map;
328         struct osd_thread_info *oti = osd_oti_get(env);
329         struct lustre_mdt_attrs *lma = &oti->oti_ost_attrs.loa_lma;
330         char *name = oti->oti_name;
331         struct osd_thread_info *info = osd_oti_get(env);
332         struct dentry *dentry;
333         struct dentry *parent;
334         int rc;
335
336         if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_AGENTENT))
337                 RETURN(0);
338
339         /*
340          * Set REMOTE_PARENT in lma, so other process like unlink or lfsck
341          * can identify this object quickly
342          */
343         rc = osd_get_lma(oti, obj->oo_inode, &oti->oti_obj_dentry,
344                          &oti->oti_ost_attrs);
345         if (rc)
346                 RETURN(rc);
347
348         lma->lma_incompat |= LMAI_REMOTE_PARENT;
349         lustre_lma_swab(lma);
350         rc = __osd_xattr_set(oti, obj->oo_inode, XATTR_NAME_LMA, lma,
351                              sizeof(*lma), XATTR_REPLACE);
352         if (rc)
353                 RETURN(rc);
354
355         parent = omm->omm_remote_parent;
356         sprintf(name, DFID_NOBRACE, PFID(lu_object_fid(&obj->oo_dt.do_lu)));
357         dentry = osd_child_dentry_by_inode(env, parent->d_inode,
358                                            name, strlen(name));
359         inode_lock(parent->d_inode);
360         rc = osd_ldiskfs_add_entry(info, osd, oh->ot_handle, dentry,
361                                    obj->oo_inode, NULL);
362         if (!rc && S_ISDIR(obj->oo_inode->i_mode))
363                 ldiskfs_inc_count(oh->ot_handle, parent->d_inode);
364         else if (unlikely(rc == -EEXIST))
365                 rc = 0;
366         if (!rc)
367                 lu_object_set_agent_entry(&obj->oo_dt.do_lu);
368         CDEBUG(D_INODE, "%s: create agent entry for %s: rc = %d\n",
369                osd_name(osd), name, rc);
370         mark_inode_dirty(parent->d_inode);
371         inode_unlock(parent->d_inode);
372         RETURN(rc);
373 }
374
375 int osd_delete_from_remote_parent(const struct lu_env *env,
376                                   struct osd_device *osd,
377                                   struct osd_object *obj,
378                                   struct osd_thandle *oh, bool destroy)
379 {
380         struct osd_mdobj_map *omm = osd->od_mdt_map;
381         struct osd_thread_info *oti = osd_oti_get(env);
382         struct lustre_mdt_attrs *lma = &oti->oti_ost_attrs.loa_lma;
383         char *name = oti->oti_name;
384         struct dentry *dentry;
385         struct dentry *parent;
386         struct ldiskfs_dir_entry_2 *de;
387         struct buffer_head *bh;
388         int rc;
389
390         parent = omm->omm_remote_parent;
391         sprintf(name, DFID_NOBRACE, PFID(lu_object_fid(&obj->oo_dt.do_lu)));
392         dentry = osd_child_dentry_by_inode(env, parent->d_inode,
393                                            name, strlen(name));
394         inode_lock(parent->d_inode);
395         bh = osd_ldiskfs_find_entry(parent->d_inode, &dentry->d_name, &de,
396                                     NULL, NULL);
397         if (IS_ERR(bh)) {
398                 inode_unlock(parent->d_inode);
399                 rc = PTR_ERR(bh);
400                 if (unlikely(rc == -ENOENT))
401                         rc = 0;
402         } else {
403                 rc = ldiskfs_delete_entry(oh->ot_handle, parent->d_inode,
404                                           de, bh);
405                 if (!rc && S_ISDIR(obj->oo_inode->i_mode))
406                         ldiskfs_dec_count(oh->ot_handle, parent->d_inode);
407                 mark_inode_dirty(parent->d_inode);
408                 inode_unlock(parent->d_inode);
409                 brelse(bh);
410                 CDEBUG(D_INODE, "%s: remove agent entry for %s: rc = %d\n",
411                        osd_name(osd), name, rc);
412         }
413
414         if (destroy || rc) {
415                 if (!rc)
416                         lu_object_clear_agent_entry(&obj->oo_dt.do_lu);
417
418                 RETURN(rc);
419         }
420
421         rc = osd_get_lma(oti, obj->oo_inode, &oti->oti_obj_dentry,
422                          &oti->oti_ost_attrs);
423         if (rc)
424                 RETURN(rc);
425
426         /* Get rid of REMOTE_PARENT flag from incompat */
427         lma->lma_incompat &= ~LMAI_REMOTE_PARENT;
428         lustre_lma_swab(lma);
429         rc = __osd_xattr_set(oti, obj->oo_inode, XATTR_NAME_LMA, lma,
430                              sizeof(*lma), XATTR_REPLACE);
431         if (!rc)
432                 lu_object_clear_agent_entry(&obj->oo_dt.do_lu);
433         RETURN(rc);
434 }
435
436 int osd_lookup_in_remote_parent(struct osd_thread_info *oti,
437                                 struct osd_device *osd,
438                                 const struct lu_fid *fid,
439                                 struct osd_inode_id *id)
440 {
441         struct osd_mdobj_map *omm = osd->od_mdt_map;
442         char *name = oti->oti_name;
443         struct dentry *parent;
444         struct dentry *dentry;
445         struct ldiskfs_dir_entry_2 *de;
446         struct buffer_head *bh;
447         int rc;
448
449         ENTRY;
450
451         if (unlikely(osd->od_is_ost))
452                 RETURN(-ENOENT);
453
454         parent = omm->omm_remote_parent;
455         sprintf(name, DFID_NOBRACE, PFID(fid));
456         dentry = osd_child_dentry_by_inode(oti->oti_env, parent->d_inode,
457                                            name, strlen(name));
458         inode_lock(parent->d_inode);
459         bh = osd_ldiskfs_find_entry(parent->d_inode, &dentry->d_name, &de,
460                                     NULL, NULL);
461         if (IS_ERR(bh)) {
462                 rc = PTR_ERR(bh);
463         } else {
464                 struct inode *inode;
465
466                 osd_id_gen(id, le32_to_cpu(de->inode), OSD_OII_NOGEN);
467                 brelse(bh);
468                 inode = osd_iget(oti, osd, id);
469                 if (IS_ERR(inode)) {
470                         rc = PTR_ERR(inode);
471                         if (rc == -ESTALE)
472                                 rc = -ENOENT;
473                 } else {
474                         iput(inode);
475                         rc = 0;
476                 }
477         }
478         inode_unlock(parent->d_inode);
479         if (rc == 0)
480                 osd_add_oi_cache(oti, osd, id, fid);
481         RETURN(rc);
482 }
483
484 /*
485  * directory structure on legacy OST:
486  *
487  * O/<seq>/d0-31/<objid>
488  * O/<seq>/LAST_ID
489  * last_rcvd
490  * LAST_GROUP
491  * CONFIGS
492  *
493  */
494 static int osd_ost_init(const struct lu_env *env, struct osd_device *dev)
495 {
496         struct lvfs_run_ctxt new;
497         struct lvfs_run_ctxt save;
498         struct dentry *d;
499         int rc;
500         bool created = false;
501
502         ENTRY;
503
504         OBD_ALLOC_PTR(dev->od_ost_map);
505         if (dev->od_ost_map == NULL)
506                 RETURN(-ENOMEM);
507
508         /* to get subdir count from last_rcvd */
509         rc = osd_last_rcvd_subdir_count(dev);
510         if (rc < 0)
511                 GOTO(cleanup_alloc, rc);
512
513         dev->od_ost_map->om_subdir_count = rc;
514         INIT_LIST_HEAD(&dev->od_ost_map->om_seq_list);
515         rwlock_init(&dev->od_ost_map->om_seq_list_lock);
516         mutex_init(&dev->od_ost_map->om_dir_init_mutex);
517
518         osd_push_ctxt(dev, &new, &save);
519         d = simple_mkdir(env, dev, osd_sb(dev)->s_root, NULL, "O",
520                          LMAC_NOT_IN_OI | LMAC_FID_ON_OST, 0755, &created);
521         if (IS_ERR(d))
522                 GOTO(cleanup_ctxt, rc = PTR_ERR(d));
523
524         if (created)
525                 /* It is quite probably that the device is new formatted. */
526                 dev->od_maybe_new = 1;
527
528         dev->od_ost_map->om_root = d;
529
530         pop_ctxt(&save, &new);
531         RETURN(0);
532
533 cleanup_ctxt:
534         pop_ctxt(&save, &new);
535 cleanup_alloc:
536         OBD_FREE_PTR(dev->od_ost_map);
537         return rc;
538 }
539
540 static void osd_seq_free(struct osd_obj_seq *osd_seq)
541 {
542         int j;
543
544         if (osd_seq->oos_dirs) {
545                 for (j = 0; j < osd_seq->oos_subdir_count; j++) {
546                         if (osd_seq->oos_dirs[j])
547                                 dput(osd_seq->oos_dirs[j]);
548                 }
549                 OBD_FREE(osd_seq->oos_dirs,
550                          sizeof(struct dentry *) * osd_seq->oos_subdir_count);
551         }
552
553         if (osd_seq->oos_root)
554                 dput(osd_seq->oos_root);
555
556         OBD_FREE_PTR(osd_seq);
557 }
558
559 static void osd_ost_fini(struct osd_device *osd)
560 {
561         struct osd_obj_seq *osd_seq;
562         struct osd_obj_seq *tmp;
563         struct osd_obj_map *map = osd->od_ost_map;
564
565         ENTRY;
566
567         if (map == NULL)
568                 return;
569
570         write_lock(&map->om_seq_list_lock);
571         list_for_each_entry_safe(osd_seq, tmp, &map->om_seq_list,
572                                  oos_seq_list) {
573                 list_del_init(&osd_seq->oos_seq_list);
574                 write_unlock(&map->om_seq_list_lock);
575                 osd_seq_free(osd_seq);
576                 write_lock(&map->om_seq_list_lock);
577         }
578         write_unlock(&map->om_seq_list_lock);
579         if (map->om_root)
580                 dput(map->om_root);
581         OBD_FREE_PTR(map);
582         osd->od_ost_map = NULL;
583         EXIT;
584 }
585
586 static int osd_index_backup_dir_init(const struct lu_env *env,
587                                      struct osd_device *dev)
588 {
589         struct lu_fid *fid = &osd_oti_get(env)->oti_fid;
590         struct lvfs_run_ctxt new;
591         struct lvfs_run_ctxt save;
592         struct dentry *dentry;
593         int rc = 0;
594
595         ENTRY;
596
597         lu_local_obj_fid(fid, INDEX_BACKUP_OID);
598         osd_push_ctxt(dev, &new, &save);
599         dentry = simple_mkdir(env, dev, osd_sb(dev)->s_root, fid,
600                               INDEX_BACKUP_DIR, LMAC_NOT_IN_OI, 0755, NULL);
601         if (IS_ERR(dentry)) {
602                 rc = PTR_ERR(dentry);
603         } else {
604                 dev->od_index_backup_inode = igrab(dentry->d_inode);
605                 dput(dentry);
606         }
607         pop_ctxt(&save, &new);
608
609         RETURN(rc);
610 }
611
612 static void osd_index_backup_dir_fini(struct osd_device *dev)
613 {
614         if (dev->od_index_backup_inode) {
615                 iput(dev->od_index_backup_inode);
616                 dev->od_index_backup_inode = NULL;
617         }
618 }
619
620 int osd_obj_map_init(const struct lu_env *env, struct osd_device *dev)
621 {
622         int rc;
623         bool mdt_init = false;
624
625         ENTRY;
626
627         rc = osd_ost_init(env, dev);
628         if (rc)
629                 RETURN(rc);
630
631         if (!dev->od_is_ost) {
632                 rc = osd_mdt_init(env, dev);
633                 if (rc) {
634                         osd_ost_fini(dev);
635                         RETURN(rc);
636                 }
637
638                 mdt_init = true;
639         }
640
641         rc = osd_index_backup_dir_init(env, dev);
642         if (rc) {
643                 osd_ost_fini(dev);
644                 if (mdt_init)
645                         osd_mdt_fini(dev);
646         }
647
648         RETURN(rc);
649 }
650
651 static struct osd_obj_seq *osd_seq_find_locked(struct osd_obj_map *map, u64 seq)
652 {
653         struct osd_obj_seq *osd_seq;
654
655         list_for_each_entry(osd_seq, &map->om_seq_list, oos_seq_list) {
656                 if (osd_seq->oos_seq == seq)
657                         return osd_seq;
658         }
659         return NULL;
660 }
661
662 static struct osd_obj_seq *osd_seq_find(struct osd_obj_map *map, u64 seq)
663 {
664         struct osd_obj_seq *osd_seq;
665
666         read_lock(&map->om_seq_list_lock);
667         osd_seq = osd_seq_find_locked(map, seq);
668         read_unlock(&map->om_seq_list_lock);
669         return osd_seq;
670 }
671
672 void osd_obj_map_fini(struct osd_device *dev)
673 {
674         osd_index_backup_dir_fini(dev);
675         osd_ost_fini(dev);
676         osd_mdt_fini(dev);
677 }
678
679 /**
680  * Update the specified OI mapping.
681  *
682  * \retval   1, changed nothing
683  * \retval   0, changed successfully
684  * \retval -ve, on error
685  */
686 static int osd_obj_update_entry(struct osd_thread_info *info,
687                                 struct osd_device *osd,
688                                 struct dentry *dir, const char *name,
689                                 const struct lu_fid *fid,
690                                 const struct osd_inode_id *id,
691                                 handle_t *th)
692 {
693         struct inode *parent = dir->d_inode;
694         struct dentry *child;
695         struct ldiskfs_dir_entry_2 *de;
696         struct buffer_head *bh;
697         struct inode *inode;
698         struct dentry *dentry = &info->oti_obj_dentry;
699         struct osd_inode_id *oi_id = &info->oti_id3;
700         struct lustre_mdt_attrs *lma = &info->oti_ost_attrs.loa_lma;
701         struct lu_fid *oi_fid = &lma->lma_self_fid;
702         int rc;
703
704         ENTRY;
705
706         LASSERT(th != NULL);
707         LASSERT(th->h_transaction != NULL);
708
709         child = &info->oti_child_dentry;
710         child->d_parent = dir;
711         child->d_name.hash = 0;
712         child->d_name.name = name;
713         child->d_name.len = strlen(name);
714
715         dquot_initialize(parent);
716         inode_lock(parent);
717         bh = osd_ldiskfs_find_entry(parent, &child->d_name, &de, NULL, NULL);
718         if (IS_ERR(bh))
719                 GOTO(out, rc = PTR_ERR(bh));
720
721         if (le32_to_cpu(de->inode) == id->oii_ino)
722                 GOTO(out, rc = 1);
723
724         osd_id_gen(oi_id, le32_to_cpu(de->inode), OSD_OII_NOGEN);
725         inode = osd_iget(info, osd, oi_id);
726         if (IS_ERR(inode)) {
727                 rc = PTR_ERR(inode);
728                 if (rc == -ENOENT || rc == -ESTALE)
729                         goto update;
730                 GOTO(out, rc);
731         }
732
733         /*
734          * The EA inode should NOT be in OI, old OI scrub may added
735          * such OI mapping by wrong, replace it.
736          */
737         if (unlikely(osd_is_ea_inode(inode))) {
738                 iput(inode);
739                 goto update;
740         }
741
742         rc = osd_get_lma(info, inode, dentry, &info->oti_ost_attrs);
743         if (rc == -ENODATA) {
744                 rc = osd_get_idif(info, inode, dentry, oi_fid);
745                 if (rc > 0 || rc == -ENODATA) {
746                         oi_fid = NULL;
747                         rc = 0;
748                 }
749         }
750         iput(inode);
751
752         if (rc != 0)
753                 GOTO(out, rc);
754
755         /*
756          * If the OST-object has neither FID-in-LMA nor FID-in-ff, it is
757          * either a crashed object or a uninitialized one. Replace it.
758          */
759         if (oi_fid != NULL && lu_fid_eq(fid, oi_fid)) {
760                 CERROR("%s: the FID "DFID" is used by two objects: "
761                        "%u/%u %u/%u\n", osd_name(osd), PFID(fid),
762                        oi_id->oii_ino, oi_id->oii_gen,
763                        id->oii_ino, id->oii_gen);
764                 GOTO(out, rc = -EEXIST);
765         }
766
767         if (fid_is_idif(fid) && oi_fid != NULL && fid_is_idif(oi_fid)) {
768                 __u32 idx1 = fid_idif_ost_idx(fid);
769                 __u32 idx2 = fid_idif_ost_idx(oi_fid);
770                 struct ost_id *ostid = &info->oti_ostid;
771                 struct lu_fid *tfid = &info->oti_fid3;
772
773                 LASSERTF(idx1 == 0 || idx1 == osd->od_index,
774                          "invalid given FID "DFID", not match the "
775                          "device index %u\n", PFID(fid), osd->od_index);
776
777                 if (idx1 != idx2) {
778                         if (idx1 == 0 && idx2 == osd->od_index) {
779                                 fid_to_ostid(fid, ostid);
780                                 ostid_to_fid(tfid, ostid, idx2);
781                                 if (lu_fid_eq(tfid, oi_fid)) {
782                                         CERROR("%s: the FID "DFID" is used by "
783                                                "two objects(2): %u/%u %u/%u\n",
784                                                osd_name(osd), PFID(fid),
785                                                oi_id->oii_ino, oi_id->oii_gen,
786                                                id->oii_ino, id->oii_gen);
787
788                                         GOTO(out, rc = -EEXIST);
789                                 }
790                         } else if (idx2 == 0 && idx1 == osd->od_index) {
791                                 fid_to_ostid(oi_fid, ostid);
792                                 ostid_to_fid(tfid, ostid, idx1);
793                                 if (lu_fid_eq(tfid, fid)) {
794                                         CERROR("%s: the FID "DFID" is used by "
795                                                "two objects(2): %u/%u %u/%u\n",
796                                                osd_name(osd), PFID(fid),
797                                                oi_id->oii_ino, oi_id->oii_gen,
798                                                id->oii_ino, id->oii_gen);
799
800                                         GOTO(out, rc = -EEXIST);
801                                 }
802                         }
803                 }
804         }
805
806 update:
807         /*
808          * There may be temporary inconsistency: On one hand, the new
809          * object may be referenced by multiple entries, which is out
810          * of our control unless we traverse the whole /O completely,
811          * which is non-flat order and inefficient, should be avoided;
812          * On the other hand, the old object may become orphan if it
813          * is still valid. Since it was referenced by an invalid entry,
814          * making it as invisible temporary may be not worse. OI scrub
815          * will process it later.
816          */
817         rc = ldiskfs_journal_get_write_access(th, bh);
818         if (rc != 0)
819                 GOTO(out, rc);
820
821         de->inode = cpu_to_le32(id->oii_ino);
822         rc = ldiskfs_handle_dirty_metadata(th, NULL, bh);
823
824         GOTO(out, rc);
825
826 out:
827         if (!IS_ERR(bh))
828                 brelse(bh);
829         inode_unlock(parent);
830         return rc;
831 }
832
833 static int osd_obj_del_entry(struct osd_thread_info *info,
834                              struct osd_device *osd,
835                              struct dentry *dird, char *name,
836                              handle_t *th)
837 {
838         struct ldiskfs_dir_entry_2 *de;
839         struct buffer_head *bh;
840         struct dentry *child;
841         struct inode *dir = dird->d_inode;
842         int rc;
843
844         ENTRY;
845
846         LASSERT(th != NULL);
847         LASSERT(th->h_transaction != NULL);
848
849         child = &info->oti_child_dentry;
850         child->d_name.hash = 0;
851         child->d_name.name = name;
852         child->d_name.len = strlen(name);
853         child->d_parent = dird;
854         child->d_inode = NULL;
855
856         dquot_initialize(dir);
857         inode_lock(dir);
858         bh = osd_ldiskfs_find_entry(dir, &child->d_name, &de, NULL, NULL);
859         if (IS_ERR(bh)) {
860                 rc = PTR_ERR(bh);
861         } else {
862                 rc = ldiskfs_delete_entry(th, dir, de, bh);
863                 brelse(bh);
864         }
865         inode_unlock(dir);
866
867         RETURN(rc);
868 }
869
870 static int osd_obj_add_entry(struct osd_thread_info *info,
871                              struct osd_device *osd,
872                              struct dentry *dir, char *name,
873                              const struct osd_inode_id *id,
874                              handle_t *th)
875 {
876         struct dentry *child;
877         struct inode *inode;
878         int rc;
879
880         ENTRY;
881
882         if (OBD_FAIL_CHECK(OBD_FAIL_OSD_COMPAT_NO_ENTRY))
883                 RETURN(0);
884
885         LASSERT(th != NULL);
886         LASSERT(th->h_transaction != NULL);
887
888         inode = info->oti_inode;
889         if (unlikely(inode == NULL)) {
890                 struct ldiskfs_inode_info *lii;
891
892                 OBD_ALLOC_PTR(lii);
893                 if (lii == NULL)
894                         RETURN(-ENOMEM);
895                 inode = info->oti_inode = &lii->vfs_inode;
896         }
897
898         inode->i_sb = osd_sb(osd);
899         osd_id_to_inode(inode, id);
900         inode->i_mode = S_IFREG; /* for type in ldiskfs dir entry */
901
902         child = &info->oti_child_dentry;
903         child->d_name.hash = 0;
904         child->d_name.name = name;
905         child->d_name.len = strlen(name);
906         child->d_parent = dir;
907         child->d_inode = inode;
908
909         if (OBD_FAIL_CHECK(OBD_FAIL_OSD_COMPAT_INVALID_ENTRY))
910                 inode->i_ino++;
911
912         dquot_initialize(dir->d_inode);
913         inode_lock(dir->d_inode);
914         rc = osd_ldiskfs_add_entry(info, osd, th, child, inode, NULL);
915         inode_unlock(dir->d_inode);
916
917         RETURN(rc);
918 }
919
920 /**
921  * Use %llu for legacy OST sequences, but use %llx for new
922  * sequences names, so that the O/{seq}/dN/{oid} more closely
923  * follows the DFID/PFID format. This makes it easier to map from
924  * debug messages to objects in the future, and the legacy space
925  * of FID_SEQ_OST_MDT0 will be unused in the future.
926  **/
927 static inline void osd_seq_name(char *seq_name, size_t name_size, u64 seq)
928 {
929         snprintf(seq_name, name_size,
930                  (fid_seq_is_rsvd(seq) ||
931                   fid_seq_is_mdt0(seq)) ? "%llu" : "%llx",
932                  fid_seq_is_idif(seq) ? 0 : seq);
933 }
934
935 static inline void osd_oid_name(char *name, size_t name_size,
936                                 const struct lu_fid *fid, u64 id)
937 {
938         snprintf(name, name_size,
939                  (fid_seq_is_rsvd(fid_seq(fid)) ||
940                   fid_seq_is_mdt0(fid_seq(fid)) ||
941                   fid_seq_is_idif(fid_seq(fid))) ? "%llu" : "%llx", id);
942 }
943
944 /* external locking is required */
945 static int osd_seq_load_locked(struct osd_thread_info *info,
946                                struct osd_device *osd,
947                                struct osd_obj_seq *osd_seq)
948 {
949         struct osd_obj_map *map = osd->od_ost_map;
950         struct dentry *seq_dir;
951         int rc = 0;
952         int i;
953         char dir_name[32];
954
955         ENTRY;
956
957         if (osd_seq->oos_root != NULL)
958                 RETURN(0);
959
960         LASSERT(map);
961         LASSERT(map->om_root);
962
963         osd_seq_name(dir_name, sizeof(dir_name), osd_seq->oos_seq);
964
965         seq_dir = simple_mkdir(info->oti_env, osd, map->om_root, NULL, dir_name,
966                                LMAC_NOT_IN_OI | LMAC_FID_ON_OST, 0755, NULL);
967         if (IS_ERR(seq_dir))
968                 GOTO(out_err, rc = PTR_ERR(seq_dir));
969         else if (seq_dir->d_inode == NULL)
970                 GOTO(out_put, rc = -EFAULT);
971
972         osd_seq->oos_root = seq_dir;
973
974         LASSERT(osd_seq->oos_dirs == NULL);
975         OBD_ALLOC(osd_seq->oos_dirs,
976                   sizeof(seq_dir) * osd_seq->oos_subdir_count);
977         if (osd_seq->oos_dirs == NULL)
978                 GOTO(out_put, rc = -ENOMEM);
979
980         for (i = 0; i < osd_seq->oos_subdir_count; i++) {
981                 struct dentry   *dir;
982
983                 snprintf(dir_name, sizeof(dir_name), "d%u", i);
984                 dir = simple_mkdir(info->oti_env, osd, osd_seq->oos_root, NULL,
985                                    dir_name, LMAC_NOT_IN_OI | LMAC_FID_ON_OST,
986                                    0700, NULL);
987                 if (IS_ERR(dir)) {
988                         GOTO(out_free, rc = PTR_ERR(dir));
989                 } else if (dir->d_inode == NULL) {
990                         dput(dir);
991                         GOTO(out_free, rc = -EFAULT);
992                 }
993
994                 osd_seq->oos_dirs[i] = dir;
995         }
996
997         if (rc != 0) {
998 out_free:
999                 for (i = 0; i < osd_seq->oos_subdir_count; i++) {
1000                         if (osd_seq->oos_dirs[i] != NULL)
1001                                 dput(osd_seq->oos_dirs[i]);
1002                 }
1003                 OBD_FREE(osd_seq->oos_dirs,
1004                          sizeof(seq_dir) * osd_seq->oos_subdir_count);
1005 out_put:
1006                 dput(seq_dir);
1007                 osd_seq->oos_root = NULL;
1008         }
1009 out_err:
1010         RETURN(rc);
1011 }
1012
1013 static struct osd_obj_seq *osd_seq_load(struct osd_thread_info *info,
1014                                         struct osd_device *osd, u64 seq)
1015 {
1016         struct osd_obj_map *map;
1017         struct osd_obj_seq *osd_seq;
1018         int rc = 0;
1019
1020         ENTRY;
1021
1022         map = osd->od_ost_map;
1023         LASSERT(map);
1024         LASSERT(map->om_root);
1025
1026         osd_seq = osd_seq_find(map, seq);
1027         if (likely(osd_seq != NULL))
1028                 RETURN(osd_seq);
1029
1030         /* Serializing init process */
1031         mutex_lock(&map->om_dir_init_mutex);
1032
1033         /* Check whether the seq has been added */
1034         read_lock(&map->om_seq_list_lock);
1035         osd_seq = osd_seq_find_locked(map, seq);
1036         if (osd_seq != NULL) {
1037                 read_unlock(&map->om_seq_list_lock);
1038                 GOTO(cleanup, rc = 0);
1039         }
1040         read_unlock(&map->om_seq_list_lock);
1041
1042         OBD_ALLOC_PTR(osd_seq);
1043         if (osd_seq == NULL)
1044                 GOTO(cleanup, rc = -ENOMEM);
1045
1046         INIT_LIST_HEAD(&osd_seq->oos_seq_list);
1047         osd_seq->oos_seq = seq;
1048         /*
1049          * Init subdir count to be 32, but each seq can have
1050          * different subdir count
1051          */
1052         osd_seq->oos_subdir_count = map->om_subdir_count;
1053         rc = osd_seq_load_locked(info, osd, osd_seq);
1054         if (rc != 0)
1055                 GOTO(cleanup, rc);
1056
1057         write_lock(&map->om_seq_list_lock);
1058         list_add(&osd_seq->oos_seq_list, &map->om_seq_list);
1059         write_unlock(&map->om_seq_list_lock);
1060
1061 cleanup:
1062         mutex_unlock(&map->om_dir_init_mutex);
1063         if (rc != 0) {
1064                 if (osd_seq != NULL)
1065                         OBD_FREE_PTR(osd_seq);
1066                 RETURN(ERR_PTR(rc));
1067         }
1068
1069         RETURN(osd_seq);
1070 }
1071
1072 int osd_obj_map_lookup(struct osd_thread_info *info, struct osd_device *dev,
1073                        const struct lu_fid *fid, struct osd_inode_id *id)
1074 {
1075         struct osd_obj_map *map;
1076         struct osd_obj_seq *osd_seq;
1077         struct dentry *d_seq;
1078         struct dentry *child;
1079         struct ost_id *ostid = &info->oti_ostid;
1080         int dirn;
1081         char name[32];
1082         struct ldiskfs_dir_entry_2 *de;
1083         struct buffer_head *bh;
1084         struct inode *dir;
1085         struct inode *inode;
1086
1087         ENTRY;
1088
1089         /* on the very first lookup we find and open directories */
1090         map = dev->od_ost_map;
1091         LASSERT(map);
1092         LASSERT(map->om_root);
1093
1094         fid_to_ostid(fid, ostid);
1095         osd_seq = osd_seq_load(info, dev, ostid_seq(ostid));
1096         if (IS_ERR(osd_seq))
1097                 RETURN(PTR_ERR(osd_seq));
1098
1099         dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
1100         d_seq = osd_seq->oos_dirs[dirn];
1101         LASSERT(d_seq);
1102
1103         osd_oid_name(name, sizeof(name), fid, ostid_id(ostid));
1104
1105         child = &info->oti_child_dentry;
1106         child->d_parent = d_seq;
1107         child->d_name.hash = 0;
1108         child->d_name.name = name;
1109         /* XXX: we can use rc from sprintf() instead of strlen() */
1110         child->d_name.len = strlen(name);
1111
1112         dir = d_seq->d_inode;
1113         inode_lock(dir);
1114         bh = osd_ldiskfs_find_entry(dir, &child->d_name, &de, NULL, NULL);
1115         inode_unlock(dir);
1116
1117         if (IS_ERR(bh))
1118                 RETURN(PTR_ERR(bh));
1119
1120         osd_id_gen(id, le32_to_cpu(de->inode), OSD_OII_NOGEN);
1121         brelse(bh);
1122
1123         inode = osd_iget(info, dev, id);
1124         if (IS_ERR(inode)) {
1125                 int rc = PTR_ERR(inode);
1126
1127                 RETURN(rc == -ENOENT ? -ESTALE : rc);
1128         }
1129
1130         iput(inode);
1131         RETURN(0);
1132 }
1133
1134 int osd_obj_map_insert(struct osd_thread_info *info,
1135                        struct osd_device *osd,
1136                        const struct lu_fid *fid,
1137                        const struct osd_inode_id *id,
1138                        handle_t *th)
1139 {
1140         struct osd_obj_map *map;
1141         struct osd_obj_seq *osd_seq;
1142         struct dentry *d;
1143         struct ost_id *ostid = &info->oti_ostid;
1144         u64 oid;
1145         int dirn, rc = 0;
1146         char name[32];
1147
1148         ENTRY;
1149
1150         map = osd->od_ost_map;
1151         LASSERT(map);
1152
1153         /* map fid to seq:objid */
1154         fid_to_ostid(fid, ostid);
1155
1156         oid = ostid_id(ostid);
1157         osd_seq = osd_seq_load(info, osd, ostid_seq(ostid));
1158         if (IS_ERR(osd_seq))
1159                 RETURN(PTR_ERR(osd_seq));
1160
1161         dirn = oid & (osd_seq->oos_subdir_count - 1);
1162         d = osd_seq->oos_dirs[dirn];
1163         LASSERT(d);
1164
1165         osd_oid_name(name, sizeof(name), fid, oid);
1166
1167 again:
1168         rc = osd_obj_add_entry(info, osd, d, name, id, th);
1169         if (rc == -EEXIST) {
1170                 rc = osd_obj_update_entry(info, osd, d, name, fid, id, th);
1171                 if (unlikely(rc == -ENOENT))
1172                         goto again;
1173
1174                 if (unlikely(rc == 1))
1175                         RETURN(0);
1176         }
1177
1178         RETURN(rc);
1179 }
1180
1181 int osd_obj_map_delete(struct osd_thread_info *info, struct osd_device *osd,
1182                        const struct lu_fid *fid, handle_t *th)
1183 {
1184         struct osd_obj_map *map;
1185         struct osd_obj_seq *osd_seq;
1186         struct dentry *d;
1187         struct ost_id *ostid = &info->oti_ostid;
1188         int dirn, rc = 0;
1189         char name[32];
1190
1191         ENTRY;
1192
1193         map = osd->od_ost_map;
1194         LASSERT(map);
1195
1196         /* map fid to seq:objid */
1197         fid_to_ostid(fid, ostid);
1198
1199         osd_seq = osd_seq_load(info, osd, ostid_seq(ostid));
1200         if (IS_ERR(osd_seq))
1201                 GOTO(cleanup, rc = PTR_ERR(osd_seq));
1202
1203         dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
1204         d = osd_seq->oos_dirs[dirn];
1205         LASSERT(d);
1206
1207         osd_oid_name(name, sizeof(name), fid, ostid_id(ostid));
1208         rc = osd_obj_del_entry(info, osd, d, name, th);
1209 cleanup:
1210         RETURN(rc);
1211 }
1212
1213 int osd_obj_map_update(struct osd_thread_info *info,
1214                        struct osd_device *osd,
1215                        const struct lu_fid *fid,
1216                        const struct osd_inode_id *id,
1217                        handle_t *th)
1218 {
1219         struct osd_obj_seq *osd_seq;
1220         struct dentry *d;
1221         struct ost_id *ostid = &info->oti_ostid;
1222         int dirn, rc = 0;
1223         char name[32];
1224
1225         ENTRY;
1226
1227         fid_to_ostid(fid, ostid);
1228         osd_seq = osd_seq_load(info, osd, ostid_seq(ostid));
1229         if (IS_ERR(osd_seq))
1230                 RETURN(PTR_ERR(osd_seq));
1231
1232         dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
1233         d = osd_seq->oos_dirs[dirn];
1234         LASSERT(d);
1235
1236         osd_oid_name(name, sizeof(name), fid, ostid_id(ostid));
1237         rc = osd_obj_update_entry(info, osd, d, name, fid, id, th);
1238
1239         RETURN(rc);
1240 }
1241
1242 int osd_obj_map_recover(struct osd_thread_info *info,
1243                         struct osd_device *osd,
1244                         struct inode *src_parent,
1245                         struct dentry *src_child,
1246                         const struct lu_fid *fid)
1247 {
1248         struct osd_obj_seq *osd_seq;
1249         struct dentry *tgt_parent;
1250         struct dentry *tgt_child = &info->oti_child_dentry;
1251         struct inode *dir;
1252         struct inode *inode = src_child->d_inode;
1253         struct ost_id *ostid = &info->oti_ostid;
1254         handle_t *jh;
1255         struct ldiskfs_dir_entry_2 *de;
1256         struct buffer_head *bh;
1257         char name[32];
1258         int dirn;
1259         int rc = 0;
1260
1261         ENTRY;
1262
1263         if (fid_is_last_id(fid)) {
1264                 osd_seq = osd_seq_load(info, osd, fid_seq(fid));
1265                 if (IS_ERR(osd_seq))
1266                         RETURN(PTR_ERR(osd_seq));
1267
1268                 tgt_parent = osd_seq->oos_root;
1269                 tgt_child->d_name.name = "LAST_ID";
1270                 tgt_child->d_name.len = strlen("LAST_ID");
1271         } else {
1272                 fid_to_ostid(fid, ostid);
1273                 osd_seq = osd_seq_load(info, osd, ostid_seq(ostid));
1274                 if (IS_ERR(osd_seq))
1275                         RETURN(PTR_ERR(osd_seq));
1276
1277                 dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
1278                 tgt_parent = osd_seq->oos_dirs[dirn];
1279                 osd_oid_name(name, sizeof(name), fid, ostid_id(ostid));
1280                 tgt_child->d_name.name = name;
1281                 tgt_child->d_name.len = strlen(name);
1282         }
1283         LASSERT(tgt_parent != NULL);
1284
1285         dir = tgt_parent->d_inode;
1286         tgt_child->d_name.hash = 0;
1287         tgt_child->d_parent = tgt_parent;
1288         tgt_child->d_inode = inode;
1289
1290         /* The non-initialized src_child may be destroyed. */
1291         jh = osd_journal_start_sb(osd_sb(osd), LDISKFS_HT_MISC,
1292                                 osd_dto_credits_noquota[DTO_INDEX_DELETE] +
1293                                 osd_dto_credits_noquota[DTO_INDEX_INSERT] +
1294                                 osd_dto_credits_noquota[DTO_OBJECT_DELETE]);
1295         if (IS_ERR(jh))
1296                 RETURN(PTR_ERR(jh));
1297
1298         dquot_initialize(src_parent);
1299         dquot_initialize(dir);
1300
1301         inode_lock(src_parent);
1302         inode_lock(dir);
1303         bh = osd_ldiskfs_find_entry(dir, &tgt_child->d_name, &de, NULL, NULL);
1304         if (!IS_ERR(bh)) {
1305                 /*
1306                  * XXX: If some other object occupied the same slot. And If such
1307                  *      inode is zero-sized and with SUID+SGID, then means it is
1308                  *      a new created one. Maybe we can remove it and insert the
1309                  *      original one back to the /O/<seq>/d<x>. But there are
1310                  *      something to be considered:
1311                  *
1312                  *      1) The OST-object under /lost+found has crashed LMA.
1313                  *         So it should not conflict with the current one.
1314                  *
1315                  *      2) There are race conditions that: someone may just want
1316                  *         to modify the current one. Even if the OI scrub takes
1317                  *         the object lock when remove the current one, it still
1318                  *         cause the modification to be lost becasue the target
1319                  *         has been removed when the RPC service thread waiting
1320                  *         for the lock.
1321                  *
1322                  *      So keep it there before we have suitable solution.
1323                  */
1324                 brelse(bh);
1325                 inode_unlock(dir);
1326                 inode_unlock(src_parent);
1327                 ldiskfs_journal_stop(jh);
1328
1329                 rc = -EEXIST;
1330                 /* If the src object has never been modified, then remove it. */
1331                 if (inode->i_size == 0 && inode->i_mode & S_ISUID &&
1332                     inode->i_mode & S_ISGID) {
1333                         rc = ll_vfs_unlink(src_parent, src_child);
1334                         if (unlikely(rc == -ENOENT))
1335                                 rc = 0;
1336                 }
1337                 if (rc)
1338                         RETURN(rc);
1339         }
1340
1341         bh = osd_ldiskfs_find_entry(src_parent, &src_child->d_name, &de,
1342                                     NULL, NULL);
1343         if (unlikely(IS_ERR(bh)))
1344                 GOTO(unlock, rc = PTR_ERR(bh));
1345
1346         rc = ldiskfs_delete_entry(jh, src_parent, de, bh);
1347         brelse(bh);
1348         if (rc != 0)
1349                 GOTO(unlock, rc);
1350
1351         rc = osd_ldiskfs_add_entry(info, osd, jh, tgt_child, inode, NULL);
1352
1353         GOTO(unlock, rc);
1354
1355 unlock:
1356         inode_unlock(dir);
1357         inode_unlock(src_parent);
1358         ldiskfs_journal_stop(jh);
1359         return rc;
1360 }
1361
1362 static struct dentry *
1363 osd_object_spec_find(struct osd_thread_info *info, struct osd_device *osd,
1364                      const struct lu_fid *fid, char **name)
1365 {
1366         struct dentry *root = ERR_PTR(-ENOENT);
1367
1368         if (fid_is_last_id(fid)) {
1369                 struct osd_obj_seq *osd_seq;
1370
1371                 /* on creation of LAST_ID we create O/<seq> hierarchy */
1372                 osd_seq = osd_seq_load(info, osd, fid_seq(fid));
1373                 if (IS_ERR(osd_seq))
1374                         RETURN((struct dentry *)osd_seq);
1375
1376                 *name = "LAST_ID";
1377                 root = osd_seq->oos_root;
1378         } else {
1379                 *name = osd_lf_fid2name(fid);
1380                 if (*name == NULL)
1381                         CWARN("UNKNOWN COMPAT FID "DFID"\n", PFID(fid));
1382                 else if ((*name)[0])
1383                         root = osd_sb(osd)->s_root;
1384         }
1385
1386         return root;
1387 }
1388
1389 int osd_obj_spec_update(struct osd_thread_info *info, struct osd_device *osd,
1390                         const struct lu_fid *fid, const struct osd_inode_id *id,
1391                         handle_t *th)
1392 {
1393         struct dentry *root;
1394         char *name = NULL;
1395         int rc;
1396
1397         ENTRY;
1398
1399         root = osd_object_spec_find(info, osd, fid, &name);
1400         if (!IS_ERR(root)) {
1401                 rc = osd_obj_update_entry(info, osd, root, name, fid, id, th);
1402         } else {
1403                 rc = PTR_ERR(root);
1404                 if (rc == -ENOENT)
1405                         rc = 1;
1406         }
1407
1408         RETURN(rc);
1409 }
1410
1411 int osd_obj_spec_insert(struct osd_thread_info *info, struct osd_device *osd,
1412                         const struct lu_fid *fid, const struct osd_inode_id *id,
1413                         handle_t *th)
1414 {
1415         struct dentry *root;
1416         char *name = NULL;
1417         int rc;
1418
1419         ENTRY;
1420
1421         root = osd_object_spec_find(info, osd, fid, &name);
1422         if (!IS_ERR(root)) {
1423                 rc = osd_obj_add_entry(info, osd, root, name, id, th);
1424         } else {
1425                 rc = PTR_ERR(root);
1426                 if (rc == -ENOENT)
1427                         rc = 0;
1428         }
1429
1430         RETURN(rc);
1431 }
1432
1433 int osd_obj_spec_lookup(struct osd_thread_info *info, struct osd_device *osd,
1434                         const struct lu_fid *fid, struct osd_inode_id *id)
1435 {
1436         struct dentry *root;
1437         struct dentry *dentry;
1438         struct inode *inode;
1439         char *name = NULL;
1440         int rc = -ENOENT;
1441
1442         ENTRY;
1443
1444         if (fid_is_last_id(fid)) {
1445                 struct osd_obj_seq *osd_seq;
1446
1447                 osd_seq = osd_seq_load(info, osd, fid_seq(fid));
1448                 if (IS_ERR(osd_seq))
1449                         RETURN(PTR_ERR(osd_seq));
1450                 root = osd_seq->oos_root;
1451                 name = "LAST_ID";
1452         } else {
1453                 root = osd_sb(osd)->s_root;
1454                 name = osd_lf_fid2name(fid);
1455                 if (name == NULL || strlen(name) == 0)
1456                         RETURN(-ENOENT);
1457         }
1458
1459         dentry = osd_lookup_one_len_unlocked(osd, name, root, strlen(name));
1460         if (!IS_ERR(dentry)) {
1461                 inode = dentry->d_inode;
1462                 if (inode) {
1463                         if (is_bad_inode(inode)) {
1464                                 rc = -EIO;
1465                         } else {
1466                                 osd_id_gen(id, inode->i_ino,
1467                                            inode->i_generation);
1468                                 rc = 0;
1469                         }
1470                 }
1471                 /*
1472                  * if dentry is accessible after osd_compat_spec_insert it
1473                  * will still contain NULL inode, so don't keep it in cache
1474                  */
1475                 d_invalidate(dentry);
1476                 dput(dentry);
1477         }
1478
1479         RETURN(rc);
1480 }
1481
1482 #ifndef HAVE_BIO_INTEGRITY_ENABLED
1483 bool bio_integrity_enabled(struct bio *bio)
1484 {
1485         struct blk_integrity *bi = blk_get_integrity(bio_get_disk(bio));
1486
1487         if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
1488                 return false;
1489
1490         if (!bio_sectors(bio))
1491                 return false;
1492
1493          /* Already protected? */
1494         if (bio_integrity(bio))
1495                 return false;
1496
1497         if (bi == NULL)
1498                 return false;
1499
1500         if (bio_data_dir(bio) == READ && bi->profile->verify_fn != NULL &&
1501             (bi->flags & BLK_INTEGRITY_VERIFY))
1502                 return true;
1503
1504         if (bio_data_dir(bio) == WRITE && bi->profile->generate_fn != NULL &&
1505             (bi->flags & BLK_INTEGRITY_GENERATE))
1506                 return true;
1507
1508         return false;
1509 }
1510 #endif