Whamcloud - gitweb
LU-13581 build: xarray and lockdep_is_held const clash
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_compat.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * lustre/osd/osd_compat.c
33  *
34  * on-disk structure for managing /O
35  *
36  * Author: Alex Zhuravlev <bzzz@whamcloud.com>
37  */
38
39 /* prerequisite for linux/xattr.h */
40 #include <linux/types.h>
41 /* prerequisite for linux/xattr.h */
42 #include <linux/fs.h>
43 /* XATTR_{REPLACE,CREATE} */
44 #include <linux/xattr.h>
45
46 /*
47  * struct OBD_{ALLOC,FREE}*()
48  * OBD_FAIL_CHECK
49  */
50 #include <obd_support.h>
51
52 #include "osd_internal.h"
53 #include "osd_oi.h"
54
55 static void osd_push_ctxt(const struct osd_device *dev,
56                           struct lvfs_run_ctxt *newctxt,
57                           struct lvfs_run_ctxt *save)
58 {
59         OBD_SET_CTXT_MAGIC(newctxt);
60         newctxt->pwdmnt = dev->od_mnt;
61         newctxt->pwd = dev->od_mnt->mnt_root;
62         newctxt->fs = KERNEL_DS;
63         newctxt->umask = current_umask();
64         newctxt->dt = NULL;
65
66         push_ctxt(save, newctxt);
67 }
68
69 struct dentry *osd_lookup_one_len_common(struct osd_device *dev,
70                                          const char *name,
71                                          struct dentry *base, int len,
72                                          enum oi_check_flags flags)
73 {
74         struct dentry *dchild;
75
76         /*
77          * We can't use inode_is_locked() directly since we can't know
78          * if the current thread context took the lock earlier or if
79          * another thread context took the lock. OI_LOCKED tells us
80          * if the current thread context has already taken the lock.
81          */
82         if (!(flags & OI_LOCKED)) {
83                 /* If another thread took this lock already we will
84                  * just have to wait until the other thread is done.
85                  */
86                 inode_lock(base->d_inode);
87                 dchild = lookup_one_len(name, base, len);
88                 inode_unlock(base->d_inode);
89         } else {
90                 /* This thread context already has taken the lock.
91                  * Other threads will have to wait until we are done.
92                  */
93                 dchild = lookup_one_len(name, base, len);
94         }
95         if (IS_ERR(dchild))
96                 return dchild;
97
98         if (dchild->d_inode && unlikely(is_bad_inode(dchild->d_inode))) {
99                 CERROR("%s: bad inode returned %lu/%u: rc = -ENOENT\n",
100                        osd_name(dev), dchild->d_inode->i_ino,
101                        dchild->d_inode->i_generation);
102                 dput(dchild);
103                 dchild = ERR_PTR(-ENOENT);
104         }
105
106         return dchild;
107 }
108
109 /**
110  * osd_lookup_one_len_unlocked
111  *
112  * @dev:        obd device we are searching
113  * @name:       pathname component to lookup
114  * @base:       base directory to lookup from
115  * @len:        maximum length @len should be interpreted to
116  *
117  * Unlike osd_lookup_one_len, this should be called without the parent
118  * i_mutex held, and will take the i_mutex itself.
119  */
120 struct dentry *osd_lookup_one_len_unlocked(struct osd_device *dev,
121                                            const char *name,
122                                            struct dentry *base, int len)
123 {
124         return osd_lookup_one_len_common(dev, name, base, len, ~OI_LOCKED);
125 }
126
127 /**
128  * osd_lookup_one_len - lookup single pathname component
129  *
130  * @dev:        obd device we are searching
131  * @name:       pathname component to lookup
132  * @base:       base directory to lookup from
133  * @len:        maximum length @len should be interpreted to
134  *
135  * The caller must hold inode lock
136  */
137 struct dentry *osd_lookup_one_len(struct osd_device *dev, const char *name,
138                                   struct dentry *base, int len)
139 {
140         return osd_lookup_one_len_common(dev, name, base, len, OI_LOCKED);
141 }
142
143 /* utility to make a directory */
144 static struct dentry *
145 simple_mkdir(const struct lu_env *env, struct osd_device *osd,
146              struct dentry *dir, const struct lu_fid *fid,
147              const char *name, __u32 compat, int mode, bool *created)
148 {
149         struct osd_thread_info *info = osd_oti_get(env);
150         struct lu_fid *tfid = &info->oti_fid3;
151         struct inode *inode;
152         struct dentry *dchild;
153         int err = 0;
154
155         ENTRY;
156
157         // ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
158         CDEBUG(D_INODE, "creating directory %.*s\n", (int)strlen(name), name);
159         dchild = osd_lookup_one_len_unlocked(osd, name, dir, strlen(name));
160         if (IS_ERR(dchild))
161                 RETURN(dchild);
162
163         inode = dchild->d_inode;
164         if (inode) {
165                 struct lustre_mdt_attrs *lma = &info->oti_ost_attrs.loa_lma;
166                 int old_mode = inode->i_mode;
167
168                 if (created)
169                         *created = false;
170
171                 if (!S_ISDIR(old_mode)) {
172                         CERROR("found %s (%lu/%u) is mode %o\n", name,
173                                inode->i_ino, inode->i_generation, old_mode);
174                         GOTO(out_err, err = -ENOTDIR);
175                 }
176
177                 if (unlikely(osd->od_dt_dev.dd_rdonly))
178                         RETURN(dchild);
179
180                 /* Fixup directory permissions if necessary */
181                 if ((old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
182                         CDEBUG(D_CONFIG,
183                                "fixing permissions on %s from %o to %o\n",
184                                name, old_mode, mode);
185                         inode->i_mode = (mode & S_IALLUGO) |
186                                         (old_mode & ~S_IALLUGO);
187                         mark_inode_dirty(inode);
188                 }
189
190                 err = osd_get_lma(info, inode, &info->oti_obj_dentry,
191                                   &info->oti_ost_attrs);
192                 if (err == -ENODATA)
193                         goto set_fid;
194
195                 if (err)
196                         GOTO(out_err, err);
197
198                 if ((fid && !lu_fid_eq(fid, &lma->lma_self_fid)) ||
199                     lma->lma_compat != compat)
200                         goto set_fid;
201
202                 RETURN(dchild);
203         }
204
205         err = vfs_mkdir(dir->d_inode, dchild, mode);
206         if (err)
207                 GOTO(out_err, err);
208
209         inode = dchild->d_inode;
210         if (created)
211                 *created = true;
212
213 set_fid:
214         if (fid)
215                 *tfid = *fid;
216         else
217                 lu_igif_build(tfid, inode->i_ino, inode->i_generation);
218         err = osd_ea_fid_set(info, inode, tfid, compat, 0);
219         if (err)
220                 GOTO(out_err, err);
221
222         RETURN(dchild);
223
224 out_err:
225         dput(dchild);
226         return ERR_PTR(err);
227 }
228
229 static int osd_last_rcvd_subdir_count(struct osd_device *osd)
230 {
231         struct lr_server_data lsd;
232         struct dentry *dlast;
233         loff_t off;
234         int rc = 0;
235         int count = OBJ_SUBDIR_COUNT;
236
237         ENTRY;
238
239         dlast = osd_lookup_one_len_unlocked(osd, LAST_RCVD, osd_sb(osd)->s_root,
240                                             strlen(LAST_RCVD));
241         if (IS_ERR(dlast))
242                 return PTR_ERR(dlast);
243         else if (dlast->d_inode == NULL)
244                 goto out;
245
246         off = 0;
247         rc = osd_ldiskfs_read(dlast->d_inode, &lsd, sizeof(lsd), &off);
248         if (rc == sizeof(lsd)) {
249                 CDEBUG(D_INFO,
250                       "read last_rcvd header, uuid = %s, subdir count = %d\n",
251                       lsd.lsd_uuid, lsd.lsd_subdir_count);
252                 if (le16_to_cpu(lsd.lsd_subdir_count) > 0)
253                         count = le16_to_cpu(lsd.lsd_subdir_count);
254         } else if (rc != 0) {
255                 CERROR("Can't read last_rcvd file, rc = %d\n", rc);
256                 if (rc > 0)
257                         rc = -EFAULT;
258                 dput(dlast);
259                 return rc;
260         }
261 out:
262         dput(dlast);
263         LASSERT(count > 0);
264         return count;
265 }
266
267 static int osd_mdt_init(const struct lu_env *env, struct osd_device *dev)
268 {
269         struct lvfs_run_ctxt new;
270         struct lvfs_run_ctxt save;
271         struct dentry *parent;
272         struct osd_mdobj_map *omm;
273         struct dentry *d;
274         struct osd_thread_info *info = osd_oti_get(env);
275         struct lu_fid *fid = &info->oti_fid3;
276         int rc = 0;
277
278         ENTRY;
279
280         OBD_ALLOC_PTR(dev->od_mdt_map);
281         if (dev->od_mdt_map == NULL)
282                 RETURN(-ENOMEM);
283
284         omm = dev->od_mdt_map;
285
286         parent = osd_sb(dev)->s_root;
287         osd_push_ctxt(dev, &new, &save);
288
289         lu_local_obj_fid(fid, REMOTE_PARENT_DIR_OID);
290         d = simple_mkdir(env, dev, parent, fid, REMOTE_PARENT_DIR,
291                          LMAC_NOT_IN_OI, 0755, NULL);
292         if (IS_ERR(d))
293                 GOTO(cleanup, rc = PTR_ERR(d));
294
295         omm->omm_remote_parent = d;
296
297         GOTO(cleanup, rc = 0);
298
299 cleanup:
300         pop_ctxt(&save, &new);
301         if (rc) {
302                 if (omm->omm_remote_parent != NULL)
303                         dput(omm->omm_remote_parent);
304                 OBD_FREE_PTR(omm);
305                 dev->od_mdt_map = NULL;
306         }
307         return rc;
308 }
309
310 static void osd_mdt_fini(struct osd_device *osd)
311 {
312         struct osd_mdobj_map *omm = osd->od_mdt_map;
313
314         if (omm == NULL)
315                 return;
316
317         if (omm->omm_remote_parent)
318                 dput(omm->omm_remote_parent);
319
320         OBD_FREE_PTR(omm);
321         osd->od_ost_map = NULL;
322 }
323
324 int osd_add_to_remote_parent(const struct lu_env *env, struct osd_device *osd,
325                              struct osd_object *obj, struct osd_thandle *oh)
326 {
327         struct osd_mdobj_map *omm = osd->od_mdt_map;
328         struct osd_thread_info *oti = osd_oti_get(env);
329         struct lustre_mdt_attrs *lma = &oti->oti_ost_attrs.loa_lma;
330         char *name = oti->oti_name;
331         struct osd_thread_info *info = osd_oti_get(env);
332         struct dentry *dentry;
333         struct dentry *parent;
334         int rc;
335
336         if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_AGENTENT))
337                 RETURN(0);
338
339         /*
340          * Set REMOTE_PARENT in lma, so other process like unlink or lfsck
341          * can identify this object quickly
342          */
343         rc = osd_get_lma(oti, obj->oo_inode, &oti->oti_obj_dentry,
344                          &oti->oti_ost_attrs);
345         if (rc)
346                 RETURN(rc);
347
348         lma->lma_incompat |= LMAI_REMOTE_PARENT;
349         lustre_lma_swab(lma);
350         rc = __osd_xattr_set(oti, obj->oo_inode, XATTR_NAME_LMA, lma,
351                              sizeof(*lma), XATTR_REPLACE);
352         if (rc)
353                 RETURN(rc);
354
355         parent = omm->omm_remote_parent;
356         sprintf(name, DFID_NOBRACE, PFID(lu_object_fid(&obj->oo_dt.do_lu)));
357         dentry = osd_child_dentry_by_inode(env, parent->d_inode,
358                                            name, strlen(name));
359         inode_lock(parent->d_inode);
360         rc = osd_ldiskfs_add_entry(info, osd, oh->ot_handle, dentry,
361                                    obj->oo_inode, NULL);
362         if (!rc && S_ISDIR(obj->oo_inode->i_mode))
363                 ldiskfs_inc_count(oh->ot_handle, parent->d_inode);
364         else if (unlikely(rc == -EEXIST))
365                 rc = 0;
366         if (!rc)
367                 lu_object_set_agent_entry(&obj->oo_dt.do_lu);
368         CDEBUG(D_INODE, "%s: create agent entry for %s: rc = %d\n",
369                osd_name(osd), name, rc);
370         mark_inode_dirty(parent->d_inode);
371         inode_unlock(parent->d_inode);
372         RETURN(rc);
373 }
374
375 int osd_delete_from_remote_parent(const struct lu_env *env,
376                                   struct osd_device *osd,
377                                   struct osd_object *obj,
378                                   struct osd_thandle *oh, bool destroy)
379 {
380         struct osd_mdobj_map *omm = osd->od_mdt_map;
381         struct osd_thread_info *oti = osd_oti_get(env);
382         struct lustre_mdt_attrs *lma = &oti->oti_ost_attrs.loa_lma;
383         char *name = oti->oti_name;
384         struct dentry *dentry;
385         struct dentry *parent;
386         struct ldiskfs_dir_entry_2 *de;
387         struct buffer_head *bh;
388         int rc;
389
390         parent = omm->omm_remote_parent;
391         sprintf(name, DFID_NOBRACE, PFID(lu_object_fid(&obj->oo_dt.do_lu)));
392         dentry = osd_child_dentry_by_inode(env, parent->d_inode,
393                                            name, strlen(name));
394         inode_lock(parent->d_inode);
395         bh = osd_ldiskfs_find_entry(parent->d_inode, &dentry->d_name, &de,
396                                     NULL, NULL);
397         if (IS_ERR(bh)) {
398                 inode_unlock(parent->d_inode);
399                 rc = PTR_ERR(bh);
400                 if (unlikely(rc == -ENOENT))
401                         rc = 0;
402         } else {
403                 rc = ldiskfs_delete_entry(oh->ot_handle, parent->d_inode,
404                                           de, bh);
405                 if (!rc && S_ISDIR(obj->oo_inode->i_mode))
406                         ldiskfs_dec_count(oh->ot_handle, parent->d_inode);
407                 mark_inode_dirty(parent->d_inode);
408                 inode_unlock(parent->d_inode);
409                 brelse(bh);
410                 CDEBUG(D_INODE, "%s: remove agent entry for %s: rc = %d\n",
411                        osd_name(osd), name, rc);
412         }
413
414         if (destroy || rc) {
415                 if (!rc)
416                         lu_object_clear_agent_entry(&obj->oo_dt.do_lu);
417
418                 RETURN(rc);
419         }
420
421         rc = osd_get_lma(oti, obj->oo_inode, &oti->oti_obj_dentry,
422                          &oti->oti_ost_attrs);
423         if (rc)
424                 RETURN(rc);
425
426         /* Get rid of REMOTE_PARENT flag from incompat */
427         lma->lma_incompat &= ~LMAI_REMOTE_PARENT;
428         lustre_lma_swab(lma);
429         rc = __osd_xattr_set(oti, obj->oo_inode, XATTR_NAME_LMA, lma,
430                              sizeof(*lma), XATTR_REPLACE);
431         if (!rc)
432                 lu_object_clear_agent_entry(&obj->oo_dt.do_lu);
433         RETURN(rc);
434 }
435
436 int osd_lookup_in_remote_parent(struct osd_thread_info *oti,
437                                 struct osd_device *osd,
438                                 const struct lu_fid *fid,
439                                 struct osd_inode_id *id)
440 {
441         struct osd_mdobj_map *omm = osd->od_mdt_map;
442         char *name = oti->oti_name;
443         struct dentry *parent;
444         struct dentry *dentry;
445         struct ldiskfs_dir_entry_2 *de;
446         struct buffer_head *bh;
447         int rc;
448
449         ENTRY;
450
451         if (unlikely(osd->od_is_ost))
452                 RETURN(-ENOENT);
453
454         parent = omm->omm_remote_parent;
455         sprintf(name, DFID_NOBRACE, PFID(fid));
456         dentry = osd_child_dentry_by_inode(oti->oti_env, parent->d_inode,
457                                            name, strlen(name));
458         inode_lock(parent->d_inode);
459         bh = osd_ldiskfs_find_entry(parent->d_inode, &dentry->d_name, &de,
460                                     NULL, NULL);
461         if (IS_ERR(bh)) {
462                 rc = PTR_ERR(bh);
463         } else {
464                 struct inode *inode;
465
466                 osd_id_gen(id, le32_to_cpu(de->inode), OSD_OII_NOGEN);
467                 brelse(bh);
468                 inode = osd_iget(oti, osd, id);
469                 if (IS_ERR(inode)) {
470                         rc = PTR_ERR(inode);
471                         if (rc == -ESTALE)
472                                 rc = -ENOENT;
473                 } else {
474                         iput(inode);
475                         rc = 0;
476                 }
477         }
478         inode_unlock(parent->d_inode);
479         if (rc == 0)
480                 osd_add_oi_cache(oti, osd, id, fid);
481         RETURN(rc);
482 }
483
484 /*
485  * directory structure on legacy OST:
486  *
487  * O/<seq>/d0-31/<objid>
488  * O/<seq>/LAST_ID
489  * last_rcvd
490  * LAST_GROUP
491  * CONFIGS
492  *
493  */
494 static int osd_ost_init(const struct lu_env *env, struct osd_device *dev)
495 {
496         struct lvfs_run_ctxt new;
497         struct lvfs_run_ctxt save;
498         struct dentry *d;
499         int rc;
500         bool created = false;
501
502         ENTRY;
503
504         OBD_ALLOC_PTR(dev->od_ost_map);
505         if (dev->od_ost_map == NULL)
506                 RETURN(-ENOMEM);
507
508         /* to get subdir count from last_rcvd */
509         rc = osd_last_rcvd_subdir_count(dev);
510         if (rc < 0)
511                 GOTO(cleanup_alloc, rc);
512
513         dev->od_ost_map->om_subdir_count = rc;
514         INIT_LIST_HEAD(&dev->od_ost_map->om_seq_list);
515         rwlock_init(&dev->od_ost_map->om_seq_list_lock);
516         mutex_init(&dev->od_ost_map->om_dir_init_mutex);
517
518         osd_push_ctxt(dev, &new, &save);
519         d = simple_mkdir(env, dev, osd_sb(dev)->s_root, NULL, "O",
520                          LMAC_NOT_IN_OI | LMAC_FID_ON_OST, 0755, &created);
521         if (IS_ERR(d))
522                 GOTO(cleanup_ctxt, rc = PTR_ERR(d));
523
524         if (created)
525                 /* It is quite probably that the device is new formatted. */
526                 dev->od_maybe_new = 1;
527
528         dev->od_ost_map->om_root = d;
529
530         pop_ctxt(&save, &new);
531         RETURN(0);
532
533 cleanup_ctxt:
534         pop_ctxt(&save, &new);
535 cleanup_alloc:
536         OBD_FREE_PTR(dev->od_ost_map);
537         return rc;
538 }
539
540 static void osd_seq_free(struct osd_obj_seq *osd_seq)
541 {
542         int j;
543
544         if (osd_seq->oos_dirs) {
545                 for (j = 0; j < osd_seq->oos_subdir_count; j++) {
546                         if (osd_seq->oos_dirs[j])
547                                 dput(osd_seq->oos_dirs[j]);
548                 }
549                 OBD_FREE_PTR_ARRAY(osd_seq->oos_dirs,
550                                    osd_seq->oos_subdir_count);
551         }
552
553         if (osd_seq->oos_root)
554                 dput(osd_seq->oos_root);
555
556         OBD_FREE_PTR(osd_seq);
557 }
558
559 static void osd_ost_fini(struct osd_device *osd)
560 {
561         struct osd_obj_seq *osd_seq;
562         struct osd_obj_seq *tmp;
563         struct osd_obj_map *map = osd->od_ost_map;
564
565         ENTRY;
566
567         if (map == NULL)
568                 return;
569
570         write_lock(&map->om_seq_list_lock);
571         list_for_each_entry_safe(osd_seq, tmp, &map->om_seq_list,
572                                  oos_seq_list) {
573                 list_del_init(&osd_seq->oos_seq_list);
574                 write_unlock(&map->om_seq_list_lock);
575                 osd_seq_free(osd_seq);
576                 write_lock(&map->om_seq_list_lock);
577         }
578         write_unlock(&map->om_seq_list_lock);
579         if (map->om_root)
580                 dput(map->om_root);
581         OBD_FREE_PTR(map);
582         osd->od_ost_map = NULL;
583         EXIT;
584 }
585
586 static int osd_index_backup_dir_init(const struct lu_env *env,
587                                      struct osd_device *dev)
588 {
589         struct lu_fid *fid = &osd_oti_get(env)->oti_fid;
590         struct lvfs_run_ctxt new;
591         struct lvfs_run_ctxt save;
592         struct dentry *dentry;
593         int rc = 0;
594
595         ENTRY;
596
597         lu_local_obj_fid(fid, INDEX_BACKUP_OID);
598         osd_push_ctxt(dev, &new, &save);
599         dentry = simple_mkdir(env, dev, osd_sb(dev)->s_root, fid,
600                               INDEX_BACKUP_DIR, LMAC_NOT_IN_OI, 0755, NULL);
601         if (IS_ERR(dentry)) {
602                 rc = PTR_ERR(dentry);
603         } else {
604                 dev->od_index_backup_inode = igrab(dentry->d_inode);
605                 dput(dentry);
606         }
607         pop_ctxt(&save, &new);
608
609         RETURN(rc);
610 }
611
612 static void osd_index_backup_dir_fini(struct osd_device *dev)
613 {
614         if (dev->od_index_backup_inode) {
615                 iput(dev->od_index_backup_inode);
616                 dev->od_index_backup_inode = NULL;
617         }
618 }
619
620 int osd_obj_map_init(const struct lu_env *env, struct osd_device *dev)
621 {
622         int rc;
623         bool mdt_init = false;
624
625         ENTRY;
626
627         rc = osd_ost_init(env, dev);
628         if (rc)
629                 RETURN(rc);
630
631         if (!dev->od_is_ost) {
632                 rc = osd_mdt_init(env, dev);
633                 if (rc) {
634                         osd_ost_fini(dev);
635                         RETURN(rc);
636                 }
637
638                 mdt_init = true;
639         }
640
641         rc = osd_index_backup_dir_init(env, dev);
642         if (rc) {
643                 osd_ost_fini(dev);
644                 if (mdt_init)
645                         osd_mdt_fini(dev);
646         }
647
648         RETURN(rc);
649 }
650
651 static struct osd_obj_seq *osd_seq_find_locked(struct osd_obj_map *map, u64 seq)
652 {
653         struct osd_obj_seq *osd_seq;
654
655         list_for_each_entry(osd_seq, &map->om_seq_list, oos_seq_list) {
656                 if (osd_seq->oos_seq == seq)
657                         return osd_seq;
658         }
659         return NULL;
660 }
661
662 static struct osd_obj_seq *osd_seq_find(struct osd_obj_map *map, u64 seq)
663 {
664         struct osd_obj_seq *osd_seq;
665
666         read_lock(&map->om_seq_list_lock);
667         osd_seq = osd_seq_find_locked(map, seq);
668         read_unlock(&map->om_seq_list_lock);
669         return osd_seq;
670 }
671
672 void osd_obj_map_fini(struct osd_device *dev)
673 {
674         osd_index_backup_dir_fini(dev);
675         osd_ost_fini(dev);
676         osd_mdt_fini(dev);
677 }
678
679 /**
680  * Update the specified OI mapping.
681  *
682  * \retval   1, changed nothing
683  * \retval   0, changed successfully
684  * \retval -ve, on error
685  */
686 static int osd_obj_update_entry(struct osd_thread_info *info,
687                                 struct osd_device *osd,
688                                 struct dentry *dir, const char *name,
689                                 const struct lu_fid *fid,
690                                 const struct osd_inode_id *id,
691                                 handle_t *th)
692 {
693         struct inode *parent = dir->d_inode;
694         struct dentry *child;
695         struct ldiskfs_dir_entry_2 *de;
696         struct buffer_head *bh;
697         struct inode *inode;
698         struct dentry *dentry = &info->oti_obj_dentry;
699         struct osd_inode_id *oi_id = &info->oti_id3;
700         struct lustre_mdt_attrs *lma = &info->oti_ost_attrs.loa_lma;
701         struct lu_fid *oi_fid = &lma->lma_self_fid;
702         int rc;
703
704         ENTRY;
705
706         LASSERT(th != NULL);
707         LASSERT(th->h_transaction != NULL);
708
709         child = &info->oti_child_dentry;
710         child->d_parent = dir;
711         child->d_name.hash = 0;
712         child->d_name.name = name;
713         child->d_name.len = strlen(name);
714
715         dquot_initialize(parent);
716         inode_lock(parent);
717         bh = osd_ldiskfs_find_entry(parent, &child->d_name, &de, NULL, NULL);
718         if (IS_ERR(bh))
719                 GOTO(out, rc = PTR_ERR(bh));
720
721         if (le32_to_cpu(de->inode) == id->oii_ino)
722                 GOTO(out, rc = 1);
723
724         osd_id_gen(oi_id, le32_to_cpu(de->inode), OSD_OII_NOGEN);
725         inode = osd_iget(info, osd, oi_id);
726         if (IS_ERR(inode)) {
727                 rc = PTR_ERR(inode);
728                 if (rc == -ENOENT || rc == -ESTALE)
729                         goto update;
730                 GOTO(out, rc);
731         }
732
733         /*
734          * The EA inode should NOT be in OI, old OI scrub may added
735          * such OI mapping by wrong, replace it.
736          */
737         if (unlikely(osd_is_ea_inode(inode))) {
738                 iput(inode);
739                 goto update;
740         }
741
742         rc = osd_get_lma(info, inode, dentry, &info->oti_ost_attrs);
743         if (rc == -ENODATA) {
744                 rc = osd_get_idif(info, inode, dentry, oi_fid);
745                 if (rc > 0 || rc == -ENODATA) {
746                         oi_fid = NULL;
747                         rc = 0;
748                 }
749         }
750         iput(inode);
751
752         if (rc != 0)
753                 GOTO(out, rc);
754
755         /*
756          * If the OST-object has neither FID-in-LMA nor FID-in-ff, it is
757          * either a crashed object or a uninitialized one. Replace it.
758          */
759         if (oi_fid != NULL && lu_fid_eq(fid, oi_fid)) {
760                 CERROR("%s: the FID "DFID" is used by two objects: "
761                        "%u/%u %u/%u\n", osd_name(osd), PFID(fid),
762                        oi_id->oii_ino, oi_id->oii_gen,
763                        id->oii_ino, id->oii_gen);
764                 GOTO(out, rc = -EEXIST);
765         }
766
767         if (fid_is_idif(fid) && oi_fid != NULL && fid_is_idif(oi_fid)) {
768                 __u32 idx1 = fid_idif_ost_idx(fid);
769                 __u32 idx2 = fid_idif_ost_idx(oi_fid);
770                 struct ost_id *ostid = &info->oti_ostid;
771                 struct lu_fid *tfid = &info->oti_fid3;
772
773                 LASSERTF(idx1 == 0 || idx1 == osd->od_index,
774                          "invalid given FID "DFID", not match the "
775                          "device index %u\n", PFID(fid), osd->od_index);
776
777                 if (idx1 != idx2) {
778                         if (idx1 == 0 && idx2 == osd->od_index) {
779                                 fid_to_ostid(fid, ostid);
780                                 ostid_to_fid(tfid, ostid, idx2);
781                                 if (lu_fid_eq(tfid, oi_fid)) {
782                                         CERROR("%s: the FID "DFID" is used by "
783                                                "two objects(2): %u/%u %u/%u\n",
784                                                osd_name(osd), PFID(fid),
785                                                oi_id->oii_ino, oi_id->oii_gen,
786                                                id->oii_ino, id->oii_gen);
787
788                                         GOTO(out, rc = -EEXIST);
789                                 }
790                         } else if (idx2 == 0 && idx1 == osd->od_index) {
791                                 fid_to_ostid(oi_fid, ostid);
792                                 ostid_to_fid(tfid, ostid, idx1);
793                                 if (lu_fid_eq(tfid, fid)) {
794                                         CERROR("%s: the FID "DFID" is used by "
795                                                "two objects(2): %u/%u %u/%u\n",
796                                                osd_name(osd), PFID(fid),
797                                                oi_id->oii_ino, oi_id->oii_gen,
798                                                id->oii_ino, id->oii_gen);
799
800                                         GOTO(out, rc = -EEXIST);
801                                 }
802                         }
803                 }
804         }
805
806 update:
807         /*
808          * There may be temporary inconsistency: On one hand, the new
809          * object may be referenced by multiple entries, which is out
810          * of our control unless we traverse the whole /O completely,
811          * which is non-flat order and inefficient, should be avoided;
812          * On the other hand, the old object may become orphan if it
813          * is still valid. Since it was referenced by an invalid entry,
814          * making it as invisible temporary may be not worse. OI scrub
815          * will process it later.
816          */
817         rc = ldiskfs_journal_get_write_access(th, bh);
818         if (rc != 0)
819                 GOTO(out, rc);
820
821         de->inode = cpu_to_le32(id->oii_ino);
822         rc = ldiskfs_handle_dirty_metadata(th, NULL, bh);
823
824         GOTO(out, rc);
825
826 out:
827         if (!IS_ERR(bh))
828                 brelse(bh);
829         inode_unlock(parent);
830         return rc;
831 }
832
833 static int osd_obj_del_entry(struct osd_thread_info *info,
834                              struct osd_device *osd,
835                              struct dentry *dird, char *name,
836                              handle_t *th)
837 {
838         struct ldiskfs_dir_entry_2 *de;
839         struct buffer_head *bh;
840         struct dentry *child;
841         struct inode *dir = dird->d_inode;
842         int rc;
843
844         ENTRY;
845
846         LASSERT(th != NULL);
847         LASSERT(th->h_transaction != NULL);
848
849         child = &info->oti_child_dentry;
850         child->d_name.hash = 0;
851         child->d_name.name = name;
852         child->d_name.len = strlen(name);
853         child->d_parent = dird;
854         child->d_inode = NULL;
855
856         dquot_initialize(dir);
857         inode_lock(dir);
858         bh = osd_ldiskfs_find_entry(dir, &child->d_name, &de, NULL, NULL);
859         if (IS_ERR(bh)) {
860                 rc = PTR_ERR(bh);
861         } else {
862                 rc = ldiskfs_delete_entry(th, dir, de, bh);
863                 brelse(bh);
864         }
865         inode_unlock(dir);
866
867         RETURN(rc);
868 }
869
870 static int osd_obj_add_entry(struct osd_thread_info *info,
871                              struct osd_device *osd,
872                              struct dentry *dir, char *name,
873                              const struct osd_inode_id *id,
874                              handle_t *th)
875 {
876         struct dentry *child;
877         struct inode *inode;
878         int rc;
879
880         ENTRY;
881
882         if (OBD_FAIL_CHECK(OBD_FAIL_OSD_COMPAT_NO_ENTRY))
883                 RETURN(0);
884
885         LASSERT(th != NULL);
886         LASSERT(th->h_transaction != NULL);
887
888         inode = info->oti_inode;
889         if (unlikely(inode == NULL)) {
890                 struct ldiskfs_inode_info *lii;
891
892                 OBD_ALLOC_PTR(lii);
893                 if (lii == NULL)
894                         RETURN(-ENOMEM);
895                 inode = info->oti_inode = &lii->vfs_inode;
896         }
897
898         inode->i_sb = osd_sb(osd);
899         osd_id_to_inode(inode, id);
900         inode->i_mode = S_IFREG; /* for type in ldiskfs dir entry */
901
902         child = &info->oti_child_dentry;
903         child->d_name.hash = 0;
904         child->d_name.name = name;
905         child->d_name.len = strlen(name);
906         child->d_parent = dir;
907         child->d_inode = inode;
908
909         if (OBD_FAIL_CHECK(OBD_FAIL_OSD_COMPAT_INVALID_ENTRY))
910                 inode->i_ino++;
911
912         dquot_initialize(dir->d_inode);
913         inode_lock(dir->d_inode);
914         rc = osd_ldiskfs_add_entry(info, osd, th, child, inode, NULL);
915         inode_unlock(dir->d_inode);
916
917         RETURN(rc);
918 }
919
920 /**
921  * Use %llu for legacy OST sequences, but use %llx for new
922  * sequences names, so that the O/{seq}/dN/{oid} more closely
923  * follows the DFID/PFID format. This makes it easier to map from
924  * debug messages to objects in the future, and the legacy space
925  * of FID_SEQ_OST_MDT0 will be unused in the future.
926  **/
927 static inline void osd_seq_name(char *seq_name, size_t name_size, u64 seq)
928 {
929         snprintf(seq_name, name_size,
930                  (fid_seq_is_rsvd(seq) ||
931                   fid_seq_is_mdt0(seq)) ? "%llu" : "%llx",
932                  fid_seq_is_idif(seq) ? 0 : seq);
933 }
934
935 static inline void osd_oid_name(char *name, size_t name_size,
936                                 const struct lu_fid *fid, u64 id)
937 {
938         snprintf(name, name_size,
939                  (fid_seq_is_rsvd(fid_seq(fid)) ||
940                   fid_seq_is_mdt0(fid_seq(fid)) ||
941                   fid_seq_is_idif(fid_seq(fid))) ? "%llu" : "%llx", id);
942 }
943
944 /* external locking is required */
945 static int osd_seq_load_locked(struct osd_thread_info *info,
946                                struct osd_device *osd,
947                                struct osd_obj_seq *osd_seq)
948 {
949         struct osd_obj_map *map = osd->od_ost_map;
950         struct dentry *seq_dir;
951         int rc = 0;
952         int i;
953         char dir_name[32];
954
955         ENTRY;
956
957         if (osd_seq->oos_root != NULL)
958                 RETURN(0);
959
960         LASSERT(map);
961         LASSERT(map->om_root);
962
963         osd_seq_name(dir_name, sizeof(dir_name), osd_seq->oos_seq);
964
965         seq_dir = simple_mkdir(info->oti_env, osd, map->om_root, NULL, dir_name,
966                                LMAC_NOT_IN_OI | LMAC_FID_ON_OST, 0755, NULL);
967         if (IS_ERR(seq_dir))
968                 GOTO(out_err, rc = PTR_ERR(seq_dir));
969         else if (seq_dir->d_inode == NULL)
970                 GOTO(out_put, rc = -EFAULT);
971
972         osd_seq->oos_root = seq_dir;
973
974         LASSERT(osd_seq->oos_dirs == NULL);
975         OBD_ALLOC_PTR_ARRAY(osd_seq->oos_dirs, osd_seq->oos_subdir_count);
976         if (osd_seq->oos_dirs == NULL)
977                 GOTO(out_put, rc = -ENOMEM);
978
979         for (i = 0; i < osd_seq->oos_subdir_count; i++) {
980                 struct dentry   *dir;
981
982                 snprintf(dir_name, sizeof(dir_name), "d%u", i);
983                 dir = simple_mkdir(info->oti_env, osd, osd_seq->oos_root, NULL,
984                                    dir_name, LMAC_NOT_IN_OI | LMAC_FID_ON_OST,
985                                    0700, NULL);
986                 if (IS_ERR(dir)) {
987                         GOTO(out_free, rc = PTR_ERR(dir));
988                 } else if (dir->d_inode == NULL) {
989                         dput(dir);
990                         GOTO(out_free, rc = -EFAULT);
991                 }
992
993                 osd_seq->oos_dirs[i] = dir;
994         }
995
996         if (rc != 0) {
997 out_free:
998                 for (i = 0; i < osd_seq->oos_subdir_count; i++) {
999                         if (osd_seq->oos_dirs[i] != NULL)
1000                                 dput(osd_seq->oos_dirs[i]);
1001                 }
1002                 OBD_FREE_PTR_ARRAY(osd_seq->oos_dirs,
1003                                    osd_seq->oos_subdir_count);
1004 out_put:
1005                 dput(seq_dir);
1006                 osd_seq->oos_root = NULL;
1007         }
1008 out_err:
1009         RETURN(rc);
1010 }
1011
1012 static struct osd_obj_seq *osd_seq_load(struct osd_thread_info *info,
1013                                         struct osd_device *osd, u64 seq)
1014 {
1015         struct osd_obj_map *map;
1016         struct osd_obj_seq *osd_seq;
1017         int rc = 0;
1018
1019         ENTRY;
1020
1021         map = osd->od_ost_map;
1022         LASSERT(map);
1023         LASSERT(map->om_root);
1024
1025         osd_seq = osd_seq_find(map, seq);
1026         if (likely(osd_seq != NULL))
1027                 RETURN(osd_seq);
1028
1029         /* Serializing init process */
1030         mutex_lock(&map->om_dir_init_mutex);
1031
1032         /* Check whether the seq has been added */
1033         read_lock(&map->om_seq_list_lock);
1034         osd_seq = osd_seq_find_locked(map, seq);
1035         if (osd_seq != NULL) {
1036                 read_unlock(&map->om_seq_list_lock);
1037                 GOTO(cleanup, rc = 0);
1038         }
1039         read_unlock(&map->om_seq_list_lock);
1040
1041         OBD_ALLOC_PTR(osd_seq);
1042         if (osd_seq == NULL)
1043                 GOTO(cleanup, rc = -ENOMEM);
1044
1045         INIT_LIST_HEAD(&osd_seq->oos_seq_list);
1046         osd_seq->oos_seq = seq;
1047         /*
1048          * Init subdir count to be 32, but each seq can have
1049          * different subdir count
1050          */
1051         osd_seq->oos_subdir_count = map->om_subdir_count;
1052         rc = osd_seq_load_locked(info, osd, osd_seq);
1053         if (rc != 0)
1054                 GOTO(cleanup, rc);
1055
1056         write_lock(&map->om_seq_list_lock);
1057         list_add(&osd_seq->oos_seq_list, &map->om_seq_list);
1058         write_unlock(&map->om_seq_list_lock);
1059
1060 cleanup:
1061         mutex_unlock(&map->om_dir_init_mutex);
1062         if (rc != 0) {
1063                 if (osd_seq != NULL)
1064                         OBD_FREE_PTR(osd_seq);
1065                 RETURN(ERR_PTR(rc));
1066         }
1067
1068         RETURN(osd_seq);
1069 }
1070
1071 int osd_obj_map_lookup(struct osd_thread_info *info, struct osd_device *dev,
1072                        const struct lu_fid *fid, struct osd_inode_id *id)
1073 {
1074         struct osd_obj_map *map;
1075         struct osd_obj_seq *osd_seq;
1076         struct dentry *d_seq;
1077         struct dentry *child;
1078         struct ost_id *ostid = &info->oti_ostid;
1079         int dirn;
1080         char name[32];
1081         struct ldiskfs_dir_entry_2 *de;
1082         struct buffer_head *bh;
1083         struct inode *dir;
1084         struct inode *inode;
1085
1086         ENTRY;
1087
1088         /* on the very first lookup we find and open directories */
1089         map = dev->od_ost_map;
1090         LASSERT(map);
1091         LASSERT(map->om_root);
1092
1093         fid_to_ostid(fid, ostid);
1094         osd_seq = osd_seq_load(info, dev, ostid_seq(ostid));
1095         if (IS_ERR(osd_seq))
1096                 RETURN(PTR_ERR(osd_seq));
1097
1098         dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
1099         d_seq = osd_seq->oos_dirs[dirn];
1100         LASSERT(d_seq);
1101
1102         osd_oid_name(name, sizeof(name), fid, ostid_id(ostid));
1103
1104         child = &info->oti_child_dentry;
1105         child->d_parent = d_seq;
1106         child->d_name.hash = 0;
1107         child->d_name.name = name;
1108         /* XXX: we can use rc from sprintf() instead of strlen() */
1109         child->d_name.len = strlen(name);
1110
1111         dir = d_seq->d_inode;
1112         inode_lock(dir);
1113         bh = osd_ldiskfs_find_entry(dir, &child->d_name, &de, NULL, NULL);
1114         inode_unlock(dir);
1115
1116         if (IS_ERR(bh))
1117                 RETURN(PTR_ERR(bh));
1118
1119         osd_id_gen(id, le32_to_cpu(de->inode), OSD_OII_NOGEN);
1120         brelse(bh);
1121
1122         inode = osd_iget(info, dev, id);
1123         if (IS_ERR(inode)) {
1124                 int rc = PTR_ERR(inode);
1125
1126                 RETURN(rc == -ENOENT ? -ESTALE : rc);
1127         }
1128
1129         iput(inode);
1130         RETURN(0);
1131 }
1132
1133 int osd_obj_map_insert(struct osd_thread_info *info,
1134                        struct osd_device *osd,
1135                        const struct lu_fid *fid,
1136                        const struct osd_inode_id *id,
1137                        handle_t *th)
1138 {
1139         struct osd_obj_map *map;
1140         struct osd_obj_seq *osd_seq;
1141         struct dentry *d;
1142         struct ost_id *ostid = &info->oti_ostid;
1143         u64 oid;
1144         int dirn, rc = 0;
1145         char name[32];
1146
1147         ENTRY;
1148
1149         map = osd->od_ost_map;
1150         LASSERT(map);
1151
1152         /* map fid to seq:objid */
1153         fid_to_ostid(fid, ostid);
1154
1155         oid = ostid_id(ostid);
1156         osd_seq = osd_seq_load(info, osd, ostid_seq(ostid));
1157         if (IS_ERR(osd_seq))
1158                 RETURN(PTR_ERR(osd_seq));
1159
1160         dirn = oid & (osd_seq->oos_subdir_count - 1);
1161         d = osd_seq->oos_dirs[dirn];
1162         LASSERT(d);
1163
1164         osd_oid_name(name, sizeof(name), fid, oid);
1165
1166 again:
1167         rc = osd_obj_add_entry(info, osd, d, name, id, th);
1168         if (rc == -EEXIST) {
1169                 rc = osd_obj_update_entry(info, osd, d, name, fid, id, th);
1170                 if (unlikely(rc == -ENOENT))
1171                         goto again;
1172
1173                 if (unlikely(rc == 1))
1174                         RETURN(0);
1175         }
1176
1177         RETURN(rc);
1178 }
1179
1180 int osd_obj_map_delete(struct osd_thread_info *info, struct osd_device *osd,
1181                        const struct lu_fid *fid, handle_t *th)
1182 {
1183         struct osd_obj_map *map;
1184         struct osd_obj_seq *osd_seq;
1185         struct dentry *d;
1186         struct ost_id *ostid = &info->oti_ostid;
1187         int dirn, rc = 0;
1188         char name[32];
1189
1190         ENTRY;
1191
1192         map = osd->od_ost_map;
1193         LASSERT(map);
1194
1195         /* map fid to seq:objid */
1196         fid_to_ostid(fid, ostid);
1197
1198         osd_seq = osd_seq_load(info, osd, ostid_seq(ostid));
1199         if (IS_ERR(osd_seq))
1200                 GOTO(cleanup, rc = PTR_ERR(osd_seq));
1201
1202         dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
1203         d = osd_seq->oos_dirs[dirn];
1204         LASSERT(d);
1205
1206         osd_oid_name(name, sizeof(name), fid, ostid_id(ostid));
1207         rc = osd_obj_del_entry(info, osd, d, name, th);
1208 cleanup:
1209         RETURN(rc);
1210 }
1211
1212 int osd_obj_map_update(struct osd_thread_info *info,
1213                        struct osd_device *osd,
1214                        const struct lu_fid *fid,
1215                        const struct osd_inode_id *id,
1216                        handle_t *th)
1217 {
1218         struct osd_obj_seq *osd_seq;
1219         struct dentry *d;
1220         struct ost_id *ostid = &info->oti_ostid;
1221         int dirn, rc = 0;
1222         char name[32];
1223
1224         ENTRY;
1225
1226         fid_to_ostid(fid, ostid);
1227         osd_seq = osd_seq_load(info, osd, ostid_seq(ostid));
1228         if (IS_ERR(osd_seq))
1229                 RETURN(PTR_ERR(osd_seq));
1230
1231         dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
1232         d = osd_seq->oos_dirs[dirn];
1233         LASSERT(d);
1234
1235         osd_oid_name(name, sizeof(name), fid, ostid_id(ostid));
1236         rc = osd_obj_update_entry(info, osd, d, name, fid, id, th);
1237
1238         RETURN(rc);
1239 }
1240
1241 int osd_obj_map_recover(struct osd_thread_info *info,
1242                         struct osd_device *osd,
1243                         struct inode *src_parent,
1244                         struct dentry *src_child,
1245                         const struct lu_fid *fid)
1246 {
1247         struct osd_obj_seq *osd_seq;
1248         struct dentry *tgt_parent;
1249         struct dentry *tgt_child = &info->oti_child_dentry;
1250         struct inode *dir;
1251         struct inode *inode = src_child->d_inode;
1252         struct ost_id *ostid = &info->oti_ostid;
1253         handle_t *jh;
1254         struct ldiskfs_dir_entry_2 *de;
1255         struct buffer_head *bh;
1256         char name[32];
1257         int dirn;
1258         int rc = 0;
1259
1260         ENTRY;
1261
1262         if (fid_is_last_id(fid)) {
1263                 osd_seq = osd_seq_load(info, osd, fid_seq(fid));
1264                 if (IS_ERR(osd_seq))
1265                         RETURN(PTR_ERR(osd_seq));
1266
1267                 tgt_parent = osd_seq->oos_root;
1268                 tgt_child->d_name.name = "LAST_ID";
1269                 tgt_child->d_name.len = strlen("LAST_ID");
1270         } else {
1271                 fid_to_ostid(fid, ostid);
1272                 osd_seq = osd_seq_load(info, osd, ostid_seq(ostid));
1273                 if (IS_ERR(osd_seq))
1274                         RETURN(PTR_ERR(osd_seq));
1275
1276                 dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
1277                 tgt_parent = osd_seq->oos_dirs[dirn];
1278                 osd_oid_name(name, sizeof(name), fid, ostid_id(ostid));
1279                 tgt_child->d_name.name = name;
1280                 tgt_child->d_name.len = strlen(name);
1281         }
1282         LASSERT(tgt_parent != NULL);
1283
1284         dir = tgt_parent->d_inode;
1285         tgt_child->d_name.hash = 0;
1286         tgt_child->d_parent = tgt_parent;
1287         tgt_child->d_inode = inode;
1288
1289         /* The non-initialized src_child may be destroyed. */
1290         jh = osd_journal_start_sb(osd_sb(osd), LDISKFS_HT_MISC,
1291                                 osd_dto_credits_noquota[DTO_INDEX_DELETE] +
1292                                 osd_dto_credits_noquota[DTO_INDEX_INSERT] +
1293                                 osd_dto_credits_noquota[DTO_OBJECT_DELETE]);
1294         if (IS_ERR(jh))
1295                 RETURN(PTR_ERR(jh));
1296
1297         dquot_initialize(src_parent);
1298         dquot_initialize(dir);
1299
1300         inode_lock(dir);
1301         bh = osd_ldiskfs_find_entry(dir, &tgt_child->d_name, &de, NULL, NULL);
1302         if (!IS_ERR(bh)) {
1303                 /*
1304                  * XXX: If some other object occupied the same slot. And If such
1305                  *      inode is zero-sized and with SUID+SGID, then means it is
1306                  *      a new created one. Maybe we can remove it and insert the
1307                  *      original one back to the /O/<seq>/d<x>. But there are
1308                  *      something to be considered:
1309                  *
1310                  *      1) The OST-object under /lost+found has crashed LMA.
1311                  *         So it should not conflict with the current one.
1312                  *
1313                  *      2) There are race conditions that: someone may just want
1314                  *         to modify the current one. Even if the OI scrub takes
1315                  *         the object lock when remove the current one, it still
1316                  *         cause the modification to be lost becasue the target
1317                  *         has been removed when the RPC service thread waiting
1318                  *         for the lock.
1319                  *
1320                  *      So keep it there before we have suitable solution.
1321                  */
1322                 brelse(bh);
1323                 inode_unlock(dir);
1324                 ldiskfs_journal_stop(jh);
1325
1326                 rc = -EEXIST;
1327                 /* If the src object has never been modified, then remove it. */
1328                 if (inode->i_size == 0 && inode->i_mode & S_ISUID &&
1329                     inode->i_mode & S_ISGID) {
1330                         rc = ll_vfs_unlink(src_parent, src_child);
1331                         if (unlikely(rc == -ENOENT))
1332                                 rc = 0;
1333                 }
1334                 if (rc)
1335                         RETURN(rc);
1336         }
1337
1338         bh = osd_ldiskfs_find_entry(src_parent, &src_child->d_name, &de,
1339                                     NULL, NULL);
1340         if (unlikely(IS_ERR(bh)))
1341                 GOTO(unlock, rc = PTR_ERR(bh));
1342
1343         rc = ldiskfs_delete_entry(jh, src_parent, de, bh);
1344         brelse(bh);
1345         if (rc != 0)
1346                 GOTO(unlock, rc);
1347
1348         rc = osd_ldiskfs_add_entry(info, osd, jh, tgt_child, inode, NULL);
1349
1350         GOTO(unlock, rc);
1351
1352 unlock:
1353         inode_unlock(dir);
1354         ldiskfs_journal_stop(jh);
1355         return rc;
1356 }
1357
1358 static struct dentry *
1359 osd_object_spec_find(struct osd_thread_info *info, struct osd_device *osd,
1360                      const struct lu_fid *fid, char **name)
1361 {
1362         struct dentry *root = ERR_PTR(-ENOENT);
1363
1364         if (fid_is_last_id(fid)) {
1365                 struct osd_obj_seq *osd_seq;
1366
1367                 /* on creation of LAST_ID we create O/<seq> hierarchy */
1368                 osd_seq = osd_seq_load(info, osd, fid_seq(fid));
1369                 if (IS_ERR(osd_seq))
1370                         RETURN((struct dentry *)osd_seq);
1371
1372                 *name = "LAST_ID";
1373                 root = osd_seq->oos_root;
1374         } else {
1375                 *name = osd_lf_fid2name(fid);
1376                 if (*name == NULL)
1377                         CWARN("UNKNOWN COMPAT FID "DFID"\n", PFID(fid));
1378                 else if ((*name)[0])
1379                         root = osd_sb(osd)->s_root;
1380         }
1381
1382         return root;
1383 }
1384
1385 int osd_obj_spec_update(struct osd_thread_info *info, struct osd_device *osd,
1386                         const struct lu_fid *fid, const struct osd_inode_id *id,
1387                         handle_t *th)
1388 {
1389         struct dentry *root;
1390         char *name = NULL;
1391         int rc;
1392
1393         ENTRY;
1394
1395         root = osd_object_spec_find(info, osd, fid, &name);
1396         if (!IS_ERR(root)) {
1397                 rc = osd_obj_update_entry(info, osd, root, name, fid, id, th);
1398         } else {
1399                 rc = PTR_ERR(root);
1400                 if (rc == -ENOENT)
1401                         rc = 1;
1402         }
1403
1404         RETURN(rc);
1405 }
1406
1407 int osd_obj_spec_insert(struct osd_thread_info *info, struct osd_device *osd,
1408                         const struct lu_fid *fid, const struct osd_inode_id *id,
1409                         handle_t *th)
1410 {
1411         struct dentry *root;
1412         char *name = NULL;
1413         int rc;
1414
1415         ENTRY;
1416
1417         root = osd_object_spec_find(info, osd, fid, &name);
1418         if (!IS_ERR(root)) {
1419                 rc = osd_obj_add_entry(info, osd, root, name, id, th);
1420         } else {
1421                 rc = PTR_ERR(root);
1422                 if (rc == -ENOENT)
1423                         rc = 0;
1424         }
1425
1426         RETURN(rc);
1427 }
1428
1429 int osd_obj_spec_lookup(struct osd_thread_info *info, struct osd_device *osd,
1430                         const struct lu_fid *fid, struct osd_inode_id *id,
1431                         enum oi_check_flags flags)
1432 {
1433         struct dentry *root;
1434         struct dentry *dentry;
1435         struct inode *inode;
1436         char *name = NULL;
1437         int rc = -ENOENT;
1438
1439         ENTRY;
1440
1441         if (fid_is_last_id(fid)) {
1442                 struct osd_obj_seq *osd_seq;
1443
1444                 osd_seq = osd_seq_load(info, osd, fid_seq(fid));
1445                 if (IS_ERR(osd_seq))
1446                         RETURN(PTR_ERR(osd_seq));
1447                 root = osd_seq->oos_root;
1448                 name = "LAST_ID";
1449         } else {
1450                 root = osd_sb(osd)->s_root;
1451                 name = osd_lf_fid2name(fid);
1452                 if (name == NULL || strlen(name) == 0)
1453                         RETURN(-ENOENT);
1454         }
1455
1456         dentry = osd_lookup_one_len_common(osd, name, root, strlen(name),
1457                                            flags);
1458         if (!IS_ERR(dentry)) {
1459                 inode = dentry->d_inode;
1460                 if (inode) {
1461                         if (is_bad_inode(inode)) {
1462                                 rc = -EIO;
1463                         } else {
1464                                 osd_id_gen(id, inode->i_ino,
1465                                            inode->i_generation);
1466                                 rc = 0;
1467                         }
1468                 }
1469                 /*
1470                  * if dentry is accessible after osd_compat_spec_insert it
1471                  * will still contain NULL inode, so don't keep it in cache
1472                  */
1473                 d_invalidate(dentry);
1474                 dput(dentry);
1475         }
1476
1477         RETURN(rc);
1478 }
1479
1480 #ifndef HAVE_BIO_INTEGRITY_ENABLED
1481 bool bio_integrity_enabled(struct bio *bio)
1482 {
1483         struct blk_integrity *bi = blk_get_integrity(bio_get_disk(bio));
1484
1485         if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
1486                 return false;
1487
1488         if (!bio_sectors(bio))
1489                 return false;
1490
1491          /* Already protected? */
1492         if (bio_integrity(bio))
1493                 return false;
1494
1495         if (bi == NULL)
1496                 return false;
1497
1498         if (bio_data_dir(bio) == READ && bi->profile->verify_fn != NULL &&
1499             (bi->flags & BLK_INTEGRITY_VERIFY))
1500                 return true;
1501
1502         if (bio_data_dir(bio) == WRITE && bi->profile->generate_fn != NULL &&
1503             (bi->flags & BLK_INTEGRITY_GENERATE))
1504                 return true;
1505
1506         return false;
1507 }
1508 #endif