Whamcloud - gitweb
LU-3409 llite: silence lockdep warning in ll_md_blocking_ast
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_compat.c
index 3d154af..4794d78 100644 (file)
@@ -27,7 +27,7 @@
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2012, Intel Corporation.
+ * Copyright (c) 2012, 2013, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
 #include <linux/types.h>
 /* prerequisite for linux/xattr.h */
 #include <linux/fs.h>
+/* XATTR_{REPLACE,CREATE} */
+#include <linux/xattr.h>
 
 /*
  * struct OBD_{ALLOC,FREE}*()
  * OBD_FAIL_CHECK
  */
 #include <obd_support.h>
-#include <lvfs.h>
 
 #include "osd_internal.h"
 #include "osd_oi.h"
@@ -76,6 +77,54 @@ static void osd_pop_ctxt(const struct osd_device *dev,
        pop_ctxt(save, new, NULL);
 }
 
+/* utility to make a directory */
+static struct dentry *simple_mkdir(struct dentry *dir, struct vfsmount *mnt,
+                                  const char *name, int mode, int fix)
+{
+       struct dentry *dchild;
+       int err = 0;
+       ENTRY;
+
+       // ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
+       CDEBUG(D_INODE, "creating directory %.*s\n", (int)strlen(name), name);
+       dchild = ll_lookup_one_len(name, dir, strlen(name));
+       if (IS_ERR(dchild))
+               GOTO(out_up, dchild);
+
+       if (dchild->d_inode) {
+               int old_mode = dchild->d_inode->i_mode;
+               if (!S_ISDIR(old_mode)) {
+                       CERROR("found %s (%lu/%u) is mode %o\n", name,
+                              dchild->d_inode->i_ino,
+                              dchild->d_inode->i_generation, old_mode);
+                       GOTO(out_err, err = -ENOTDIR);
+               }
+
+               /* Fixup directory permissions if necessary */
+               if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
+                       CDEBUG(D_CONFIG,
+                              "fixing permissions on %s from %o to %o\n",
+                              name, old_mode, mode);
+                       dchild->d_inode->i_mode = (mode & S_IALLUGO) |
+                                                 (old_mode & ~S_IALLUGO);
+                       mark_inode_dirty(dchild->d_inode);
+               }
+               GOTO(out_up, dchild);
+       }
+
+       err = ll_vfs_mkdir(dir->d_inode, dchild, mnt, mode);
+       if (err)
+               GOTO(out_err, err);
+
+       RETURN(dchild);
+
+out_err:
+       dput(dchild);
+       dchild = ERR_PTR(err);
+out_up:
+       return dchild;
+}
+
 int osd_last_rcvd_subdir_count(struct osd_device *osd)
 {
         struct lr_server_data lsd;
@@ -114,6 +163,162 @@ out:
        return count;
 }
 
+static const char remote_parent_dir[] = "REMOTE_PARENT_DIR";
+static int osd_mdt_init(const struct lu_env *env, struct osd_device *dev)
+{
+       struct lvfs_run_ctxt    new;
+       struct lvfs_run_ctxt    save;
+       struct dentry           *parent;
+       struct osd_mdobj_map    *omm;
+       struct dentry           *d;
+       struct osd_thread_info  *info = osd_oti_get(env);
+       struct lu_fid           *fid = &info->oti_fid;
+       int                     rc = 0;
+       ENTRY;
+
+       OBD_ALLOC_PTR(dev->od_mdt_map);
+       if (dev->od_mdt_map == NULL)
+               RETURN(-ENOMEM);
+
+       omm = dev->od_mdt_map;
+
+       LASSERT(dev->od_fsops);
+
+       parent = osd_sb(dev)->s_root;
+       osd_push_ctxt(dev, &new, &save);
+
+       d = simple_mkdir(parent, dev->od_mnt, remote_parent_dir,
+                        0755, 1);
+       if (IS_ERR(d))
+               GOTO(cleanup, rc = PTR_ERR(d));
+
+       ldiskfs_set_inode_state(d->d_inode, LDISKFS_STATE_LUSTRE_NO_OI);
+       omm->omm_remote_parent = d;
+
+       /* Set LMA for remote parent inode */
+       lu_local_obj_fid(fid, REMOTE_PARENT_DIR_OID);
+       rc = osd_ea_fid_set(info, d->d_inode, fid, 0);
+       if (rc != 0)
+               GOTO(cleanup, rc);
+cleanup:
+       pop_ctxt(&save, &new, NULL);
+       if (rc) {
+               if (omm->omm_remote_parent != NULL)
+                       dput(omm->omm_remote_parent);
+               OBD_FREE_PTR(omm);
+               dev->od_mdt_map = NULL;
+       }
+       RETURN(rc);
+}
+
+static void osd_mdt_fini(struct osd_device *osd)
+{
+       struct osd_mdobj_map *omm = osd->od_mdt_map;
+
+       if (omm == NULL)
+               return;
+
+       if (omm->omm_remote_parent)
+               dput(omm->omm_remote_parent);
+
+       OBD_FREE_PTR(omm);
+       osd->od_ost_map = NULL;
+}
+
+int osd_add_to_remote_parent(const struct lu_env *env, struct osd_device *osd,
+                            struct osd_object *obj, struct osd_thandle *oh)
+{
+       struct osd_mdobj_map    *omm = osd->od_mdt_map;
+       struct osd_thread_info  *oti = osd_oti_get(env);
+       struct lustre_mdt_attrs *lma = &oti->oti_mdt_attrs;
+       char                    *name = oti->oti_name;
+       struct dentry           *dentry;
+       struct dentry           *parent;
+       int                     rc;
+
+       /* Set REMOTE_PARENT in lma, so other process like unlink or lfsck
+        * can identify this object quickly */
+       rc = osd_get_lma(oti, obj->oo_inode, &oti->oti_obj_dentry, lma);
+       if (rc != 0)
+               RETURN(rc);
+
+       lma->lma_incompat |= LMAI_REMOTE_PARENT;
+       lustre_lma_swab(lma);
+       rc = __osd_xattr_set(oti, obj->oo_inode, XATTR_NAME_LMA, lma,
+                            sizeof(*lma), XATTR_REPLACE);
+       if (rc != 0)
+               RETURN(rc);
+
+       parent = omm->omm_remote_parent;
+       sprintf(name, DFID_NOBRACE, PFID(lu_object_fid(&obj->oo_dt.do_lu)));
+       dentry = osd_child_dentry_by_inode(env, parent->d_inode,
+                                          name, strlen(name));
+       mutex_lock(&parent->d_inode->i_mutex);
+       rc = osd_ldiskfs_add_entry(oh->ot_handle, dentry, obj->oo_inode,
+                                  NULL);
+       CDEBUG(D_INODE, "%s: add %s:%lu to remote parent %lu.\n", osd_name(osd),
+              name, obj->oo_inode->i_ino, parent->d_inode->i_ino);
+       LASSERTF(parent->d_inode->i_nlink > 1, "%s: %lu nlink %d",
+                osd_name(osd), parent->d_inode->i_ino,
+                parent->d_inode->i_nlink);
+       parent->d_inode->i_nlink++;
+       mark_inode_dirty(parent->d_inode);
+       mutex_unlock(&parent->d_inode->i_mutex);
+       RETURN(rc);
+}
+
+int osd_delete_from_remote_parent(const struct lu_env *env,
+                                 struct osd_device *osd,
+                                 struct osd_object *obj,
+                                 struct osd_thandle *oh)
+{
+       struct osd_mdobj_map       *omm = osd->od_mdt_map;
+       struct osd_thread_info     *oti = osd_oti_get(env);
+       struct lustre_mdt_attrs    *lma = &oti->oti_mdt_attrs;
+       char                       *name = oti->oti_name;
+       struct dentry              *dentry;
+       struct dentry              *parent;
+       struct ldiskfs_dir_entry_2 *de;
+       struct buffer_head         *bh;
+       int                        rc;
+
+       /* Check lma to see whether it is remote object */
+       rc = osd_get_lma(oti, obj->oo_inode, &oti->oti_obj_dentry, lma);
+       if (rc != 0)
+               RETURN(rc);
+
+       if (likely(!(lma->lma_incompat & LMAI_REMOTE_PARENT)))
+               RETURN(0);
+
+       parent = omm->omm_remote_parent;
+       sprintf(name, DFID_NOBRACE, PFID(lu_object_fid(&obj->oo_dt.do_lu)));
+       dentry = osd_child_dentry_by_inode(env, parent->d_inode,
+                                          name, strlen(name));
+       mutex_lock(&parent->d_inode->i_mutex);
+       bh = osd_ldiskfs_find_entry(parent->d_inode, dentry, &de, NULL);
+       if (bh == NULL) {
+               mutex_unlock(&parent->d_inode->i_mutex);
+               RETURN(-ENOENT);
+       }
+       CDEBUG(D_INODE, "%s: el %s:%lu to remote parent %lu.\n", osd_name(osd),
+              name, obj->oo_inode->i_ino, parent->d_inode->i_ino);
+       rc = ldiskfs_delete_entry(oh->ot_handle, parent->d_inode, de, bh);
+       LASSERTF(parent->d_inode->i_nlink > 1, "%s: %lu nlink %d",
+                osd_name(osd), parent->d_inode->i_ino,
+                parent->d_inode->i_nlink);
+       parent->d_inode->i_nlink--;
+       mark_inode_dirty(parent->d_inode);
+       mutex_unlock(&parent->d_inode->i_mutex);
+       brelse(bh);
+
+       /* Get rid of REMOTE_PARENT flag from incompat */
+       lma->lma_incompat &= ~LMAI_REMOTE_PARENT;
+       lustre_lma_swab(lma);
+       rc = __osd_xattr_set(oti, obj->oo_inode, XATTR_NAME_LMA, lma,
+                            sizeof(*lma), XATTR_REPLACE);
+       RETURN(rc);
+}
+
 /*
  * directory structure on legacy OST:
  *
@@ -124,7 +329,7 @@ out:
  * CONFIGS
  *
  */
-int osd_ost_init(struct osd_device *dev)
+static int osd_ost_init(struct osd_device *dev)
 {
        struct lvfs_run_ctxt  new;
        struct lvfs_run_ctxt  save;
@@ -193,15 +398,42 @@ static void osd_seq_free(struct osd_obj_map *map,
        OBD_FREE_PTR(osd_seq);
 }
 
-int osd_obj_map_init(struct osd_device *dev)
+static void osd_ost_fini(struct osd_device *osd)
+{
+       struct osd_obj_seq    *osd_seq;
+       struct osd_obj_seq    *tmp;
+       struct osd_obj_map    *map = osd->od_ost_map;
+       ENTRY;
+
+       if (map == NULL)
+               return;
+
+       write_lock(&map->om_seq_list_lock);
+       cfs_list_for_each_entry_safe(osd_seq, tmp,
+                                    &map->om_seq_list,
+                                    oos_seq_list) {
+               osd_seq_free(map, osd_seq);
+       }
+       write_unlock(&map->om_seq_list_lock);
+       if (map->om_root)
+               dput(map->om_root);
+       OBD_FREE_PTR(map);
+       osd->od_ost_map = NULL;
+       EXIT;
+}
+
+int osd_obj_map_init(const struct lu_env *env, struct osd_device *dev)
 {
        int rc;
        ENTRY;
 
        /* prepare structures for OST */
        rc = osd_ost_init(dev);
+       if (rc)
+               RETURN(rc);
 
        /* prepare structures for MDS */
+       rc = osd_mdt_init(env, dev);
 
         RETURN(rc);
 }
@@ -229,27 +461,8 @@ struct osd_obj_seq *osd_seq_find(struct osd_obj_map *map, obd_seq seq)
 
 void osd_obj_map_fini(struct osd_device *dev)
 {
-       struct osd_obj_seq    *osd_seq;
-       struct osd_obj_seq    *tmp;
-       struct osd_obj_map    *map = dev->od_ost_map;
-       ENTRY;
-
-       map = dev->od_ost_map;
-       if (map == NULL)
-               return;
-
-       write_lock(&dev->od_ost_map->om_seq_list_lock);
-       cfs_list_for_each_entry_safe(osd_seq, tmp,
-                                    &dev->od_ost_map->om_seq_list,
-                                    oos_seq_list) {
-               osd_seq_free(map, osd_seq);
-       }
-       write_unlock(&dev->od_ost_map->om_seq_list_lock);
-       if (map->om_root)
-               dput(map->om_root);
-       OBD_FREE_PTR(dev->od_ost_map);
-       dev->od_ost_map = NULL;
-       EXIT;
+       osd_ost_fini(dev);
+       osd_mdt_fini(dev);
 }
 
 static int osd_obj_del_entry(struct osd_thread_info *info,
@@ -257,26 +470,25 @@ static int osd_obj_del_entry(struct osd_thread_info *info,
                             struct dentry *dird, char *name,
                             struct thandle *th)
 {
-        struct ldiskfs_dir_entry_2 *de;
-        struct buffer_head         *bh;
-        struct osd_thandle         *oh;
-        struct dentry              *child;
-        struct inode               *dir = dird->d_inode;
-        int                         rc;
+       struct ldiskfs_dir_entry_2 *de;
+       struct buffer_head         *bh;
+       struct osd_thandle         *oh;
+       struct dentry              *child;
+       struct inode               *dir = dird->d_inode;
+       int                         rc;
+       ENTRY;
 
-        ENTRY;
+       oh = container_of(th, struct osd_thandle, ot_super);
+       LASSERT(oh->ot_handle != NULL);
+       LASSERT(oh->ot_handle->h_transaction != NULL);
 
-        oh = container_of(th, struct osd_thandle, ot_super);
-        LASSERT(oh->ot_handle != NULL);
-        LASSERT(oh->ot_handle->h_transaction != NULL);
 
-
-        child = &info->oti_child_dentry;
-        child->d_name.hash = 0;
-        child->d_name.name = name;
-        child->d_name.len = strlen(name);
-        child->d_parent = dird;
-        child->d_inode = NULL;
+       child = &info->oti_child_dentry;
+       child->d_name.hash = 0;
+       child->d_name.name = name;
+       child->d_name.len = strlen(name);
+       child->d_parent = dird;
+       child->d_inode = NULL;
 
        ll_vfs_dq_init(dir);
        mutex_lock(&dir->i_mutex);
@@ -413,7 +625,7 @@ out_err:
        RETURN(rc);
 }
 
-static struct osd_obj_seq *osd_seq_load(struct osd_device *osd, obd_seq seq)
+struct osd_obj_seq *osd_seq_load(struct osd_device *osd, obd_seq seq)
 {
        struct osd_obj_map      *map;
        struct osd_obj_seq      *osd_seq;
@@ -490,16 +702,16 @@ int osd_obj_map_lookup(struct osd_thread_info *info, struct osd_device *dev,
         LASSERT(map);
        LASSERT(map->om_root);
 
-        fid_ostid_pack(fid, ostid);
-       osd_seq = osd_seq_load(dev, ostid->oi_seq);
+        fid_to_ostid(fid, ostid);
+       osd_seq = osd_seq_load(dev, ostid_seq(ostid));
        if (IS_ERR(osd_seq))
                RETURN(PTR_ERR(osd_seq));
 
-       dirn = ostid->oi_id & (osd_seq->oos_subdir_count - 1);
+       dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
        d_seq = osd_seq->oos_dirs[dirn];
        LASSERT(d_seq);
 
-       osd_oid_name(name, fid, ostid->oi_id);
+       osd_oid_name(name, fid, ostid_id(ostid));
 
        child = &info->oti_child_dentry;
        child->d_parent = d_seq;
@@ -545,17 +757,17 @@ int osd_obj_map_insert(struct osd_thread_info *info,
         LASSERT(map);
 
        /* map fid to seq:objid */
-        fid_ostid_pack(fid, ostid);
+        fid_to_ostid(fid, ostid);
 
-       osd_seq = osd_seq_load(osd, ostid->oi_seq);
+       osd_seq = osd_seq_load(osd, ostid_seq(ostid));
        if (IS_ERR(osd_seq))
                RETURN(PTR_ERR(osd_seq));
 
-       dirn = ostid->oi_id & (osd_seq->oos_subdir_count - 1);
+       dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
        d = osd_seq->oos_dirs[dirn];
         LASSERT(d);
 
-       osd_oid_name(name, fid, ostid->oi_id);
+       osd_oid_name(name, fid, ostid_id(ostid));
        rc = osd_obj_add_entry(info, osd, d, name, id, th);
 
         RETURN(rc);
@@ -576,59 +788,22 @@ int osd_obj_map_delete(struct osd_thread_info *info, struct osd_device *osd,
         LASSERT(map);
 
        /* map fid to seq:objid */
-        fid_ostid_pack(fid, ostid);
+        fid_to_ostid(fid, ostid);
 
-       osd_seq = osd_seq_load(osd, ostid->oi_seq);
+       osd_seq = osd_seq_load(osd, ostid_seq(ostid));
        if (IS_ERR(osd_seq))
                GOTO(cleanup, rc = PTR_ERR(osd_seq));
 
-       dirn = ostid->oi_id & (osd_seq->oos_subdir_count - 1);
+       dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
        d = osd_seq->oos_dirs[dirn];
        LASSERT(d);
 
-       osd_oid_name(name, fid, ostid->oi_id);
+       osd_oid_name(name, fid, ostid_id(ostid));
        rc = osd_obj_del_entry(info, osd, d, name, th);
 cleanup:
         RETURN(rc);
 }
 
-struct named_oid {
-        unsigned long  oid;
-        char          *name;
-};
-
-static const struct named_oid oids[] = {
-       { FLD_INDEX_OID,        "fld" },
-       { FID_SEQ_CTL_OID,      "seq_ctl" },
-       { FID_SEQ_SRV_OID,      "seq_srv" },
-       { MDD_ROOT_INDEX_OID,   "" /* "ROOT" */ },
-       { MDD_ORPHAN_OID,       "" /* "PENDING" */ },
-       { MDD_LOV_OBJ_OID,      LOV_OBJID },
-       { MDD_CAPA_KEYS_OID,    "" /* CAPA_KEYS */ },
-       { MDT_LAST_RECV_OID,    LAST_RCVD },
-       { LFSCK_BOOKMARK_OID,   "" /* "lfsck_bookmark" */ },
-       { OTABLE_IT_OID,        "" /* "otable iterator" */},
-       { OFD_LAST_RECV_OID,    LAST_RCVD },
-       { OFD_LAST_GROUP_OID,   "LAST_GROUP" },
-       { LLOG_CATALOGS_OID,    "CATALOGS" },
-       { MGS_CONFIGS_OID,      "" /* MOUNT_CONFIGS_DIR */ },
-       { OFD_HEALTH_CHECK_OID, HEALTH_CHECK },
-       { MDD_LOV_OBJ_OSEQ,     LOV_OBJSEQ },
-       { 0,                    NULL }
-};
-
-static char *oid2name(const unsigned long oid)
-{
-        int i = 0;
-
-        while (oids[i].oid) {
-                if (oids[i].oid == oid)
-                        return oids[i].name;
-                i++;
-        }
-        return NULL;
-}
-
 int osd_obj_spec_insert(struct osd_thread_info *info, struct osd_device *osd,
                        const struct lu_fid *fid,
                        const struct osd_inode_id *id,
@@ -651,7 +826,7 @@ int osd_obj_spec_insert(struct osd_thread_info *info, struct osd_device *osd,
                rc = osd_obj_add_entry(info, osd, osd_seq->oos_root,
                                       "LAST_ID", id, th);
        } else {
-               name = oid2name(fid_oid(fid));
+               name = osd_lf_fid2name(fid);
                if (name == NULL)
                        CWARN("UNKNOWN COMPAT FID "DFID"\n", PFID(fid));
                else if (name[0])
@@ -665,10 +840,10 @@ int osd_obj_spec_lookup(struct osd_thread_info *info, struct osd_device *osd,
                        const struct lu_fid *fid, struct osd_inode_id *id)
 {
        struct dentry   *root;
-       struct dentry *dentry;
-       struct inode  *inode;
-       char          *name;
-       int            rc = -ENOENT;
+       struct dentry   *dentry;
+       struct inode    *inode;
+       char            *name;
+       int             rc = -ENOENT;
        ENTRY;
 
        if (fid_is_last_id(fid)) {
@@ -681,7 +856,7 @@ int osd_obj_spec_lookup(struct osd_thread_info *info, struct osd_device *osd,
                name = "LAST_ID";
        } else {
                root = osd_sb(osd)->s_root;
-               name = oid2name(fid_oid(fid));
+               name = osd_lf_fid2name(fid);
                if (name == NULL || strlen(name) == 0)
                        RETURN(-ENOENT);
        }