Whamcloud - gitweb
LU-1406 ofd: add OBD methods to handle OST requests
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_handler.c
index f3ea487..de725c6 100644 (file)
@@ -1,6 +1,4 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
  * GPL HEADER START
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -43,9 +41,6 @@
  *         Pravin Shelar <pravin.shelar@sun.com> : Added fid in dirent
  */
 
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
 #define DEBUG_SUBSYSTEM S_MDS
 
 #include <linux/module.h>
@@ -248,43 +243,101 @@ static struct lu_object *osd_object_alloc(const struct lu_env *env,
         }
 }
 
+static int osd_get_lma(struct inode *inode, struct dentry *dentry,
+                      struct lustre_mdt_attrs *lma)
+{
+       int rc;
+
+       dentry->d_inode = inode;
+       rc = inode->i_op->getxattr(dentry, XATTR_NAME_LMA, (void *)lma,
+                                  sizeof(*lma));
+       if (rc > 0) {
+               /* Check LMA compatibility */
+               if (lma->lma_incompat & ~cpu_to_le32(LMA_INCOMPAT_SUPP)) {
+                       CWARN("%.16s: unsupported incompat LMA feature(s) "
+                             "%lx/%#x\n",
+                             LDISKFS_SB(inode->i_sb)->s_es->s_volume_name,
+                             inode->i_ino, le32_to_cpu(lma->lma_incompat) &
+                                                       ~LMA_INCOMPAT_SUPP);
+                       rc = -ENOSYS;
+               } else {
+                       lustre_lma_swab(lma);
+                       rc = 0;
+               }
+       } else if (rc == 0) {
+               rc = -ENODATA;
+       }
+
+       return rc;
+}
+
 /*
  * retrieve object from backend ext fs.
  **/
-struct inode *osd_iget(struct osd_thread_info *info,
-                       struct osd_device *dev,
-                       const struct osd_inode_id *id)
-{
-        struct inode *inode = NULL;
-
-        inode = ldiskfs_iget(osd_sb(dev), id->oii_ino);
-        if (IS_ERR(inode)) {
-                CERROR("Cannot get inode, rc = %li\n", PTR_ERR(inode));
-        } else if (id->oii_gen != OSD_OII_NOGEN &&
-                   inode->i_generation != id->oii_gen) {
-                iput(inode);
-                inode = ERR_PTR(-ESTALE);
-        } else if (inode->i_nlink == 0) {
-                /* due to parallel readdir and unlink,
-                * we can have dead inode here. */
-                CWARN("stale inode\n");
-                make_bad_inode(inode);
-                iput(inode);
-                inode = ERR_PTR(-ESTALE);
-        } else if (is_bad_inode(inode)) {
-                CERROR("bad inode %lx\n",inode->i_ino);
-                iput(inode);
-                inode = ERR_PTR(-ENOENT);
-        } else {
-                /* Do not update file c/mtime in ldiskfs.
-                 * NB: we don't have any lock to protect this because we don't
-                 * have reference on osd_object now, but contention with
-                 * another lookup + attr_set can't happen in the tiny window
-                 * between if (...) and set S_NOCMTIME. */
-                if (!(inode->i_flags & S_NOCMTIME))
-                        inode->i_flags |= S_NOCMTIME;
-        }
-        return inode;
+struct inode *osd_iget(struct osd_thread_info *info, struct osd_device *dev,
+                      struct osd_inode_id *id)
+{
+       struct inode *inode = NULL;
+
+       inode = ldiskfs_iget(osd_sb(dev), id->oii_ino);
+       if (IS_ERR(inode)) {
+               CDEBUG(D_INODE, "no inode: ino = %u, rc = %ld\n",
+                      id->oii_ino, PTR_ERR(inode));
+       } else if (id->oii_gen != OSD_OII_NOGEN &&
+                  inode->i_generation != id->oii_gen) {
+               CDEBUG(D_INODE, "unmatched inode: ino = %u, gen0 = %u, "
+                      "gen1 = %u\n",
+                      id->oii_ino, id->oii_gen, inode->i_generation);
+               iput(inode);
+               inode = ERR_PTR(-ESTALE);
+       } else if (inode->i_nlink == 0) {
+               /* due to parallel readdir and unlink,
+               * we can have dead inode here. */
+               CDEBUG(D_INODE, "stale inode: ino = %u\n", id->oii_ino);
+               make_bad_inode(inode);
+               iput(inode);
+               inode = ERR_PTR(-ESTALE);
+       } else if (is_bad_inode(inode)) {
+               CWARN("%s: bad inode: ino = %u\n",
+               dev->od_dt_dev.dd_lu_dev.ld_obd->obd_name, id->oii_ino);
+               iput(inode);
+               inode = ERR_PTR(-ENOENT);
+       } else {
+               if (id->oii_gen == OSD_OII_NOGEN)
+                       osd_id_gen(id, inode->i_ino, inode->i_generation);
+
+               /* Do not update file c/mtime in ldiskfs.
+                * NB: we don't have any lock to protect this because we don't
+                * have reference on osd_object now, but contention with
+                * another lookup + attr_set can't happen in the tiny window
+                * between if (...) and set S_NOCMTIME. */
+               if (!(inode->i_flags & S_NOCMTIME))
+                       inode->i_flags |= S_NOCMTIME;
+       }
+       return inode;
+}
+
+struct inode *osd_iget_fid(struct osd_thread_info *info, struct osd_device *dev,
+                          struct osd_inode_id *id, struct lu_fid *fid)
+{
+       struct lustre_mdt_attrs *lma   = &info->oti_mdt_attrs;
+       struct inode            *inode;
+       int                      rc;
+
+       inode = osd_iget(info, dev, id);
+       if (IS_ERR(inode))
+               return inode;
+
+       rc = osd_get_lma(inode, &info->oti_obj_dentry, lma);
+       if (rc == 0) {
+               *fid = lma->lma_self_fid;
+       } else if (rc == -ENODATA) {
+               LU_IGIF_BUILD(fid, inode->i_ino, inode->i_generation);
+       } else {
+               iput(inode);
+               inode = ERR_PTR(rc);
+       }
+       return inode;
 }
 
 static int osd_fid_lookup(const struct lu_env *env,
@@ -345,18 +398,20 @@ static int osd_fid_lookup(const struct lu_env *env,
         }
 
         if (!S_ISDIR(inode->i_mode) || !ldiskfs_pdo) /* done */
-                goto out;
+               GOTO(out, result = 0);
+
+       LASSERT(obj->oo_hl_head == NULL);
+       obj->oo_hl_head = ldiskfs_htree_lock_head_alloc(HTREE_HBITS_DEF);
+       if (obj->oo_hl_head == NULL) {
+               obj->oo_inode = NULL;
+               iput(inode);
+               GOTO(out, result = -ENOMEM);
+       }
+       GOTO(out, result = 0);
 
-        LASSERT(obj->oo_hl_head == NULL);
-        obj->oo_hl_head = ldiskfs_htree_lock_head_alloc(HTREE_HBITS_DEF);
-        if (obj->oo_hl_head == NULL) {
-                obj->oo_inode = NULL;
-                iput(inode);
-                result = -ENOMEM;
-        }
 out:
-        LINVRNT(osd_invariant(obj));
-        RETURN(result);
+       LINVRNT(osd_invariant(obj));
+       return result;
 }
 
 /*
@@ -556,9 +611,11 @@ static void osd_trans_commit_cb(struct journal_callback *jcb, int error)
 
         dt_txn_hook_commit(th);
 
-        /* call per-transaction callbacks if any */
-        cfs_list_for_each_entry_safe(dcb, tmp, &oh->ot_dcb_list, dcb_linkage)
-                dcb->dcb_func(NULL, th, dcb, error);
+       /* call per-transaction callbacks if any */
+       cfs_list_for_each_entry_safe(dcb, tmp, &oh->ot_dcb_list, dcb_linkage) {
+               cfs_list_del_init(&dcb->dcb_linkage);
+               dcb->dcb_func(NULL, th, dcb, error);
+       }
 
         lu_ref_del_at(&lud->ld_reference, oh->ot_dev_link, "osd-tx", th);
         lu_device_put(lud);
@@ -873,7 +930,7 @@ static void osd_conf_get(const struct lu_env *env,
          */
         param->ddp_max_name_len = LDISKFS_NAME_LEN;
         param->ddp_max_nlink    = LDISKFS_LINK_MAX;
-        param->ddp_block_shift  = osd_sb(osd_dt_dev(dev))->s_blocksize_bits;
+       param->ddp_block_shift  = sb->s_blocksize_bits;
         param->ddp_mntopts      = 0;
         if (test_opt(sb, XATTR_USER))
                 param->ddp_mntopts |= MNTOPT_USERXATTR;
@@ -1000,7 +1057,7 @@ const int osd_dto_credits_noquota[DTO_NR] = {
         [DTO_INDEX_INSERT]  = 16,
         [DTO_INDEX_DELETE]  = 16,
         /**
-         * Unused now
+        * Used for OI scrub
          */
         [DTO_INDEX_UPDATE]  = 16,
         /**
@@ -1484,6 +1541,7 @@ static int osd_mkfile(struct osd_thread_info *info, struct osd_object *obj,
                  * NB: don't need any lock because no contention at this
                  * early stage */
                 inode->i_flags |= S_NOCMTIME;
+               inode->i_state |= I_LUSTRE_NOSCRUB;
                 obj->oo_inode = inode;
                 result = 0;
         } else {
@@ -1697,11 +1755,8 @@ static int __osd_oi_insert(const struct lu_env *env, struct osd_object *obj,
         LASSERT(obj->oo_inode != NULL);
         LASSERT(uc != NULL);
 
-        id->oii_ino = obj->oo_inode->i_ino;
-        id->oii_gen = obj->oo_inode->i_generation;
-
-        return osd_oi_insert(info, osd, fid, id, th,
-                             uc->mu_cap & CFS_CAP_SYS_RESOURCE_MASK);
+       osd_id_gen(id, obj->oo_inode->i_ino, obj->oo_inode->i_generation);
+       return osd_oi_insert(info, osd, fid, id, th);
 }
 
 static int osd_declare_object_create(const struct lu_env *env,
@@ -1820,6 +1875,9 @@ static int osd_object_destroy(const struct lu_env *env,
         LASSERT(inode);
         LASSERT(!lu_object_is_dying(dt->do_lu.lo_header));
 
+       /* Parallel control for OI scrub. For most of cases, there is no
+        * lock contention. So it will not affect unlink performance. */
+       cfs_mutex_lock(&inode->i_mutex);
         if (S_ISDIR(inode->i_mode)) {
                 LASSERT(osd_inode_unlinked(inode) ||
                         inode->i_nlink == 1);
@@ -1834,6 +1892,7 @@ static int osd_object_destroy(const struct lu_env *env,
         OSD_EXEC_OP(th, destroy);
 
         result = osd_oi_delete(osd_oti_get(env), osd, fid, th);
+       cfs_mutex_unlock(&inode->i_mutex);
 
         /* XXX: add to ext3 orphan list */
         /* rc = ext3_orphan_add(handle_t *handle, struct inode *inode) */
@@ -1859,7 +1918,6 @@ static int __osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
 
         LASSERT(dt_object_exists(dt));
         LASSERT(inode->i_op != NULL && inode->i_op->setxattr != NULL);
-        LASSERT(osd_write_locked(env, obj));
 
         if (fl & LU_XATTR_REPLACE)
                 fs_flags |= XATTR_REPLACE;
@@ -1898,15 +1956,6 @@ static int osd_ea_fid_set(const struct lu_env *env, struct dt_object *dt,
 }
 
 /**
- * Helper function to form igif
- */
-static inline void osd_igif_get(const struct lu_env *env, struct inode  *inode,
-                                struct lu_fid *fid)
-{
-        LU_IGIF_BUILD(fid, inode->i_ino, inode->i_generation);
-}
-
-/**
  * ldiskfs supports fid in dirent, it is passed in dentry->d_fsdata.
  * lustre 1.8 also uses d_fsdata for passing other info to ldiskfs.
  * To have compatilibility with 1.8 ldiskfs driver we need to have
@@ -1933,54 +1982,20 @@ void osd_get_ldiskfs_dirent_param(struct ldiskfs_dentry_param *param,
  * \retval 0 on success
  */
 static int osd_ea_fid_get(const struct lu_env *env, struct osd_object *obj,
-                          __u32 ino, struct lu_fid *fid)
+                         __u32 ino, struct lu_fid *fid)
 {
-        struct osd_thread_info  *info      = osd_oti_get(env);
-        struct lustre_mdt_attrs *mdt_attrs = &info->oti_mdt_attrs;
-        struct lu_device        *ldev   = obj->oo_dt.do_lu.lo_dev;
-        struct dentry           *dentry = &info->oti_child_dentry;
-        struct osd_inode_id     *id     = &info->oti_id;
-        struct osd_device       *dev;
-        struct inode            *inode;
-        int                      rc;
-
-        ENTRY;
-        dev  = osd_dev(ldev);
-
-        id->oii_ino = ino;
-        id->oii_gen = OSD_OII_NOGEN;
+       struct osd_thread_info  *info = osd_oti_get(env);
+       struct osd_inode_id     *id = &info->oti_id;
+       struct inode            *inode;
+       ENTRY;
 
-        inode = osd_iget(info, dev, id);
-        if (IS_ERR(inode)) {
-                rc = PTR_ERR(inode);
-                GOTO(out,rc);
-        }
-        dentry->d_inode = inode;
-
-        LASSERT(inode->i_op != NULL && inode->i_op->getxattr != NULL);
-        rc = inode->i_op->getxattr(dentry, XATTR_NAME_LMA, (void *)mdt_attrs,
-                                   sizeof *mdt_attrs);
-
-        /* Check LMA compatibility */
-        if (rc > 0 &&
-            (mdt_attrs->lma_incompat & ~cpu_to_le32(LMA_INCOMPAT_SUPP))) {
-                CWARN("Inode %lx: Unsupported incompat LMA feature(s) %#x\n",
-                      inode->i_ino, le32_to_cpu(mdt_attrs->lma_incompat) &
-                      ~LMA_INCOMPAT_SUPP);
-                return -ENOSYS;
-        }
+       osd_id_gen(id, ino, OSD_OII_NOGEN);
+       inode = osd_iget_fid(info, osd_obj2dev(obj), id, fid);
+       if (IS_ERR(inode))
+               RETURN(PTR_ERR(inode));
 
-        if (rc > 0) {
-                lustre_lma_swab(mdt_attrs);
-                memcpy(fid, &mdt_attrs->lma_self_fid, sizeof(*fid));
-                rc = 0;
-        } else if (rc == -ENODATA) {
-                osd_igif_get(env, inode, fid);
-                rc = 0;
-        }
-        iput(inode);
-out:
-        RETURN(rc);
+       iput(inode);
+       RETURN(0);
 }
 
 /**
@@ -2075,7 +2090,7 @@ static int osd_object_ref_add(const struct lu_env *env,
                     inode->i_nlink == 2)
                         inode->i_nlink = 1;
         }
-        LASSERT(inode->i_nlink < LDISKFS_LINK_MAX);
+        LASSERT(inode->i_nlink <= LDISKFS_LINK_MAX);
         cfs_spin_unlock(&obj->oo_guard);
         inode->i_sb->s_op->dirty_inode(inode);
         LINVRNT(osd_invariant(obj));
@@ -2914,7 +2929,7 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt,
         LASSERT(th != NULL);
 
         if (osd_object_auth(env, dt, capa, CAPA_OPC_INDEX_INSERT))
-                return -EACCES;
+               RETURN(-EACCES);
 
         OSD_EXEC_OP(th, insert);
 
@@ -3176,7 +3191,7 @@ struct osd_object *osd_object_find(const struct lu_env *env,
                         else
                                 LU_OBJECT_DEBUG(D_ERROR, env, luch,
                                                 "lu_object can't be located"
-                                                ""DFID"\n", PFID(fid));
+                                               DFID"\n", PFID(fid));
 
                         if (child == NULL) {
                                 lu_object_put(env, luch);
@@ -3187,6 +3202,7 @@ struct osd_object *osd_object_find(const struct lu_env *env,
                         LU_OBJECT_DEBUG(D_ERROR, env, luch,
                                         "lu_object does not exists "DFID"\n",
                                         PFID(fid));
+                       lu_object_put(env, luch);
                         child = ERR_PTR(-ENOENT);
                 }
         } else
@@ -3977,7 +3993,7 @@ static void osd_key_exit(const struct lu_context *ctx,
 LU_TYPE_INIT_FINI(osd, &osd_key);
 
 struct lu_context_key osd_key = {
-        .lct_tags = LCT_DT_THREAD | LCT_MD_THREAD,
+        .lct_tags = LCT_DT_THREAD | LCT_MD_THREAD | LCT_MG_THREAD | LCT_LOCAL,
         .lct_init = osd_key_init,
         .lct_fini = osd_key_fini,
         .lct_exit = osd_key_exit
@@ -3992,19 +4008,16 @@ static int osd_device_init(const struct lu_env *env, struct lu_device *d,
 
 static int osd_shutdown(const struct lu_env *env, struct osd_device *o)
 {
-        struct osd_thread_info *info = osd_oti_get(env);
+       ENTRY;
 
-        ENTRY;
+       osd_scrub_cleanup(env, o);
 
-        if (o->od_oi_table != NULL)
-                osd_oi_fini(info, o);
+       if (o->od_fsops) {
+               fsfilt_put_ops(o->od_fsops);
+       o->od_fsops = NULL;
+       }
 
-        if (o->od_fsops) {
-                fsfilt_put_ops(o->od_fsops);
-                o->od_fsops = NULL;
-        }
-
-        RETURN(0);
+       RETURN(0);
 }
 
 static int osd_mount(const struct lu_env *env,
@@ -4101,6 +4114,7 @@ static struct lu_device *osd_device_alloc(const struct lu_env *env,
                         l->ld_ops = &osd_lu_ops;
                         o->od_dt_dev.dd_ops = &osd_dt_ops;
                         cfs_spin_lock_init(&o->od_osfs_lock);
+                       cfs_mutex_init(&o->od_otable_mutex);
                         o->od_osfs_age = cfs_time_shift_64(-1000);
                         o->od_capa_hash = init_capa_hash();
                         if (o->od_capa_hash == NULL) {
@@ -4159,14 +4173,12 @@ static int osd_recovery_complete(const struct lu_env *env,
 static int osd_prepare(const struct lu_env *env, struct lu_device *pdev,
                        struct lu_device *dev)
 {
-        struct osd_device      *osd = osd_dev(dev);
-        struct osd_thread_info *oti = osd_oti_get(env);
-        int                     result;
-
-        ENTRY;
+       struct osd_device *osd = osd_dev(dev);
+       int                result;
+       ENTRY;
 
-        /* 1. initialize oi before any file create or file open */
-        result = osd_oi_init(oti, osd);
+       /* 1. setup scrub, including OI files initialization */
+       result = osd_scrub_setup(env, osd);
         if (result < 0)
                 RETURN(result);
 
@@ -4212,7 +4224,7 @@ static struct lu_device_type osd_device_type = {
         .ldt_tags     = LU_DEVICE_DT,
         .ldt_name     = LUSTRE_OSD_NAME,
         .ldt_ops      = &osd_device_type_ops,
-        .ldt_ctx_tags = LCT_MD_THREAD|LCT_DT_THREAD
+        .ldt_ctx_tags = LCT_LOCAL,
 };
 
 /*