Whamcloud - gitweb
LU-6175 ha: add health_check routine to the MDS, MGS and OSD
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_handler.c
index 2f93f70..0465afb 100644 (file)
@@ -86,6 +86,9 @@ CFS_MODULE_PARM(ldiskfs_track_declares_assert, "i", int, 0644,
 /* Slab to allocate dynlocks */
 struct kmem_cache *dynlock_cachep;
 
+/* Slab to allocate osd_it_ea */
+struct kmem_cache *osd_itea_cachep;
+
 static struct lu_kmem_descr ldiskfs_caches[] = {
        {
                .ckd_cache = &dynlock_cachep,
@@ -93,6 +96,11 @@ static struct lu_kmem_descr ldiskfs_caches[] = {
                .ckd_size  = sizeof(struct dynlock_handle)
        },
        {
+               .ckd_cache = &osd_itea_cachep,
+               .ckd_name  = "osd_itea_cache",
+               .ckd_size  = sizeof(struct osd_it_ea)
+       },
+       {
                .ckd_cache = NULL
        }
 };
@@ -349,6 +357,8 @@ static struct inode *osd_iget_check(struct osd_thread_info *info,
 
 check_oi:
        if (rc != 0) {
+               struct osd_inode_id saved_id = *id;
+
                LASSERTF(rc == -ESTALE || rc == -ENOENT, "rc = %d\n", rc);
 
                rc = osd_oi_lookup(info, dev, fid, id, OI_CHECK_FLD);
@@ -370,10 +380,22 @@ check_oi:
                 *      to distinguish the 1st case from the 2nd case. */
                if (rc == 0) {
                        if (!IS_ERR(inode) && inode->i_generation != 0 &&
-                           inode->i_generation == id->oii_gen)
+                           inode->i_generation == id->oii_gen) {
                                rc = -ENOENT;
-                       else
+                       } else {
+                               __u32 level = D_LFSCK;
+
                                rc = -EREMCHG;
+                               if (!thread_is_running(&dev->od_scrub.os_thread))
+                                       level |= D_CONSOLE;
+
+                               CDEBUG(level, "%s: the OI mapping for the FID "
+                                      DFID" become inconsistent, the given ID "
+                                      "%u/%u, the ID in OI mapping %u/%u\n",
+                                      osd_name(dev), PFID(fid),
+                                      saved_id.oii_ino, saved_id.oii_gen,
+                                      id->oii_ino, id->oii_ino);
+                       }
                }
        } else {
                if (id->oii_gen == OSD_OII_NOGEN)
@@ -473,15 +495,14 @@ static int osd_check_lma(const struct lu_env *env, struct osd_object *obj)
        if (rc == -ENODATA && !fid_is_igif(rfid) && osd->od_check_ff) {
                fid = &lma->lma_self_fid;
                rc = osd_get_idif(info, inode, dentry, fid);
-               if ((rc > 0) || (rc == -ENODATA && osd->od_lma_self_repair)) {
+               if ((rc > 0) || (rc == -ENODATA && osd->od_index_in_idif)) {
                        /* For the given OST-object, if it has neither LMA nor
                         * FID in XATTR_NAME_FID, then the given FID (which is
                         * contained in the @obj, from client RPC for locating
                         * the OST-object) is trusted. We use it to generate
                         * the LMA. */
                        osd_lma_self_repair(info, osd, inode, rfid,
-                               fid_is_on_ost(info, osd, fid, OI_CHECK_FLD) ?
-                               LMAC_FID_ON_OST : 0);
+                                           LMAC_FID_ON_OST);
                        RETURN(0);
                }
        }
@@ -508,6 +529,8 @@ static int osd_check_lma(const struct lu_env *env, struct osd_object *obj)
        }
 
        if (fid != NULL && unlikely(!lu_fid_eq(rfid, fid))) {
+               __u32 level = D_LFSCK;
+
                if (fid_is_idif(rfid) && fid_is_idif(fid)) {
                        struct ost_id   *oi   = &info->oti_ostid;
                        struct lu_fid   *fid1 = &info->oti_fid3;
@@ -522,7 +545,7 @@ static int osd_check_lma(const struct lu_env *env, struct osd_object *obj)
                                fid_to_ostid(fid, oi);
                                ostid_to_fid(fid1, oi, idx);
                                if (lu_fid_eq(fid1, rfid)) {
-                                       if (osd->od_lma_self_repair)
+                                       if (osd->od_index_in_idif)
                                                osd_lma_self_repair(info, osd,
                                                        inode, rfid,
                                                        LMAC_FID_ON_OST);
@@ -531,9 +554,13 @@ static int osd_check_lma(const struct lu_env *env, struct osd_object *obj)
                        }
                }
 
-               CDEBUG(D_INODE, "%s: FID "DFID" != self_fid "DFID"\n",
-                      osd_name(osd), PFID(rfid), PFID(fid));
+
                rc = -EREMCHG;
+               if (!thread_is_running(&osd->od_scrub.os_thread))
+                       level |= D_CONSOLE;
+
+               CDEBUG(level, "%s: FID "DFID" != self_fid "DFID"\n",
+                      osd_name(osd), PFID(rfid), PFID(fid));
        }
 
        RETURN(rc);
@@ -638,9 +665,9 @@ trigger:
                                 * whole device. */
                                result = osd_scrub_start(dev, SS_AUTO_FULL |
                                        SS_CLEAR_DRYRUN | SS_CLEAR_FAILOUT);
-                               LCONSOLE_WARN("%.16s: trigger OI scrub by RPC "
-                                             "for "DFID", rc = %d [1]\n",
-                                             osd_name(dev), PFID(fid),result);
+                               CDEBUG(D_LFSCK | D_CONSOLE, "%.16s: trigger OI "
+                                      "scrub by RPC for "DFID", rc = %d [1]\n",
+                                      osd_name(dev), PFID(fid),result);
                                if (result == 0 || result == -EALREADY)
                                        result = -EINPROGRESS;
                                else
@@ -2065,14 +2092,13 @@ static int osd_mkfile(struct osd_thread_info *info, struct osd_object *obj,
                                               osd_sb(osd)->s_root->d_inode,
                                      mode);
         if (!IS_ERR(inode)) {
-                /* Do not update file c/mtime in ldiskfs.
-                 * NB: don't need any lock because no contention at this
-                 * early stage */
-                inode->i_flags |= S_NOCMTIME;
+               /* Do not update file c/mtime in ldiskfs. */
+               inode->i_flags |= S_NOCMTIME;
 
                /* For new created object, it must be consistent,
                 * and it is unnecessary to scrub against it. */
                ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB);
+
                 obj->oo_inode = inode;
                 result = 0;
         } else {
@@ -2299,13 +2325,16 @@ static int __osd_object_create(struct osd_thread_info *info,
 
        result = osd_create_type_f(dof->dof_type)(info, obj, attr, hint, dof,
                                                  th);
-        if (result == 0) {
+       if (result == 0) {
                osd_attr_init(info, obj, attr, dof);
                osd_object_init0(obj);
-               /* bz 24037 */
-               if (obj->oo_inode && (obj->oo_inode->i_state & I_NEW))
-                       unlock_new_inode(obj->oo_inode);
-        }
+       }
+
+       if (obj->oo_inode != NULL) {
+               LASSERT(obj->oo_inode->i_state & I_NEW);
+
+               unlock_new_inode(obj->oo_inode);
+       }
 
        /* restore previous umask value */
        current->fs->umask = umask;
@@ -2677,6 +2706,9 @@ static struct inode *osd_create_local_agent_inode(const struct lu_env *env,
                RETURN(local);
        }
 
+       ldiskfs_set_inode_state(local, LDISKFS_STATE_LUSTRE_NOSCRUB);
+       unlock_new_inode(local);
+
        /* Set special LMA flag for local agent inode */
        rc = osd_ea_fid_set(info, local, fid, 0, LMAI_AGENT);
        if (rc != 0) {
@@ -2768,11 +2800,23 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt,
        osd_trans_declare_rb(env, th, OSD_OT_REF_ADD);
 
        result = __osd_object_create(info, obj, attr, hint, dof, th);
-       if (result == 0)
-               result = osd_ea_fid_set(info, obj->oo_inode, fid,
+       if (result == 0) {
+               if (fid_is_idif(fid) &&
+                   !osd_dev(dt->do_lu.lo_dev)->od_index_in_idif) {
+                       struct lu_fid *tfid = &info->oti_fid;
+                       struct ost_id *oi   = &info->oti_ostid;
+
+                       fid_to_ostid(fid, oi);
+                       ostid_to_fid(tfid, oi, 0);
+                       result = osd_ea_fid_set(info, obj->oo_inode, tfid,
+                                               LMAC_FID_ON_OST, 0);
+               } else {
+                       result = osd_ea_fid_set(info, obj->oo_inode, fid,
                                fid_is_on_ost(info, osd_obj2dev(obj),
                                              fid, OI_CHECK_FLD) ?
                                LMAC_FID_ON_OST : 0, 0);
+               }
+       }
 
        if (result == 0)
                result = __osd_oi_insert(env, obj, fid, th);
@@ -4185,10 +4229,10 @@ again:
        if (!dev->od_noscrub && ++once == 1) {
                rc = osd_scrub_start(dev, SS_AUTO_PARTIAL | SS_CLEAR_DRYRUN |
                                     SS_CLEAR_FAILOUT);
-               LCONSOLE_WARN("%.16s: trigger OI scrub by RPC for "DFID
-                             ", rc = %d [2]\n",
-                             LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name,
-                             PFID(fid), rc);
+               CDEBUG(D_LFSCK | D_CONSOLE, "%.16s: trigger OI scrub by RPC "
+                      "for "DFID", rc = %d [2]\n",
+                      LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name,
+                      PFID(fid), rc);
                if (rc == 0 || rc == -EALREADY)
                        goto again;
        }
@@ -4627,27 +4671,21 @@ static struct dt_it *osd_it_iam_init(const struct lu_env *env,
                                      __u32 unused,
                                      struct lustre_capa *capa)
 {
-        struct osd_it_iam      *it;
-        struct osd_thread_info *oti = osd_oti_get(env);
-        struct osd_object      *obj = osd_dt_obj(dt);
-        struct lu_object       *lo  = &dt->do_lu;
-        struct iam_path_descr  *ipd;
-        struct iam_container   *bag = &obj->oo_dir->od_container;
+       struct osd_it_iam      *it;
+       struct osd_object      *obj = osd_dt_obj(dt);
+       struct lu_object       *lo  = &dt->do_lu;
+       struct iam_path_descr  *ipd;
+       struct iam_container   *bag = &obj->oo_dir->od_container;
 
        if (!dt_object_exists(dt))
                return ERR_PTR(-ENOENT);
 
-        if (osd_object_auth(env, dt, capa, CAPA_OPC_BODY_READ))
-                return ERR_PTR(-EACCES);
+       if (osd_object_auth(env, dt, capa, CAPA_OPC_BODY_READ))
+               return ERR_PTR(-EACCES);
 
-       if (oti->oti_it_inline) {
-               OBD_ALLOC_PTR(it);
-               if (it == NULL)
-                       return ERR_PTR(-ENOMEM);
-       } else {
-               it = &oti->oti_it;
-               oti->oti_it_inline = 1;
-       }
+       OBD_ALLOC_PTR(it);
+       if (it == NULL)
+               return ERR_PTR(-ENOMEM);
 
        ipd = osd_it_ipd_get(env, bag);
        if (likely(ipd != NULL)) {
@@ -4657,11 +4695,7 @@ static struct dt_it *osd_it_iam_init(const struct lu_env *env,
                iam_it_init(&it->oi_it, bag, IAM_IT_MOVE, ipd);
                return (struct dt_it *)it;
        } else {
-               if (it != &oti->oti_it)
-                       OBD_FREE_PTR(it);
-               else
-                       oti->oti_it_inline = 0;
-
+               OBD_FREE_PTR(it);
                return ERR_PTR(-ENOMEM);
        }
 }
@@ -4672,17 +4706,13 @@ static struct dt_it *osd_it_iam_init(const struct lu_env *env,
 
 static void osd_it_iam_fini(const struct lu_env *env, struct dt_it *di)
 {
-       struct osd_thread_info  *oti = osd_oti_get(env);
        struct osd_it_iam       *it  = (struct osd_it_iam *)di;
        struct osd_object       *obj = it->oi_obj;
 
        iam_it_fini(&it->oi_it);
        osd_ipd_put(env, &obj->oo_dir->od_container, it->oi_ipd);
        lu_object_put(env, &obj->oo_dt.do_lu);
-       if (it != &oti->oti_it)
-               OBD_FREE_PTR(it);
-       else
-               oti->oti_it_inline = 0;
+       OBD_FREE_PTR(it);
 }
 
 /**
@@ -4926,38 +4956,39 @@ static struct dt_it *osd_it_ea_init(const struct lu_env *env,
 {
        struct osd_object       *obj  = osd_dt_obj(dt);
        struct osd_thread_info  *info = osd_oti_get(env);
-       struct osd_it_ea        *it;
+       struct osd_it_ea        *oie;
        struct file             *file;
        struct lu_object        *lo   = &dt->do_lu;
-       struct dentry           *obj_dentry = &info->oti_it_dentry;
+       struct dentry           *obj_dentry;
        ENTRY;
 
        if (!dt_object_exists(dt))
                RETURN(ERR_PTR(-ENOENT));
 
-       if (info->oti_it_inline) {
-               OBD_ALLOC_PTR(it);
-               if (it == NULL)
-                       RETURN(ERR_PTR(-ENOMEM));
-       } else {
-               it = &info->oti_it_ea;
-               info->oti_it_inline = 1;
-       }
+       OBD_SLAB_ALLOC_PTR_GFP(oie, osd_itea_cachep, GFP_NOFS);
+       if (oie == NULL)
+               RETURN(ERR_PTR(-ENOMEM));
+       obj_dentry = &oie->oie_dentry;
 
        obj_dentry->d_inode = obj->oo_inode;
        obj_dentry->d_sb = osd_sb(osd_obj2dev(obj));
        obj_dentry->d_name.hash = 0;
 
-       it->oie_rd_dirent       = 0;
-       it->oie_it_dirent       = 0;
-       it->oie_dirent          = NULL;
-       it->oie_buf             = info->oti_it_ea_buf;
-       it->oie_obj             = obj;
+       oie->oie_rd_dirent       = 0;
+       oie->oie_it_dirent       = 0;
+       oie->oie_dirent          = NULL;
+       if (unlikely(!info->oti_it_ea_buf_used)) {
+               oie->oie_buf = info->oti_it_ea_buf;
+               info->oti_it_ea_buf_used = 1;
+       } else {
+               OBD_ALLOC(oie->oie_buf, OSD_IT_EA_BUFSIZE);
+               if (oie->oie_buf == NULL)
+                       RETURN(ERR_PTR(-ENOMEM));
+       }
+       oie->oie_obj             = obj;
+
+       file = &oie->oie_file;
 
-       file = &it->oie_file;
-       /* Reset the "file" totally to avoid to reuse any old value from
-        * former readdir handling, the "file->f_pos" should be zero. */
-       memset(file, 0, sizeof(*file));
        /* Only FMODE_64BITHASH or FMODE_32BITHASH should be set, NOT both. */
        if (attr & LUDA_64BITHASH)
                file->f_mode    = FMODE_64BITHASH;
@@ -4969,7 +5000,7 @@ static struct dt_it *osd_it_ea_init(const struct lu_env *env,
        set_file_inode(file, obj->oo_inode);
 
        lu_object_get(lo);
-       RETURN((struct dt_it *) it);
+       RETURN((struct dt_it *) oie);
 }
 
 /**
@@ -4979,18 +5010,19 @@ static struct dt_it *osd_it_ea_init(const struct lu_env *env,
  */
 static void osd_it_ea_fini(const struct lu_env *env, struct dt_it *di)
 {
-        struct osd_thread_info  *info  = osd_oti_get(env);
-        struct osd_it_ea       *it     = (struct osd_it_ea *)di;
-        struct osd_object      *obj    = it->oie_obj;
-        struct inode           *inode  = obj->oo_inode;
+       struct osd_thread_info  *info = osd_oti_get(env);
+       struct osd_it_ea        *oie    = (struct osd_it_ea *)di;
+       struct osd_object       *obj    = oie->oie_obj;
+       struct inode            *inode  = obj->oo_inode;
 
-        ENTRY;
-        it->oie_file.f_op->release(inode, &it->oie_file);
-        lu_object_put(env, &obj->oo_dt.do_lu);
-       if (it != &info->oti_it_ea)
-               OBD_FREE_PTR(it);
+       ENTRY;
+       oie->oie_file.f_op->release(inode, &oie->oie_file);
+       lu_object_put(env, &obj->oo_dt.do_lu);
+       if (unlikely(oie->oie_buf != info->oti_it_ea_buf))
+               OBD_FREE(oie->oie_buf, OSD_IT_EA_BUFSIZE);
        else
-               info->oti_it_inline = 0;
+               info->oti_it_ea_buf_used = 0;
+       OBD_SLAB_FREE_PTR(oie, osd_itea_cachep);
        EXIT;
 }
 
@@ -6113,9 +6145,6 @@ static int osd_device_init0(const struct lu_env *env,
        if (rc != 0)
                GOTO(out_site, rc);
 
-       /* self-repair LMA by default */
-       o->od_lma_self_repair = 1;
-
        INIT_LIST_HEAD(&o->od_ios_list);
        /* setup scrub, including OI files initialization */
        rc = osd_scrub_setup(env, o);
@@ -6306,10 +6335,12 @@ static int osd_obd_disconnect(struct obd_export *exp)
 }
 
 static int osd_prepare(const struct lu_env *env, struct lu_device *pdev,
-                       struct lu_device *dev)
+                      struct lu_device *dev)
 {
-       struct osd_device *osd = osd_dev(dev);
-       int                result = 0;
+       struct osd_device       *osd    = osd_dev(dev);
+       struct lr_server_data   *lsd    =
+                       &osd->od_dt_dev.dd_lu_dev.ld_site->ls_tgt->lut_lsd;
+       int                      result = 0;
        ENTRY;
 
        if (osd->od_quota_slave != NULL) {
@@ -6319,6 +6350,21 @@ static int osd_prepare(const struct lu_env *env, struct lu_device *pdev,
                        RETURN(result);
        }
 
+       if (lsd->lsd_feature_incompat & OBD_COMPAT_OST) {
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 52, 0)
+               if (lsd->lsd_feature_rocompat & OBD_ROCOMPAT_IDX_IN_IDIF) {
+                       osd->od_index_in_idif = 1;
+               } else {
+                       osd->od_index_in_idif = 0;
+                       result = osd_register_proc_index_in_idif(osd);
+                       if (result != 0)
+                               RETURN(result);
+               }
+#else
+               osd->od_index_in_idif = 1;
+#endif
+       }
+
        result = osd_fid_init(env, osd);
 
        RETURN(result);
@@ -6369,6 +6415,14 @@ static struct lu_device_type osd_device_type = {
         .ldt_ctx_tags = LCT_LOCAL,
 };
 
+static int osd_health_check(const struct lu_env *env, struct obd_device *obd)
+{
+       struct osd_device *osd = osd_dev(obd->obd_lu_dev);
+       struct super_block *sb = osd_sb(osd);
+
+       return (osd->od_mnt == NULL || sb->s_flags & MS_RDONLY);
+}
+
 /*
  * lprocfs legacy support.
  */
@@ -6377,6 +6431,7 @@ static struct obd_ops osd_obd_device_ops = {
        .o_connect      = osd_obd_connect,
        .o_disconnect   = osd_obd_disconnect,
        .o_fid_alloc    = osd_fid_alloc,
+       .o_health_check = osd_health_check,
 };
 
 static int __init osd_mod_init(void)