Whamcloud - gitweb
LU-11765 ofd: return EAGAIN during 1st CLEANUP_ORPHAN
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_handler.c
index 2ca791a..adc7b4a 100644 (file)
@@ -706,29 +706,30 @@ put:
 }
 
 /**
- * \retval +v: new filter_fid, does not contain self-fid
- * \retval 0:  filter_fid_old, contains self-fid
+ * \retval +v: new filter_fid does not contain self-fid
+ * \retval 0:  filter_fid_18_23, contains self-fid
  * \retval -v: other failure cases
  */
 int osd_get_idif(struct osd_thread_info *info, struct inode *inode,
                 struct dentry *dentry, struct lu_fid *fid)
 {
-       struct filter_fid_old *ff = &info->oti_ff;
+       struct filter_fid *ff = &info->oti_ff;
        struct ost_id *ostid = &info->oti_ostid;
        int rc;
 
        rc = __osd_xattr_get(inode, dentry, XATTR_NAME_FID, ff, sizeof(*ff));
-       if (rc == sizeof(*ff)) {
-               rc = 0;
-               ostid_set_seq(ostid, le64_to_cpu(ff->ff_seq));
-               rc = ostid_set_id(ostid, le64_to_cpu(ff->ff_objid));
+       if (rc == sizeof(struct filter_fid_18_23)) {
+               struct filter_fid_18_23 *ff_old = (void *)ff;
+
+               ostid_set_seq(ostid, le64_to_cpu(ff_old->ff_seq));
+               rc = ostid_set_id(ostid, le64_to_cpu(ff_old->ff_objid));
                /*
                 * XXX: use 0 as the index for compatibility, the caller will
                 * handle index related issues when necessary.
                 */
                if (!rc)
                        ostid_to_fid(fid, ostid, 0);
-       } else if (rc == sizeof(struct filter_fid)) {
+       } else if (rc >= (int)sizeof(struct filter_fid_24_29)) {
                rc = 1;
        } else if (rc >= 0) {
                rc = -EINVAL;
@@ -782,7 +783,7 @@ static int osd_check_lma(const struct lu_env *env, struct osd_object *obj)
        if (rc == -ENODATA && !fid_is_igif(rfid) && osd->od_check_ff) {
                fid = &lma->lma_self_fid;
                rc = osd_get_idif(info, inode, dentry, fid);
-               if ((rc > 0) || (rc == -ENODATA && osd->od_index_in_idif)) {
+               if (rc > 0 || (rc == -ENODATA && osd->od_index_in_idif)) {
                        /*
                         * For the given OST-object, if it has neither LMA nor
                         * FID in XATTR_NAME_FID, then the given FID (which is
@@ -1706,8 +1707,7 @@ static void osd_trans_commit_cb(struct super_block *sb,
        if (error)
                CERROR("transaction @0x%p commit error: %d\n", th, error);
 
-       dt_txn_hook_commit(th);
-
+       OBD_FAIL_TIMEOUT(OBD_FAIL_OST_DELAY_TRANS, 40);
        /* call per-transaction callbacks if any */
        list_for_each_entry_safe(dcb, tmp, &oh->ot_commit_dcb_list,
                                 dcb_linkage) {
@@ -1894,6 +1894,9 @@ static int osd_trans_start(const struct lu_env *env, struct dt_device *d,
                        oh->ot_credits = osd_transaction_size(dev);
        }
 
+       if (OBD_FAIL_CHECK(OBD_FAIL_OSD_TXN_START))
+               GOTO(out, rc = -EIO);
+
        /*
         * XXX temporary stuff. Some abstraction layer should
         * be used.
@@ -1965,7 +1968,7 @@ static int osd_trans_stop(const struct lu_env *env, struct dt_device *dt,
        struct osd_thandle *oh;
        struct osd_iobuf *iobuf = &oti->oti_iobuf;
        struct osd_device *osd = osd_dt_dev(th->th_dev);
-       struct qsd_instance *qsd = osd->od_quota_slave;
+       struct qsd_instance *qsd = osd_def_qsd(osd);
        struct lquota_trans *qtrans;
        struct list_head truncates = LIST_HEAD_INIT(truncates);
        int rc = 0, remove_agents = 0;
@@ -2090,9 +2093,9 @@ static void osd_object_delete(const struct lu_env *env, struct lu_object *l)
 
        osd_index_fini(obj);
        if (inode != NULL) {
-               struct qsd_instance     *qsd = osd_obj2dev(obj)->od_quota_slave;
-               qid_t                    uid = i_uid_read(inode);
-               qid_t                    gid = i_gid_read(inode);
+               struct qsd_instance *qsd = osd_def_qsd(osd_obj2dev(obj));
+               qid_t                uid = i_uid_read(inode);
+               qid_t                gid = i_gid_read(inode);
 
                obj->oo_inode = NULL;
                iput(inode);
@@ -2311,22 +2314,32 @@ static void osd_conf_get(const struct lu_env *env,
                            sizeof("T10-DIF-TYPE") - 1) == 0) {
                        /* also skip "1/3-" at end */
                        const int type_off = sizeof("T10-DIF-TYPE.");
+                       char type_number = name[type_off - 2];
 
-                       if (interval != 512 && interval != 4096)
+                       if (interval != 512 && interval != 4096) {
                                CERROR("%s: unsupported T10PI sector size %u\n",
                                       d->od_svname, interval);
-                       else if (strcmp(name + type_off, "CRC") == 0)
+                       } else if (type_number != '1' && type_number != '3') {
+                               CERROR("%s: unsupported T10PI type %s\n",
+                                      d->od_svname, name);
+                       } else if (strcmp(name + type_off, "CRC") == 0) {
+                               d->od_t10_type = type_number == '1' ?
+                                       OSD_T10_TYPE1_CRC : OSD_T10_TYPE3_CRC;
                                param->ddp_t10_cksum_type = interval == 512 ?
                                        OBD_CKSUM_T10CRC512 :
                                        OBD_CKSUM_T10CRC4K;
-                       else if (strcmp(name + type_off, "IP") == 0)
+                       } else if (strcmp(name + type_off, "IP") == 0) {
+                               d->od_t10_type = type_number == '1' ?
+                                       OSD_T10_TYPE1_IP : OSD_T10_TYPE3_IP;
                                param->ddp_t10_cksum_type = interval == 512 ?
                                        OBD_CKSUM_T10IP512 :
                                        OBD_CKSUM_T10IP4K;
-                       else
+                       } else {
                                CERROR("%s: unsupported checksum type of "
                                       "T10PI type '%s'",
                                       d->od_svname, name);
+                       }
+
                } else {
                        CERROR("%s: unsupported T10PI type '%s'",
                               d->od_svname, name);
@@ -2369,11 +2382,16 @@ static int osd_commit_async(const struct lu_env *env,
                            struct dt_device *d)
 {
        struct super_block *s = osd_sb(osd_dt_dev(d));
+       int rc;
 
        ENTRY;
 
        CDEBUG(D_HA, "%s: async commit OSD\n", osd_dt_dev(d)->od_svname);
-       RETURN(s->s_op->sync_fs(s, 0));
+       down_read(&s->s_umount);
+       rc = s->s_op->sync_fs(s, 0);
+       up_read(&s->s_umount);
+
+       RETURN(rc);
 }
 
 /* Our own copy of the set readonly functions if present, or NU if not. */
@@ -2582,9 +2600,9 @@ static void osd_inode_getattr(const struct lu_env *env,
                           LA_PROJID | LA_FLAGS | LA_NLINK | LA_RDEV |
                           LA_BLKSIZE | LA_TYPE;
 
-       attr->la_atime   = LTIME_S(inode->i_atime);
-       attr->la_mtime   = LTIME_S(inode->i_mtime);
-       attr->la_ctime   = LTIME_S(inode->i_ctime);
+       attr->la_atime = inode->i_atime.tv_sec;
+       attr->la_mtime = inode->i_mtime.tv_sec;
+       attr->la_ctime = inode->i_ctime.tv_sec;
        attr->la_mode    = inode->i_mode;
        attr->la_size    = i_size_read(inode);
        attr->la_blocks  = inode->i_blocks;
@@ -2605,8 +2623,7 @@ static void osd_inode_getattr(const struct lu_env *env,
                attr->la_flags |= LUSTRE_PROJINHERIT_FL;
 }
 
-static int osd_attr_get(const struct lu_env *env,
-                       struct dt_object *dt,
+static int osd_attr_get(const struct lu_env *env, struct dt_object *dt,
                        struct lu_attr *attr)
 {
        struct osd_object *obj = osd_dt_obj(dt);
@@ -2621,8 +2638,10 @@ static int osd_attr_get(const struct lu_env *env,
 
        spin_lock(&obj->oo_guard);
        osd_inode_getattr(env, obj->oo_inode, attr);
-       if (obj->oo_lma_flags & LUSTRE_ORPHAN_FL)
+       if (obj->oo_lma_flags & LUSTRE_ORPHAN_FL) {
+               attr->la_valid |= LA_FLAGS;
                attr->la_flags |= LUSTRE_ORPHAN_FL;
+       }
        spin_unlock(&obj->oo_guard);
 
        return 0;
@@ -3262,11 +3281,11 @@ static void osd_attr_init(struct osd_thread_info *info, struct osd_object *obj,
 
        if (dof->dof_type != DFT_NODE)
                attr->la_valid &= ~LA_RDEV;
-       if ((valid & LA_ATIME) && (attr->la_atime == LTIME_S(inode->i_atime)))
+       if ((valid & LA_ATIME) && (attr->la_atime == inode->i_atime.tv_sec))
                attr->la_valid &= ~LA_ATIME;
-       if ((valid & LA_CTIME) && (attr->la_ctime == LTIME_S(inode->i_ctime)))
+       if ((valid & LA_CTIME) && (attr->la_ctime == inode->i_ctime.tv_sec))
                attr->la_valid &= ~LA_CTIME;
-       if ((valid & LA_MTIME) && (attr->la_mtime == LTIME_S(inode->i_mtime)))
+       if ((valid & LA_MTIME) && (attr->la_mtime == inode->i_mtime.tv_sec))
                attr->la_valid &= ~LA_MTIME;
 
        result = osd_quota_transfer(inode, attr);
@@ -3749,6 +3768,19 @@ static struct inode *osd_create_local_agent_inode(const struct lu_env *env,
         */
        local->i_gid = current_fsgid();
        ldiskfs_set_inode_state(local, LDISKFS_STATE_LUSTRE_NOSCRUB);
+
+       /* e2fsck doesn't like empty symlinks.  Store remote FID as symlink.
+        * That gives e2fsck something to look at and be happy, and allows
+        * debugging if we need to determine where this symlink came from.
+        */
+       if (S_ISLNK(type)) {
+               CLASSERT(LDISKFS_N_BLOCKS * 4 >= FID_LEN + 1);
+               rc = snprintf((char *)LDISKFS_I(local)->i_data,
+                             LDISKFS_N_BLOCKS * 4, DFID, PFID(fid));
+
+               i_size_write(local, rc);
+               LDISKFS_I(local)->i_disksize = rc;
+       }
        unlock_new_inode(local);
 
        /* Agent inode should not have project ID */
@@ -5256,8 +5288,7 @@ static int osd_index_declare_iam_insert(const struct lu_env *env,
  */
 static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt,
                                const struct dt_rec *rec,
-                               const struct dt_key *key, struct thandle *th,
-                               int ignore_quota)
+                               const struct dt_key *key, struct thandle *th)
 {
        struct osd_object *obj = osd_dt_obj(dt);
        struct iam_path_descr *ipd;
@@ -5947,8 +5978,7 @@ static int osd_index_declare_ea_insert(const struct lu_env *env,
  */
 static int osd_index_ea_insert(const struct lu_env *env, struct dt_object *dt,
                               const struct dt_rec *rec,
-                              const struct dt_key *key, struct thandle *th,
-                              int ignore_quota)
+                              const struct dt_key *key, struct thandle *th)
 {
        struct osd_object *obj = osd_dt_obj(dt);
        struct osd_device *osd = osd_dev(dt->do_lu.lo_dev);
@@ -7305,6 +7335,16 @@ static void osd_key_fini(const struct lu_context *ctx,
        struct ldiskfs_inode_info *lli = LDISKFS_I(info->oti_inode);
        struct osd_idmap_cache *idc = info->oti_ins_cache;
 
+       if (info->oti_dio_pages) {
+               int i;
+               for (i = 0; i < PTLRPC_MAX_BRW_PAGES; i++) {
+                       if (info->oti_dio_pages[i])
+                               __free_page(info->oti_dio_pages[i]);
+               }
+               OBD_FREE(info->oti_dio_pages,
+                        sizeof(struct page *) * PTLRPC_MAX_BRW_PAGES);
+       }
+
        if (info->oti_inode != NULL)
                OBD_FREE_PTR(lli);
        if (info->oti_hlock != NULL)
@@ -7312,6 +7352,7 @@ static void osd_key_fini(const struct lu_context *ctx,
        OBD_FREE(info->oti_it_ea_buf, OSD_IT_EA_BUFSIZE);
        lu_buf_free(&info->oti_iobuf.dr_pg_buf);
        lu_buf_free(&info->oti_iobuf.dr_bl_buf);
+       lu_buf_free(&info->oti_iobuf.dr_lnb_buf);
        lu_buf_free(&info->oti_big_buf);
        if (idc != NULL) {
                LASSERT(info->oti_ins_cache_size > 0);
@@ -7407,10 +7448,17 @@ static int osd_shutdown(const struct lu_env *env, struct osd_device *o)
        ENTRY;
 
        /* shutdown quota slave instance associated with the device */
-       if (o->od_quota_slave != NULL) {
-               struct qsd_instance *qsd = o->od_quota_slave;
+       if (o->od_quota_slave_md != NULL) {
+               struct qsd_instance *qsd = o->od_quota_slave_md;
+
+               o->od_quota_slave_md = NULL;
+               qsd_fini(env, qsd);
+       }
 
-               o->od_quota_slave = NULL;
+       if (o->od_quota_slave_dt != NULL) {
+               struct qsd_instance *qsd = o->od_quota_slave_dt;
+
+               o->od_quota_slave_dt = NULL;
                qsd_fini(env, qsd);
        }
 
@@ -7615,6 +7663,12 @@ static int osd_mount(const struct lu_env *env,
        if (lmd_flags & LMD_FLG_NOSCRUB)
                o->od_auto_scrub_interval = AS_NEVER;
 
+       if (blk_queue_nonrot(bdev_get_queue(osd_sb(o)->s_bdev))) {
+               /* do not use pagecache with flash-backed storage */
+               o->od_writethrough_cache = 0;
+               o->od_read_cache = 0;
+       }
+
        GOTO(out, rc = 0);
 
 out_mnt:
@@ -7670,6 +7724,7 @@ static int osd_device_init0(const struct lu_env *env,
        INIT_LIST_HEAD(&o->od_index_restore_list);
        spin_lock_init(&o->od_lock);
        o->od_index_backup_policy = LIBP_NONE;
+       o->od_t10_type = 0;
 
        o->od_read_cache = 1;
        o->od_writethrough_cache = 1;
@@ -7727,11 +7782,29 @@ static int osd_device_init0(const struct lu_env *env,
        LASSERT(l->ld_site->ls_linkage.prev != NULL);
 
        /* initialize quota slave instance */
-       o->od_quota_slave = qsd_init(env, o->od_svname, &o->od_dt_dev,
-                                    o->od_proc_entry);
-       if (IS_ERR(o->od_quota_slave)) {
-               rc = PTR_ERR(o->od_quota_slave);
-               o->od_quota_slave = NULL;
+       /* currently it's no need to prepare qsd_instance_md for OST */
+       if (!o->od_is_ost) {
+               o->od_quota_slave_md = qsd_init(env, o->od_svname,
+                                               &o->od_dt_dev,
+                                               o->od_proc_entry, true);
+               if (IS_ERR(o->od_quota_slave_md)) {
+                       rc = PTR_ERR(o->od_quota_slave_md);
+                       o->od_quota_slave_md = NULL;
+                       GOTO(out_procfs, rc);
+               }
+       }
+
+       o->od_quota_slave_dt = qsd_init(env, o->od_svname, &o->od_dt_dev,
+                                       o->od_proc_entry, false);
+
+       if (IS_ERR(o->od_quota_slave_dt)) {
+               if (o->od_quota_slave_md != NULL) {
+                       qsd_fini(env, o->od_quota_slave_md);
+                       o->od_quota_slave_md = NULL;
+               }
+
+               rc = PTR_ERR(o->od_quota_slave_dt);
+               o->od_quota_slave_dt = NULL;
                GOTO(out_procfs, rc);
        }
 
@@ -7804,6 +7877,7 @@ static int osd_process_config(const struct lu_env *env,
                              struct lu_device *d, struct lustre_cfg *cfg)
 {
        struct osd_device *o = osd_dev(d);
+       ssize_t count;
        int rc;
 
        ENTRY;
@@ -7823,15 +7897,12 @@ static int osd_process_config(const struct lu_env *env,
                break;
        case LCFG_PARAM:
                LASSERT(&o->od_dt_dev);
-               rc = class_process_proc_param(PARAM_OSD, lprocfs_osd_obd_vars,
-                                             cfg, &o->od_dt_dev);
-               if (rc > 0 || rc == -ENOSYS) {
-                       rc = class_process_proc_param(PARAM_OST,
-                                                     lprocfs_osd_obd_vars,
-                                                     cfg, &o->od_dt_dev);
-                       if (rc > 0)
-                               rc = 0;
-               }
+               count  = class_modify_config(cfg, PARAM_OSD,
+                                            &o->od_dt_dev.dd_kobj);
+               if (count < 0)
+                       count = class_modify_config(cfg, PARAM_OST,
+                                                   &o->od_dt_dev.dd_kobj);
+               rc = count > 0 ? 0 : count;
                break;
        case LCFG_PRE_CLEANUP:
                osd_scrub_stop(o);
@@ -7854,14 +7925,17 @@ static int osd_recovery_complete(const struct lu_env *env,
 
        ENTRY;
 
-       if (osd->od_quota_slave == NULL)
+       if (osd->od_quota_slave_md == NULL && osd->od_quota_slave_dt == NULL)
                RETURN(0);
 
        /*
         * start qsd instance on recovery completion, this notifies the quota
         * slave code that we are about to process new requests now
         */
-       rc = qsd_start(env, osd->od_quota_slave);
+       rc = qsd_start(env, osd->od_quota_slave_dt);
+       if (rc == 0 && osd->od_quota_slave_md != NULL)
+               rc = qsd_start(env, osd->od_quota_slave_md);
+
        RETURN(rc);
 }
 
@@ -7929,13 +8003,21 @@ static int osd_prepare(const struct lu_env *env, struct lu_device *pdev,
 
        ENTRY;
 
-       if (osd->od_quota_slave != NULL) {
-               /* set up quota slave objects */
-               result = qsd_prepare(env, osd->od_quota_slave);
+       if (osd->od_quota_slave_md != NULL) {
+               /* set up quota slave objects for inode */
+               result = qsd_prepare(env, osd->od_quota_slave_md);
+               if (result != 0)
+                       RETURN(result);
+       }
+
+       if (osd->od_quota_slave_dt != NULL) {
+               /* set up quota slave objects for block */
+               result = qsd_prepare(env, osd->od_quota_slave_dt);
                if (result != 0)
                        RETURN(result);
        }
 
+
        if (lsd->lsd_feature_incompat & OBD_COMPAT_OST) {
 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 52, 0)
                if (lsd->lsd_feature_rocompat & OBD_ROCOMPAT_IDX_IN_IDIF) {
@@ -8020,8 +8102,33 @@ static struct obd_ops osd_obd_device_ops = {
        .o_health_check = osd_health_check,
 };
 
+static ssize_t track_declares_assert_show(struct kobject *kobj,
+                                  struct attribute *attr,
+                                  char *buf)
+{
+       return sprintf(buf, "%d\n", ldiskfs_track_declares_assert);
+}
+
+static ssize_t track_declares_assert_store(struct kobject *kobj,
+                                          struct attribute *attr,
+                                          const char *buffer, size_t count)
+{
+       bool track_declares_assert;
+       int rc;
+
+       rc = kstrtobool(buffer, &track_declares_assert);
+       if (rc)
+               return rc;
+
+       ldiskfs_track_declares_assert = track_declares_assert;
+
+       return count;
+}
+LUSTRE_RW_ATTR(track_declares_assert);
+
 static int __init osd_init(void)
 {
+       struct kobject *kobj;
        int rc;
 
        CLASSERT(BH_DXLock < sizeof(((struct buffer_head *)0)->b_state) * 8);
@@ -8042,16 +8149,36 @@ static int __init osd_init(void)
                (void *)kallsyms_lookup_name("dev_check_rdonly");
 #endif
 
-       rc = class_register_type(&osd_obd_device_ops, NULL, true,
-                                lprocfs_osd_module_vars,
+       rc = class_register_type(&osd_obd_device_ops, NULL, true, NULL,
                                 LUSTRE_OSD_LDISKFS_NAME, &osd_device_type);
-       if (rc)
+       if (rc) {
                lu_kmem_fini(ldiskfs_caches);
+               return rc;
+       }
+
+       kobj = kset_find_obj(lustre_kset, LUSTRE_OSD_LDISKFS_NAME);
+       if (kobj) {
+               rc = sysfs_create_file(kobj,
+                                      &lustre_attr_track_declares_assert.attr);
+               kobject_put(kobj);
+               if (rc) {
+                       CWARN("osd-ldiskfs: track_declares_assert failed to register with sysfs\n");
+                       rc = 0;
+               }
+       }
        return rc;
 }
 
 static void __exit osd_exit(void)
 {
+       struct kobject *kobj;
+
+       kobj = kset_find_obj(lustre_kset, LUSTRE_OSD_LDISKFS_NAME);
+       if (kobj) {
+               sysfs_remove_file(kobj,
+                                 &lustre_attr_track_declares_assert.attr);
+               kobject_put(kobj);
+       }
        class_unregister_type(LUSTRE_OSD_LDISKFS_NAME);
        lu_kmem_fini(ldiskfs_caches);
 }