Whamcloud - gitweb
LU-12616 obclass: fix MDS start/stop race
[fs/lustre-release.git] / lustre / osp / osp_dev.c
index 4c7f791..dc3249b 100644 (file)
@@ -23,7 +23,7 @@
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2012, 2016, Intel Corporation.
+ * Copyright (c) 2012, 2017, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -258,25 +258,23 @@ static int osp_init_last_objid(const struct lu_env *env, struct osp_device *osp)
        if (IS_ERR(dto))
                RETURN(PTR_ERR(dto));
 
+       osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, &osp->opd_last_id,
+                          osp->opd_index);
+
        /* object will be released in device cleanup path */
-       if (osi->osi_attr.la_size >=
-           sizeof(osi->osi_id) * (osp->opd_index + 1)) {
-               osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, &osi->osi_id,
-                                  osp->opd_index);
+       if (osi->osi_attr.la_size >= (osi->osi_off + osi->osi_lb.lb_len)) {
                rc = dt_record_read(env, dto, &osi->osi_lb, &osi->osi_off);
                if (rc != 0 && rc != -EFAULT)
                        GOTO(out, rc);
                /* In case of idif bits 32-48 go to f_seq
                 * (see osp_init_last_seq). So don't care
                 * about u64->u32 convertion. */
-               fid->f_oid = osi->osi_id;
+               fid->f_oid = osp->opd_last_id;
        }
 
        if (rc == -EFAULT) { /* fresh LAST_ID */
-               osi->osi_id = 0;
+               osp->opd_last_id = 0;
                fid->f_oid = 0;
-               osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, &osi->osi_id,
-                                  osp->opd_index);
                rc = osp_write_local_file(env, osp, dto, &osi->osi_lb,
                                          osi->osi_off);
                if (rc != 0)
@@ -320,22 +318,21 @@ static int osp_init_last_seq(const struct lu_env *env, struct osp_device *osp)
        if (IS_ERR(dto))
                RETURN(PTR_ERR(dto));
 
+       osp_objseq_buf_prep(&osi->osi_lb, &osi->osi_off, &fid->f_seq,
+                          osp->opd_index);
+
        /* object will be released in device cleanup path */
-       if (osi->osi_attr.la_size >=
-           sizeof(osi->osi_id) * (osp->opd_index + 1)) {
-               osp_objseq_buf_prep(&osi->osi_lb, &osi->osi_off, &fid->f_seq,
-                                  osp->opd_index);
+       if (osi->osi_attr.la_size >= (osi->osi_off + osi->osi_lb.lb_len)) {
                rc = dt_record_read(env, dto, &osi->osi_lb, &osi->osi_off);
                if (rc != 0 && rc != -EFAULT)
                        GOTO(out, rc);
                if (fid_is_idif(fid))
-                       fid->f_seq = fid_idif_seq(osi->osi_id, osp->opd_index);
+                       fid->f_seq = fid_idif_seq(osp->opd_last_id,
+                                                 osp->opd_index);
        }
 
        if (rc == -EFAULT) { /* fresh OSP */
                fid->f_seq = 0;
-               osp_objseq_buf_prep(&osi->osi_lb, &osi->osi_off, &fid->f_seq,
-                                   osp->opd_index);
                rc = osp_write_local_file(env, osp, dto, &osi->osi_lb,
                                          osi->osi_off);
                if (rc != 0)
@@ -412,6 +409,7 @@ static int osp_last_used_init(const struct lu_env *env, struct osp_device *osp)
                GOTO(out, rc = -EINVAL);
        }
 
+       osp_fid_to_obdid(&osp->opd_last_used_fid, &osp->opd_last_id);
        CDEBUG(D_INFO, "%s: Init last used fid "DFID"\n",
               osp->opd_obd->obd_name, PFID(&osp->opd_last_used_fid));
 out:
@@ -603,6 +601,8 @@ static int osp_shutdown(const struct lu_env *env, struct osp_device *d)
 
        rc = osp_disconnect(d);
 
+       osp_statfs_fini(d);
+
        if (!d->opd_connect_mdt) {
                /* stop sync thread */
                osp_sync_fini(d);
@@ -636,9 +636,11 @@ static int osp_shutdown(const struct lu_env *env, struct osp_device *d)
 static int osp_process_config(const struct lu_env *env,
                              struct lu_device *dev, struct lustre_cfg *lcfg)
 {
-       struct osp_device               *d = lu2osp_dev(dev);
-       struct obd_device               *obd = d->opd_obd;
-       int                              rc;
+       struct osp_device *d = lu2osp_dev(dev);
+       struct dt_device *dt = lu2dt_dev(dev);
+       struct obd_device *obd = d->opd_obd;
+       ssize_t count;
+       int rc;
 
        ENTRY;
 
@@ -654,22 +656,20 @@ static int osp_process_config(const struct lu_env *env,
                rc = osp_shutdown(env, d);
                break;
        case LCFG_PARAM:
-               LASSERT(obd);
-               rc = class_process_proc_param(d->opd_connect_mdt ?
-                                             PARAM_OSP : PARAM_OSC,
-                                             obd->obd_vars, lcfg, obd);
-               if (rc > 0)
-                       rc = 0;
-               if (rc == -ENOSYS) {
-                       /* class_process_proc_param() haven't found matching
-                        * parameter and returned ENOSYS so that layer(s)
+               count = class_modify_config(lcfg, d->opd_connect_mdt ?
+                                                 PARAM_OSP : PARAM_OSC,
+                                           &dt->dd_kobj);
+               if (count < 0) {
+                       /* class_modify_config() haven't found matching
+                        * parameter and returned an error so that layer(s)
                         * below could use that. But OSP is the bottom, so
-                        * just ignore it */
+                        * just ignore it
+                        */
                        CERROR("%s: unknown param %s\n",
                               (char *)lustre_cfg_string(lcfg, 0),
                               (char *)lustre_cfg_string(lcfg, 1));
-                       rc = 0;
                }
+               rc = 0;
                break;
        default:
                CERROR("%s: unknown command %u\n",
@@ -730,7 +730,7 @@ const struct lu_device_operations osp_lu_ops = {
  * \retval negative    negative errno if get statfs failed.
  */
 static int osp_statfs(const struct lu_env *env, struct dt_device *dev,
-                     struct obd_statfs *sfs)
+                     struct obd_statfs *sfs, struct obd_statfs_info *info)
 {
        struct osp_device *d = dt2osp_dev(dev);
        struct obd_import *imp = d->opd_obd->u.cli.cl_import;
@@ -743,35 +743,40 @@ static int osp_statfs(const struct lu_env *env, struct dt_device *dev,
        if (unlikely(d->opd_imp_active == 0))
                RETURN(-ENOTCONN);
 
+       /* return recently updated data */
+       *sfs = d->opd_statfs;
+       if (info) {
+               info->os_reserved_mb_low = d->opd_reserved_mb_low;
+               info->os_reserved_mb_high = d->opd_reserved_mb_high;
+       }
+
        if (d->opd_pre == NULL)
                RETURN(0);
 
-       /* return recently updated data */
-       *sfs = d->opd_statfs;
+       CDEBUG(D_OTHER, "%s: %llu blocks, %llu free, %llu avail, "
+              "%u reserved mb low, %u reserved mb high,"
+              "%llu files, %llu free files\n", d->opd_obd->obd_name,
+              sfs->os_blocks, sfs->os_bfree, sfs->os_bavail,
+              d->opd_reserved_mb_low, d->opd_reserved_mb_high,
+              sfs->os_files, sfs->os_ffree);
+
+
+       if (info && !info->os_enable_pre)
+               RETURN(0);
 
        /*
         * layer above osp (usually lod) can use ffree to estimate
         * how many objects are available for immediate creation
         */
        spin_lock(&d->opd_pre_lock);
-       LASSERTF(fid_seq(&d->opd_pre_last_created_fid) ==
-                fid_seq(&d->opd_pre_used_fid),
-                "last_created "DFID", next_fid "DFID"\n",
-                PFID(&d->opd_pre_last_created_fid),
-                PFID(&d->opd_pre_used_fid));
-       sfs->os_fprecreated = fid_oid(&d->opd_pre_last_created_fid) -
-                             fid_oid(&d->opd_pre_used_fid);
+       sfs->os_fprecreated = osp_fid_diff(&d->opd_pre_last_created_fid,
+                                          &d->opd_pre_used_fid);
        sfs->os_fprecreated -= d->opd_pre_reserved;
        LASSERTF(sfs->os_fprecreated <= OST_MAX_PRECREATE * 2,
                 "last_created "DFID", next_fid "DFID", reserved %llu\n",
                 PFID(&d->opd_pre_last_created_fid), PFID(&d->opd_pre_used_fid),
                 d->opd_pre_reserved);
        spin_unlock(&d->opd_pre_lock);
-
-       CDEBUG(D_OTHER, "%s: %llu blocks, %llu free, %llu avail, "
-              "%llu files, %llu free files\n", d->opd_obd->obd_name,
-              sfs->os_blocks, sfs->os_bfree, sfs->os_bavail,
-              sfs->os_files, sfs->os_ffree);
        RETURN(0);
 }
 
@@ -795,11 +800,10 @@ static int osp_sync_timeout(void *data)
 static int osp_sync(const struct lu_env *env, struct dt_device *dev)
 {
        struct osp_device *d = dt2osp_dev(dev);
-       cfs_time_t         expire;
        struct l_wait_info lwi = { 0 };
+       time64_t start = ktime_get_seconds();
        int recs, rc = 0;
-       unsigned long start = cfs_time_current();
-       __u64 old;
+       u64 old;
 
        ENTRY;
 
@@ -821,8 +825,7 @@ static int osp_sync(const struct lu_env *env, struct dt_device *dev)
               atomic_read(&d->opd_async_updates_count));
 
        /* make sure the connection is fine */
-       expire = cfs_time_shift(obd_timeout);
-       lwi = LWI_TIMEOUT(expire - cfs_time_current(), osp_sync_timeout, d);
+       lwi = LWI_TIMEOUT(cfs_time_seconds(obd_timeout), osp_sync_timeout, d);
        rc = l_wait_event(d->opd_sync_barrier_waitq,
                          atomic_read(&d->opd_async_updates_count) == 0,
                          &lwi);
@@ -830,14 +833,13 @@ static int osp_sync(const struct lu_env *env, struct dt_device *dev)
        if (rc != 0)
                GOTO(out, rc);
 
-       CDEBUG(D_CACHE, "%s: processed %lu\n", d->opd_obd->obd_name,
-              atomic64_read(&d->opd_sync_processed_recs));
+       CDEBUG(D_CACHE, "%s: processed %llu\n", d->opd_obd->obd_name,
+              (unsigned long long)atomic64_read(&d->opd_sync_processed_recs));
 
        while (atomic64_read(&d->opd_sync_processed_recs) < old + recs) {
                __u64 last = atomic64_read(&d->opd_sync_processed_recs);
                /* make sure the connection is fine */
-               expire = cfs_time_shift(obd_timeout);
-               lwi = LWI_TIMEOUT(expire - cfs_time_current(),
+               lwi = LWI_TIMEOUT(cfs_time_seconds(obd_timeout),
                                  osp_sync_timeout, d);
                l_wait_event(d->opd_sync_barrier_waitq,
                             atomic64_read(&d->opd_sync_processed_recs)
@@ -869,8 +871,7 @@ static int osp_sync(const struct lu_env *env, struct dt_device *dev)
        while (atomic_read(&d->opd_sync_rpcs_in_flight) > 0) {
                old = atomic_read(&d->opd_sync_rpcs_in_flight);
 
-               expire = cfs_time_shift(obd_timeout);
-               lwi = LWI_TIMEOUT(expire - cfs_time_current(),
+               lwi = LWI_TIMEOUT(cfs_time_seconds(obd_timeout),
                                  osp_sync_timeout, d);
                l_wait_event(d->opd_sync_barrier_waitq,
                             atomic_read(&d->opd_sync_rpcs_in_flight) == 0,
@@ -893,8 +894,8 @@ out:
        atomic_dec(&d->opd_sync_barrier);
        osp_sync_check_for_work(d);
 
-       CDEBUG(D_CACHE, "%s: done in %lu: rc = %d\n", d->opd_obd->obd_name,
-              cfs_time_current() - start, rc);
+       CDEBUG(D_CACHE, "%s: done in %lld: rc = %d\n", d->opd_obd->obd_name,
+              ktime_get_seconds() - start, rc);
 
        RETURN(rc);
 }
@@ -1014,7 +1015,6 @@ static int osp_init0(const struct lu_env *env, struct osp_device *osp,
 {
        struct obd_device       *obd;
        struct obd_import       *imp;
-       class_uuid_t            uuid;
        char                    *src, *tgt, *mdt, *osdname = NULL;
        int                     rc;
        long                    idx;
@@ -1135,14 +1135,7 @@ static int osp_init0(const struct lu_env *env, struct osp_device *osp,
        strcat(osdname, "-osd");
        CDEBUG(D_HA, "%s: connect to %s (%s)\n", obd->obd_name, osdname, src);
 
-       if (osp->opd_connect_mdt) {
-               struct client_obd *cli = &osp->opd_obd->u.cli;
-
-               OBD_ALLOC(cli->cl_rpc_lock, sizeof(*cli->cl_rpc_lock));
-               if (!cli->cl_rpc_lock)
-                       GOTO(out_fini, rc = -ENOMEM);
-               osp_init_rpc_lock(cli->cl_rpc_lock);
-       }
+       osp_init_rpc_lock(osp);
 
        osp->opd_dt_dev.dd_lu_dev.ld_ops = &osp_lu_ops;
        osp->opd_dt_dev.dd_ops = &osp_dt_ops;
@@ -1164,7 +1157,7 @@ static int osp_init0(const struct lu_env *env, struct osp_device *osp,
                GOTO(out_ref, rc);
        }
 
-       osp_lprocfs_init(osp);
+       osp_tunables_init(osp);
 
        rc = obd_fid_init(osp->opd_obd, NULL, osp->opd_connect_mdt ?
                          LUSTRE_SEQ_METADATA : LUSTRE_SEQ_DATA);
@@ -1198,19 +1191,21 @@ static int osp_init0(const struct lu_env *env, struct osp_device *osp,
                if (rc < 0)
                        GOTO(out_precreat, rc);
        } else {
+               osp->opd_got_disconnected = 1;
                rc = osp_update_init(osp);
                if (rc != 0)
                        GOTO(out_fid, rc);
        }
 
+       rc = osp_init_statfs(osp);
+       if (rc)
+               GOTO(out_precreat, rc);
+
        ns_register_cancel(obd->obd_namespace, osp_cancel_weight);
 
        /*
         * Initiate connect to OST
         */
-       ll_generate_random_uuid(uuid);
-       class_uuid_unparse(uuid, &osp->opd_cluuid);
-
        imp = obd->u.cli.cl_import;
 
        rc = ptlrpc_init_import(imp);
@@ -1236,21 +1231,11 @@ out_last_used:
 out_fid:
        obd_fid_fini(osp->opd_obd);
 out_proc:
-       ptlrpc_lprocfs_unregister_obd(obd);
-       lprocfs_obd_cleanup(obd);
-       if (osp->opd_symlink)
-               lprocfs_remove(&osp->opd_symlink);
+       osp_tunables_fini(osp);
        client_obd_cleanup(obd);
 out_ref:
        ptlrpcd_decref();
 out_disconnect:
-       if (osp->opd_connect_mdt) {
-               struct client_obd *cli = &osp->opd_obd->u.cli;
-               if (cli->cl_rpc_lock != NULL) {
-                       OBD_FREE_PTR(cli->cl_rpc_lock);
-                       cli->cl_rpc_lock = NULL;
-               }
-       }
        obd_disconnect(osp->opd_storage_exp);
 out_fini:
        if (osdname)
@@ -1353,20 +1338,7 @@ static struct lu_device *osp_device_fini(const struct lu_env *env,
                obd_disconnect(osp->opd_storage_exp);
        }
 
-       if (osp->opd_symlink)
-               lprocfs_remove(&osp->opd_symlink);
-
        LASSERT(osp->opd_obd);
-       ptlrpc_lprocfs_unregister_obd(osp->opd_obd);
-       lprocfs_obd_cleanup(osp->opd_obd);
-
-       if (osp->opd_connect_mdt) {
-               struct client_obd *cli = &osp->opd_obd->u.cli;
-               if (cli->cl_rpc_lock != NULL) {
-                       OBD_FREE_PTR(cli->cl_rpc_lock);
-                       cli->cl_rpc_lock = NULL;
-               }
-       }
 
        rc = client_obd_cleanup(osp->opd_obd);
        if (rc != 0) {
@@ -1374,6 +1346,8 @@ static struct lu_device *osp_device_fini(const struct lu_env *env,
                RETURN(ERR_PTR(rc));
        }
 
+       osp_tunables_fini(osp);
+
        ptlrpcd_decref();
 
        RETURN(NULL);
@@ -1519,7 +1493,7 @@ static int osp_obd_disconnect(struct obd_export *exp)
  * \retval negative    negative errno if statfs failed.
  */
 static int osp_obd_statfs(const struct lu_env *env, struct obd_export *exp,
-                         struct obd_statfs *osfs, __u64 unused, __u32 flags)
+                         struct obd_statfs *osfs, time64_t unused, __u32 flags)
 {
        struct obd_statfs       *msfs;
        struct ptlrpc_request   *req;
@@ -1633,12 +1607,10 @@ static int osp_import_event(struct obd_device *obd, struct obd_import *imp,
                d->opd_imp_connected = 1;
                d->opd_imp_seen_connected = 1;
                d->opd_obd->obd_inactive = 0;
+               wake_up(&d->opd_pre_waitq);
                if (d->opd_connect_mdt)
                        break;
 
-               if (d->opd_pre != NULL)
-                       wake_up(&d->opd_pre_waitq);
-
                osp_sync_check_for_work(d);
                CDEBUG(D_HA, "got connected\n");
                break;
@@ -1709,9 +1681,6 @@ static int osp_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                rc = ptlrpc_set_import_active(obd->u.cli.cl_import,
                                              data->ioc_offset);
                break;
-       case OBD_IOC_PING_TARGET:
-               rc = ptlrpc_obd_ping(obd);
-               break;
        default:
                CERROR("%s: unrecognized ioctl %#x by %s\n", obd->obd_name,
                       cmd, current_comm());
@@ -1918,7 +1887,7 @@ static struct obd_ops osp_obd_device_ops = {
        .o_fid_alloc    = osp_fid_alloc,
 };
 
-struct llog_operations osp_mds_ost_orig_logops;
+static struct obd_type *sym;
 
 /**
  * Initialize OSP module.
@@ -1934,22 +1903,20 @@ struct llog_operations osp_mds_ost_orig_logops;
  */
 static int __init osp_init(void)
 {
-       struct obd_type *type;
        int rc;
 
        rc = lu_kmem_init(osp_caches);
        if (rc)
                return rc;
 
-
-       rc = class_register_type(&osp_obd_device_ops, NULL, true, NULL,
+       rc = class_register_type(&osp_obd_device_ops, NULL, false, NULL,
                                 LUSTRE_OSP_NAME, &osp_device_type);
        if (rc != 0) {
                lu_kmem_fini(osp_caches);
                return rc;
        }
 
-       rc = class_register_type(&lwp_obd_device_ops, NULL, true, NULL,
+       rc = class_register_type(&lwp_obd_device_ops, NULL, false, NULL,
                                 LUSTRE_LWP_NAME, &lwp_device_type);
        if (rc != 0) {
                class_unregister_type(LUSTRE_OSP_NAME);
@@ -1957,24 +1924,15 @@ static int __init osp_init(void)
                return rc;
        }
 
-       /* Note: add_rec/delcare_add_rec will be only used by catalogs */
-       osp_mds_ost_orig_logops = llog_osd_ops;
-       osp_mds_ost_orig_logops.lop_add = llog_cat_add_rec;
-       osp_mds_ost_orig_logops.lop_declare_add = llog_cat_declare_add_rec;
-
-       /* create "osc" entry in procfs for compatibility purposes */
-       type = class_search_type(LUSTRE_OSC_NAME);
-       if (type != NULL && type->typ_procroot != NULL)
-               return rc;
-
-       type = class_search_type(LUSTRE_OSP_NAME);
-       type->typ_procsym = lprocfs_register("osc", proc_lustre_root,
-                                            NULL, NULL);
-       if (IS_ERR(type->typ_procsym)) {
-               CERROR("osp: can't create compat entry \"osc\": %d\n",
-                      (int) PTR_ERR(type->typ_procsym));
-               type->typ_procsym = NULL;
+       /* create "osc" entry for compatibility purposes */
+       sym = class_add_symlinks(LUSTRE_OSC_NAME, false);
+       if (IS_ERR(sym)) {
+               rc = PTR_ERR(sym);
+               /* does real "osc" already exist ? */
+               if (rc == -EEXIST)
+                       rc = 0;
        }
+
        return rc;
 }
 
@@ -1986,6 +1944,9 @@ static int __init osp_init(void)
  */
 static void __exit osp_exit(void)
 {
+       if (!IS_ERR_OR_NULL(sym))
+               kobject_put(&sym->typ_kobj);
+
        class_unregister_type(LUSTRE_LWP_NAME);
        class_unregister_type(LUSTRE_OSP_NAME);
        lu_kmem_fini(osp_caches);