LU-1406 ofd: grant support

[fs/lustre-release.git] / lustre / ofd / ofd_dev.c
diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c

index 7d9b436..c0e3f8f 100644 (file)
--- a/lustre/ofd/ofd_dev.c
+++ b/lustre/ofd/ofd_dev.c
@@ -306,6 +306,13 @@ static int ofd_recovery_complete(const struct lu_env *env,
  
         ENTRY;
  
+       /* Grant space for object precreation on the self export.
+        * This initial reserved space (i.e. 20MB for zfs and 560KB for ldiskfs)
+        * is enough to create 20k objects. It is then adapted based on the
+        * precreate request size (see ofd_grant_create()
+        */
+       ofd_grant_connect(env, dev->ld_obd->obd_self_export,
+                         OST_MAX_PRECREATE * ofd->ofd_dt_conf.ddp_inodespace);
         rc = next->ld_ops->ldo_recovery_complete(env, next);
         RETURN(rc);
  }
@@ -316,6 +323,82 @@ static struct lu_device_operations ofd_lu_ops = {
         .ldo_recovery_complete  = ofd_recovery_complete,
  };
  
+static int ofd_procfs_init(struct ofd_device *ofd)
+{
+       struct lprocfs_static_vars       lvars;
+       struct obd_device               *obd = ofd_obd(ofd);
+       cfs_proc_dir_entry_t            *entry;
+       int                              rc = 0;
+
+       ENTRY;
+
+       /* lprocfs must be setup before the ofd so state can be safely added
+        * to /proc incrementally as the ofd is setup */
+       lprocfs_ofd_init_vars(&lvars);
+       rc = lprocfs_obd_setup(obd, lvars.obd_vars);
+       if (rc) {
+               CERROR("%s: lprocfs_obd_setup failed: %d.\n",
+                      obd->obd_name, rc);
+               RETURN(rc);
+       }
+
+       rc = lprocfs_alloc_obd_stats(obd, LPROC_OFD_LAST);
+       if (rc) {
+               CERROR("%s: lprocfs_alloc_obd_stats failed: %d.\n",
+                      obd->obd_name, rc);
+               GOTO(obd_cleanup, rc);
+       }
+
+       /* Init OFD private stats here */
+       lprocfs_counter_init(obd->obd_stats, LPROC_OFD_READ_BYTES,
+                            LPROCFS_CNTR_AVGMINMAX, "read_bytes", "bytes");
+       lprocfs_counter_init(obd->obd_stats, LPROC_OFD_WRITE_BYTES,
+                            LPROCFS_CNTR_AVGMINMAX, "write_bytes", "bytes");
+
+       rc = lproc_ofd_attach_seqstat(obd);
+       if (rc) {
+               CERROR("%s: create seqstat failed: %d.\n", obd->obd_name, rc);
+               GOTO(free_obd_stats, rc);
+       }
+
+       entry = lprocfs_register("exports", obd->obd_proc_entry, NULL, NULL);
+       if (IS_ERR(entry)) {
+               rc = PTR_ERR(entry);
+               CERROR("%s: error %d setting up lprocfs for %s\n",
+                      obd->obd_name, rc, "exports");
+               GOTO(free_obd_stats, rc);
+       }
+       obd->obd_proc_exports_entry = entry;
+
+       entry = lprocfs_add_simple(obd->obd_proc_exports_entry, "clear",
+                                  lprocfs_nid_stats_clear_read,
+                                  lprocfs_nid_stats_clear_write, obd, NULL);
+       if (IS_ERR(entry)) {
+               rc = PTR_ERR(entry);
+               CERROR("%s: add proc entry 'clear' failed: %d.\n",
+                      obd->obd_name, rc);
+               GOTO(free_obd_stats, rc);
+       }
+       RETURN(0);
+
+free_obd_stats:
+       lprocfs_free_obd_stats(obd);
+obd_cleanup:
+       lprocfs_obd_cleanup(obd);
+       return rc;
+}
+
+static int ofd_procfs_fini(struct ofd_device *ofd)
+{
+       struct obd_device *obd = ofd_obd(ofd);
+
+       lprocfs_remove_proc_entry("clear", obd->obd_proc_exports_entry);
+       lprocfs_free_per_client_stats(obd);
+       lprocfs_free_obd_stats(obd);
+       lprocfs_obd_cleanup(obd);
+       return 0;
+}
+
  extern int ost_handle(struct ptlrpc_request *req);
  
  static int ofd_init0(const struct lu_env *env, struct ofd_device *m,
@@ -324,6 +407,7 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m,
         const char              *dev = lustre_cfg_string(cfg, 0);
         struct ofd_thread_info  *info = NULL;
         struct obd_device       *obd;
+       struct obd_statfs       *osfs;
         int                      rc;
  
         ENTRY;
@@ -340,6 +424,29 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m,
  
         obd->u.obt.obt_magic = OBT_MAGIC;
  
+       m->ofd_fmd_max_num = OFD_FMD_MAX_NUM_DEFAULT;
+       m->ofd_fmd_max_age = OFD_FMD_MAX_AGE_DEFAULT;
+
+       cfs_spin_lock_init(&m->ofd_flags_lock);
+       m->ofd_raid_degraded = 0;
+       m->ofd_syncjournal = 0;
+       ofd_slc_set(m);
+       m->ofd_grant_compat_disable = 0;
+
+       /* statfs data */
+       cfs_spin_lock_init(&m->ofd_osfs_lock);
+       m->ofd_osfs_age = cfs_time_shift_64(-1000);
+       m->ofd_osfs_unstable = 0;
+       m->ofd_statfs_inflight = 0;
+       m->ofd_osfs_inflight = 0;
+
+       /* grant data */
+       cfs_spin_lock_init(&m->ofd_grant_lock);
+       m->ofd_tot_dirty = 0;
+       m->ofd_tot_granted = 0;
+       m->ofd_tot_pending = 0;
+       m->ofd_max_group = 0;
+
         cfs_rwlock_init(&obd->u.filter.fo_sptlrpc_lock);
         sptlrpc_rule_set_init(&obd->u.filter.fo_sptlrpc_rset);
  
@@ -354,6 +461,12 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m,
         /* set this lu_device to obd, because error handling need it */
         obd->obd_lu_dev = &m->ofd_dt_dev.dd_lu_dev;
  
+       rc = ofd_procfs_init(m);
+       if (rc) {
+               CERROR("Can't init ofd lprocfs, rc %d\n", rc);
+               RETURN(rc);
+       }
+
         /* No connection accepted until configurations will finish */
         obd->obd_no_conn = 1;
         obd->obd_replayable = 1;
@@ -372,7 +485,7 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m,
  
         rc = lu_site_init(&m->ofd_site, &m->ofd_dt_dev.dd_lu_dev);
         if (rc)
-               GOTO(err_out, rc);
+               GOTO(err_fini_proc, rc);
         m->ofd_site.ls_top_dev = &m->ofd_dt_dev.dd_lu_dev;
  
         rc = ofd_stack_init(env, m, cfg);
@@ -381,6 +494,21 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m,
                 GOTO(err_lu_site, rc);
         }
  
+       /* populate cached statfs data */
+       osfs = &ofd_info(env)->fti_u.osfs;
+       rc = ofd_statfs_internal(env, m, osfs, 0, NULL);
+       if (rc != 0) {
+               CERROR("%s: can't get statfs data, rc %d\n", obd->obd_name, rc);
+               GOTO(err_fini_stack, rc);
+       }
+       if (!IS_PO2(osfs->os_bsize)) {
+               CERROR("%s: blocksize (%d) is not a power of 2\n",
+                               obd->obd_name, osfs->os_bsize);
+               GOTO(err_fini_stack, rc = -EPROTO);
+       }
+       m->ofd_blockbits = cfs_fls(osfs->os_bsize) - 1;
+
+       snprintf(info->fti_u.name, sizeof(info->fti_u.name), "filter-%p", m);
         m->ofd_namespace = ldlm_namespace_new(obd, info->fti_u.name,
                                               LDLM_NAMESPACE_SERVER,
                                               LDLM_NAMESPACE_GREEDY,
@@ -392,6 +520,14 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m,
  
         dt_conf_get(env, m->ofd_osd, &m->ofd_dt_conf);
  
+       /* Allow at most ddp_grant_reserved% of the available filesystem space
+        * to be granted to clients, so that any errors in the grant overhead
+        * calculations do not allow granting more space to clients than can be
+        * written. Assumes that in aggregate the grant overhead calculations do
+        * not have more than ddp_grant_reserved% estimation error in them. */
+       m->ofd_grant_ratio =
+               ofd_grant_ratio_conv(m->ofd_dt_conf.ddp_grant_reserved);
+
         rc = ofd_start(env, &m->ofd_dt_dev.dd_lu_dev);
         if (rc)
                 GOTO(err_fini_stack, rc);
@@ -423,7 +559,8 @@ err_fini_stack:
         ofd_stack_fini(env, m, &m->ofd_osd->dd_lu_dev);
  err_lu_site:
         lu_site_fini(&m->ofd_site);
-err_out:
+err_fini_proc:
+       ofd_procfs_fini(m);
         return rc;
  }
  
@@ -450,6 +587,7 @@ static void ofd_fini(const struct lu_env *env, struct ofd_device *m)
  
         ofd_stack_fini(env, m, m->ofd_site.ls_top_dev);
         lu_site_fini(&m->ofd_site);
+       ofd_procfs_fini(m);
         LASSERT(cfs_atomic_read(&d->ld_ref) == 0);
         EXIT;
  }
@@ -545,6 +683,12 @@ int __init ofd_init(void)
         if (rc)
                 return rc;
  
+       rc = ofd_fmd_init();
+       if (rc) {
+               lu_kmem_fini(ofd_caches);
+               return(rc);
+       }
+
         lprocfs_ofd_init_vars(&lvars);
  
         rc = class_register_type(&ofd_obd_ops, NULL, lvars.module_vars,
@@ -554,8 +698,9 @@ int __init ofd_init(void)
  
  void __exit ofd_exit(void)
  {
-       class_unregister_type(LUSTRE_OST_NAME);
+       ofd_fmd_exit();
         lu_kmem_fini(ofd_caches);
+       class_unregister_type(LUSTRE_OST_NAME);
  }
  
  MODULE_AUTHOR("Whamcloud, Inc. <http://www.whamcloud.com/>");