Whamcloud - gitweb
LU-3285 mdt: use generic grant code at MDT 21/28021/24
authorMikhal Pershin <mike.pershin@intel.com>
Sun, 11 Dec 2016 17:16:25 +0000 (20:16 +0300)
committerMike Pershin <mike.pershin@intel.com>
Tue, 17 Oct 2017 20:30:21 +0000 (20:30 +0000)
Use grants at MDT for Data-on-MDT needs.
Add parameter to reserve part of available space
for metadata and never grants it to clients.

Test-Parameters: mdssizegb=20 testlist=dom-performance
Signed-off-by: Mikhal Pershin <mike.pershin@intel.com>
Change-Id: I2612352062871e4edd3817f32e7d96cb95a0a52b
Reviewed-on: https://review.whamcloud.com/28021
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Jinshan Xiong <jinshan.xiong@intel.com>
14 files changed:
lustre/include/lu_target.h
lustre/include/lustre_osc.h
lustre/include/uapi/linux/lustre/lustre_idl.h
lustre/llite/llite_lib.c
lustre/mdc/mdc_request.c
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_internal.h
lustre/mdt/mdt_io.c
lustre/mdt/mdt_lproc.c
lustre/ofd/lproc_ofd.c
lustre/ofd/ofd_dev.c
lustre/osc/osc_request.c
lustre/target/tgt_grant.c
lustre/target/tgt_main.c

index fa9ad61..0e6ed95 100644 (file)
@@ -126,6 +126,9 @@ struct tg_grants_data {
        u64                      tgd_tot_granted;
        /* grant used by I/Os in progress (between prepare and commit) */
        u64                      tgd_tot_pending;
+       /* amount of available space in percentage that is never used for
+        * grants, used on MDT to always keep space for metadata. */
+       u64                      tgd_reserved_pcnt;
        /* number of clients using grants */
        int                      tgd_tot_granted_clients;
        /* shall we grant space to clients not
@@ -520,6 +523,13 @@ long tgt_grant_create(const struct lu_env *env, struct obd_export *exp,
 int tgt_statfs_internal(const struct lu_env *env, struct lu_target *lut,
                        struct obd_statfs *osfs, __u64 max_age,
                        int *from_cache);
+int tgt_tot_dirty_seq_show(struct seq_file *m, void *data);
+int tgt_tot_granted_seq_show(struct seq_file *m, void *data);
+int tgt_tot_pending_seq_show(struct seq_file *m, void *data);
+int tgt_grant_compat_disable_seq_show(struct seq_file *m, void *data);
+ssize_t tgt_grant_compat_disable_seq_write(struct file *file,
+                                          const char __user *buffer,
+                                          size_t count, loff_t *off);
 
 /* target/update_trans.c */
 int distribute_txn_init(const struct lu_env *env,
index d2a3d8d..0f23af7 100644 (file)
@@ -655,7 +655,10 @@ int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
                       struct ptlrpc_request_set *set);
 int osc_ldlm_resource_invalidate(struct cfs_hash *hs, struct cfs_hash_bd *bd,
                                 struct hlist_node *hnode, void *arg);
-
+int osc_reconnect(const struct lu_env *env, struct obd_export *exp,
+                 struct obd_device *obd, struct obd_uuid *cluuid,
+                 struct obd_connect_data *data, void *localdata);
+int osc_disconnect(struct obd_export *exp);
 int osc_punch_send(struct obd_export *exp, struct obdo *oa,
                   obd_enqueue_update_f upcall, void *cookie);
 
index 71c77e0..7118cdc 100644 (file)
@@ -850,6 +850,7 @@ struct ptlrpc_body_v2 {
                                OBD_CONNECT_BULK_MBITS | OBD_CONNECT_CKSUM | \
                                OBD_CONNECT_MULTIMODRPCS | \
                                OBD_CONNECT_SUBTREE | OBD_CONNECT_LARGE_ACL | \
+                               OBD_CONNECT_GRANT_PARAM | \
                                OBD_CONNECT_FLAGS2)
 
 #define MDT_CONNECT_SUPPORTED2 OBD_CONNECT2_FILE_SECCTX
index 003b44b..960f90c 100644 (file)
@@ -196,6 +196,10 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
                 RETURN(-ENOMEM);
         }
 
+       /* pass client page size via ocd_grant_blkbits, the server should report
+        * back its backend blocksize for grant calculation purpose */
+       data->ocd_grant_blkbits = PAGE_SHIFT;
+
        /* indicate MDT features supported by this client */
        data->ocd_connect_flags = OBD_CONNECT_IBITS    | OBD_CONNECT_NODEVOH  |
                                  OBD_CONNECT_ATTRFID  | OBD_CONNECT_GRANT |
@@ -216,7 +220,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
                                  OBD_CONNECT_DIR_STRIPE |
                                  OBD_CONNECT_BULK_MBITS | OBD_CONNECT_CKSUM |
                                  OBD_CONNECT_SUBTREE |
-                                 OBD_CONNECT_FLAGS2 | OBD_CONNECT_MULTIMODRPCS;
+                                 OBD_CONNECT_MULTIMODRPCS |
+                                 OBD_CONNECT_GRANT_PARAM | OBD_CONNECT_FLAGS2;
 
        data->ocd_connect_flags2 = 0;
 
index 411552c..18eb9ba 100644 (file)
@@ -2612,7 +2612,8 @@ static struct obd_ops mdc_obd_ops = {
         .o_add_conn         = client_import_add_conn,
         .o_del_conn         = client_import_del_conn,
         .o_connect          = client_connect_import,
-        .o_disconnect       = client_disconnect_export,
+       .o_reconnect        = osc_reconnect,
+       .o_disconnect       = osc_disconnect,
         .o_iocontrol        = mdc_iocontrol,
         .o_set_info_async   = mdc_set_info_async,
         .o_statfs           = mdc_statfs,
index 2791ad3..966a5d2 100644 (file)
@@ -415,7 +415,8 @@ static int mdt_statfs(struct tgt_session_info *tsi)
 {
        struct ptlrpc_request           *req = tgt_ses_req(tsi);
        struct mdt_thread_info          *info = tsi2mdt_info(tsi);
-       struct md_device                *next = info->mti_mdt->mdt_child;
+       struct mdt_device               *mdt = info->mti_mdt;
+       struct tg_grants_data           *tgd = &mdt->mdt_lut.lut_tgd;
        struct ptlrpc_service_part      *svcpt;
        struct obd_statfs               *osfs;
        int                             rc;
@@ -440,24 +441,44 @@ static int mdt_statfs(struct tgt_session_info *tsi)
        if (!osfs)
                GOTO(out, rc = -EPROTO);
 
-       /** statfs information are cached in the mdt_device */
-       if (cfs_time_before_64(info->mti_mdt->mdt_osfs_age,
-                              cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS))) {
-               /** statfs data is too old, get up-to-date one */
-               rc = next->md_ops->mdo_statfs(info->mti_env, next, osfs);
-               if (rc)
-                       GOTO(out, rc);
-               spin_lock(&info->mti_mdt->mdt_lock);
-               info->mti_mdt->mdt_osfs = *osfs;
-               info->mti_mdt->mdt_osfs_age = cfs_time_current_64();
-               spin_unlock(&info->mti_mdt->mdt_lock);
-       } else {
-               /** use cached statfs data */
-               spin_lock(&info->mti_mdt->mdt_lock);
-               *osfs = info->mti_mdt->mdt_osfs;
-               spin_unlock(&info->mti_mdt->mdt_lock);
-       }
+       rc = tgt_statfs_internal(tsi->tsi_env, &mdt->mdt_lut, osfs,
+                                cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
+                                NULL);
+       if (unlikely(rc))
+               GOTO(out, rc);
 
+       /* at least try to account for cached pages.  its still racy and
+        * might be under-reporting if clients haven't announced their
+        * caches with brw recently */
+       CDEBUG(D_SUPER | D_CACHE, "blocks cached %llu granted %llu"
+              " pending %llu free %llu avail %llu\n",
+              tgd->tgd_tot_dirty, tgd->tgd_tot_granted,
+              tgd->tgd_tot_pending,
+              osfs->os_bfree << tgd->tgd_blockbits,
+              osfs->os_bavail << tgd->tgd_blockbits);
+
+       osfs->os_bavail -= min_t(u64, osfs->os_bavail,
+                                ((tgd->tgd_tot_dirty + tgd->tgd_tot_pending +
+                                  osfs->os_bsize - 1) >> tgd->tgd_blockbits));
+
+       tgt_grant_sanity_check(mdt->mdt_lu_dev.ld_obd, __func__);
+       CDEBUG(D_CACHE, "%llu blocks: %llu free, %llu avail; "
+              "%llu objects: %llu free; state %x\n",
+              osfs->os_blocks, osfs->os_bfree, osfs->os_bavail,
+              osfs->os_files, osfs->os_ffree, osfs->os_state);
+
+       if (!exp_grant_param_supp(tsi->tsi_exp) &&
+           tgd->tgd_blockbits > COMPAT_BSIZE_SHIFT) {
+               /* clients which don't support OBD_CONNECT_GRANT_PARAM
+                * should not see a block size > page size, otherwise
+                * cl_lost_grant goes mad. Therefore, we emulate a 4KB (=2^12)
+                * block size which is the biggest block size known to work
+                * with all client's page size. */
+               osfs->os_blocks <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
+               osfs->os_bfree  <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
+               osfs->os_bavail <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
+               osfs->os_bsize = 1 << COMPAT_BSIZE_SHIFT;
+       }
        if (rc == 0)
                mdt_counter_incr(req, LPROC_MDT_STATFS);
 out:
@@ -4931,8 +4952,9 @@ static int mdt_postrecov(const struct lu_env *, struct mdt_device *);
 static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
                      struct lu_device_type *ldt, struct lustre_cfg *cfg)
 {
-        struct mdt_thread_info    *info;
-        struct obd_device         *obd;
+       struct mdt_thread_info    *info;
+       struct obd_device         *obd;
+       struct tg_grants_data *tgd = &m->mdt_lut.lut_tgd;
         const char                *dev = lustre_cfg_string(cfg, 0);
         const char                *num = lustre_cfg_string(cfg, 2);
         struct lustre_mount_info  *lmi = NULL;
@@ -4989,7 +5011,6 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
        INIT_LIST_HEAD(&m->mdt_squash.rsi_nosquash_nids);
        init_rwsem(&m->mdt_squash.rsi_sem);
        spin_lock_init(&m->mdt_lock);
-       m->mdt_osfs_age = cfs_time_shift_64(-1000);
        m->mdt_enable_remote_dir = 0;
        m->mdt_enable_remote_dir_gid = 0;
 
@@ -5070,6 +5091,15 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
        if (rc)
                GOTO(err_free_hsm, rc);
 
+       /* Amount of available space excluded from granting and reserved
+        * for metadata. It is in percentage and 50% is default value. */
+       tgd->tgd_reserved_pcnt = 50;
+
+       if (ONE_MB_BRW_SIZE < (1U << tgd->tgd_blockbits))
+               m->mdt_brw_size = 1U << tgd->tgd_blockbits;
+       else
+               m->mdt_brw_size = ONE_MB_BRW_SIZE;
+
        rc = mdt_fs_setup(env, m, obd, lsi);
        if (rc)
                GOTO(err_tgt, rc);
@@ -5474,7 +5504,8 @@ static int mdt_connect_internal(const struct lu_env *env,
                data->ocd_connect_flags &= ~OBD_CONNECT_XATTR;
 
        if (OCD_HAS_FLAG(data, BRW_SIZE)) {
-               data->ocd_brw_size = min(data->ocd_brw_size, MD_MAX_BRW_SIZE);
+               data->ocd_brw_size = min(data->ocd_brw_size,
+                                        mdt->mdt_brw_size);
                if (data->ocd_brw_size == 0) {
                        CERROR("%s: cli %s/%p ocd_connect_flags: %#llx "
                               "ocd_version: %x ocd_grant: %d ocd_index: %u "
@@ -5488,9 +5519,29 @@ static int mdt_connect_internal(const struct lu_env *env,
                }
        }
 
-       if (OCD_HAS_FLAG(data, GRANT))
-               data->ocd_grant = mdt_grant_connect(env, exp, data->ocd_grant,
-                                                   !reconnect);
+       if (OCD_HAS_FLAG(data, GRANT_PARAM)) {
+               struct dt_device_param *ddp = &mdt->mdt_lut.lut_dt_conf;
+
+               /* client is reporting its page size, for future use */
+               exp->exp_target_data.ted_pagebits = data->ocd_grant_blkbits;
+               data->ocd_grant_blkbits  = mdt->mdt_lut.lut_tgd.tgd_blockbits;
+               /* ddp_inodespace may not be power-of-two value, eg. for ldiskfs
+                * it's LDISKFS_DIR_REC_LEN(20) = 28. */
+               data->ocd_grant_inobits = fls(ddp->ddp_inodespace - 1);
+               /* ocd_grant_tax_kb is in 1K byte blocks */
+               data->ocd_grant_tax_kb = ddp->ddp_extent_tax >> 10;
+               data->ocd_grant_max_blks = ddp->ddp_max_extent_blks;
+       }
+
+       if (OCD_HAS_FLAG(data, GRANT)) {
+               /* Save connect_data we have so far because tgt_grant_connect()
+                * uses it to calculate grant. */
+               exp->exp_connect_data = *data;
+               tgt_grant_connect(env, exp, data, !reconnect);
+       }
+
+       if (OCD_HAS_FLAG(data, MAXBYTES))
+               data->ocd_maxbytes = mdt->mdt_lut.lut_dt_conf.ddp_maxbytes;
 
        /* NB: Disregard the rule against updating
         * exp_connect_data.ocd_connect_flags in this case, since
@@ -5685,11 +5736,15 @@ static inline void mdt_disable_slc(struct mdt_device *mdt)
 
 static int mdt_obd_disconnect(struct obd_export *exp)
 {
-        int rc;
-        ENTRY;
+       int rc;
+
+       ENTRY;
 
-        LASSERT(exp);
-        class_export_get(exp);
+       LASSERT(exp);
+       class_export_get(exp);
+
+       if (!(exp->exp_flags & OBD_OPT_FORCE))
+               tgt_grant_sanity_check(exp->exp_obd, __func__);
 
        if ((exp_connect_flags(exp) & OBD_CONNECT_MDS_MDS) &&
            !(exp_connect_flags(exp) & OBD_CONNECT_LIGHTWEIGHT)) {
@@ -5703,6 +5758,8 @@ static int mdt_obd_disconnect(struct obd_export *exp)
        if (rc != 0)
                CDEBUG(D_IOCTL, "server disconnect error: rc = %d\n", rc);
 
+       tgt_grant_discard(exp);
+
        rc = mdt_export_cleanup(exp);
        nodemap_del_member(exp);
        class_export_put(exp);
@@ -5873,6 +5930,17 @@ static int mdt_destroy_export(struct obd_export *exp)
        LASSERT(list_empty(&exp->exp_outstanding_replies));
        LASSERT(list_empty(&exp->exp_mdt_data.med_open_head));
 
+       /*
+        * discard grants once we're sure no more
+        * interaction with the client is possible
+        */
+       tgt_grant_discard(exp);
+       if (exp_connect_flags(exp) & OBD_CONNECT_GRANT)
+               exp->exp_obd->u.obt.obt_lut->lut_tgd.tgd_tot_granted_clients--;
+
+       if (!(exp->exp_flags & OBD_OPT_FORCE))
+               tgt_grant_sanity_check(exp->exp_obd, __func__);
+
        RETURN(0);
 }
 
index f81cfa1..522e03d 100644 (file)
@@ -221,6 +221,9 @@ struct mdt_device {
 
        int                        mdt_max_ea_size;
 
+       /* preferred BRW size, decided by storage type and capability */
+       __u32                      mdt_brw_size;
+
         struct upcall_cache        *mdt_identity_cache;
 
        unsigned int               mdt_capa_conf:1,
@@ -233,10 +236,6 @@ struct mdt_device {
        /* lock for osfs and md_root */
        spinlock_t                 mdt_lock;
 
-       /* statfs optimization: we cache a bit  */
-       struct obd_statfs          mdt_osfs;
-       __u64                      mdt_osfs_age;
-
         /* root squash */
        struct root_squash_info    mdt_squash;
 
index 2548612..caaadf7 100644 (file)
 #include <dt_object.h>
 #include "mdt_internal.h"
 
-/* --------------- MDT grant code ---------------- */
-
-long mdt_grant_connect(const struct lu_env *env,
-                      struct obd_export *exp,
-                      u64 want, bool conservative)
-{
-       struct mdt_device *mdt = mdt_exp2dev(exp);
-       u64 left;
-       long grant;
-
-       ENTRY;
-
-       dt_statfs(env, mdt->mdt_bottom, &mdt->mdt_osfs);
-
-       left = (mdt->mdt_osfs.os_bavail * mdt->mdt_osfs.os_bsize) / 2;
-
-       grant = left;
-
-       CDEBUG(D_CACHE, "%s: cli %s/%p ocd_grant: %ld want: %llu left: %llu\n",
-              exp->exp_obd->obd_name, exp->exp_client_uuid.uuid,
-              exp, grant, want, left);
-
-       return grant;
-}
-
-void mdt_grant_prepare_write(const struct lu_env *env,
-                            struct obd_export *exp, struct obdo *oa,
-                            struct niobuf_remote *rnb, int niocount)
-{
-       struct mdt_device *mdt = mdt_exp2dev(exp);
-       u64 left;
-
-       ENTRY;
-
-       left = (mdt->mdt_osfs.os_bavail * mdt->mdt_osfs.os_bsize) / 2;
-
-       /* grant more space back to the client if possible */
-       oa->o_grant = left;
-}
-/* ---------------- end of MDT grant code ---------------- */
-
 /* functions below are stubs for now, they will be implemented with
  * grant support on MDT */
 static inline void mdt_io_counter_incr(struct obd_export *exp, int opcode,
@@ -82,19 +41,6 @@ static inline void mdt_io_counter_incr(struct obd_export *exp, int opcode,
        return;
 }
 
-void mdt_grant_prepare_read(const struct lu_env *env,
-                           struct obd_export *exp, struct obdo *oa)
-{
-       return;
-}
-
-void mdt_grant_commit(struct obd_export *exp, unsigned long pending,
-                     int rc)
-{
-       return;
-
-}
-
 static inline void mdt_dom_read_lock(struct mdt_object *mo)
 {
        down_read(&mo->mot_dom_sem);
@@ -174,7 +120,7 @@ static int mdt_preprw_write(const struct lu_env *env, struct obd_export *exp,
 
        /* Process incoming grant info, set OBD_BRW_GRANTED flag and grant some
         * space back if possible */
-       mdt_grant_prepare_write(env, exp, oa, rnb, obj->ioo_bufcnt);
+       tgt_grant_prepare_write(env, exp, oa, rnb, obj->ioo_bufcnt);
 
        mdt_dom_read_lock(mo);
        if (!mdt_object_exists(mo)) {
@@ -191,8 +137,11 @@ static int mdt_preprw_write(const struct lu_env *env, struct obd_export *exp,
                if (unlikely(rc < 0))
                        GOTO(err, rc);
                /* correct index for local buffers to continue with */
-               for (k = 0; k < rc; k++)
-                       lnb[j+k].lnb_flags = rnb[i].rnb_flags;
+               for (k = 0; k < rc; k++) {
+                       lnb[j + k].lnb_flags = rnb[i].rnb_flags;
+                       if (!(rnb[i].rnb_flags & OBD_BRW_GRANTED))
+                               lnb[j + k].lnb_rc = -ENOSPC;
+               }
                j += rc;
                *nr_local += rc;
                tot_bytes += rnb[i].rnb_len;
@@ -209,11 +158,11 @@ err:
 unlock:
        mdt_dom_read_unlock(mo);
        /* tgt_grant_prepare_write() was called, so we must commit */
-       mdt_grant_commit(exp, oa->o_grant_used, rc);
+       tgt_grant_commit(exp, oa->o_grant_used, rc);
        /* let's still process incoming grant information packed in the oa,
         * but without enforcing grant since we won't proceed with the write.
         * Just like a read request actually. */
-       mdt_grant_prepare_read(env, exp, oa);
+       tgt_grant_prepare_read(env, exp, oa);
        return rc;
 }
 
@@ -256,7 +205,7 @@ int mdt_obd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp,
                                      objcount, obj, rnb, nr_local, lnb,
                                      jobid);
        } else if (cmd == OBD_BRW_READ) {
-               mdt_grant_prepare_read(env, exp, oa);
+               tgt_grant_prepare_read(env, exp, oa);
                rc = mdt_preprw_read(env, exp, mdt, mo, la,
                                     obj->ioo_bufcnt, rnb, nr_local, lnb,
                                     jobid);
@@ -368,6 +317,12 @@ out_stop:
        if (rc == -ENOSPC)
                th->th_sync = 1;
 
+
+       if (rc == 0 && granted > 0) {
+               if (tgt_grant_commit_cb_add(th, exp, granted) == 0)
+                       granted = 0;
+       }
+
        th->th_result = rc;
        dt_trans_stop(env, dt, th);
        if (rc == -ENOSPC && retries++ < 3) {
@@ -379,7 +334,8 @@ out_stop:
 out:
        dt_bufs_put(env, dob, lnb, niocount);
        mdt_dom_read_unlock(mo);
-       mdt_grant_commit(exp, granted, old_rc);
+       if (granted > 0)
+               tgt_grant_commit(exp, granted, old_rc);
        RETURN(rc);
 }
 
index c60742e..15deaae 100644 (file)
@@ -775,7 +775,20 @@ LPROC_SEQ_FOPS(mdt_hsm_cdt_control);
 LPROC_SEQ_FOPS_RW_TYPE(mdt, recovery_time_hard);
 LPROC_SEQ_FOPS_RW_TYPE(mdt, recovery_time_soft);
 
+LPROC_SEQ_FOPS_RO(tgt_tot_dirty);
+LPROC_SEQ_FOPS_RO(tgt_tot_granted);
+LPROC_SEQ_FOPS_RO(tgt_tot_pending);
+LPROC_SEQ_FOPS(tgt_grant_compat_disable);
+
 static struct lprocfs_vars lprocfs_mdt_obd_vars[] = {
+       { .name =       "tot_dirty",
+         .fops =       &tgt_tot_dirty_fops             },
+       { .name =       "tot_pending",
+         .fops =       &tgt_tot_pending_fops           },
+       { .name =       "tot_granted",
+         .fops =       &tgt_tot_granted_fops           },
+       { .name =       "grant_compat_disable",
+         .fops =       &tgt_grant_compat_disable_fops  },
        { .name =       "recovery_status",
          .fops =       &mdt_recovery_status_fops               },
        { .name =       "num_exports",
index b23eda2..27fa94d 100644 (file)
@@ -70,69 +70,6 @@ static int ofd_seqs_seq_show(struct seq_file *m, void *data)
 LPROC_SEQ_FOPS_RO(ofd_seqs);
 
 /**
- * Show estimate of total amount of dirty data on clients.
- *
- * \param[in] m                seq_file handle
- * \param[in] data     unused for single entry
- *
- * \retval             0 on success
- * \retval             negative value on error
- */
-static int ofd_tot_dirty_seq_show(struct seq_file *m, void *data)
-{
-       struct obd_device *obd = m->private;
-       struct tg_grants_data *tgd;
-
-       LASSERT(obd != NULL);
-       tgd = &obd->u.obt.obt_lut->lut_tgd;
-       seq_printf(m, "%llu\n", tgd->tgd_tot_dirty);
-       return 0;
-}
-LPROC_SEQ_FOPS_RO(ofd_tot_dirty);
-
-/**
- * Show total amount of space granted to clients.
- *
- * \param[in] m                seq_file handle
- * \param[in] data     unused for single entry
- *
- * \retval             0 on success
- * \retval             negative value on error
- */
-static int ofd_tot_granted_seq_show(struct seq_file *m, void *data)
-{
-       struct obd_device *obd = m->private;
-       struct tg_grants_data *tgd;
-
-       LASSERT(obd != NULL);
-       tgd = &obd->u.obt.obt_lut->lut_tgd;
-       seq_printf(m, "%llu\n", tgd->tgd_tot_granted);
-       return 0;
-}
-LPROC_SEQ_FOPS_RO(ofd_tot_granted);
-
-/**
- * Show total amount of space used by IO in progress.
- *
- * \param[in] m                seq_file handle
- * \param[in] data     unused for single entry
- *
- * \retval             0 on success
- * \retval             negative value on error
- */
-static int ofd_tot_pending_seq_show(struct seq_file *m, void *data)
-{
-       struct obd_device *obd = m->private;
-       struct tg_grants_data *tgd;
-
-       LASSERT(obd != NULL);
-       tgd = &obd->u.obt.obt_lut->lut_tgd;
-       seq_printf(m, "%llu\n", tgd->tgd_tot_pending);
-       return 0;
-}
-LPROC_SEQ_FOPS_RO(ofd_tot_pending);
-
-/**
  * Show total number of grants for precreate.
  *
  * \param[in] m                seq_file handle
@@ -634,70 +571,6 @@ ofd_sync_lock_cancel_seq_write(struct file *file, const char __user *buffer,
 LPROC_SEQ_FOPS(ofd_sync_lock_cancel);
 
 /**
- * Show if grants compatibility mode is disabled.
- *
- * When tgd_grant_compat_disable is set, we don't grant any space to clients
- * not supporting OBD_CONNECT_GRANT_PARAM. Otherwise, space granted to such
- * a client is inflated since it consumes PAGE_SIZE of grant space per
- * block, (i.e. typically 4kB units), but underlaying file system might have
- * block size bigger than page size, e.g. ZFS. See LU-2049 for details.
- *
- * \param[in] m                seq_file handle
- * \param[in] data     unused for single entry
- *
- * \retval             0 on success
- * \retval             negative value on error
- */
-static int ofd_grant_compat_disable_seq_show(struct seq_file *m, void *data)
-{
-       struct obd_device *obd = m->private;
-       struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd;
-
-       seq_printf(m, "%u\n", tgd->tgd_grant_compat_disable);
-       return 0;
-}
-
-/**
- * Change grant compatibility mode.
- *
- * Setting tgd_grant_compat_disable prohibit any space granting to clients
- * not supporting OBD_CONNECT_GRANT_PARAM. See details above.
- *
- * \param[in] file     proc file
- * \param[in] buffer   string which represents mode
- *                     1: disable compatibility mode
- *                     0: enable compatibility mode
- * \param[in] count    \a buffer length
- * \param[in] off      unused for single entry
- *
- * \retval             \a count on success
- * \retval             negative number on error
- */
-static ssize_t
-ofd_grant_compat_disable_seq_write(struct file *file,
-                                  const char __user *buffer,
-                                  size_t count, loff_t *off)
-{
-       struct seq_file *m = file->private_data;
-       struct obd_device *obd = m->private;
-       struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd;
-       __s64 val;
-       int rc;
-
-       rc = lprocfs_str_to_s64(buffer, count, &val);
-       if (rc)
-               return rc;
-
-       if (val < 0)
-               return -EINVAL;
-
-       tgd->tgd_grant_compat_disable = !!val;
-
-       return count;
-}
-LPROC_SEQ_FOPS(ofd_grant_compat_disable);
-
-/**
  * Show the limit of soft sync RPCs.
  *
  * This value defines how many IO RPCs with OBD_BRW_SOFT_SYNC flag
@@ -898,6 +771,11 @@ LPROC_SEQ_FOPS_RW_TYPE(ofd, ir_factor);
 LPROC_SEQ_FOPS_RW_TYPE(ofd, checksum_dump);
 LPROC_SEQ_FOPS_RW_TYPE(ofd, job_interval);
 
+LPROC_SEQ_FOPS_RO(tgt_tot_dirty);
+LPROC_SEQ_FOPS_RO(tgt_tot_granted);
+LPROC_SEQ_FOPS_RO(tgt_tot_pending);
+LPROC_SEQ_FOPS(tgt_grant_compat_disable);
+
 struct lprocfs_vars lprocfs_ofd_obd_vars[] = {
        { .name =       "seqs_allocated",
          .fops =       &ofd_seqs_fops                  },
@@ -906,11 +784,11 @@ struct lprocfs_vars lprocfs_ofd_obd_vars[] = {
        { .name =       "last_id",
          .fops =       &ofd_last_id_fops               },
        { .name =       "tot_dirty",
-         .fops =       &ofd_tot_dirty_fops             },
+         .fops =       &tgt_tot_dirty_fops             },
        { .name =       "tot_pending",
-         .fops =       &ofd_tot_pending_fops           },
+         .fops =       &tgt_tot_pending_fops           },
        { .name =       "tot_granted",
-         .fops =       &ofd_tot_granted_fops           },
+         .fops =       &tgt_tot_granted_fops           },
        { .name =       "grant_precreate",
          .fops =       &ofd_grant_precreate_fops       },
        { .name =       "precreate_batch",
@@ -940,7 +818,7 @@ struct lprocfs_vars lprocfs_ofd_obd_vars[] = {
        { .name =       "checksum_dump",
          .fops =       &ofd_checksum_dump_fops         },
        { .name =       "grant_compat_disable",
-         .fops =       &ofd_grant_compat_disable_fops  },
+         .fops =       &tgt_grant_compat_disable_fops  },
        { .name =       "client_cache_count",
          .fops =       &ofd_fmd_max_num_fops           },
        { .name =       "client_cache_seconds",
index 43e9acc..fcb477a 100644 (file)
@@ -2902,7 +2902,6 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m,
        struct ofd_thread_info *info = NULL;
        struct obd_device *obd;
        struct tg_grants_data *tgd = &m->ofd_lut.lut_tgd;
-       struct obd_statfs *osfs;
        struct lu_fid fid;
        struct nm_config_file *nodemap_config;
        struct obd_device_target *obt;
@@ -2930,22 +2929,8 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m,
        m->ofd_raid_degraded = 0;
        m->ofd_syncjournal = 0;
        ofd_slc_set(m);
-       tgd->tgd_grant_compat_disable = 0;
        m->ofd_soft_sync_limit = OFD_SOFT_SYNC_LIMIT_DEFAULT;
 
-       /* statfs data */
-       spin_lock_init(&tgd->tgd_osfs_lock);
-       tgd->tgd_osfs_age = cfs_time_shift_64(-1000);
-       tgd->tgd_osfs_unstable = 0;
-       tgd->tgd_statfs_inflight = 0;
-       tgd->tgd_osfs_inflight = 0;
-
-       /* grant data */
-       spin_lock_init(&tgd->tgd_grant_lock);
-       tgd->tgd_tot_dirty = 0;
-       tgd->tgd_tot_granted = 0;
-       tgd->tgd_tot_pending = 0;
-
        m->ofd_seq_count = 0;
        init_waitqueue_head(&m->ofd_inconsistency_thread.t_ctl_waitq);
        INIT_LIST_HEAD(&m->ofd_inconsistency_list);
@@ -3008,27 +2993,13 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m,
        ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
                           "filter_ldlm_cb_client", &obd->obd_ldlm_client);
 
-       dt_conf_get(env, m->ofd_osd, &m->ofd_lut.lut_dt_conf);
-
        rc = tgt_init(env, &m->ofd_lut, obd, m->ofd_osd, ofd_common_slice,
                      OBD_FAIL_OST_ALL_REQUEST_NET,
                      OBD_FAIL_OST_ALL_REPLY_NET);
        if (rc)
                GOTO(err_free_ns, rc);
 
-       /* populate cached statfs data */
-       osfs = &ofd_info(env)->fti_u.osfs;
-       rc = tgt_statfs_internal(env, &m->ofd_lut, osfs, 0, NULL);
-       if (rc != 0) {
-               CERROR("%s: can't get statfs data, rc %d\n", obd->obd_name, rc);
-               GOTO(err_fini_lut, rc);
-       }
-       if (!is_power_of_2(osfs->os_bsize)) {
-               CERROR("%s: blocksize (%d) is not a power of 2\n",
-                       obd->obd_name, osfs->os_bsize);
-               GOTO(err_fini_lut, rc = -EPROTO);
-       }
-       tgd->tgd_blockbits = fls(osfs->os_bsize) - 1;
+       tgd->tgd_reserved_pcnt = 0;
 
        if (DT_DEF_BRW_SIZE < (1U << tgd->tgd_blockbits))
                m->ofd_brw_size = 1U << tgd->tgd_blockbits;
@@ -3037,7 +3008,8 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m,
 
        m->ofd_cksum_types_supported = cksum_types_supported_server();
        m->ofd_precreate_batch = OFD_PRECREATE_BATCH_DEFAULT;
-       if (osfs->os_bsize * osfs->os_blocks < OFD_PRECREATE_SMALL_FS)
+       if (tgd->tgd_osfs.os_bsize * tgd->tgd_osfs.os_blocks <
+           OFD_PRECREATE_SMALL_FS)
                m->ofd_precreate_batch = OFD_PRECREATE_BATCH_SMALL;
 
        rc = ofd_fs_setup(env, m, obd);
index 87b6c22..e4c6a04 100644 (file)
@@ -2621,16 +2621,14 @@ int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
 }
 EXPORT_SYMBOL(osc_set_info_async);
 
-static int osc_reconnect(const struct lu_env *env,
-                         struct obd_export *exp, struct obd_device *obd,
-                         struct obd_uuid *cluuid,
-                         struct obd_connect_data *data,
-                         void *localdata)
+int osc_reconnect(const struct lu_env *env, struct obd_export *exp,
+                 struct obd_device *obd, struct obd_uuid *cluuid,
+                 struct obd_connect_data *data, void *localdata)
 {
-        struct client_obd *cli = &obd->u.cli;
+       struct client_obd *cli = &obd->u.cli;
 
-        if (data != NULL && (data->ocd_connect_flags & OBD_CONNECT_GRANT)) {
-                long lost_grant;
+       if (data != NULL && (data->ocd_connect_flags & OBD_CONNECT_GRANT)) {
+               long lost_grant;
                long grant;
 
                spin_lock(&cli->cl_loi_list_lock);
@@ -2651,8 +2649,9 @@ static int osc_reconnect(const struct lu_env *env,
 
        RETURN(0);
 }
+EXPORT_SYMBOL(osc_reconnect);
 
-static int osc_disconnect(struct obd_export *exp)
+int osc_disconnect(struct obd_export *exp)
 {
        struct obd_device *obd = class_exp2obd(exp);
        int rc;
@@ -2679,6 +2678,7 @@ static int osc_disconnect(struct obd_export *exp)
                 osc_del_shrink_grant(&obd->u.cli);
         return rc;
 }
+EXPORT_SYMBOL(osc_disconnect);
 
 int osc_ldlm_resource_invalidate(struct cfs_hash *hs, struct cfs_hash_bd *bd,
                                 struct hlist_node *hnode, void *arg)
index 1caad7c..e3264a5 100644 (file)
@@ -308,6 +308,8 @@ int tgt_statfs_internal(const struct lu_env *env, struct lu_target *lut,
                if (unlikely(rc))
                        GOTO(out, rc);
 
+               osfs->os_namelen = min_t(__u32, osfs->os_namelen, NAME_MAX);
+
                spin_lock(&tgd->tgd_grant_lock);
                spin_lock(&tgd->tgd_osfs_lock);
                /* calculate how much space was written while we released the
@@ -428,6 +430,7 @@ static u64 tgt_grant_space_left(struct obd_export *exp)
        u64                      left;
        u64                      avail;
        u64                      unstable;
+       u64                      reserved;
 
        ENTRY;
        assert_spin_locked(&tgd->tgd_grant_lock);
@@ -438,7 +441,8 @@ static u64 tgt_grant_space_left(struct obd_export *exp)
        unstable = tgd->tgd_osfs_unstable; /* those might be accounted twice */
        spin_unlock(&tgd->tgd_osfs_lock);
 
-       tot_granted = tgd->tgd_tot_granted;
+       reserved = left * tgd->tgd_reserved_pcnt / 100;
+       tot_granted = tgd->tgd_tot_granted + reserved;
 
        if (left < tot_granted) {
                int mask = (left + unstable <
@@ -1500,3 +1504,132 @@ int tgt_grant_commit_cb_add(struct thandle *th, struct obd_export *exp,
        RETURN(rc);
 }
 EXPORT_SYMBOL(tgt_grant_commit_cb_add);
+
+
+/**
+ * Show estimate of total amount of dirty data on clients.
+ *
+ * \param[in] m                seq_file handle
+ * \param[in] data     unused for single entry
+ *
+ * \retval             0 on success
+ * \retval             negative value on error
+ */
+int tgt_tot_dirty_seq_show(struct seq_file *m, void *data)
+{
+       struct obd_device *obd = m->private;
+       struct tg_grants_data *tgd;
+
+       LASSERT(obd != NULL);
+       tgd = &obd->u.obt.obt_lut->lut_tgd;
+       seq_printf(m, "%llu\n", tgd->tgd_tot_dirty);
+       return 0;
+}
+EXPORT_SYMBOL(tgt_tot_dirty_seq_show);
+
+/**
+ * Show total amount of space granted to clients.
+ *
+ * \param[in] m                seq_file handle
+ * \param[in] data     unused for single entry
+ *
+ * \retval             0 on success
+ * \retval             negative value on error
+ */
+int tgt_tot_granted_seq_show(struct seq_file *m, void *data)
+{
+       struct obd_device *obd = m->private;
+       struct tg_grants_data *tgd;
+
+       LASSERT(obd != NULL);
+       tgd = &obd->u.obt.obt_lut->lut_tgd;
+       seq_printf(m, "%llu\n", tgd->tgd_tot_granted);
+       return 0;
+}
+EXPORT_SYMBOL(tgt_tot_granted_seq_show);
+
+/**
+ * Show total amount of space used by IO in progress.
+ *
+ * \param[in] m                seq_file handle
+ * \param[in] data     unused for single entry
+ *
+ * \retval             0 on success
+ * \retval             negative value on error
+ */
+int tgt_tot_pending_seq_show(struct seq_file *m, void *data)
+{
+       struct obd_device *obd = m->private;
+       struct tg_grants_data *tgd;
+
+       LASSERT(obd != NULL);
+       tgd = &obd->u.obt.obt_lut->lut_tgd;
+       seq_printf(m, "%llu\n", tgd->tgd_tot_pending);
+       return 0;
+}
+EXPORT_SYMBOL(tgt_tot_pending_seq_show);
+
+/**
+ * Show if grants compatibility mode is disabled.
+ *
+ * When tgd_grant_compat_disable is set, we don't grant any space to clients
+ * not supporting OBD_CONNECT_GRANT_PARAM. Otherwise, space granted to such
+ * a client is inflated since it consumes PAGE_SIZE of grant space per
+ * block, (i.e. typically 4kB units), but underlaying file system might have
+ * block size bigger than page size, e.g. ZFS. See LU-2049 for details.
+ *
+ * \param[in] m                seq_file handle
+ * \param[in] data     unused for single entry
+ *
+ * \retval             0 on success
+ * \retval             negative value on error
+ */
+int tgt_grant_compat_disable_seq_show(struct seq_file *m, void *data)
+{
+       struct obd_device *obd = m->private;
+       struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd;
+
+       seq_printf(m, "%u\n", tgd->tgd_grant_compat_disable);
+       return 0;
+}
+EXPORT_SYMBOL(tgt_grant_compat_disable_seq_show);
+
+/**
+ * Change grant compatibility mode.
+ *
+ * Setting tgd_grant_compat_disable prohibit any space granting to clients
+ * not supporting OBD_CONNECT_GRANT_PARAM. See details above.
+ *
+ * \param[in] file     proc file
+ * \param[in] buffer   string which represents mode
+ *                     1: disable compatibility mode
+ *                     0: enable compatibility mode
+ * \param[in] count    \a buffer length
+ * \param[in] off      unused for single entry
+ *
+ * \retval             \a count on success
+ * \retval             negative number on error
+ */
+ssize_t tgt_grant_compat_disable_seq_write(struct file *file,
+                                          const char __user *buffer,
+                                          size_t count, loff_t *off)
+{
+       struct seq_file *m = file->private_data;
+       struct obd_device *obd = m->private;
+       struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd;
+       __s64 val;
+       int rc;
+
+       rc = lprocfs_str_to_s64(buffer, count, &val);
+       if (rc)
+               return rc;
+
+       if (val < 0)
+               return -EINVAL;
+
+       tgd->tgd_grant_compat_disable = !!val;
+
+       return count;
+}
+EXPORT_SYMBOL(tgt_grant_compat_disable_seq_write);
+
index 4d39237..3783674 100644 (file)
@@ -152,6 +152,8 @@ int tgt_init(const struct lu_env *env, struct lu_target *lut,
        struct lu_attr           attr;
        struct lu_fid            fid;
        struct dt_object        *o;
+       struct tg_grants_data   *tgd = &lut->lut_tgd;
+       struct obd_statfs       *osfs;
        int i, rc = 0;
 
        ENTRY;
@@ -188,6 +190,38 @@ int tgt_init(const struct lu_env *env, struct lu_target *lut,
        if (!obd->obd_replayable)
                RETURN(0);
 
+       /* initialize grant and statfs data in target */
+       dt_conf_get(env, lut->lut_bottom, &lut->lut_dt_conf);
+
+       /* statfs data */
+       spin_lock_init(&tgd->tgd_osfs_lock);
+       tgd->tgd_osfs_age = cfs_time_shift_64(-1000);
+       tgd->tgd_osfs_unstable = 0;
+       tgd->tgd_statfs_inflight = 0;
+       tgd->tgd_osfs_inflight = 0;
+
+       /* grant data */
+       spin_lock_init(&tgd->tgd_grant_lock);
+       tgd->tgd_tot_dirty = 0;
+       tgd->tgd_tot_granted = 0;
+       tgd->tgd_tot_pending = 0;
+       tgd->tgd_grant_compat_disable = 0;
+
+       /* populate cached statfs data */
+       osfs = &tgt_th_info(env)->tti_u.osfs;
+       rc = tgt_statfs_internal(env, lut, osfs, 0, NULL);
+       if (rc != 0) {
+               CERROR("%s: can't get statfs data, rc %d\n", tgt_name(lut),
+                       rc);
+               GOTO(out, rc);
+       }
+       if (!is_power_of_2(osfs->os_bsize)) {
+               CERROR("%s: blocksize (%d) is not a power of 2\n",
+                       tgt_name(lut), osfs->os_bsize);
+               GOTO(out, rc = -EPROTO);
+       }
+       tgd->tgd_blockbits = fls(osfs->os_bsize) - 1;
+
        spin_lock_init(&lut->lut_translock);
        spin_lock_init(&lut->lut_client_bitmap_lock);