Whamcloud - gitweb
LU-14543 target: prevent overflowing of tgd->tgd_tot_granted
[fs/lustre-release.git] / lustre / target / tgt_grant.c
index 99373cf..2e1df78 100644 (file)
@@ -71,7 +71,7 @@
  * Author: Johann Lombardi <johann.lombardi@intel.com>
  */
 
-#define DEBUG_SUBSYSTEM S_FILTER
+#define DEBUG_SUBSYSTEM S_CLASS
 
 #include <obd.h>
 #include <obd_class.h>
@@ -454,9 +454,9 @@ static u64 tgt_grant_space_left(struct obd_export *exp)
                            tot_granted - tgd->tgd_tot_pending) ?
                            D_ERROR : D_CACHE;
 
-               CDEBUG_LIMIT(mask, "%s: cli %s/%p left %llu < tot_grant "
-                            "%llu unstable %llu pending %llu "
-                            "dirty %llu\n",
+               /* the below message is checked in sanityn.sh test_15 */
+               CDEBUG_LIMIT(mask,
+                            "%s: cli %s/%p left=%llu < tot_grant=%llu unstable=%llu pending=%llu dirty=%llu\n",
                             obd->obd_name, exp->exp_client_uuid.uuid, exp,
                             left, tot_granted, unstable,
                             tgd->tgd_tot_pending,
@@ -471,10 +471,10 @@ static u64 tgt_grant_space_left(struct obd_export *exp)
        /* Align left on block size */
        left &= ~((1ULL << tgd->tgd_blockbits) - 1);
 
-       CDEBUG(D_CACHE, "%s: cli %s/%p avail %llu left %llu unstable "
-              "%llu tot_grant %llu pending %llu\n", obd->obd_name,
-              exp->exp_client_uuid.uuid, exp, avail, left, unstable,
-              tot_granted, tgd->tgd_tot_pending);
+       CDEBUG(D_CACHE,
+              "%s: cli %s/%p avail=%llu left=%llu unstable=%llu tot_grant=%llu pending=%llu\n",
+              obd->obd_name, exp->exp_client_uuid.uuid, exp, avail, left,
+              unstable, tot_granted, tgd->tgd_tot_pending);
 
        RETURN(left);
 }
@@ -499,8 +499,7 @@ static void tgt_grant_incoming(const struct lu_env *env, struct obd_export *exp,
        struct tg_export_data   *ted = &exp->exp_target_data;
        struct obd_device       *obd = exp->exp_obd;
        struct tg_grants_data   *tgd = &obd->u.obt.obt_lut->lut_tgd;
-       long                     dirty;
-       long                     dropped;
+       long long                dirty, dropped;
        ENTRY;
 
        assert_spin_locked(&tgd->tgd_grant_lock);
@@ -524,10 +523,19 @@ static void tgt_grant_incoming(const struct lu_env *env, struct obd_export *exp,
 
        /* inflate grant counters if required */
        if (!exp_grant_param_supp(exp)) {
+               u64 tmp;
                oa->o_grant     = tgt_grant_inflate(tgd, oa->o_grant);
                oa->o_dirty     = tgt_grant_inflate(tgd, oa->o_dirty);
-               oa->o_dropped   = tgt_grant_inflate(tgd, (u64)oa->o_dropped);
-               oa->o_undirty   = tgt_grant_inflate(tgd, oa->o_undirty);
+               /* inflation can bump client's wish to >4GB which doesn't fit
+                * 32bit o_undirty, limit that ..  */
+               tmp = tgt_grant_inflate(tgd, oa->o_undirty);
+               if (tmp >= OBD_MAX_GRANT)
+                       tmp = OBD_MAX_GRANT & ~(1ULL << tgd->tgd_blockbits);
+               oa->o_undirty = tmp;
+               tmp = tgt_grant_inflate(tgd, oa->o_dropped);
+               if (tmp >= OBD_MAX_GRANT)
+                       tmp = OBD_MAX_GRANT & ~(1ULL << tgd->tgd_blockbits);
+               oa->o_dropped = tmp;
        }
 
        dirty = oa->o_dirty;
@@ -542,13 +550,13 @@ static void tgt_grant_incoming(const struct lu_env *env, struct obd_export *exp,
        tgd->tgd_tot_dirty += dirty - ted->ted_dirty;
        if (ted->ted_grant < dropped) {
                CDEBUG(D_CACHE,
-                      "%s: cli %s/%p reports %lu dropped > grant %lu\n",
+                      "%s: cli %s/%p reports %llu dropped > grant %lu\n",
                       obd->obd_name, exp->exp_client_uuid.uuid, exp, dropped,
                       ted->ted_grant);
                dropped = 0;
        }
        if (tgd->tgd_tot_granted < dropped) {
-               CERROR("%s: cli %s/%p reports %lu dropped > tot_grant %llu\n",
+               CERROR("%s: cli %s/%p reports %llu dropped > tot_grant %llu\n",
                       obd->obd_name, exp->exp_client_uuid.uuid, exp,
                       dropped, tgd->tgd_tot_granted);
                dropped = 0;
@@ -597,6 +605,14 @@ static void tgt_grant_shrink(struct obd_export *exp, struct obdo *oa,
 
        grant_shrink = oa->o_grant;
 
+       if (ted->ted_grant < grant_shrink) {
+               CDEBUG(D_CACHE,
+                      "%s: cli %s/%p wants %lu shrinked > grant %lu\n",
+                      obd->obd_name, exp->exp_client_uuid.uuid, exp,
+                      grant_shrink, ted->ted_grant);
+               grant_shrink = ted->ted_grant;
+       }
+
        ted->ted_grant -= grant_shrink;
        tgd->tgd_tot_granted -= grant_shrink;
 
@@ -868,6 +884,7 @@ static void tgt_grant_check(const struct lu_env *env, struct obd_export *exp,
  *                             have
  * \param[in] left             remaining free space with granted space taken
  *                             out
+ * \param[in] chunk            grant allocation unit
  * \param[in] conservative     if set to true, the server should be cautious
  *                             and limit how much space is granted back to the
  *                             client. Otherwise, the server should try hard to
@@ -886,6 +903,9 @@ static long tgt_grant_alloc(struct obd_export *exp, u64 curgrant,
 
        ENTRY;
 
+       if (OBD_FAIL_CHECK(OBD_FAIL_TGT_NO_GRANT))
+               RETURN(0);
+
        /* When tgd_grant_compat_disable is set, we don't grant any space to
         * clients not supporting OBD_CONNECT_GRANT_PARAM.
         * Otherwise, space granted to such a client is inflated since it
@@ -945,12 +965,13 @@ static long tgt_grant_alloc(struct obd_export *exp, u64 curgrant,
        tgd->tgd_tot_granted += grant;
        ted->ted_grant += grant;
 
-       if (ted->ted_grant < 0) {
+       if (unlikely(ted->ted_grant < 0 || ted->ted_grant > want + chunk)) {
                CERROR("%s: cli %s/%p grant %ld want %llu current %llu\n",
                       obd->obd_name, exp->exp_client_uuid.uuid, exp,
                       ted->ted_grant, want, curgrant);
                spin_unlock(&tgd->tgd_grant_lock);
-               LBUG();
+               if (tgd->tgd_lbug_on_grant_miscount)
+                       LBUG();
        }
 
        CDEBUG(D_CACHE,
@@ -1073,24 +1094,42 @@ void tgt_grant_discard(struct obd_export *exp)
 
        tgd = &lut->lut_tgd;
        spin_lock(&tgd->tgd_grant_lock);
-       LASSERTF(tgd->tgd_tot_granted >= ted->ted_grant,
-                "%s: tot_granted %llu cli %s/%p ted_grant %ld\n",
-                obd->obd_name, tgd->tgd_tot_granted,
-                exp->exp_client_uuid.uuid, exp, ted->ted_grant);
-       tgd->tgd_tot_granted -= ted->ted_grant;
+       if (unlikely(tgd->tgd_tot_granted < ted->ted_grant ||
+                    tgd->tgd_tot_dirty < ted->ted_dirty)) {
+               struct obd_export *e;
+               u64 ttg = 0;
+               u64 ttd = 0;
+
+               list_for_each_entry(e, &obd->obd_exports, exp_obd_chain) {
+                       LASSERT(exp != e);
+                       ttg += e->exp_target_data.ted_grant;
+                       ttg += e->exp_target_data.ted_pending;
+                       ttd += e->exp_target_data.ted_dirty;
+               }
+               if (tgd->tgd_tot_granted < ted->ted_grant)
+                       CERROR("%s: cli %s/%p: tot_granted %llu < ted_grant %ld, corrected to %llu",
+                              obd->obd_name,  exp->exp_client_uuid.uuid, exp,
+                              tgd->tgd_tot_granted, ted->ted_grant, ttg);
+               if (tgd->tgd_tot_dirty < ted->ted_dirty)
+                       CERROR("%s: cli %s/%p: tot_dirty %llu < ted_dirty %ld, corrected to %llu",
+                              obd->obd_name, exp->exp_client_uuid.uuid, exp,
+                              tgd->tgd_tot_dirty, ted->ted_dirty, ttd);
+               tgd->tgd_tot_granted = ttg;
+               tgd->tgd_tot_dirty = ttd;
+       } else {
+               tgd->tgd_tot_granted -= ted->ted_grant;
+               tgd->tgd_tot_dirty -= ted->ted_dirty;
+       }
        ted->ted_grant = 0;
-       LASSERTF(tgd->tgd_tot_pending >= ted->ted_pending,
-                "%s: tot_pending %llu cli %s/%p ted_pending %ld\n",
-                obd->obd_name, tgd->tgd_tot_pending,
-                exp->exp_client_uuid.uuid, exp, ted->ted_pending);
+       ted->ted_dirty = 0;
+
+       if (tgd->tgd_tot_pending < ted->ted_pending) {
+               CERROR("%s: tot_pending %llu < cli %s/%p ted_pending %ld\n",
+                      obd->obd_name, tgd->tgd_tot_pending,
+                      exp->exp_client_uuid.uuid, exp, ted->ted_pending);
+       }
        /* tgd_tot_pending is handled in tgt_grant_commit as bulk
         * commmits */
-       LASSERTF(tgd->tgd_tot_dirty >= ted->ted_dirty,
-                "%s: tot_dirty %llu cli %s/%p ted_dirty %ld\n",
-                obd->obd_name, tgd->tgd_tot_dirty,
-                exp->exp_client_uuid.uuid, exp, ted->ted_dirty);
-       tgd->tgd_tot_dirty -= ted->ted_dirty;
-       ted->ted_dirty = 0;
        spin_unlock(&tgd->tgd_grant_lock);
 }
 EXPORT_SYMBOL(tgt_grant_discard);
@@ -1529,65 +1568,68 @@ EXPORT_SYMBOL(tgt_grant_commit_cb_add);
 /**
  * Show estimate of total amount of dirty data on clients.
  *
- * \param[in] m                seq_file handle
- * \param[in] data     unused for single entry
+ * @kobj               kobject embedded in obd_device
+ * @attr               unused
+ * @buf                        buf used by sysfs to print out data
  *
- * \retval             0 on success
- * \retval             negative value on error
+ * Return:             0 on success
+ *                     negative value on error
  */
-int tgt_tot_dirty_seq_show(struct seq_file *m, void *data)
+ssize_t tot_dirty_show(struct kobject *kobj, struct attribute *attr,
+                      char *buf)
 {
-       struct obd_device *obd = m->private;
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
        struct tg_grants_data *tgd;
 
-       LASSERT(obd != NULL);
        tgd = &obd->u.obt.obt_lut->lut_tgd;
-       seq_printf(m, "%llu\n", tgd->tgd_tot_dirty);
-       return 0;
+       return scnprintf(buf, PAGE_SIZE, "%llu\n", tgd->tgd_tot_dirty);
 }
-EXPORT_SYMBOL(tgt_tot_dirty_seq_show);
+EXPORT_SYMBOL(tot_dirty_show);
 
 /**
  * Show total amount of space granted to clients.
  *
- * \param[in] m                seq_file handle
- * \param[in] data     unused for single entry
+ * @kobj               kobject embedded in obd_device
+ * @attr               unused
+ * @buf                        buf used by sysfs to print out data
  *
- * \retval             0 on success
- * \retval             negative value on error
+ * Return:             0 on success
+ *                     negative value on error
  */
-int tgt_tot_granted_seq_show(struct seq_file *m, void *data)
+ssize_t tot_granted_show(struct kobject *kobj, struct attribute *attr,
+                        char *buf)
 {
-       struct obd_device *obd = m->private;
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
        struct tg_grants_data *tgd;
 
-       LASSERT(obd != NULL);
        tgd = &obd->u.obt.obt_lut->lut_tgd;
-       seq_printf(m, "%llu\n", tgd->tgd_tot_granted);
-       return 0;
+       return scnprintf(buf, PAGE_SIZE, "%llu\n", tgd->tgd_tot_granted);
 }
-EXPORT_SYMBOL(tgt_tot_granted_seq_show);
+EXPORT_SYMBOL(tot_granted_show);
 
 /**
  * Show total amount of space used by IO in progress.
  *
- * \param[in] m                seq_file handle
- * \param[in] data     unused for single entry
+ * @kobj               kobject embedded in obd_device
+ * @attr               unused
+ * @buf                        buf used by sysfs to print out data
  *
- * \retval             0 on success
- * \retval             negative value on error
+ * Return:             0 on success
+ *                     negative value on error
  */
-int tgt_tot_pending_seq_show(struct seq_file *m, void *data)
+ssize_t tot_pending_show(struct kobject *kobj, struct attribute *attr,
+                        char *buf)
 {
-       struct obd_device *obd = m->private;
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
        struct tg_grants_data *tgd;
 
-       LASSERT(obd != NULL);
        tgd = &obd->u.obt.obt_lut->lut_tgd;
-       seq_printf(m, "%llu\n", tgd->tgd_tot_pending);
-       return 0;
+       return scnprintf(buf, PAGE_SIZE, "%llu\n", tgd->tgd_tot_pending);
 }
-EXPORT_SYMBOL(tgt_tot_pending_seq_show);
+EXPORT_SYMBOL(tot_pending_show);
 
 /**
  * Show if grants compatibility mode is disabled.
@@ -1598,21 +1640,22 @@ EXPORT_SYMBOL(tgt_tot_pending_seq_show);
  * block, (i.e. typically 4kB units), but underlaying file system might have
  * block size bigger than page size, e.g. ZFS. See LU-2049 for details.
  *
- * \param[in] m                seq_file handle
- * \param[in] data     unused for single entry
+ * @kobj               kobject embedded in obd_device
+ * @attr               unused
+ * @buf                        buf used by sysfs to print out data
  *
- * \retval             0 on success
- * \retval             negative value on error
+ * Return:             string length of @buf output on success
  */
-int tgt_grant_compat_disable_seq_show(struct seq_file *m, void *data)
+ssize_t grant_compat_disable_show(struct kobject *kobj, struct attribute *attr,
+                                 char *buf)
 {
-       struct obd_device *obd = m->private;
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
        struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd;
 
-       seq_printf(m, "%u\n", tgd->tgd_grant_compat_disable);
-       return 0;
+       return scnprintf(buf, PAGE_SIZE, "%u\n", tgd->tgd_grant_compat_disable);
 }
-EXPORT_SYMBOL(tgt_grant_compat_disable_seq_show);
+EXPORT_SYMBOL(grant_compat_disable_show);
 
 /**
  * Change grant compatibility mode.
@@ -1620,27 +1663,27 @@ EXPORT_SYMBOL(tgt_grant_compat_disable_seq_show);
  * Setting tgd_grant_compat_disable prohibit any space granting to clients
  * not supporting OBD_CONNECT_GRANT_PARAM. See details above.
  *
- * \param[in] file     proc file
- * \param[in] buffer   string which represents mode
- *                     1: disable compatibility mode
- *                     0: enable compatibility mode
- * \param[in] count    \a buffer length
- * \param[in] off      unused for single entry
+ * @kobj       kobject embedded in obd_device
+ * @attr       unused
+ * @buffer     string which represents mode
+ *             1: disable compatibility mode
+ *             0: enable compatibility mode
+ * @count      @buffer length
  *
- * \retval             \a count on success
- * \retval             negative number on error
+ * Return:     @count on success
+ *             negative number on error
  */
-ssize_t tgt_grant_compat_disable_seq_write(struct file *file,
-                                          const char __user *buffer,
-                                          size_t count, loff_t *off)
+ssize_t grant_compat_disable_store(struct kobject *kobj,
+                                  struct attribute *attr,
+                                  const char *buffer, size_t count)
 {
-       struct seq_file *m = file->private_data;
-       struct obd_device *obd = m->private;
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
        struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd;
        bool val;
        int rc;
 
-       rc = kstrtobool_from_user(buffer, count, &val);
+       rc = kstrtobool(buffer, &val);
        if (rc)
                return rc;
 
@@ -1648,4 +1691,61 @@ ssize_t tgt_grant_compat_disable_seq_write(struct file *file,
 
        return count;
 }
-EXPORT_SYMBOL(tgt_grant_compat_disable_seq_write);
+EXPORT_SYMBOL(grant_compat_disable_store);
+
+/**
+ * Show lbug_on_grant_miscount mode.
+ *
+ * @kobj               kobject embedded in obd_device
+ * @attr               unused
+ * @buf                        buf used by sysfs to print out data
+ *
+ * Return:             string length of @buf output on success
+ */
+ssize_t lbug_on_grant_miscount_show(struct kobject *kobj,
+                                   struct attribute *attr, char *buf)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd;
+
+       return scnprintf(buf, PAGE_SIZE, "%u\n",
+                        tgd->tgd_lbug_on_grant_miscount);
+}
+EXPORT_SYMBOL(lbug_on_grant_miscount_show);
+
+/**
+ * Change lbug on grant miscount mode.
+ *
+ * Setting tgd_lbug_on_grant_miscount to 1 makes tgt_alloc_grant() to
+ * LBUG on apparently wrong ted->ted_grant
+ *
+ * @kobj       kobject embedded in obd_device
+ * @attr       unused
+ * @buffer     string which represents mode
+ *             1: use LBUG on grant miscount
+ *             0: use CERROR on grant miscount
+ * @count      @buffer length
+ *
+ * Return:     @count on success
+ *             negative number on error
+ */
+ssize_t lbug_on_grant_miscount_store(struct kobject *kobj,
+                                    struct attribute *attr,
+                                    const char *buffer, size_t count)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd;
+       bool val;
+       int rc;
+
+       rc = kstrtobool(buffer, &val);
+       if (rc)
+               return rc;
+
+       tgd->tgd_lbug_on_grant_miscount = val;
+
+       return count;
+}
+EXPORT_SYMBOL(lbug_on_grant_miscount_store);