Whamcloud - gitweb
LU-14719 osp: add inode watermark 28/47128/15
authorLai Siyao <lai.siyao@whamcloud.com>
Fri, 1 Apr 2022 19:58:08 +0000 (15:58 -0400)
committerOleg Drokin <green@whamcloud.com>
Sat, 17 Sep 2022 06:23:26 +0000 (06:23 +0000)
* move block watermark from debugfs to sysfs.
* add inode watermark for OSP.

Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Change-Id: I7c768fa2ebfb4b8c2f75255f9e9c061d4c15cf66
Reviewed-on: https://review.whamcloud.com/47128
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Qian Yingjin <qian@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/osp/lproc_osp.c
lustre/osp/osp_internal.h
lustre/osp/osp_precreate.c

index f1e0a64..6fa08c4 100644 (file)
@@ -874,59 +874,57 @@ LDEBUGFS_SEQ_FOPS_RO_TYPE(osp, timeouts);
 LDEBUGFS_SEQ_FOPS_RW_TYPE(osp, import);
 LDEBUGFS_SEQ_FOPS_RO_TYPE(osp, state);
 
+static int osp_rpc_stats_seq_show(struct seq_file *seq, void *v)
+{
+       struct obd_device *dev = seq->private;
+
+       return obd_mod_rpc_stats_seq_show(&dev->u.cli, seq);
+}
+
+static ssize_t osp_rpc_stats_seq_write(struct file *file,
+                                      const char __user *buf,
+                                      size_t len, loff_t *off)
+{
+       struct seq_file *seq = file->private_data;
+       struct obd_device *dev = seq->private;
+       struct client_obd *cli = &dev->u.cli;
+
+       lprocfs_oh_clear(&cli->cl_mod_rpcs_hist);
+
+       return len;
+}
+LDEBUGFS_SEQ_FOPS(osp_rpc_stats);
+
 /**
- * Show high watermark (in megabytes). If available free space at OST is grater
+ * Show high watermark (in megabytes). If available free space at OST is greater
  * than high watermark and object allocation for OST is disabled, enable it.
- *
- * \param[in] m                seq_file handle
- * \param[in] data     unused for single entry
- * \retval             0 on success
- * \retval             negative number on error
  */
-static int osp_reserved_mb_high_seq_show(struct seq_file *m, void *data)
+static ssize_t reserved_mb_high_show(struct kobject *kobj,
+                                    struct attribute *attr,
+                                    char *buf)
 {
-       struct obd_device       *dev = m->private;
-       struct osp_device       *osp = lu2osp_dev(dev->obd_lu_dev);
-
-       if (osp == NULL)
-               return -EINVAL;
+       struct dt_device *dt = container_of(kobj, struct dt_device,
+                                           dd_kobj);
+       struct osp_device *osp = dt2osp_dev(dt);
 
-       seq_printf(m, "%u\n", osp->opd_reserved_mb_high);
-       return 0;
+       return snprintf(buf, PAGE_SIZE, "%u\n", osp->opd_reserved_mb_high);
 }
 
 /**
  * Change high watermark
- *
- * \param[in] file     proc file
- * \param[in] buffer   string which represents new value (in megabytes)
- * \param[in] count    \a buffer length
- * \param[in] off      unused for single entry
- * \retval             \a count on success
- * \retval             negative number on error
  */
-static ssize_t
-osp_reserved_mb_high_seq_write(struct file *file, const char __user *buffer,
-                       size_t count, loff_t *off)
+static ssize_t reserved_mb_high_store(struct kobject *kobj,
+                                     struct attribute *attr,
+                                     const char *buffer,
+                                     size_t count)
 {
-       struct seq_file         *m = file->private_data;
-       struct obd_device       *dev = m->private;
-       struct osp_device       *osp = lu2osp_dev(dev->obd_lu_dev);
-       char kernbuf[22] = "";
+       struct dt_device *dt = container_of(kobj, struct dt_device,
+                                           dd_kobj);
+       struct osp_device *osp = dt2osp_dev(dt);
        u64 val;
-       int                     rc;
-
-       if (osp == NULL || osp->opd_pre == NULL)
-               return -EINVAL;
-
-       if (count >= sizeof(kernbuf))
-               return -EINVAL;
-
-       if (copy_from_user(kernbuf, buffer, count))
-               return -EFAULT;
-       kernbuf[count] = 0;
+       int rc;
 
-       rc = sysfs_memparse(kernbuf, count, &val, "MiB");
+       rc = sysfs_memparse(buffer, count, &val, "MiB");
        if (rc < 0)
                return rc;
        val >>= 20;
@@ -941,95 +939,140 @@ osp_reserved_mb_high_seq_write(struct file *file, const char __user *buffer,
 
        return count;
 }
-LDEBUGFS_SEQ_FOPS(osp_reserved_mb_high);
+LUSTRE_RW_ATTR(reserved_mb_high);
 
-static int osp_rpc_stats_seq_show(struct seq_file *seq, void *v)
+/**
+ * Show low watermark (in megabytes). If available free space at OST is less
+ * than low watermark, object allocation for OST is disabled.
+ */
+static ssize_t reserved_mb_low_show(struct kobject *kobj,
+                                   struct attribute *attr,
+                                   char *buf)
 {
-       struct obd_device *dev = seq->private;
+       struct dt_device *dt = container_of(kobj, struct dt_device,
+                                           dd_kobj);
+       struct osp_device *osp = dt2osp_dev(dt);
 
-       return obd_mod_rpc_stats_seq_show(&dev->u.cli, seq);
+       return snprintf(buf, PAGE_SIZE, "%u\n", osp->opd_reserved_mb_low);
 }
 
-static ssize_t osp_rpc_stats_seq_write(struct file *file,
-                                      const char __user *buf,
-                                      size_t len, loff_t *off)
+/**
+ * Change low watermark
+ */
+static ssize_t reserved_mb_low_store(struct kobject *kobj,
+                                    struct attribute *attr,
+                                    const char *buffer,
+                                    size_t count)
 {
-       struct seq_file *seq = file->private_data;
-       struct obd_device *dev = seq->private;
-       struct client_obd *cli = &dev->u.cli;
+       struct dt_device *dt = container_of(kobj, struct dt_device,
+                                           dd_kobj);
+       struct osp_device *osp = dt2osp_dev(dt);
+       u64 val;
+       int rc;
 
-       lprocfs_oh_clear(&cli->cl_mod_rpcs_hist);
+       rc = sysfs_memparse(buffer, count, &val, "MiB");
+       if (rc < 0)
+               return rc;
+       val >>= 20;
 
-       return len;
+       spin_lock(&osp->opd_pre_lock);
+       osp->opd_reserved_mb_low = val;
+       if (val >= osp->opd_reserved_mb_high)
+               osp->opd_reserved_mb_high = val + 1;
+       spin_unlock(&osp->opd_pre_lock);
+
+       return count;
 }
-LDEBUGFS_SEQ_FOPS(osp_rpc_stats);
+LUSTRE_RW_ATTR(reserved_mb_low);
 
 /**
- * Show low watermark (in megabytes). If available free space at OST is less
- * than low watermark, object allocation for OST is disabled.
- *
- * \param[in] m                seq_file handle
- * \param[in] data     unused for single entry
- * \retval             0 on success
- * \retval             negative number on error
+ * Show high watermark of inode.
  */
-static int osp_reserved_mb_low_seq_show(struct seq_file *m, void *data)
+static ssize_t reserved_ino_high_show(struct kobject *kobj,
+                                     struct attribute *attr,
+                                     char *buf)
 {
-       struct obd_device       *dev = m->private;
-       struct osp_device       *osp = lu2osp_dev(dev->obd_lu_dev);
-
-       if (osp == NULL)
-               return -EINVAL;
+       struct dt_device *dt = container_of(kobj, struct dt_device,
+                                           dd_kobj);
+       struct osp_device *osp = dt2osp_dev(dt);
 
-       seq_printf(m, "%u\n", osp->opd_reserved_mb_low);
-       return 0;
+       return snprintf(buf, PAGE_SIZE, "%u\n", osp->opd_reserved_ino_high);
 }
 
 /**
- * Change low watermark
- *
- * \param[in] file     proc file
- * \param[in] buffer   string which represents new value (in megabytes)
- * \param[in] count    \a buffer length
- * \param[in] off      unused for single entry
- * \retval             \a count on success
- * \retval             negative number on error
+ * Change high watermark of inode.
  */
-static ssize_t
-osp_reserved_mb_low_seq_write(struct file *file, const char __user *buffer,
-                       size_t count, loff_t *off)
+static ssize_t reserved_ino_high_store(struct kobject *kobj,
+                                      struct attribute *attr,
+                                      const char *buffer,
+                                      size_t count)
 {
-       struct seq_file         *m = file->private_data;
-       struct obd_device       *dev = m->private;
-       struct osp_device       *osp = lu2osp_dev(dev->obd_lu_dev);
-       char kernbuf[22] = "";
-       u64 val;
-       int                     rc;
+       struct dt_device *dt = container_of(kobj, struct dt_device,
+                                           dd_kobj);
+       struct osp_device *osp = dt2osp_dev(dt);
+       unsigned int val;
+       int rc;
 
-       if (osp == NULL || osp->opd_pre == NULL)
-               return -EINVAL;
+       rc = kstrtouint(buffer, 0, &val);
+       if (rc < 0)
+               return rc;
+       if (val < 1)
+               return -ERANGE;
 
-       if (count >= sizeof(kernbuf))
-               return -EINVAL;
+       spin_lock(&osp->opd_pre_lock);
+       osp->opd_reserved_ino_high = val;
+       if (val <= osp->opd_reserved_ino_low)
+               osp->opd_reserved_ino_low = val >> 1;
+       spin_unlock(&osp->opd_pre_lock);
+
+       return count;
+}
+LUSTRE_RW_ATTR(reserved_ino_high);
 
-       if (copy_from_user(kernbuf, buffer, count))
-               return -EFAULT;
-       kernbuf[count] = 0;
+/**
+ * Show low watermark.
+ */
+static ssize_t reserved_ino_low_show(struct kobject *kobj,
+                                    struct attribute *attr,
+                                    char *buf)
+{
+       struct dt_device *dt = container_of(kobj, struct dt_device,
+                                           dd_kobj);
+       struct osp_device *osp = dt2osp_dev(dt);
 
-       rc = sysfs_memparse(kernbuf, count, &val, "MiB");
+       return snprintf(buf, PAGE_SIZE, "%u\n", osp->opd_reserved_ino_low);
+}
+
+/**
+ * Change low watermark
+ */
+static ssize_t reserved_ino_low_store(struct kobject *kobj,
+                                     struct attribute *attr,
+                                     const char *buffer,
+                                     size_t count)
+{
+       struct dt_device *dt = container_of(kobj, struct dt_device,
+                                           dd_kobj);
+       struct osp_device *osp = dt2osp_dev(dt);
+       unsigned int val;
+       int rc;
+
+       rc = kstrtouint(buffer, 0, &val);
        if (rc < 0)
                return rc;
-       val >>= 20;
+
+       if (val & (1UL << 31))
+               return -EOVERFLOW;
 
        spin_lock(&osp->opd_pre_lock);
-       osp->opd_reserved_mb_low = val;
-       if (val >= osp->opd_reserved_mb_high)
-               osp->opd_reserved_mb_high = val + 1;
+       osp->opd_reserved_ino_low = val;
+       if (val >= osp->opd_reserved_ino_high)
+               osp->opd_reserved_ino_high = val << 1;
        spin_unlock(&osp->opd_pre_lock);
 
        return count;
 }
-LDEBUGFS_SEQ_FOPS(osp_reserved_mb_low);
+LUSTRE_RW_ATTR(reserved_ino_low);
 
 static ssize_t force_sync_store(struct kobject *kobj, struct attribute *attr,
                                const char *buffer, size_t count)
@@ -1061,10 +1104,6 @@ static struct ldebugfs_vars ldebugfs_osp_obd_vars[] = {
          .fops =       &osp_import_fops                },
        { .name =       "state",
          .fops =       &osp_state_fops                 },
-       { .name =       "reserved_mb_high",
-         .fops =       &osp_reserved_mb_high_fops      },
-       { .name =       "reserved_mb_low",
-         .fops =       &osp_reserved_mb_low_fops       },
        { NULL }
 };
 
@@ -1108,6 +1147,10 @@ static struct attribute *osp_obd_attrs[] = {
        &lustre_attr_old_sync_processed.attr,
        &lustre_attr_create_count.attr,
        &lustre_attr_max_create_count.attr,
+       &lustre_attr_reserved_mb_high.attr,
+       &lustre_attr_reserved_mb_low.attr,
+       &lustre_attr_reserved_ino_high.attr,
+       &lustre_attr_reserved_ino_low.attr,
        NULL,
 };
 
@@ -1123,6 +1166,10 @@ static struct attribute *osp_md_attrs[] = {
        &lustre_attr_mdt_conn_uuid.attr,
        &lustre_attr_ping.attr,
        &lustre_attr_prealloc_status.attr,
+       &lustre_attr_reserved_mb_high.attr,
+       &lustre_attr_reserved_mb_low.attr,
+       &lustre_attr_reserved_ino_high.attr,
+       &lustre_attr_reserved_ino_low.attr,
        NULL,
 };
 
index 57dd8f4..0a9af3a 100644 (file)
@@ -266,8 +266,10 @@ struct osp_device {
        /*
         * Limit the object allocation using ENOSPC for opd_pre_status
         */
-       int                             opd_reserved_mb_high;
-       int                             opd_reserved_mb_low;
+       unsigned int                    opd_reserved_mb_high;
+       unsigned int                    opd_reserved_mb_low;
+       unsigned int                    opd_reserved_ino_high;
+       unsigned int                    opd_reserved_ino_low;
        bool                            opd_cleanup_orphans_done;
        bool                            opd_force_creation;
 };
index 1b77f30..672cd82 100644 (file)
@@ -1035,8 +1035,6 @@ out:
 static void osp_pre_update_msfs(struct osp_device *d, struct obd_statfs *msfs)
 {
        u32 old_state = d->opd_statfs.os_state;
-       u32 reserved_ino_low = 32;      /* could be tunable in the future */
-       u32 reserved_ino_high = reserved_ino_low * 2;
        u64 available_mb;
 
        /* statfs structure not initialized yet */
@@ -1054,7 +1052,7 @@ static void osp_pre_update_msfs(struct osp_device *d, struct obd_statfs *msfs)
                    d->opd_reserved_mb_low == 0) {
                        d->opd_reserved_mb_low = ((msfs->os_bsize >> 10) *
                                                  msfs->os_blocks) >> 20;
-                       if (d->opd_reserved_mb_low == 0)
+                       if (d->opd_reserved_mb_low < 1)
                                d->opd_reserved_mb_low = 1;
                        d->opd_reserved_mb_high =
                                (d->opd_reserved_mb_low << 1) + 1;
@@ -1062,24 +1060,42 @@ static void osp_pre_update_msfs(struct osp_device *d, struct obd_statfs *msfs)
                spin_unlock(&d->opd_pre_lock);
        }
 
+       if (unlikely(d->opd_reserved_ino_high == 0 &&
+                    d->opd_reserved_ino_low == 0)) {
+               /* Use ~0.1% by default to disallow distributed transactions,
+                * and ~0.2% to allow, set both watermark
+                */
+               spin_lock(&d->opd_pre_lock);
+               if (d->opd_reserved_ino_high == 0 &&
+                   d->opd_reserved_ino_low == 0) {
+                       d->opd_reserved_ino_low = msfs->os_ffree >> 20;
+                       if (d->opd_reserved_ino_low < 32)
+                               d->opd_reserved_ino_low = 32;
+                       d->opd_reserved_ino_high =
+                               (d->opd_reserved_ino_low << 1) + 1;
+               }
+               spin_unlock(&d->opd_pre_lock);
+       }
+
        available_mb = (msfs->os_bavail * (msfs->os_bsize >> 10)) >> 10;
-       if (msfs->os_ffree < reserved_ino_low)
+       if (msfs->os_ffree < d->opd_reserved_ino_low)
                msfs->os_state |= OS_STATFS_ENOINO;
-       else if (msfs->os_ffree <= reserved_ino_high)
+       else if (msfs->os_ffree <= d->opd_reserved_ino_high)
                msfs->os_state |= old_state & OS_STATFS_ENOINO;
        /* else don't clear flags in new msfs->os_state sent from OST */
 
+       if (available_mb < d->opd_reserved_mb_low)
+               msfs->os_state |= OS_STATFS_ENOSPC;
+       else if (available_mb <= d->opd_reserved_mb_high)
+               msfs->os_state |= old_state & OS_STATFS_ENOSPC;
+       /* else don't clear flags in new msfs->os_state sent from OST */
+
        CDEBUG(D_INFO,
               "%s: blocks=%llu free=%llu avail=%llu avail_mb=%llu hwm_mb=%u files=%llu ffree=%llu state=%x: rc = %d\n",
               d->opd_obd->obd_name, msfs->os_blocks, msfs->os_bfree,
               msfs->os_bavail, available_mb, d->opd_reserved_mb_high,
               msfs->os_files, msfs->os_ffree, msfs->os_state,
               d->opd_pre_status);
-       if (available_mb < d->opd_reserved_mb_low)
-               msfs->os_state |= OS_STATFS_ENOSPC;
-       else if (available_mb <= d->opd_reserved_mb_high)
-               msfs->os_state |= old_state & OS_STATFS_ENOSPC;
-       /* else don't clear flags in new msfs->os_state sent from OST */
 
        if (msfs->os_state & (OS_STATFS_ENOINO | OS_STATFS_ENOSPC)) {
                d->opd_pre_status = -ENOSPC;