Whamcloud - gitweb
Revert "LU-6910 osp: add procfs values for OST reserved size" 17/17617/3
authorAndreas Dilger <andreas.dilger@intel.com>
Tue, 15 Dec 2015 18:11:24 +0000 (18:11 +0000)
committerAndreas Dilger <andreas.dilger@intel.com>
Tue, 15 Dec 2015 19:35:23 +0000 (19:35 +0000)
This is causing LU-7550 and LU-7552 test failures in sanity.

This reverts commit 0585b0fb5895a24f07ca32e830d1fa72b75f4f2b.

Change-Id: Ic332a54ace4998acc4ba2ceab6f76ef733f85be5
Reviewed-on: http://review.whamcloud.com/17617
Tested-by: Jenkins
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
lustre/lod/lod_qos.c
lustre/osp/lproc_osp.c
lustre/osp/osp_dev.c
lustre/osp/osp_internal.h
lustre/osp/osp_precreate.c
lustre/tests/sanity.sh

index 24aff5a..1aa9dc1 100644 (file)
@@ -197,10 +197,6 @@ static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d,
        LASSERT(ost);
 
        rc = dt_statfs(env, ost->ltd_ost, sfs);
-
-       if (rc == -ENOSPC)
-               RETURN(rc);
-
        if (rc && rc != -ENOTCONN)
                CERROR("%s: statfs: rc = %d\n", lod2obd(d)->obd_name, rc);
 
@@ -740,6 +736,30 @@ static int min_stripe_count(__u32 stripe_cnt, int flags)
 #define LOV_CREATE_RESEED_MIN  2000
 
 /**
+ * Check if an OST is full.
+ *
+ * Check whether an OST should be considered full based
+ * on the given statfs data.
+ *
+ * \param[in] msfs     statfs data
+ *
+ * \retval false       not full
+ * \retval true                full
+ */
+static int inline lod_qos_dev_is_full(struct obd_statfs *msfs)
+{
+       __u64 used;
+       int   bs = msfs->os_bsize;
+
+       LASSERT(((bs - 1) & bs) == 0);
+
+       /* the minimum of 0.1% used blocks and 1GB bytes. */
+       used = min_t(__u64, (msfs->os_blocks - msfs->os_bfree) >> 10,
+                       1 << (31 - ffs(bs)));
+       return (msfs->os_bavail < used);
+}
+
+/**
  * Initialize temporary OST-in-use array.
  *
  * Allocate or extend the array used to mark targets already assigned to a new
@@ -830,6 +850,14 @@ static int lod_check_and_reserve_ost(const struct lu_env *env,
        }
 
        /*
+        * skip full devices
+        */
+       if (lod_qos_dev_is_full(sfs)) {
+               QOS_DEBUG("#%d is full\n", ost_idx);
+               goto out_return;
+       }
+
+       /*
         * We expect number of precreated objects in f_ffree at
         * the first iteration, skip OSPs with no objects ready
         */
@@ -1395,6 +1423,12 @@ static int lod_alloc_qos(const struct lu_env *env, struct lod_object *lo,
                        continue;
                }
 
+               /*
+                * skip full devices
+                */
+               if (lod_qos_dev_is_full(sfs))
+                       continue;
+
                /* Fail Check before osc_precreate() is called
                   so we can only 'fail' single OSC. */
                if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OSC_PRECREATE) &&
index 478d7e1..4ab3a47 100644 (file)
@@ -717,124 +717,6 @@ LPROC_SEQ_FOPS_RO_TYPE(osp, timeouts);
 LPROC_SEQ_FOPS_RW_TYPE(osp, import);
 LPROC_SEQ_FOPS_RO_TYPE(osp, state);
 
-/**
- * Show high watermark (in megabytes). If available free space at OST is grater
- * than high watermark and object allocation for OST is disabled, enable it.
- *
- * \param[in] m                seq_file handle
- * \param[in] data     unused for single entry
- * \retval             0 on success
- * \retval             negative number on error
- */
-static int osp_reserved_mb_high_seq_show(struct seq_file *m, void *data)
-{
-       struct obd_device       *dev = m->private;
-       struct osp_device       *osp = lu2osp_dev(dev->obd_lu_dev);
-
-       if (osp == NULL)
-               return -EINVAL;
-
-       return seq_printf(m, "%u\n", osp->opd_reserved_mb_high);
-}
-
-/**
- * Change high watermark
- *
- * \param[in] file     proc file
- * \param[in] buffer   string which represents new value (in megabytes)
- * \param[in] count    \a buffer length
- * \param[in] off      unused for single entry
- * \retval             \a count on success
- * \retval             negative number on error
- */
-static ssize_t
-osp_reserved_mb_high_seq_write(struct file *file, const char *buffer,
-                       size_t count, loff_t *off)
-{
-       struct seq_file         *m = file->private_data;
-       struct obd_device       *dev = m->private;
-       struct osp_device       *osp = lu2osp_dev(dev->obd_lu_dev);
-       __u64                   val;
-       int                     rc;
-
-       if (osp == NULL)
-               return -EINVAL;
-
-       rc = lprocfs_write_frac_u64_helper(buffer, count, &val, 1 << 20);
-       if (rc)
-               return rc;
-       val >>= 20;
-       if (val < 1)
-               return -ERANGE;
-
-       spin_lock(&osp->opd_pre_lock);
-       osp->opd_reserved_mb_high = val;
-       if (val <= osp->opd_reserved_mb_low)
-               osp->opd_reserved_mb_low = val - 1;
-       spin_unlock(&osp->opd_pre_lock);
-
-       return count;
-}
-LPROC_SEQ_FOPS(osp_reserved_mb_high);
-
-/**
- * Show low watermark (in megabytes). If available free space at OST is less
- * than low watermark, object allocation for OST is disabled.
- *
- * \param[in] m                seq_file handle
- * \param[in] data     unused for single entry
- * \retval             0 on success
- * \retval             negative number on error
- */
-static int osp_reserved_mb_low_seq_show(struct seq_file *m, void *data)
-{
-       struct obd_device       *dev = m->private;
-       struct osp_device       *osp = lu2osp_dev(dev->obd_lu_dev);
-
-       if (osp == NULL)
-               return -EINVAL;
-
-       return seq_printf(m, "%u\n", osp->opd_reserved_mb_low);
-}
-
-/**
- * Change low watermark
- *
- * \param[in] file     proc file
- * \param[in] buffer   string which represents new value (in megabytes)
- * \param[in] count    \a buffer length
- * \param[in] off      unused for single entry
- * \retval             \a count on success
- * \retval             negative number on error
- */
-static ssize_t
-osp_reserved_mb_low_seq_write(struct file *file, const char *buffer,
-                       size_t count, loff_t *off)
-{
-       struct seq_file         *m = file->private_data;
-       struct obd_device       *dev = m->private;
-       struct osp_device       *osp = lu2osp_dev(dev->obd_lu_dev);
-       __u64                   val;
-       int                     rc;
-
-       if (osp == NULL)
-               return -EINVAL;
-
-       rc = lprocfs_write_frac_u64_helper(buffer, count, &val, 1 << 20);
-       if (rc)
-               return rc;
-       val >>= 20;
-
-       spin_lock(&osp->opd_pre_lock);
-       osp->opd_reserved_mb_low = val;
-       if (val >= osp->opd_reserved_mb_high)
-               osp->opd_reserved_mb_high = val + 1;
-       spin_unlock(&osp->opd_pre_lock);
-
-       return count;
-}
-LPROC_SEQ_FOPS(osp_reserved_mb_low);
-
 static struct lprocfs_vars lprocfs_osp_obd_vars[] = {
        { .name =       "uuid",
          .fops =       &osp_uuid_fops                  },
@@ -885,10 +767,6 @@ static struct lprocfs_vars lprocfs_osp_obd_vars[] = {
          .fops =       &osp_syn_in_prog_fops           },
        { .name =       "old_sync_processed",
          .fops =       &osp_old_sync_processed_fops    },
-       { .name =       "reserved_mb_high",
-         .fops =       &osp_reserved_mb_high_fops      },
-       { .name =       "reserved_mb_low",
-         .fops =       &osp_reserved_mb_low_fops       },
 
        /* for compatibility reasons */
        { .name =       "destroys_in_flight",
index 2a9d30f..e4af6b6 100644 (file)
@@ -752,17 +752,7 @@ static int osp_statfs(const struct lu_env *env, struct dt_device *dev,
               LPU64" files, "LPU64" free files\n", d->opd_obd->obd_name,
               sfs->os_blocks, sfs->os_bfree, sfs->os_bavail,
               sfs->os_files, sfs->os_ffree);
-
-       /* ENOSPC could be for two reasons,
-        * 1) not enough inodes 2) not enough blocks
-        * for 1) lod should use preallocated objects
-        * and for 2) shouldn`t. So, here for ENOSPC
-        * different values is returned to spend preallocated.
-        */
-       if (d->opd_pre_status == -ENOSPC && sfs->os_ffree < 32)
-               RETURN(0);
-
-       RETURN(d->opd_pre_status);
+       RETURN(0);
 }
 
 static int osp_sync_timeout(void *data)
index 012cff9..1352d64 100644 (file)
@@ -248,12 +248,6 @@ struct osp_device {
        struct list_head                 opd_async_updates;
        struct rw_semaphore              opd_async_updates_rwsem;
        atomic_t                         opd_async_updates_count;
-
-       /**
-        * Limit the object allocation using ENOSPC for opd_pre_status
-        */
-       int                             opd_reserved_mb_high;
-       int                             opd_reserved_mb_low;
 };
 
 #define opd_pre_lock                   opd_pre->osp_pre_lock
index 31fcb63..cac5bf2 100644 (file)
@@ -923,8 +923,17 @@ out:
  * Add a bit of hysteresis so this flag isn't continually flapping,
  * and ensure that new files don't get extremely fragmented due to
  * only a small amount of available space in the filesystem.
- * We want to set the ENOSPC when there is less than reserved size
- * free and clear it when there is at least 2*reserved size free space.
+ * We want to set the NOSPC flag when there is less than ~0.1% free
+ * and clear it when there is at least ~0.2% free space, so:
+ *                   avail < ~0.1% max          max = avail + used
+ *            1025 * avail < avail + used       used = blocks - free
+ *            1024 * avail < used
+ *            1024 * avail < blocks - free
+ *                   avail < ((blocks - free) >> 10)
+ *
+ * On very large disk, say 16TB 0.1% will be 16 GB. We don't want to
+ * lose that amount of space so in those cases we report no space left
+ * if their is less than 1 GB left.
  * the function updates current precreation status used: functional or not
  *
  * \param[in] d                OSP device
@@ -937,49 +946,28 @@ void osp_pre_update_status(struct osp_device *d, int rc)
 {
        struct obd_statfs       *msfs = &d->opd_statfs;
        int                      old = d->opd_pre_status;
-       __u64                    available;
+       __u64                    used;
 
        d->opd_pre_status = rc;
        if (rc)
                goto out;
 
        if (likely(msfs->os_type)) {
-               if (d->opd_reserved_mb_high == 0 &&
-                   d->opd_reserved_mb_low == 0) {
-                       /* Use ~0.1% by default to disable object allocation,
-                        * and ~0.2% to enable, size in MB, set both watermark
-                        */
-                       spin_lock(&d->opd_pre_lock);
-                       if (d->opd_reserved_mb_high == 0 &&
-                           d->opd_reserved_mb_low == 0) {
-                               d->opd_reserved_mb_low = (msfs->os_bsize *
-                                                       msfs->os_blocks) >> 30;
-                               if (d->opd_reserved_mb_low == 0)
-                                       d->opd_reserved_mb_low = 1;
-                               d->opd_reserved_mb_high =
-                                        (d->opd_reserved_mb_low << 1) + 1;
-                       }
-                       spin_unlock(&d->opd_pre_lock);
-               }
-               /* in MB */
-               available = (msfs->os_bavail * (msfs->os_bsize >> 10)) >> 10;
-               if ((msfs->os_ffree < 32) ||
-                   (available < d->opd_reserved_mb_low)) {
+               used = min_t(__u64, (msfs->os_blocks - msfs->os_bfree) >> 10,
+                                   1 << 30);
+               if ((msfs->os_ffree < 32) || (msfs->os_bavail < used)) {
                        d->opd_pre_status = -ENOSPC;
                        if (old != -ENOSPC)
                                CDEBUG(D_INFO, "%s: status: "LPU64" blocks, "
-                                      LPU64" free, "LPU64" avail, "LPU64" "
-                                      "MB avail, %u hwm -> %d: rc = %d\n",
+                                      LPU64" free, "LPU64" used, "LPU64" "
+                                      "avail -> %d: rc = %d\n",
                                       d->opd_obd->obd_name, msfs->os_blocks,
-                                      msfs->os_bfree, msfs->os_bavail,
-                                      available, d->opd_reserved_mb_low,
+                                      msfs->os_bfree, used, msfs->os_bavail,
                                       d->opd_pre_status, rc);
                        CDEBUG(D_INFO,
                               "non-commited changes: %lu, in progress: %u\n",
                               d->opd_syn_changes, d->opd_syn_rpc_in_progress);
-               } else if (unlikely(old == -ENOSPC &&
-                                   (msfs->os_ffree > 64) &&
-                                   (available > d->opd_reserved_mb_high))) {
+               } else if (old == -ENOSPC) {
                        d->opd_pre_status = 0;
                        spin_lock(&d->opd_pre_lock);
                        d->opd_pre_grow_slow = 0;
@@ -987,13 +975,13 @@ void osp_pre_update_status(struct osp_device *d, int rc)
                        spin_unlock(&d->opd_pre_lock);
                        wake_up(&d->opd_pre_waitq);
                        CDEBUG(D_INFO, "%s: no space: "LPU64" blocks, "LPU64
-                              " free, "LPU64" avail, "LPU64"MB avail, %u nwm"
-                              " -> %d: rc = %d\n", d->opd_obd->obd_name,
-                              msfs->os_blocks, msfs->os_bfree, msfs->os_bavail,
-                              available, d->opd_reserved_mb_high,
-                              d->opd_pre_status, rc);
+                              " free, "LPU64" used, "LPU64" avail -> %d: "
+                              "rc = %d\n", d->opd_obd->obd_name,
+                              msfs->os_blocks, msfs->os_bfree, used,
+                              msfs->os_bavail, d->opd_pre_status, rc);
                }
        }
+
 out:
        wake_up(&d->opd_pre_user_waitq);
 }
@@ -1560,8 +1548,6 @@ int osp_init_precreate(struct osp_device *d)
        d->opd_pre_grow_count = OST_MIN_PRECREATE;
        d->opd_pre_min_grow_count = OST_MIN_PRECREATE;
        d->opd_pre_max_grow_count = OST_MAX_PRECREATE;
-       d->opd_reserved_mb_high = 0;
-       d->opd_reserved_mb_low = 0;
 
        spin_lock_init(&d->opd_pre_lock);
        init_waitqueue_head(&d->opd_pre_waitq);
index 1bb2891..7a25e4b 100755 (executable)
@@ -13507,120 +13507,6 @@ test_252() {
 }
 run_test 252 "check lr_reader tool"
 
-test_253_fill_ost() {
-       local size_1
-       local hwm=$3
-       local free_10
-
-       blocks=$($LFS df $MOUNT | grep $1 | awk '{ print $4 }')
-       size_1=$((blocks/1024-hwm))
-       free_10=$((blocks/10240))
-       if (( free_10 > size_1 )); then
-               size_1=$free_10
-       else
-               size_1=$((size_1+size_1/10))
-       fi
-       if [[ $hwm < $((blocks/1024)) ]]; then
-               dd if=/dev/zero of=$DIR/$tdir/1 bs=1M count=$size_1 \
-                        oflag=append conv=notrunc
-
-               sleep_maxage
-
-               blocks=$($LFS df $MOUNT | grep $1 | awk '{ print $4 }')
-               echo "OST still has $((blocks/1024)) mbytes free"
-       fi
-}
-
-test_253() {
-       local ostidx=0
-       local rc=0
-
-       [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
-       remote_mds_nodsh && skip "remote MDS with nodsh" && return
-       remote_mgs_nodsh && skip "remote MGS with nodsh" && return
-
-       rm -rf $DIR/$tdir
-       wait_mds_ost_sync
-       wait_delete_completed
-       mkdir $DIR/$tdir
-       local ost_name=$($LFS osts | grep ${ostidx}": " | \
-               awk '{print $2}' | sed -e 's/_UUID$//')
-
-       # on the mdt's osc
-       local mdtosc_proc1=$(get_mdtosc_proc_path $SINGLEMDS $ost_name)
-       local last_wm_h=$(do_facet $SINGLEMDS lctl get_param -n \
-                       osp.$mdtosc_proc1.reserved_mb_high)
-       local last_wm_l=$(do_facet $SINGLEMDS lctl get_param -n \
-                       osp.$mdtosc_proc1.reserved_mb_low)
-       echo "prev high watermark $last_wm_h, prev low watermark $last_wm_l"
-
-       do_facet mgs $LCTL pool_new $FSNAME.$TESTNAME ||
-               error "Pool creation failed"
-       do_facet mgs $LCTL pool_add $FSNAME.$TESTNAME $ost_name ||
-               errot "Adding $ost_name to pool fialed"
-
-       # Wait for client to see a OST at pool
-       wait_update $HOSTNAME "lctl get_param -n
-                       lov.$FSNAME-*.pools.$TESTNAME | sort -u |
-                       grep $ost_name" "$ost_name""_UUID" $((TIMEOUT/2)) ||
-                       return 2
-       $SETSTRIPE $DIR/$tdir -i $ostidx -c 1 -p $FSNAME.$TESTNAME ||
-               error "Setstripe failed"
-
-       dd if=/dev/zero of=$DIR/$tdir/0 bs=1M count=10
-       local blocks=$($LFS df $MOUNT | grep $ost_name | awk '{ print $4 }')
-       echo "OST still has $((blocks/1024)) mbytes free"
-
-       local new_hwm=$((blocks/1024-10))
-       do_facet $SINGLEMDS lctl set_param \
-                       osp.$mdtosc_proc1.reserved_mb_high=$((new_hwm+5))
-       do_facet $SINGLEMDS lctl set_param \
-                       osp.$mdtosc_proc1.reserved_mb_low=$new_hwm
-
-       test_253_fill_ost $ost_name $mdtosc_proc1 $new_hwm
-
-       #First enospc could execute orphan deletion so repeat.
-       test_253_fill_ost $ost_name $mdtosc_proc1 $new_hwm
-
-       local oa_status=$(do_facet $SINGLEMDS lctl get_param -n \
-                       osp.$mdtosc_proc1.prealloc_status)
-       echo "prealloc_status $oa_status"
-
-       dd if=/dev/zero of=$DIR/$tdir/2 bs=1M count=1 &&
-               error "File creation should fail"
-       #object allocation was stopped, but we still able to append files
-       dd if=/dev/zero of=$DIR/$tdir/1 bs=1M seek=6 count=5 oflag=append ||
-               error "Append failed"
-       rm -f $DIR/$tdir/1 $DIR/$tdir/0 $DIR/$tdir/r*
-
-       wait_delete_completed
-
-       sleep_maxage
-
-       for i in $(seq 10 12); do
-               dd if=/dev/zero of=$DIR/$tdir/$i bs=1M count=1 2>/dev/null ||
-                       error "File creation failed after rm";
-       done
-
-       oa_status=$(do_facet $SINGLEMDS lctl get_param -n \
-                       osp.$mdtosc_proc1.prealloc_status)
-       echo "prealloc_status $oa_status"
-
-       if (( oa_status != 0 )); then
-               error "Object allocation still disable after rm"
-       fi
-       do_facet $SINGLEMDS lctl set_param \
-                       osp.$mdtosc_proc1.reserved_mb_high=$last_wm_h
-       do_facet $SINGLEMDS lctl set_param \
-                       osp.$mdtosc_proc1.reserved_mb_low=$last_wm_l
-
-
-       do_facet mgs $LCTL pool_remove $FSNAME.$TESTNAME $ost_name ||
-               error "Remove $ost_name from pool failed"
-       do_facet mgs $LCTL pool_destroy $FSNAME.$TESTNAME ||
-               error "Pool destroy fialed"
-}
-run_test 253 "Check object allocation limit"
 
 cleanup_test_300() {
        trap 0