LU-7908 osp: fake precreate support

[fs/lustre-release.git] / lustre / osp / osp_precreate.c
diff --git a/lustre/osp/osp_precreate.c b/lustre/osp/osp_precreate.c

index 076cc6a..30f3164 100644 (file)
--- a/lustre/osp/osp_precreate.c
+++ b/lustre/osp/osp_precreate.c
@@ -44,6 +44,10 @@
  
  #define DEBUG_SUBSYSTEM S_MDS
  
+#include <linux/kthread.h>
+
+#include <lustre_obdo.h>
+
  #include "osp_internal.h"
  
  /*
@@ -206,7 +210,7 @@ static int osp_statfs_update(struct osp_device *d)
         d->opd_statfs_fresh_till = cfs_time_shift(obd_timeout * 1000);
         d->opd_statfs_update_in_progress = 1;
  
-       ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+       ptlrpcd_add_req(req);
  
         RETURN(0);
  }
@@ -216,7 +220,7 @@ static int osp_statfs_update(struct osp_device *d)
   *
   * If cached statfs data claim no free space, but OSP has got a request to
   * destroy an object (so release some space probably), then we may need to
- * refresh cached statfs data sooner then planned. The function checks there
+ * refresh cached statfs data sooner than planned. The function checks there
   * is no statfs update going and schedules immediate update if so.
   * XXX: there might be a case where removed object(s) do not add free space (empty
   * object). If the number of such deletions is high, then we can start to update
@@ -276,7 +280,7 @@ static inline int osp_precreate_near_empty_nolock(const struct lu_env *env,
  
         /* don't consider new precreation till OST is healty and
          * has free space */
-       return ((window - d->opd_pre_reserved < d->opd_pre_grow_count / 2) &&
+       return ((window - d->opd_pre_reserved < d->opd_pre_create_count / 2) &&
                 (d->opd_pre_status == 0));
  }
  
@@ -574,9 +578,9 @@ static int osp_precreate_send(const struct lu_env *env, struct osp_device *d)
         }
  
         spin_lock(&d->opd_pre_lock);
-       if (d->opd_pre_grow_count > d->opd_pre_max_grow_count / 2)
-               d->opd_pre_grow_count = d->opd_pre_max_grow_count / 2;
-       grow = d->opd_pre_grow_count;
+       if (d->opd_pre_create_count > d->opd_pre_max_create_count / 2)
+               d->opd_pre_create_count = d->opd_pre_max_create_count / 2;
+       grow = d->opd_pre_create_count;
         spin_unlock(&d->opd_pre_lock);
  
         body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
@@ -606,6 +610,9 @@ static int osp_precreate_send(const struct lu_env *env, struct osp_device *d)
  
         ptlrpc_request_set_replen(req);
  
+       if (OBD_FAIL_CHECK(OBD_FAIL_OSP_FAKE_PRECREATE))
+               GOTO(ready, rc = 0);
+
         rc = ptlrpc_queue_wait(req);
         if (rc) {
                 CERROR("%s: can't precreate: rc = %d\n", d->opd_obd->obd_name,
@@ -619,6 +626,8 @@ static int osp_precreate_send(const struct lu_env *env, struct osp_device *d)
                 GOTO(out_req, rc = -EPROTO);
  
         ostid_to_fid(fid, &body->oa.o_oi, d->opd_index);
+
+ready:
         if (osp_fid_diff(fid, &d->opd_pre_used_fid) <= 0) {
                 CERROR("%s: precreate fid "DFID" < local used fid "DFID
                        ": rc = %d\n", d->opd_obd->obd_name,
@@ -632,13 +641,13 @@ static int osp_precreate_send(const struct lu_env *env, struct osp_device *d)
         if (diff < grow) {
                 /* the OST has not managed to create all the
                  * objects we asked for */
-               d->opd_pre_grow_count = max(diff, OST_MIN_PRECREATE);
-               d->opd_pre_grow_slow = 1;
+               d->opd_pre_create_count = max(diff, OST_MIN_PRECREATE);
+               d->opd_pre_create_slow = 1;
         } else {
                 /* the OST is able to keep up with the work,
-                * we could consider increasing grow_count
+                * we could consider increasing create_count
                  * next time if needed */
-               d->opd_pre_grow_slow = 0;
+               d->opd_pre_create_slow = 0;
         }
  
         body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
@@ -860,10 +869,10 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env,
         spin_lock(&d->opd_pre_lock);
         diff = osp_fid_diff(&d->opd_last_used_fid, last_fid);
         if (diff > 0) {
-               d->opd_pre_grow_count = OST_MIN_PRECREATE + diff;
+               d->opd_pre_create_count = OST_MIN_PRECREATE + diff;
                 d->opd_pre_last_created_fid = d->opd_last_used_fid;
         } else {
-               d->opd_pre_grow_count = OST_MIN_PRECREATE;
+               d->opd_pre_create_count = OST_MIN_PRECREATE;
                 d->opd_pre_last_created_fid = *last_fid;
         }
         /*
@@ -873,7 +882,7 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env,
         LASSERT(fid_oid(&d->opd_pre_last_created_fid) <=
                 LUSTRE_DATA_SEQ_MAX_WIDTH);
         d->opd_pre_used_fid = d->opd_pre_last_created_fid;
-       d->opd_pre_grow_slow = 0;
+       d->opd_pre_create_slow = 0;
         spin_unlock(&d->opd_pre_lock);
  
         CDEBUG(D_HA, "%s: Got last_id "DFID" from OST, last_created "DFID
@@ -964,13 +973,13 @@ void osp_pre_update_status(struct osp_device *d, int rc)
                                        msfs->os_bfree, used, msfs->os_bavail,
                                        d->opd_pre_status, rc);
                         CDEBUG(D_INFO,
-                              "non-commited changes: %lu, in progress: %u\n",
+                              "non-committed changes: %lu, in progress: %u\n",
                                d->opd_syn_changes, d->opd_syn_rpc_in_progress);
                 } else if (old == -ENOSPC) {
                         d->opd_pre_status = 0;
                         spin_lock(&d->opd_pre_lock);
-                       d->opd_pre_grow_slow = 0;
-                       d->opd_pre_grow_count = OST_MIN_PRECREATE;
+                       d->opd_pre_create_slow = 0;
+                       d->opd_pre_create_count = OST_MIN_PRECREATE;
                         spin_unlock(&d->opd_pre_lock);
                         wake_up(&d->opd_pre_waitq);
                         CDEBUG(D_INFO, "%s: no space: "LPU64" blocks, "LPU64
@@ -1080,6 +1089,8 @@ static int osp_precreate_thread(void *_arg)
         struct osp_device       *d = _arg;
         struct ptlrpc_thread    *thread = &d->opd_pre_thread;
         struct l_wait_info       lwi = { 0 };
+       struct l_wait_info       lwi2 = LWI_TIMEOUT(cfs_time_seconds(5),
+                                                   back_to_sleep, NULL);
         struct lu_env            env;
         int                      rc;
  
@@ -1133,7 +1144,11 @@ static int osp_precreate_thread(void *_arg)
                         continue;
                 }
  
-               osp_statfs_update(d);
+               if (osp_statfs_update(d)) {
+                       l_wait_event(d->opd_pre_waitq,
+                                    !osp_precreate_running(d), &lwi2);
+                       continue;
+               }
  
                 /*
                  * Clean up orphans or recreate missing objects.
@@ -1160,7 +1175,8 @@ static int osp_precreate_thread(void *_arg)
                                 break;
  
                         if (osp_statfs_need_update(d))
-                               osp_statfs_update(d);
+                               if (osp_statfs_update(d))
+                                       break;
  
                         /* To avoid handling different seq in precreate/orphan
                          * cleanup, it will hold precreate until current seq is
@@ -1235,6 +1251,7 @@ static int osp_precreate_ready_condition(const struct lu_env *env,
         if (d->opd_pre_status != 0 &&
             d->opd_pre_status != -EAGAIN &&
             d->opd_pre_status != -ENODEV &&
+           d->opd_pre_status != -ENOTCONN &&
             d->opd_pre_status != -ENOSPC) {
                 /* DEBUG LU-3230 */
                 if (d->opd_pre_status != -EIO)
@@ -1311,12 +1328,12 @@ int osp_precreate_reserve(const struct lu_env *env, struct osp_device *d)
                  * increase number of precreations
                  */
                 precreated = osp_objs_precreated(env, d);
-               if (d->opd_pre_grow_count < d->opd_pre_max_grow_count &&
-                   d->opd_pre_grow_slow == 0 &&
-                   precreated <= (d->opd_pre_grow_count / 4 + 1)) {
+               if (d->opd_pre_create_count < d->opd_pre_max_create_count &&
+                   d->opd_pre_create_slow == 0 &&
+                   precreated <= (d->opd_pre_create_count / 4 + 1)) {
                         spin_lock(&d->opd_pre_lock);
-                       d->opd_pre_grow_slow = 1;
-                       d->opd_pre_grow_count *= 2;
+                       d->opd_pre_create_slow = 1;
+                       d->opd_pre_create_count *= 2;
                         spin_unlock(&d->opd_pre_lock);
                 }
  
@@ -1478,6 +1495,13 @@ int osp_object_truncate(const struct lu_env *env, struct dt_object *dt,
          * XXX: decide how do we do here with resend
          * if we don't resend, then client may see wrong file size
          * if we do resend, then MDS thread can get stuck for quite long
+        * and if we don't resend, then client will also get -EWOULDBLOCK !!
+        * (see LU-7975 and sanity/test_27F use cases)
+        * but let's decide not to resend/delay this truncate request to OST
+        * and allow Client to decide to resend, in a less agressive way from
+        * after_reply(), by returning -EINPROGRESS instead of
+        * -EAGAIN/-EWOULDBLOCK upon return from ptlrpc_queue_wait() at the
+        * end of this routine
          */
         req->rq_no_resend = req->rq_no_delay = 1;
  
@@ -1505,8 +1529,23 @@ int osp_object_truncate(const struct lu_env *env, struct dt_object *dt,
         ptlrpc_request_set_replen(req);
  
         rc = ptlrpc_queue_wait(req);
-       if (rc)
-               CERROR("can't punch object: %d\n", rc);
+       if (rc) {
+               /* -EWOULDBLOCK/-EAGAIN means OST is unreachable at the moment
+                * since we have decided not to resend/delay, but this could
+                * lead to wrong size to be seen at Client side and even process
+                * trying to open to exit/fail if not itself handling -EAGAIN.
+                * So it should be better to return -EINPROGRESS instead and
+                * leave the decision to resend at Client side in after_reply()
+                */
+               if (rc == -EWOULDBLOCK) {
+                       rc = -EINPROGRESS;
+                       CDEBUG(D_HA, "returning -EINPROGRESS instead of "
+                              "-EWOULDBLOCK/-EAGAIN to allow Client to "
+                              "resend\n");
+               } else {
+                       CERROR("can't punch object: %d\n", rc);
+               }
+       }
  out:
         ptlrpc_req_finished(req);
         if (oa)
@@ -1543,10 +1582,10 @@ int osp_init_precreate(struct osp_device *d)
         d->opd_pre_last_created_fid.f_oid = 1;
         d->opd_pre_reserved = 0;
         d->opd_got_disconnected = 1;
-       d->opd_pre_grow_slow = 0;
-       d->opd_pre_grow_count = OST_MIN_PRECREATE;
-       d->opd_pre_min_grow_count = OST_MIN_PRECREATE;
-       d->opd_pre_max_grow_count = OST_MAX_PRECREATE;
+       d->opd_pre_create_slow = 0;
+       d->opd_pre_create_count = OST_MIN_PRECREATE;
+       d->opd_pre_min_create_count = OST_MIN_PRECREATE;
+       d->opd_pre_max_create_count = OST_MAX_PRECREATE;
  
         spin_lock_init(&d->opd_pre_lock);
         init_waitqueue_head(&d->opd_pre_waitq);