LU-3750 mdt: fix typo in mdt_txn_stop_cb()

[fs/lustre-release.git] / lustre / include / lustre_mdc.h
diff --git a/lustre/include/lustre_mdc.h b/lustre/include/lustre_mdc.h

index a90dfb8..e7152d8 100644 (file)
--- a/lustre/include/lustre_mdc.h
+++ b/lustre/include/lustre_mdc.h
@@ -28,7 +28,7 @@
   * Use is subject to license terms.
   */
  /*
- * Copyright (c) 2012 Whamcloud, Inc.
+ * Copyright (c) 2011, 2012, Intel Corporation.
   */
  /*
   * This file is part of Lustre, http://www.lustre.org/
@@ -58,10 +58,10 @@
  #endif /* __KERNEL__ */
  #include <lustre_handles.h>
  #include <libcfs/libcfs.h>
+#include <obd_class.h>
  #include <lustre/lustre_idl.h>
  #include <lustre_lib.h>
  #include <lustre_dlm.h>
-#include <lustre_log.h>
  #include <lustre_export.h>
  
  struct ptlrpc_client;
@@ -69,37 +69,102 @@ struct obd_export;
  struct ptlrpc_request;
  struct obd_device;
  
+/**
+ * Serializes in-flight MDT-modifying RPC requests to preserve idempotency.
+ *
+ * This mutex is used to implement execute-once semantics on the MDT.
+ * The MDT stores the last transaction ID and result for every client in
+ * its last_rcvd file. If the client doesn't get a reply, it can safely
+ * resend the request and the MDT will reconstruct the reply being aware
+ * that the request has already been executed. Without this lock,
+ * execution status of concurrent in-flight requests would be
+ * overwritten.
+ *
+ * This design limits the extent to which we can keep a full pipeline of
+ * in-flight requests from a single client.  This limitation could be
+ * overcome by allowing multiple slots per client in the last_rcvd file.
+ */
  struct mdc_rpc_lock {
-        cfs_mutex_t           rpcl_mutex;
-        struct lookup_intent *rpcl_it;
+       /** Lock protecting in-flight RPC concurrency. */
+       struct mutex            rpcl_mutex;
+       /** Intent associated with currently executing request. */
+       struct lookup_intent    *rpcl_it;
+       /** Used for MDS/RPC load testing purposes. */
+       int                     rpcl_fakes;
  };
  
+#define MDC_FAKE_RPCL_IT ((void *)0x2c0012bfUL)
+
  static inline void mdc_init_rpc_lock(struct mdc_rpc_lock *lck)
  {
-        cfs_mutex_init(&lck->rpcl_mutex);
+       mutex_init(&lck->rpcl_mutex);
          lck->rpcl_it = NULL;
  }
  
  static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck,
-                                    struct lookup_intent *it)
+                                   struct lookup_intent *it)
  {
-        ENTRY;
-        if (!it || (it->it_op != IT_GETATTR && it->it_op != IT_LOOKUP)) {
-                cfs_mutex_lock(&lck->rpcl_mutex);
-                LASSERT(lck->rpcl_it == NULL);
-                lck->rpcl_it = it;
-        }
+       ENTRY;
+
+       if (it != NULL && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
+                          it->it_op == IT_LAYOUT))
+               return;
+
+       /* This would normally block until the existing request finishes.
+        * If fail_loc is set it will block until the regular request is
+        * done, then set rpcl_it to MDC_FAKE_RPCL_IT.  Once that is set
+        * it will only be cleared when all fake requests are finished.
+        * Only when all fake requests are finished can normal requests
+        * be sent, to ensure they are recoverable again. */
+ again:
+       mutex_lock(&lck->rpcl_mutex);
+
+       if (CFS_FAIL_CHECK_QUIET(OBD_FAIL_MDC_RPCS_SEM)) {
+               lck->rpcl_it = MDC_FAKE_RPCL_IT;
+               lck->rpcl_fakes++;
+               mutex_unlock(&lck->rpcl_mutex);
+               return;
+       }
+
+       /* This will only happen when the CFS_FAIL_CHECK() was
+        * just turned off but there are still requests in progress.
+        * Wait until they finish.  It doesn't need to be efficient
+        * in this extremely rare case, just have low overhead in
+        * the common case when it isn't true. */
+       while (unlikely(lck->rpcl_it == MDC_FAKE_RPCL_IT)) {
+               mutex_unlock(&lck->rpcl_mutex);
+               schedule_timeout(cfs_time_seconds(1) / 4);
+               goto again;
+       }
+
+       LASSERT(lck->rpcl_it == NULL);
+       lck->rpcl_it = it;
  }
  
  static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck,
-                                    struct lookup_intent *it)
+                                   struct lookup_intent *it)
  {
-        if (!it || (it->it_op != IT_GETATTR && it->it_op != IT_LOOKUP)) {
-                LASSERT(it == lck->rpcl_it);
-                lck->rpcl_it = NULL;
-                cfs_mutex_unlock(&lck->rpcl_mutex);
-        }
-        EXIT;
+       if (it != NULL && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
+                          it->it_op == IT_LAYOUT))
+               goto out;
+
+       if (lck->rpcl_it == MDC_FAKE_RPCL_IT) { /* OBD_FAIL_MDC_RPCS_SEM */
+               mutex_lock(&lck->rpcl_mutex);
+
+               LASSERTF(lck->rpcl_fakes > 0, "%d\n", lck->rpcl_fakes);
+               lck->rpcl_fakes--;
+
+               if (lck->rpcl_fakes == 0)
+                       lck->rpcl_it = NULL;
+
+       } else {
+               LASSERTF(it == lck->rpcl_it, "%p != %p\n", it, lck->rpcl_it);
+               lck->rpcl_it = NULL;
+       }
+
+       mutex_unlock(&lck->rpcl_mutex);
+ out:
+       EXIT;
  }
  
  static inline void mdc_update_max_ea_from_body(struct obd_export *exp,
@@ -118,8 +183,8 @@ static inline void mdc_update_max_ea_from_body(struct obd_export *exp,
  
  
  struct mdc_cache_waiter {
-        cfs_list_t              mcw_entry;
-        cfs_waitq_t             mcw_waitq;
+       cfs_list_t              mcw_entry;
+       wait_queue_head_t             mcw_waitq;
  };
  
  /* mdc/mdc_locks.c */