Whamcloud - gitweb
LU-6245 libcfs: make libcfs_ioctl.h and lnetctl.h uapi compliant
[fs/lustre-release.git] / lustre / ofd / ofd_grant.c
index ff205f5..5ef22b5 100644 (file)
@@ -23,7 +23,7 @@
  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2012, 2014, Intel Corporation.
+ * Copyright (c) 2012, 2015, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -60,8 +60,8 @@
 /* Clients typically hold 2x their max_rpcs_in_flight of grant space */
 #define OFD_GRANT_SHRINK_LIMIT(exp)    (2ULL * 8 * exp_max_brw_size(exp))
 
-static inline obd_size ofd_grant_from_cli(struct obd_export *exp,
-                                         struct ofd_device *ofd, obd_size val)
+static inline u64 ofd_grant_from_cli(struct obd_export *exp,
+                                    struct ofd_device *ofd, u64 val)
 {
        if (ofd_grant_compat(exp, ofd))
                /* clients not supporting OBD_CONNECT_GRANT_PARAM actually
@@ -71,16 +71,16 @@ static inline obd_size ofd_grant_from_cli(struct obd_export *exp,
        return val;
 }
 
-static inline obd_size ofd_grant_to_cli(struct obd_export *exp,
-                                       struct ofd_device *ofd, obd_size val)
+static inline u64 ofd_grant_to_cli(struct obd_export *exp,
+                                  struct ofd_device *ofd, u64 val)
 {
        if (ofd_grant_compat(exp, ofd))
                return val >> (ofd->ofd_blockbits - COMPAT_BSIZE_SHIFT);
        return val;
 }
 
-static inline obd_size ofd_grant_chunk(struct obd_export *exp,
-                                      struct ofd_device *ofd)
+static inline u64 ofd_grant_chunk(struct obd_export *exp,
+                                 struct ofd_device *ofd)
 {
        if (ofd_obd(ofd)->obd_self_export == exp)
                /* Grant enough space to handle a big precreate request */
@@ -112,17 +112,17 @@ static inline obd_size ofd_grant_chunk(struct obd_export *exp,
  */
 void ofd_grant_sanity_check(struct obd_device *obd, const char *func)
 {
-       struct ofd_device       *ofd = ofd_dev(obd->obd_lu_dev);
-       struct obd_export       *exp;
-       obd_size                 maxsize;
-       obd_size                 tot_dirty = 0;
-       obd_size                 tot_pending = 0;
-       obd_size                 tot_granted = 0;
-       obd_size                 fo_tot_granted;
-       obd_size                 fo_tot_pending;
-       obd_size                 fo_tot_dirty;
-
-       if (cfs_list_empty(&obd->obd_exports))
+       struct ofd_device *ofd = ofd_dev(obd->obd_lu_dev);
+       struct obd_export *exp;
+       u64                maxsize;
+       u64                tot_dirty = 0;
+       u64                tot_pending = 0;
+       u64                tot_granted = 0;
+       u64                fo_tot_granted;
+       u64                fo_tot_pending;
+       u64                fo_tot_dirty;
+
+       if (list_empty(&obd->obd_exports))
                return;
 
        /* We don't want to do this for large machines that do lots of
@@ -134,7 +134,7 @@ void ofd_grant_sanity_check(struct obd_device *obd, const char *func)
 
        spin_lock(&obd->obd_dev_lock);
        spin_lock(&ofd->ofd_grant_lock);
-       cfs_list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) {
+       list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) {
                struct filter_export_data       *fed;
                int                              error = 0;
 
@@ -173,6 +173,45 @@ void ofd_grant_sanity_check(struct obd_device *obd, const char *func)
                tot_pending += fed->fed_pending;
                tot_dirty += fed->fed_dirty;
        }
+
+       /* exports about to be unlinked should also be taken into account since
+        * they might still hold pending grant space to be released at
+        * commit time */
+       list_for_each_entry(exp, &obd->obd_unlinked_exports, exp_obd_chain) {
+               struct filter_export_data       *fed;
+               int                              error = 0;
+
+               fed = &exp->exp_filter_data;
+
+               if (fed->fed_grant < 0 || fed->fed_pending < 0 ||
+                   fed->fed_dirty < 0)
+                       error = 1;
+               if (fed->fed_grant + fed->fed_pending > maxsize) {
+                       CERROR("%s: cli %s/%p fed_grant(%ld) + fed_pending(%ld)"
+                              " > maxsize("LPU64")\n", obd->obd_name,
+                              exp->exp_client_uuid.uuid, exp, fed->fed_grant,
+                              fed->fed_pending, maxsize);
+                       spin_unlock(&obd->obd_dev_lock);
+                       spin_unlock(&ofd->ofd_grant_lock);
+                       LBUG();
+               }
+               if (fed->fed_dirty > maxsize) {
+                       CERROR("%s: cli %s/%p fed_dirty(%ld) > maxsize("LPU64
+                              ")\n", obd->obd_name, exp->exp_client_uuid.uuid,
+                              exp, fed->fed_dirty, maxsize);
+                       spin_unlock(&obd->obd_dev_lock);
+                       spin_unlock(&ofd->ofd_grant_lock);
+                       LBUG();
+               }
+               CDEBUG_LIMIT(error ? D_ERROR : D_CACHE, "%s: cli %s/%p dirty "
+                            "%ld pend %ld grant %ld\n", obd->obd_name,
+                            exp->exp_client_uuid.uuid, exp, fed->fed_dirty,
+                            fed->fed_pending, fed->fed_grant);
+               tot_granted += fed->fed_grant + fed->fed_pending;
+               tot_pending += fed->fed_pending;
+               tot_dirty += fed->fed_dirty;
+       }
+
        spin_unlock(&obd->obd_dev_lock);
        fo_tot_granted = ofd->ofd_tot_granted;
        fo_tot_pending = ofd->ofd_tot_pending;
@@ -254,14 +293,14 @@ static void ofd_grant_statfs(const struct lu_env *env, struct obd_export *exp,
  *                     of available space is requested
  * \retval             amount of non-allocated space, in bytes
  */
-static obd_size ofd_grant_space_left(struct obd_export *exp)
+static u64 ofd_grant_space_left(struct obd_export *exp)
 {
-       struct obd_device       *obd = exp->exp_obd;
-       struct ofd_device       *ofd = ofd_exp(exp);
-       obd_size                 tot_granted;
-       obd_size                 left;
-       obd_size                 avail;
-       obd_size                 unstable;
+       struct obd_device *obd = exp->exp_obd;
+       struct ofd_device *ofd = ofd_exp(exp);
+       u64                tot_granted;
+       u64                left;
+       u64                avail;
+       u64                unstable;
 
        ENTRY;
        assert_spin_locked(&ofd->ofd_grant_lock);
@@ -298,7 +337,7 @@ static obd_size ofd_grant_space_left(struct obd_export *exp)
         * overhead estimate made by the OSD layer. If we grant all the free
         * space, we have no way (grant space cannot be revoked yet) to
         * adjust if the write overhead has been underestimated. */
-       left -= min_t(obd_size, left, ofd_grant_reserved(ofd, avail));
+       left -= min_t(u64, left, ofd_grant_reserved(ofd, avail));
 
        /* Align left on block size */
        left &= ~((1ULL << ofd->ofd_blockbits) - 1);
@@ -356,7 +395,7 @@ static void ofd_grant_incoming(const struct lu_env *env, struct obd_export *exp,
                oa->o_dirty = 0;
 
        dirty       = ofd_grant_from_cli(exp, ofd, oa->o_dirty);
-       dropped     = ofd_grant_from_cli(exp, ofd, (obd_size)oa->o_dropped);
+       dropped     = ofd_grant_from_cli(exp, ofd, (u64)oa->o_dropped);
        grant_chunk = ofd_grant_chunk(exp, ofd);
 
        /* Update our accounting now so that statfs takes it into account.
@@ -408,7 +447,7 @@ static void ofd_grant_incoming(const struct lu_env *env, struct obd_export *exp,
  *                             taken out
  */
 static void ofd_grant_shrink(struct obd_export *exp, struct obdo *oa,
-                            obd_size left_space)
+                            u64 left_space)
 {
        struct filter_export_data       *fed;
        struct ofd_device               *ofd = ofd_exp(exp);
@@ -451,13 +490,13 @@ static void ofd_grant_shrink(struct obd_export *exp, struct obdo *oa,
  * \retval             space (in bytes) that will be consumed to write the
  *                     network buffer
  */
-static inline int ofd_grant_rnb_size(struct obd_export *exp,
+static inline u64 ofd_grant_rnb_size(struct obd_export *exp,
                                     struct ofd_device *ofd,
                                     struct niobuf_remote *rnb)
 {
-       obd_size blocksize;
-       obd_size bytes;
-       obd_size end;
+       u64 blocksize;
+       u64 bytes;
+       u64 end;
 
        if (exp && ofd_grant_compat(exp, ofd))
                blocksize = 1ULL << COMPAT_BSIZE_SHIFT;
@@ -473,7 +512,7 @@ static inline int ofd_grant_rnb_size(struct obd_export *exp,
                bytes += blocksize - end;
        if (exp)
                /* Apply per-export pecularities if one is given */
-               bytes = ofd_grant_from_cli(exp, ofd, (obd_size)bytes);
+               bytes = ofd_grant_from_cli(exp, ofd, bytes);
        return bytes;
 }
 
@@ -496,13 +535,13 @@ static inline int ofd_grant_rnb_size(struct obd_export *exp,
  * \param[in] oa       incoming obdo in which we should return the pack the
  *                     additional grant
  * \param[in,out] rnb  the list of network buffers
- * \param[in] niocont  the number of network buffers in the list
+ * \param[in] niocount the number of network buffers in the list
  * \param[in] left     the remaining free space with space already granted
  *                     taken out
  */
 static void ofd_grant_check(const struct lu_env *env, struct obd_export *exp,
                            struct obdo *oa, struct niobuf_remote *rnb,
-                           int niocount, obd_size *left)
+                           int niocount, u64 *left)
 {
        struct filter_export_data       *fed = &exp->exp_filter_data;
        struct obd_device               *obd = exp->exp_obd;
@@ -595,7 +634,10 @@ static void ofd_grant_check(const struct lu_env *env, struct obd_export *exp,
                                exp->exp_client_uuid.uuid, exp, i, bytes);
        }
 
-       /* record space used for the I/O, will be used in ofd_grant_commmit() */
+       /* record in o_grant_used the actual space reserved for the I/O, will be
+        * used later in ofd_grant_commmit() */
+       oa->o_grant_used = granted + ungranted;
+
        /* Now substract what the clients has used already.  We don't subtract
         * this from the tot_granted yet, so that other client's can't grab
         * that space before we have actually allocated our blocks. That
@@ -603,9 +645,9 @@ static void ofd_grant_check(const struct lu_env *env, struct obd_export *exp,
        info->fti_used = granted + ungranted;
        *left -= ungranted;
        fed->fed_grant -= granted;
-       fed->fed_pending += info->fti_used;
+       fed->fed_pending += oa->o_grant_used;
        ofd->ofd_tot_granted += ungranted;
-       ofd->ofd_tot_pending += info->fti_used;
+       ofd->ofd_tot_pending += oa->o_grant_used;
 
        CDEBUG(D_CACHE,
               "%s: cli %s/%p granted: %lu ungranted: %lu grant: %lu dirty: %lu"
@@ -655,14 +697,14 @@ static void ofd_grant_check(const struct lu_env *env, struct obd_export *exp,
  *
  * \retval                     amount of grant space allocated
  */
-static long ofd_grant_alloc(struct obd_export *exp, obd_size curgrant,
-                           obd_size want, obd_size left, bool conservative)
+static long ofd_grant_alloc(struct obd_export *exp, u64 curgrant,
+                           u64 want, u64 left, bool conservative)
 {
        struct obd_device               *obd = exp->exp_obd;
        struct ofd_device               *ofd = ofd_exp(exp);
        struct filter_export_data       *fed = &exp->exp_filter_data;
        long                             grant_chunk;
-       obd_size                         grant;
+       u64                              grant;
 
        ENTRY;
 
@@ -700,7 +742,7 @@ static long ofd_grant_alloc(struct obd_export *exp, obd_size curgrant,
                /* don't grant more than 1/8th of the remaining free space in
                 * one chunk */
                left >>= 3;
-       grant = min(want, left);
+       grant = min(want - curgrant, left);
        /* round grant upt to the next block size */
        grant = (grant + (1 << ofd->ofd_blockbits) - 1) &
                ~((1ULL << ofd->ofd_blockbits) - 1);
@@ -753,11 +795,11 @@ static long ofd_grant_alloc(struct obd_export *exp, obd_size curgrant,
  * \retval             amount of grant space currently owned by the client
  */
 long ofd_grant_connect(const struct lu_env *env, struct obd_export *exp,
-                      obd_size want, bool new_conn)
+                      u64 want, bool new_conn)
 {
        struct ofd_device               *ofd = ofd_exp(exp);
        struct filter_export_data       *fed = &exp->exp_filter_data;
-       obd_size                         left = 0;
+       u64                              left = 0;
        long                             grant;
        int                              from_cache;
        int                              force = 0; /* can use cached data */
@@ -785,11 +827,11 @@ refresh:
        }
 
        ofd_grant_alloc(exp,
-                       ofd_grant_to_cli(exp, ofd, (obd_size)fed->fed_grant),
+                       ofd_grant_to_cli(exp, ofd, (u64)fed->fed_grant),
                        want, left, new_conn);
 
        /* return to client its current grant */
-       grant = ofd_grant_to_cli(exp, ofd, (obd_size)fed->fed_grant);
+       grant = ofd_grant_to_cli(exp, ofd, (u64)fed->fed_grant);
        ofd->ofd_tot_granted_clients++;
 
        spin_unlock(&ofd->ofd_grant_lock);
@@ -829,7 +871,7 @@ void ofd_grant_discard(struct obd_export *exp)
                 obd->obd_name, ofd->ofd_tot_pending,
                 exp->exp_client_uuid.uuid, exp, fed->fed_pending);
        /* ofd_tot_pending is handled in ofd_grant_commit as bulk
-        * finishes */
+        * commmits */
        LASSERTF(ofd->ofd_tot_dirty >= fed->fed_dirty,
                 "%s: tot_dirty "LPU64" cli %s/%p fed_dirty %ld\n",
                 obd->obd_name, ofd->ofd_tot_dirty,
@@ -850,14 +892,14 @@ void ofd_grant_discard(struct obd_export *exp)
  *
  * \param[in] env      is the lu environment provided by the caller
  * \param[in] exp      is the export of the client which sent the request
- * \paral[in,out] oa   is the incoming obdo sent by the client
+ * \param[in,out] oa   is the incoming obdo sent by the client
  */
 void ofd_grant_prepare_read(const struct lu_env *env,
                            struct obd_export *exp, struct obdo *oa)
 {
        struct ofd_device       *ofd = ofd_exp(exp);
        int                      do_shrink;
-       obd_size                 left = 0;
+       u64                      left = 0;
 
        if (!oa)
                return;
@@ -925,7 +967,7 @@ void ofd_grant_prepare_read(const struct lu_env *env,
  * \param[in] exp      export of the client which sent the request
  * \param[in] oa       incoming obdo sent by the client
  * \param[in] rnb      list of network buffers
- * \param[in] niocont  number of network buffers in the list
+ * \param[in] niocount number of network buffers in the list
  */
 void ofd_grant_prepare_write(const struct lu_env *env,
                             struct obd_export *exp, struct obdo *oa,
@@ -933,7 +975,7 @@ void ofd_grant_prepare_write(const struct lu_env *env,
 {
        struct obd_device       *obd = exp->exp_obd;
        struct ofd_device       *ofd = ofd_exp(exp);
-       obd_size                 left;
+       u64                      left;
        int                      from_cache;
        int                      force = 0; /* can use cached data intially */
        int                      rc;
@@ -1022,20 +1064,18 @@ refresh:
  *                     export currently)
  * \param[in] nr       number of objects to be created
  *
- * \retval 0           for success
+ * \retval >= 0                amount of grant space allocated to the precreate request
  * \retval -ENOSPC     on failure
  */
-int ofd_grant_create(const struct lu_env *env, struct obd_export *exp, int *nr)
+long ofd_grant_create(const struct lu_env *env, struct obd_export *exp, int *nr)
 {
-       struct ofd_thread_info          *info = ofd_info(env);
        struct ofd_device               *ofd = ofd_exp(exp);
        struct filter_export_data       *fed = &exp->exp_filter_data;
-       obd_size                         left = 0;
+       u64                              left = 0;
        unsigned long                    wanted;
+       unsigned long                    granted;
        ENTRY;
 
-       info->fti_used = 0;
-
        if (exp->exp_obd->obd_recovering ||
            ofd->ofd_dt_conf.ddp_inodespace == 0)
                /* don't enforce grant during recovery */
@@ -1090,9 +1130,9 @@ int ofd_grant_create(const struct lu_env *env, struct obd_export *exp, int *nr)
                left -= wanted - fed->fed_grant;
                fed->fed_grant = 0;
        }
-       info->fti_used = wanted;
-       fed->fed_pending += info->fti_used;
-       ofd->ofd_tot_pending += info->fti_used;
+       granted = wanted;
+       fed->fed_pending += granted;
+       ofd->ofd_tot_pending += granted;
 
        /* grant more space for precreate purpose if possible. */
        wanted = OST_MAX_PRECREATE * ofd->ofd_dt_conf.ddp_inodespace / 2;
@@ -1103,7 +1143,7 @@ int ofd_grant_create(const struct lu_env *env, struct obd_export *exp, int *nr)
                ofd_grant_alloc(exp, fed->fed_grant, wanted, left, false);
        }
        spin_unlock(&ofd->ofd_grant_lock);
-       RETURN(0);
+       RETURN(granted);
 }
 
 /**
@@ -1111,21 +1151,18 @@ int ofd_grant_create(const struct lu_env *env, struct obd_export *exp, int *nr)
  *
  * Update pending grant counter once buffers have been written to the disk.
  *
- * \param[in] env      LU environment provided by the caller
  * \param[in] exp      export of the client which sent the request
+ * \param[in] pending  amount of reserved space to be released
+ * \param[in] rc       return code of pre-commit operations
  */
-void ofd_grant_commit(const struct lu_env *env, struct obd_export *exp,
+void ofd_grant_commit(struct obd_export *exp, unsigned long pending,
                      int rc)
 {
        struct ofd_device       *ofd  = ofd_exp(exp);
-       struct ofd_thread_info  *info = ofd_info(env);
-       unsigned long            pending;
-
        ENTRY;
 
        /* get space accounted in tot_pending for the I/O, set in
         * ofd_grant_check() */
-       pending = info->fti_used;
        if (pending == 0)
                RETURN_EXIT;
 
@@ -1139,7 +1176,7 @@ void ofd_grant_commit(const struct lu_env *env, struct obd_export *exp,
        if (rc == 0) {
                spin_lock(&ofd->ofd_osfs_lock);
                /* Take pending out of cached statfs data */
-               ofd->ofd_osfs.os_bavail -= min_t(obd_size,
+               ofd->ofd_osfs.os_bavail -= min_t(u64,
                                                 ofd->ofd_osfs.os_bavail,
                                                 pending >> ofd->ofd_blockbits);
                if (ofd->ofd_statfs_inflight)
@@ -1179,3 +1216,75 @@ void ofd_grant_commit(const struct lu_env *env, struct obd_export *exp,
        spin_unlock(&ofd->ofd_grant_lock);
        EXIT;
 }
+
+struct ofd_grant_cb {
+       /* commit callback structure */
+       struct dt_txn_commit_cb  ogc_cb;
+       /* export associated with the bulk write */
+       struct obd_export       *ogc_exp;
+       /* pending grant to be released */
+       unsigned long            ogc_granted;
+};
+
+/**
+ * Callback function for grant releasing
+ *
+ * Release grant space reserved by the client node.
+ *
+ * \param[in] env      execution environment
+ * \param[in] th       transaction handle
+ * \param[in] cb       callback data
+ * \param[in] err      error code
+ */
+static void ofd_grant_commit_cb(struct lu_env *env, struct thandle *th,
+                               struct dt_txn_commit_cb *cb, int err)
+{
+       struct ofd_grant_cb     *ogc;
+
+       ogc = container_of(cb, struct ofd_grant_cb, ogc_cb);
+
+       ofd_grant_commit(ogc->ogc_exp, ogc->ogc_granted, err);
+       class_export_cb_put(ogc->ogc_exp);
+       OBD_FREE_PTR(ogc);
+}
+
+/**
+ * Add callback for grant releasing
+ *
+ * Register a commit callback to release grant space.
+ *
+ * \param[in] th       transaction handle
+ * \param[in] exp      OBD export of client
+ * \param[in] granted  amount of grant space to be released upon commit
+ *
+ * \retval             0 on successful callback adding
+ * \retval             negative value on error
+ */
+int ofd_grant_commit_cb_add(struct thandle *th, struct obd_export *exp,
+                           unsigned long granted)
+{
+       struct ofd_grant_cb     *ogc;
+       struct dt_txn_commit_cb *dcb;
+       int                      rc;
+       ENTRY;
+
+       OBD_ALLOC_PTR(ogc);
+       if (ogc == NULL)
+               RETURN(-ENOMEM);
+
+       ogc->ogc_exp = class_export_cb_get(exp);
+       ogc->ogc_granted = granted;
+
+       dcb = &ogc->ogc_cb;
+       dcb->dcb_func = ofd_grant_commit_cb;
+       INIT_LIST_HEAD(&dcb->dcb_linkage);
+       strlcpy(dcb->dcb_name, "ofd_grant_commit_cb", sizeof(dcb->dcb_name));
+
+       rc = dt_trans_cb_add(th, dcb);
+       if (rc) {
+               class_export_cb_put(ogc->ogc_exp);
+               OBD_FREE_PTR(ogc);
+       }
+
+       RETURN(rc);
+}