Whamcloud - gitweb
LU-5061 obd: add rnb_ prefix to struct niobuf_remote members
[fs/lustre-release.git] / lustre / osc / osc_request.c
index f6ce5cb..93567bc 100644 (file)
@@ -503,11 +503,10 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa,
        *ea = lsm;
 
         if (oti != NULL) {
-                oti->oti_transno = lustre_msg_get_transno(req->rq_repmsg);
-
                 if (oa->o_valid & OBD_MD_FLCOOKIE) {
-                        if (!oti->oti_logcookies)
-                                oti_alloc_cookies(oti, 1);
+                       if (oti->oti_logcookies == NULL)
+                               oti->oti_logcookies = &oti->oti_onecookie;
+
                         *oti->oti_logcookies = oa->o_lcookie;
                 }
         }
@@ -639,7 +638,7 @@ int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo,
  * @objid. Found locks are added into @cancel list. Returns the amount of
  * locks added to @cancels list. */
 static int osc_resource_get_unused(struct obd_export *exp, struct obdo *oa,
-                                  cfs_list_t *cancels,
+                                  struct list_head *cancels,
                                   ldlm_mode_t mode, __u64 lock_flags)
 {
         struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
@@ -659,7 +658,7 @@ static int osc_resource_get_unused(struct obd_export *exp, struct obdo *oa,
 
        ostid_build_res_name(&oa->o_oi, &res_id);
        res = ldlm_resource_get(ns, NULL, &res_id, 0, 0);
-       if (res == NULL)
+       if (IS_ERR(res))
                RETURN(0);
 
         LDLM_RESOURCE_ADDREF(res);
@@ -742,7 +741,7 @@ static int osc_destroy(const struct lu_env *env, struct obd_export *exp,
         struct client_obd     *cli = &exp->exp_obd->u.cli;
         struct ptlrpc_request *req;
         struct ost_body       *body;
-        CFS_LIST_HEAD(cancels);
+       struct list_head       cancels = LIST_HEAD_INIT(cancels);
         int rc, count;
         ENTRY;
 
@@ -811,37 +810,38 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
 
         LASSERT(!(oa->o_valid & bits));
 
-        oa->o_valid |= bits;
-        client_obd_list_lock(&cli->cl_loi_list_lock);
-        oa->o_dirty = cli->cl_dirty;
-       if (unlikely(cli->cl_dirty - cli->cl_dirty_transit >
-                    cli->cl_dirty_max)) {
+       oa->o_valid |= bits;
+       client_obd_list_lock(&cli->cl_loi_list_lock);
+       oa->o_dirty = cli->cl_dirty_pages << PAGE_CACHE_SHIFT;
+       if (unlikely(cli->cl_dirty_pages - cli->cl_dirty_transit >
+                    cli->cl_dirty_max_pages)) {
                CERROR("dirty %lu - %lu > dirty_max %lu\n",
-                      cli->cl_dirty, cli->cl_dirty_transit, cli->cl_dirty_max);
+                      cli->cl_dirty_pages, cli->cl_dirty_transit,
+                      cli->cl_dirty_max_pages);
                oa->o_undirty = 0;
-       } else if (unlikely(atomic_read(&obd_unstable_pages) +
-                           atomic_read(&obd_dirty_pages) -
+       } else if (unlikely(atomic_read(&obd_dirty_pages) -
                            atomic_read(&obd_dirty_transit_pages) >
                            (long)(obd_max_dirty_pages + 1))) {
                /* The atomic_read() allowing the atomic_inc() are
                 * not covered by a lock thus they may safely race and trip
                 * this CERROR() unless we add in a small fudge factor (+1). */
-               CERROR("%s: dirty %d + %d - %d > system dirty_max %d\n",
+               CERROR("%s: dirty %d - %d > system dirty_max %d\n",
                       cli->cl_import->imp_obd->obd_name,
-                      atomic_read(&obd_unstable_pages),
                       atomic_read(&obd_dirty_pages),
                       atomic_read(&obd_dirty_transit_pages),
                       obd_max_dirty_pages);
                oa->o_undirty = 0;
-       } else if (unlikely(cli->cl_dirty_max - cli->cl_dirty > 0x7fffffff)) {
+       } else if (unlikely(cli->cl_dirty_max_pages - cli->cl_dirty_pages >
+                           0x7fffffff)) {
                CERROR("dirty %lu - dirty_max %lu too big???\n",
-                      cli->cl_dirty, cli->cl_dirty_max);
+                      cli->cl_dirty_pages, cli->cl_dirty_max_pages);
                oa->o_undirty = 0;
        } else {
                long max_in_flight = (cli->cl_max_pages_per_rpc <<
                                      PAGE_CACHE_SHIFT) *
                                     (cli->cl_max_rpcs_in_flight + 1);
-                oa->o_undirty = max(cli->cl_dirty_max, max_in_flight);
+               oa->o_undirty = max(cli->cl_dirty_max_pages << PAGE_CACHE_SHIFT,
+                                   max_in_flight);
         }
        oa->o_grant = cli->cl_avail_grant + cli->cl_reserved_grant;
         oa->o_dropped = cli->cl_lost_grant;
@@ -1002,14 +1002,13 @@ static int osc_should_shrink_grant(struct client_obd *client)
 
 static int osc_grant_shrink_grant_cb(struct timeout_item *item, void *data)
 {
-        struct client_obd *client;
+       struct client_obd *client;
 
-        cfs_list_for_each_entry(client, &item->ti_obd_list,
-                                cl_grant_shrink_list) {
-                if (osc_should_shrink_grant(client))
-                        osc_shrink_grant(client);
-        }
-        return 0;
+       list_for_each_entry(client, &item->ti_obd_list, cl_grant_shrink_list) {
+               if (osc_should_shrink_grant(client))
+                       osc_shrink_grant(client);
+       }
+       return 0;
 }
 
 static int osc_add_shrink_grant(struct client_obd *client)
@@ -1039,24 +1038,26 @@ static int osc_del_shrink_grant(struct client_obd *client)
 
 static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd)
 {
-        /*
-         * ocd_grant is the total grant amount we're expect to hold: if we've
-         * been evicted, it's the new avail_grant amount, cl_dirty will drop
-         * to 0 as inflight RPCs fail out; otherwise, it's avail_grant + dirty.
-         *
-         * race is tolerable here: if we're evicted, but imp_state already
-         * left EVICTED state, then cl_dirty must be 0 already.
-         */
-        client_obd_list_lock(&cli->cl_loi_list_lock);
-        if (cli->cl_import->imp_state == LUSTRE_IMP_EVICTED)
-                cli->cl_avail_grant = ocd->ocd_grant;
-        else
-                cli->cl_avail_grant = ocd->ocd_grant - cli->cl_dirty;
+       /*
+        * ocd_grant is the total grant amount we're expect to hold: if we've
+        * been evicted, it's the new avail_grant amount, cl_dirty_pages will
+        * drop to 0 as inflight RPCs fail out; otherwise, it's avail_grant +
+        * dirty.
+        *
+        * race is tolerable here: if we're evicted, but imp_state already
+        * left EVICTED state, then cl_dirty_pages must be 0 already.
+        */
+       client_obd_list_lock(&cli->cl_loi_list_lock);
+       if (cli->cl_import->imp_state == LUSTRE_IMP_EVICTED)
+               cli->cl_avail_grant = ocd->ocd_grant;
+       else
+               cli->cl_avail_grant = ocd->ocd_grant -
+                                     (cli->cl_dirty_pages << PAGE_CACHE_SHIFT);
 
         if (cli->cl_avail_grant < 0) {
                CWARN("%s: available grant < 0: avail/ocd/dirty %ld/%u/%ld\n",
                      cli->cl_import->imp_obd->obd_name, cli->cl_avail_grant,
-                     ocd->ocd_grant, cli->cl_dirty);
+                     ocd->ocd_grant, cli->cl_dirty_pages << PAGE_CACHE_SHIFT);
                /* workaround for servers which do not have the patch from
                 * LU-2679 */
                cli->cl_avail_grant = ocd->ocd_grant;
@@ -1071,7 +1072,7 @@ static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd)
                cli->cl_avail_grant, cli->cl_lost_grant, cli->cl_chunkbits);
 
        if (ocd->ocd_connect_flags & OBD_CONNECT_GRANT_SHRINK &&
-           cfs_list_empty(&cli->cl_grant_shrink_list))
+           list_empty(&cli->cl_grant_shrink_list))
                osc_add_shrink_grant(cli);
 }
 
@@ -1161,7 +1162,7 @@ static inline int can_merge_pages(struct brw_page *p1, struct brw_page *p2)
                  * safe to combine */
                 if (unlikely((p1->flag & mask) != (p2->flag & mask))) {
                         CWARN("Saw flags 0x%x and 0x%x in the same brw, please "
-                              "report this at http://bugs.whamcloud.com/\n",
+                              "report this at https://jira.hpdd.intel.com/\n",
                               p1->flag, p2->flag);
                 }
                 return 0;
@@ -1199,12 +1200,13 @@ static obd_count osc_checksum_bulk(int nob, obd_count pg_count,
                    OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_RECEIVE)) {
                        unsigned char *ptr = kmap(pga[i]->pg);
                        int off = pga[i]->off & ~CFS_PAGE_MASK;
+
                        memcpy(ptr + off, "bad1", min(4, nob));
                        kunmap(pga[i]->pg);
                }
                cfs_crypto_hash_update_page(hdesc, pga[i]->pg,
-                                 pga[i]->off & ~CFS_PAGE_MASK,
-                                 count);
+                                           pga[i]->off & ~CFS_PAGE_MASK,
+                                           count);
                LL_CDEBUG_PAGE(D_PAGE, pga[i]->pg, "off %d\n",
                               (int)(pga[i]->off & ~CFS_PAGE_MASK));
 
@@ -1213,12 +1215,9 @@ static obd_count osc_checksum_bulk(int nob, obd_count pg_count,
                i++;
        }
 
-       bufsize = 4;
+       bufsize = sizeof(cksum);
        err = cfs_crypto_hash_final(hdesc, (unsigned char *)&cksum, &bufsize);
 
-       if (err)
-               cfs_crypto_hash_final(hdesc, NULL, NULL);
-
        /* For sending we only compute the wrong checksum instead
         * of corrupting the data so it is still correct on a redo */
        if (opc == OST_WRITE && OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_SEND))
@@ -1345,11 +1344,11 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa,
 
                 if (i > 0 && can_merge_pages(pg_prev, pg)) {
                         niobuf--;
-                        niobuf->len += pg->count;
-                } else {
-                        niobuf->offset = pg->off;
-                        niobuf->len    = pg->count;
-                        niobuf->flags  = pg->flag;
+                       niobuf->rnb_len += pg->count;
+               } else {
+                       niobuf->rnb_offset = pg->off;
+                       niobuf->rnb_len    = pg->count;
+                       niobuf->rnb_flags  = pg->flag;
                 }
                 pg_prev = pg;
         }
@@ -1423,7 +1422,7 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa,
         aa->aa_resends = 0;
         aa->aa_ppga = pga;
         aa->aa_cli = cli;
-        CFS_INIT_LIST_HEAD(&aa->aa_oaps);
+       INIT_LIST_HEAD(&aa->aa_oaps);
         if (ocapa && reserve)
                 aa->aa_ocapa = capa_get(ocapa);
 
@@ -1586,12 +1585,7 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
                         router = libcfs_nid2str(req->rq_bulk->bd_sender);
                 }
 
-               if (server_cksum == ~0 && rc > 0) {
-                       CERROR("Protocol error: server %s set the 'checksum' "
-                              "bit, but didn't send a checksum.  Not fatal, "
-                              "but please notify on http://bugs.whamcloud.com/\n",
-                              libcfs_nid2str(peer->nid));
-               } else if (server_cksum != client_cksum) {
+               if (server_cksum != client_cksum) {
                        LCONSOLE_ERROR_MSG(0x133, "%s: BAD READ CHECKSUM: from "
                                           "%s%s%s inode "DFID" object "DOSTID
                                           " extent ["LPU64"-"LPU64"]\n",
@@ -1657,7 +1651,7 @@ static int osc_brw_redo_request(struct ptlrpc_request *request,
         if (rc)
                 RETURN(rc);
 
-        cfs_list_for_each_entry(oap, &aa->aa_oaps, oap_rpc_item) {
+       list_for_each_entry(oap, &aa->aa_oaps, oap_rpc_item) {
                 if (oap->oap_request != NULL) {
                         LASSERTF(request == oap->oap_request,
                                  "request %p != oap_request %p\n",
@@ -1685,13 +1679,13 @@ static int osc_brw_redo_request(struct ptlrpc_request *request,
 
         new_aa = ptlrpc_req_async_args(new_req);
 
-        CFS_INIT_LIST_HEAD(&new_aa->aa_oaps);
-       cfs_list_splice_init(&aa->aa_oaps, &new_aa->aa_oaps);
-       CFS_INIT_LIST_HEAD(&new_aa->aa_exts);
-       cfs_list_splice_init(&aa->aa_exts, &new_aa->aa_exts);
+       INIT_LIST_HEAD(&new_aa->aa_oaps);
+       list_splice_init(&aa->aa_oaps, &new_aa->aa_oaps);
+       INIT_LIST_HEAD(&new_aa->aa_exts);
+       list_splice_init(&aa->aa_exts, &new_aa->aa_exts);
        new_aa->aa_resends = aa->aa_resends;
 
-        cfs_list_for_each_entry(oap, &new_aa->aa_oaps, oap_rpc_item) {
+       list_for_each_entry(oap, &new_aa->aa_oaps, oap_rpc_item) {
                 if (oap->oap_request) {
                         ptlrpc_req_finished(oap->oap_request);
                         oap->oap_request = ptlrpc_request_addref(new_req);
@@ -1841,12 +1835,15 @@ static int brw_interpret(const struct lu_env *env,
        }
        OBDO_FREE(aa->aa_oa);
 
-       cfs_list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) {
-               cfs_list_del_init(&ext->oe_link);
+       if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE && rc == 0)
+               osc_inc_unstable_pages(req);
+
+       list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) {
+               list_del_init(&ext->oe_link);
                osc_extent_finish(env, ext, 1, rc);
        }
-       LASSERT(cfs_list_empty(&aa->aa_exts));
-       LASSERT(cfs_list_empty(&aa->aa_oaps));
+       LASSERT(list_empty(&aa->aa_exts));
+       LASSERT(list_empty(&aa->aa_oaps));
 
        cl_req_completion(env, aa->aa_clerq, rc < 0 ? rc :
                          req->rq_bulk->bd_nob_transferred);
@@ -1892,7 +1889,7 @@ static void brw_commit(struct ptlrpc_request *req)
  * Extents in the list must be in OES_RPC state.
  */
 int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
-                 cfs_list_t *ext_list, int cmd, pdl_policy_t pol)
+                 struct list_head *ext_list, int cmd, pdl_policy_t pol)
 {
        struct ptlrpc_request           *req = NULL;
        struct osc_extent               *ext;
@@ -1904,27 +1901,27 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
        struct cl_req                   *clerq = NULL;
        enum cl_req_type                crt = (cmd & OBD_BRW_WRITE) ? CRT_WRITE :
                                                                      CRT_READ;
-       struct ldlm_lock                *lock = NULL;
        struct cl_req_attr              *crattr = NULL;
        obd_off                         starting_offset = OBD_OBJECT_EOF;
        obd_off                         ending_offset = 0;
        int                             mpflag = 0;
        int                             mem_tight = 0;
        int                             page_count = 0;
+       bool                            soft_sync = false;
        int                             i;
        int                             rc;
-       CFS_LIST_HEAD(rpc_list);
+       struct list_head                rpc_list = LIST_HEAD_INIT(rpc_list);
 
        ENTRY;
-       LASSERT(!cfs_list_empty(ext_list));
+       LASSERT(!list_empty(ext_list));
 
        /* add pages into rpc_list to build BRW rpc */
-       cfs_list_for_each_entry(ext, ext_list, oe_link) {
+       list_for_each_entry(ext, ext_list, oe_link) {
                LASSERT(ext->oe_state == OES_RPC);
                mem_tight |= ext->oe_memalloc;
-               cfs_list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
+               list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
                        ++page_count;
-                       cfs_list_add_tail(&oap->oap_rpc_item, &rpc_list);
+                       list_add_tail(&oap->oap_rpc_item, &rpc_list);
                        if (starting_offset > oap->oap_obj_off)
                                starting_offset = oap->oap_obj_off;
                        else
@@ -1938,6 +1935,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
                }
        }
 
+       soft_sync = osc_over_unstable_soft_limit(cli);
        if (mem_tight)
                mpflag = cfs_memory_pressure_get_and_set();
 
@@ -1954,17 +1952,18 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
                GOTO(out, rc = -ENOMEM);
 
        i = 0;
-       cfs_list_for_each_entry(oap, &rpc_list, oap_rpc_item) {
+       list_for_each_entry(oap, &rpc_list, oap_rpc_item) {
                struct cl_page *page = oap2cl_page(oap);
                if (clerq == NULL) {
                        clerq = cl_req_alloc(env, page, crt,
                                             1 /* only 1-object rpcs for now */);
                        if (IS_ERR(clerq))
                                GOTO(out, rc = PTR_ERR(clerq));
-                       lock = oap->oap_ldlm_lock;
                }
                if (mem_tight)
                        oap->oap_brw_flags |= OBD_BRW_MEMALLOC;
+               if (soft_sync)
+                       oap->oap_brw_flags |= OBD_BRW_SOFT_SYNC;
                pga[i] = &oap->oap_brw_page;
                pga[i]->off = oap->oap_obj_off + oap->oap_page_off;
                CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n",
@@ -1978,10 +1977,6 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
        LASSERT(clerq != NULL);
        crattr->cra_oa = oa;
        cl_req_attr_set(env, clerq, crattr, ~0ULL);
-       if (lock) {
-               oa->o_handle = lock->l_remote_handle;
-               oa->o_valid |= OBD_MD_FLHANDLE;
-       }
 
        rc = cl_req_prep(env, clerq);
        if (rc != 0) {
@@ -2015,16 +2010,16 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 
        CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
        aa = ptlrpc_req_async_args(req);
-       CFS_INIT_LIST_HEAD(&aa->aa_oaps);
-       cfs_list_splice_init(&rpc_list, &aa->aa_oaps);
-       CFS_INIT_LIST_HEAD(&aa->aa_exts);
-       cfs_list_splice_init(ext_list, &aa->aa_exts);
+       INIT_LIST_HEAD(&aa->aa_oaps);
+       list_splice_init(&rpc_list, &aa->aa_oaps);
+       INIT_LIST_HEAD(&aa->aa_exts);
+       list_splice_init(ext_list, &aa->aa_exts);
        aa->aa_clerq = clerq;
 
        /* queued sync pages can be torn down while the pages
         * were between the pending list and the rpc */
        tmp = NULL;
-       cfs_list_for_each_entry(oap, &aa->aa_oaps, oap_rpc_item) {
+       list_for_each_entry(oap, &aa->aa_oaps, oap_rpc_item) {
                /* only one oap gets a request reference */
                if (tmp == NULL)
                        tmp = oap;
@@ -2092,10 +2087,10 @@ out:
                        OBD_FREE(pga, sizeof(*pga) * page_count);
                /* this should happen rarely and is pretty bad, it makes the
                 * pending list not follow the dirty order */
-               while (!cfs_list_empty(ext_list)) {
-                       ext = cfs_list_entry(ext_list->next, struct osc_extent,
-                                            oe_link);
-                       cfs_list_del_init(&ext->oe_link);
+               while (!list_empty(ext_list)) {
+                       ext = list_entry(ext_list->next, struct osc_extent,
+                                        oe_link);
+                       list_del_init(&ext->oe_link);
                        osc_extent_finish(env, ext, 0, rc);
                }
                if (clerq && !IS_ERR(clerq))
@@ -2634,7 +2629,8 @@ static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
         ENTRY;
 
        if (!try_module_get(THIS_MODULE)) {
-               CERROR("Can't get module. Is it alive?");
+               CERROR("%s: cannot get module '%s'\n", obd->obd_name,
+                      module_name(THIS_MODULE));
                return -EINVAL;
        }
         switch (cmd) {
@@ -2839,9 +2835,9 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
                cli->cl_lru_left = &cli->cl_cache->ccc_lru_left;
 
                /* add this osc into entity list */
-               LASSERT(cfs_list_empty(&cli->cl_lru_osc));
+               LASSERT(list_empty(&cli->cl_lru_osc));
                spin_lock(&cli->cl_cache->ccc_lru_lock);
-               cfs_list_add(&cli->cl_lru_osc, &cli->cl_cache->ccc_lru);
+               list_add(&cli->cl_lru_osc, &cli->cl_cache->ccc_lru);
                spin_unlock(&cli->cl_cache->ccc_lru_lock);
 
                RETURN(0);
@@ -2929,9 +2925,10 @@ static int osc_reconnect(const struct lu_env *env,
         if (data != NULL && (data->ocd_connect_flags & OBD_CONNECT_GRANT)) {
                 long lost_grant;
 
-                client_obd_list_lock(&cli->cl_loi_list_lock);
-                data->ocd_grant = (cli->cl_avail_grant + cli->cl_dirty) ?:
-                               2 * cli_brw_size(obd);
+               client_obd_list_lock(&cli->cl_loi_list_lock);
+               data->ocd_grant = (cli->cl_avail_grant +
+                                 (cli->cl_dirty_pages << PAGE_CACHE_SHIFT)) ?:
+                                 2 * cli_brw_size(obd);
                 lost_grant = cli->cl_lost_grant;
                 cli->cl_lost_grant = 0;
                 client_obd_list_unlock(&cli->cl_loi_list_lock);
@@ -3162,7 +3159,7 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
                                    OST_MAXREQSIZE,
                                    ptlrpc_add_rqs_to_pool);
 
-       CFS_INIT_LIST_HEAD(&cli->cl_grant_shrink_list);
+       INIT_LIST_HEAD(&cli->cl_grant_shrink_list);
        ns_register_cancel(obd->obd_namespace, osc_cancel_weight);
        RETURN(0);
 
@@ -3242,7 +3239,7 @@ int osc_cleanup(struct obd_device *obd)
        if (cli->cl_cache != NULL) {
                LASSERT(atomic_read(&cli->cl_cache->ccc_users) > 0);
                spin_lock(&cli->cl_cache->ccc_lru_lock);
-               cfs_list_del_init(&cli->cl_lru_osc);
+               list_del_init(&cli->cl_lru_osc);
                spin_unlock(&cli->cl_cache->ccc_lru_lock);
                cli->cl_lru_left = NULL;
                atomic_dec(&cli->cl_cache->ccc_users);