Whamcloud - gitweb
LU-9728 osd: use GFP_HIGHUSER for non-local IO
[fs/lustre-release.git] / lustre / ofd / ofd_dev.c
index c654a30..6879a92 100644 (file)
@@ -70,7 +70,7 @@
 
 #include <obd_class.h>
 #include <obd_cksum.h>
-#include <lustre_param.h>
+#include <uapi/linux/lustre_param.h>
 #include <lustre_fid.h>
 #include <lustre_lfsck.h>
 #include <lustre/lustre_idl.h>
@@ -237,13 +237,14 @@ static void ofd_stack_fini(const struct lu_env *env, struct ofd_device *m,
        if (obd->obd_fail)
                strcat(flags, "A");
        lustre_cfg_bufs_set_string(&bufs, 1, flags);
-       lcfg = lustre_cfg_new(LCFG_CLEANUP, &bufs);
-       if (lcfg == NULL)
+       OBD_ALLOC(lcfg, lustre_cfg_len(bufs.lcfg_bufcount, bufs.lcfg_buflen));
+       if (!lcfg)
                RETURN_EXIT;
+       lustre_cfg_init(lcfg, LCFG_CLEANUP, &bufs);
 
        LASSERT(top);
        top->ld_ops->ldo_process_config(env, top, lcfg);
-       lustre_cfg_free(lcfg);
+       OBD_FREE(lcfg, lustre_cfg_len(lcfg->lcfg_bufcount, lcfg->lcfg_buflens));
 
        lu_site_purge(env, top->ld_site, ~0);
        if (!cfs_hash_is_empty(top->ld_site->ls_obj_hash)) {
@@ -691,7 +692,7 @@ static int ofd_procfs_init(struct ofd_device *ofd)
        /* lprocfs must be setup before the ofd so state can be safely added
         * to /proc incrementally as the ofd is setup */
        obd->obd_vars = lprocfs_ofd_obd_vars;
-       rc = lprocfs_obd_setup(obd);
+       rc = lprocfs_obd_setup(obd, false);
        if (rc) {
                CERROR("%s: lprocfs_obd_setup failed: %d.\n",
                       obd->obd_name, rc);
@@ -1526,8 +1527,7 @@ done:
                rc = ofd_seq_last_oid_write(env, ofd, oseq);
        } else {
                /* don't reuse orphan object, return last used objid */
-               ostid_set_id(oi, last);
-               rc = 0;
+               rc = ostid_set_id(oi, last);
        }
 
        GOTO(out_put, rc);
@@ -1559,7 +1559,8 @@ static int ofd_create_hdl(struct tgt_session_info *tsi)
        u64                      seq = ostid_seq(&oa->o_oi);
        u64                      oid = ostid_id(&oa->o_oi);
        struct ofd_seq          *oseq;
-       int                      rc = 0, diff;
+       s64 diff;
+       int rc = 0;
        int                      sync_trans = 0;
        long                     granted = 0;
 
@@ -1624,16 +1625,24 @@ static int ofd_create_hdl(struct tgt_session_info *tsi)
                if (!oseq->os_destroys_in_progress) {
                        CERROR("%s:[%llu] destroys_in_progress already"
                               " cleared\n", ofd_name(ofd), seq);
-                       ostid_set_id(&rep_oa->o_oi, ofd_seq_last_oid(oseq));
-                       GOTO(out, rc = 0);
+                       rc = ostid_set_id(&rep_oa->o_oi,
+                                         ofd_seq_last_oid(oseq));
+                       GOTO(out, rc);
                }
                diff = oid - ofd_seq_last_oid(oseq);
-               CDEBUG(D_HA, "ofd_last_id() = %llu -> diff = %d\n",
-                       ofd_seq_last_oid(oseq), diff);
+               CDEBUG(D_HA, "ofd_last_id() = %llu -> diff = %lld\n",
+                      ofd_seq_last_oid(oseq), diff);
                if (-diff > OST_MAX_PRECREATE) {
+                       LCONSOLE(D_INFO, "%s: too large difference between MDS "
+                                "LAST_ID "DFID" (%llu) and OST LAST_ID "DFID" "
+                                "(%llu), trust the OST\n",
+                                ofd_name(ofd), PFID(&oa->o_oi.oi_fid), oid,
+                                PFID(&oseq->os_oi.oi_fid),
+                                ofd_seq_last_oid(oseq));
+
                        /* Let MDS know that we are so far ahead. */
-                       ostid_set_id(&rep_oa->o_oi, ofd_seq_last_oid(oseq) + 1);
-                       rc = 0;
+                       rc = ostid_set_id(&rep_oa->o_oi,
+                                         ofd_seq_last_oid(oseq) + 1);
                } else if (diff < 0) {
                        rc = ofd_orphans_destroy(tsi->tsi_env, exp,
                                                 ofd, rep_oa);
@@ -1705,7 +1714,7 @@ static int ofd_create_hdl(struct tgt_session_info *tsi)
                                rc = granted;
                                granted = 0;
                                CDEBUG(D_HA, "%s: failed to acquire grant "
-                                      "space for precreate (%d): rc = %d\n",
+                                      "space for precreate (%lld): rc = %d\n",
                                       ofd_name(ofd), diff, rc);
                                diff = 0;
                        }
@@ -1725,7 +1734,7 @@ static int ofd_create_hdl(struct tgt_session_info *tsi)
 
                        CDEBUG(D_HA, "%s: precreate FID "DOSTID" is over "
                               "%u larger than the LAST_ID "DOSTID", only "
-                              "precreating the last %u objects.\n",
+                              "precreating the last %lld objects.\n",
                               ofd_name(ofd), POSTID(&oa->o_oi),
                               5 * OST_MAX_PRECREATE,
                               POSTID(&oseq->os_oi), diff);
@@ -1734,7 +1743,7 @@ static int ofd_create_hdl(struct tgt_session_info *tsi)
 
                while (diff > 0) {
                        next_id = ofd_seq_last_oid(oseq) + 1;
-                       count = ofd_precreate_batch(ofd, diff);
+                       count = ofd_precreate_batch(ofd, (int)diff);
 
                        CDEBUG(D_HA, "%s: reserve %d objects in group %#llx"
                               " at %llu\n", ofd_name(ofd),
@@ -1742,7 +1751,7 @@ static int ofd_create_hdl(struct tgt_session_info *tsi)
 
                        if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)
                            && cfs_time_after(jiffies, enough_time)) {
-                               CDEBUG(D_HA, "%s: Slow creates, %d/%d objects"
+                               CDEBUG(D_HA, "%s: Slow creates, %d/%lld objects"
                                      " created at a rate of %d/s\n",
                                      ofd_name(ofd), created, diff + created,
                                      created / DISK_TIMEOUT);
@@ -1763,7 +1772,7 @@ static int ofd_create_hdl(struct tgt_session_info *tsi)
                    lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)
                        LCONSOLE_WARN("%s: can't create the same count of"
                                      " objects when replaying the request"
-                                     " (diff is %d). see LU-4621\n",
+                                     " (diff is %lld). see LU-4621\n",
                                      ofd_name(ofd), diff);
 
                if (created > 0)
@@ -1781,7 +1790,7 @@ static int ofd_create_hdl(struct tgt_session_info *tsi)
                        granted = 0;
                }
 
-               ostid_set_id(&rep_oa->o_oi, ofd_seq_last_oid(oseq));
+               rc = ostid_set_id(&rep_oa->o_oi, ofd_seq_last_oid(oseq));
        }
        EXIT;
        ofd_counter_incr(exp, LPROC_OFD_STATS_CREATE,
@@ -2107,13 +2116,13 @@ out:
 static int ofd_ladvise_prefetch(const struct lu_env *env,
                                struct ofd_object *fo,
                                struct niobuf_local *lnb,
-                               __u64 start, __u64 end)
+                               __u64 start, __u64 end, enum dt_bufs_type dbt)
 {
-       struct ofd_thread_info  *info = ofd_info(env);
-       pgoff_t                  start_index, end_index, pages;
-       struct niobuf_remote     rnb;
-       unsigned long            nr_local;
-       int                      rc = 0;
+       struct ofd_thread_info *info = ofd_info(env);
+       pgoff_t start_index, end_index, pages;
+       struct niobuf_remote rnb;
+       unsigned long nr_local;
+       int rc = 0;
 
        if (end <= start)
                RETURN(-EINVAL);
@@ -2129,7 +2138,7 @@ static int ofd_ladvise_prefetch(const struct lu_env *env,
        if (end > info->fti_attr.la_size)
                end = info->fti_attr.la_size;
 
-       if (end == 0)
+       if (end <= start)
                GOTO(out_unlock, rc);
 
        /* We need page aligned offset and length */
@@ -2141,7 +2150,7 @@ static int ofd_ladvise_prefetch(const struct lu_env *env,
                        PTLRPC_MAX_BRW_PAGES;
                rnb.rnb_offset = start_index << PAGE_SHIFT;
                rnb.rnb_len = nr_local << PAGE_SHIFT;
-               rc = dt_bufs_get(env, ofd_object_child(fo), &rnb, lnb, 0);
+               rc = dt_bufs_get(env, ofd_object_child(fo), &rnb, lnb, dbt);
                if (unlikely(rc < 0))
                        break;
                nr_local = rc;
@@ -2179,7 +2188,7 @@ static int ofd_ladvise_hdl(struct tgt_session_info *tsi)
        struct ptlrpc_thread *svc_thread = req->rq_svc_thread;
        const struct lu_env *env = svc_thread->t_env;
        struct tgt_thread_big_cache *tbc = svc_thread->t_data;
-       int rc = 0;
+       enum dt_bufs_type dbt = DT_BUFS_TYPE_READAHEAD;
        struct lu_ladvise *ladvise;
        int num_advise;
        struct ladvise_hdr *ladvise_hdr;
@@ -2190,6 +2199,7 @@ static int ofd_ladvise_hdl(struct tgt_session_info *tsi)
        struct dt_object *dob;
        __u64 start;
        __u64 end;
+       int rc = 0;
        ENTRY;
 
        CFS_FAIL_TIMEOUT(OBD_FAIL_OST_LADVISE_PAUSE, cfs_fail_val);
@@ -2238,6 +2248,9 @@ static int ofd_ladvise_hdl(struct tgt_session_info *tsi)
        LASSERT(fo != NULL);
        dob = ofd_object_child(fo);
 
+       if (ptlrpc_connection_is_local(exp->exp_connection))
+               dbt |= DT_BUFS_TYPE_LOCAL;
+
        for (i = 0; i < num_advise; i++, ladvise++) {
                start = ladvise->lla_start;
                end = ladvise->lla_end;
@@ -2265,7 +2278,7 @@ static int ofd_ladvise_hdl(struct tgt_session_info *tsi)
 
                        req->rq_status = ofd_ladvise_prefetch(env, fo,
                                                              tbc->local,
-                                                             start, end);
+                                                             start, end, dbt);
                        tgt_extent_unlock(&lockh, LCK_PR);
                        break;
                case LU_LADVISE_DONTNEED:
@@ -3018,10 +3031,10 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m,
        }
        tgd->tgd_blockbits = fls(osfs->os_bsize) - 1;
 
-       if (ONE_MB_BRW_SIZE < (1U << tgd->tgd_blockbits))
+       if (DT_DEF_BRW_SIZE < (1U << tgd->tgd_blockbits))
                m->ofd_brw_size = 1U << tgd->tgd_blockbits;
        else
-               m->ofd_brw_size = ONE_MB_BRW_SIZE;
+               m->ofd_brw_size = DT_DEF_BRW_SIZE;
 
        m->ofd_cksum_types_supported = cksum_types_supported_server();
        m->ofd_precreate_batch = OFD_PRECREATE_BATCH_DEFAULT;