Whamcloud - gitweb
LU-2756 ost: buffer utilization rate of OST rqbd
[fs/lustre-release.git] / lustre / ost / ost_handler.c
index 3d789d9..873c23e 100644 (file)
@@ -27,7 +27,7 @@
  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, 2012, Whamcloud, Inc.
+ * Copyright (c) 2011, 2012, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
 #include <lustre_dlm.h>
 #include <lustre_export.h>
 #include <lustre_debug.h>
+#include <lustre_fid.h>
+#include <lustre_fld.h>
 #include <linux/init.h>
 #include <lprocfs_status.h>
 #include <libcfs/list.h>
+#include <lustre_quota.h>
+#include <lustre_fid.h>
 #include "ost_internal.h"
+#include <lustre_fid.h>
 
 static int oss_num_threads;
 CFS_MODULE_PARM(oss_num_threads, "i", int, 0444,
@@ -102,24 +107,35 @@ static void ost_drop_id(struct obd_export *exp, struct obdo *oa)
  *    b. for CMD, seq = FID_SEQ_OST_MDT0, FID_SEQ_OST_MDT1 - FID_SEQ_OST_MAX
  */
 static int ost_validate_obdo(struct obd_export *exp, struct obdo *oa,
-                             struct obd_ioobj *ioobj)
+                            struct obd_ioobj *ioobj)
 {
-        if (oa != NULL && !(oa->o_valid & OBD_MD_FLGROUP)) {
-                oa->o_seq = FID_SEQ_OST_MDT0;
-                if (ioobj)
-                        ioobj->ioo_seq = FID_SEQ_OST_MDT0;
-        /* remove fid_seq_is_rsvd() after FID-on-OST allows SEQ > 9 */
-        } else if (oa == NULL || !(fid_seq_is_rsvd(oa->o_seq) ||
-                                   fid_seq_is_mdt0(oa->o_seq))) {
-                CERROR("%s: client %s sent invalid object "POSTID"\n",
-                       exp->exp_obd->obd_name, obd_export_nid2str(exp),
-                       oa ? oa->o_id : -1, oa ? oa->o_seq : -1);
-                return -EPROTO;
-        }
-        obdo_from_ostid(oa, &oa->o_oi);
-        if (ioobj)
-                ioobj_from_obdo(ioobj, oa);
-        return 0;
+       if (unlikely(oa != NULL && !(oa->o_valid & OBD_MD_FLGROUP))) {
+               oa->o_seq = FID_SEQ_OST_MDT0;
+               if (ioobj)
+                       ioobj->ioo_seq = FID_SEQ_OST_MDT0;
+       } else if (unlikely(oa == NULL || !(fid_seq_is_idif(oa->o_seq) ||
+                                           fid_seq_is_mdt(oa->o_seq) ||
+                                           fid_seq_is_echo(oa->o_seq)))) {
+               CERROR("%s: client %s sent bad object "POSTID": rc = -EPROTO\n",
+                      exp->exp_obd->obd_name, obd_export_nid2str(exp),
+                      oa ? oa->o_id : -1, oa ? oa->o_seq : -1);
+               return -EPROTO;
+       }
+
+       obdo_from_ostid(oa, &oa->o_oi);
+       if (ioobj != NULL) {
+               unsigned max_brw = ioobj_max_brw_get(ioobj);
+
+               if (unlikely((max_brw & (max_brw - 1)) != 0)) {
+                       CERROR("%s: client %s sent bad ioobj max %u for "POSTID
+                              ": rc = -EPROTO\n", exp->exp_obd->obd_name,
+                              obd_export_nid2str(exp), max_brw,
+                              oa->o_id, oa->o_seq);
+                       return -EPROTO;
+               }
+               ioobj_from_obdo(ioobj, oa);
+       }
+       return 0;
 }
 
 void oti_to_request(struct obd_trans_info *oti, struct ptlrpc_request *req)
@@ -230,7 +246,7 @@ static int ost_lock_get(struct obd_export *exp, struct obdo *oa,
             !(oa->o_flags & OBD_FL_SRVLOCK))
                 RETURN(0);
 
-        osc_build_res_name(oa->o_id, oa->o_seq, &res_id);
+       ostid_build_res_name(&oa->o_oi, &res_id);
         CDEBUG(D_INODE, "OST-side extent lock.\n");
 
         policy.l_extent.start = start & CFS_PAGE_MASK;
@@ -624,7 +640,7 @@ static int ost_brw_lock_get(int mode, struct obd_export *exp,
         int i;
         ENTRY;
 
-        osc_build_res_name(obj->ioo_id, obj->ioo_seq, &res_id);
+       ostid_build_res_name(&obj->ioo_oid, &res_id);
         LASSERT(mode == LCK_PR || mode == LCK_PW);
         LASSERT(!lustre_handle_is_used(lh));
 
@@ -801,10 +817,10 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
         if (rc != 0)
                 GOTO(out_lock, rc);
 
-        desc = ptlrpc_prep_bulk_exp(req, npages,
-                                     BULK_PUT_SOURCE, OST_BULK_PORTAL);
-        if (desc == NULL)
-                GOTO(out_commitrw, rc = -ENOMEM);
+       desc = ptlrpc_prep_bulk_exp(req, npages, ioobj_max_brw_get(ioo),
+                                   BULK_PUT_SOURCE, OST_BULK_PORTAL);
+       if (desc == NULL)
+               GOTO(out_commitrw, rc = -ENOMEM);
 
         nob = 0;
         for (i = 0; i < npages; i++) {
@@ -1091,14 +1107,13 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
         if (rc != 0)
                 GOTO(out_lock, rc);
 
-        desc = ptlrpc_prep_bulk_exp(req, npages,
-                                     BULK_GET_SINK, OST_BULK_PORTAL);
-        if (desc == NULL)
-                GOTO(skip_transfer, rc = -ENOMEM);
-
-        /* NB Having prepped, we must commit... */
+       desc = ptlrpc_prep_bulk_exp(req, npages, ioobj_max_brw_get(ioo),
+                                   BULK_GET_SINK, OST_BULK_PORTAL);
+       if (desc == NULL)
+               GOTO(skip_transfer, rc = -ENOMEM);
 
-        for (i = 0; i < npages; i++)
+       /* NB Having prepped, we must commit... */
+       for (i = 0; i < npages; i++)
                ptlrpc_prep_bulk_page_nopin(desc, local_nb[i].page,
                                            local_nb[i].lnb_page_offset,
                                            local_nb[i].len);
@@ -1324,10 +1339,27 @@ static int ost_get_info(struct obd_export *exp, struct ptlrpc_request *req)
         if (reply == NULL)
                 RETURN(-ENOMEM);
 
+       if (KEY_IS(KEY_LAST_FID)) {
+               void *val;
+               int vallen;
+
+               req_capsule_extend(pill, &RQF_OST_GET_INFO_LAST_FID);
+               val = req_capsule_client_get(pill, &RMF_SETINFO_VAL);
+               vallen = req_capsule_get_size(pill, &RMF_SETINFO_VAL,
+                                             RCL_CLIENT);
+               if (val != NULL && vallen > 0 && replylen >= vallen) {
+                       memcpy(reply, val, vallen);
+               } else {
+                       CERROR("%s: invalid req val %p vallen %d replylen %d\n",
+                              exp->exp_obd->obd_name, val, vallen, replylen);
+                       GOTO(out, rc = -EINVAL);
+               }
+       }
+
         /* call again to fill in the reply buffer */
         rc = obd_get_info(req->rq_svc_thread->t_env, exp, keylen, key,
                           &replylen, reply, NULL);
-
+out:
         lustre_msg_set_status(req->rq_repmsg, 0);
         RETURN(rc);
 }
@@ -1391,7 +1423,7 @@ do {                                                                      \
                                      OBD_CONNECT_RMT_CLIENT_FORCE |    \
                                      OBD_CONNECT_OSS_CAPA);            \
        spin_lock(&exp->exp_lock);                                      \
-       exp->exp_connect_flags = reply->ocd_connect_flags;              \
+       *exp_connect_flags_ptr(exp) = reply->ocd_connect_flags;         \
        spin_unlock(&exp->exp_lock);                                    \
 } while (0)
 
@@ -1490,7 +1522,7 @@ static int ost_init_sec_level(struct ptlrpc_request *req)
                                 reply->ocd_connect_flags &= ~OBD_CONNECT_OSS_CAPA;
 
                        spin_lock(&exp->exp_lock);
-                       exp->exp_connect_flags = reply->ocd_connect_flags;
+                       *exp_connect_flags_ptr(exp) = reply->ocd_connect_flags;
                        spin_unlock(&exp->exp_lock);
                 }
                 break;
@@ -1595,8 +1627,8 @@ int ost_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                        OBD_FREE_PTR(oinfo);
                        GOTO(out_env, rc = -ENOMEM);
                }
-               oa->o_id = lock->l_resource->lr_name.name[0];
-               oa->o_seq = lock->l_resource->lr_name.name[1];
+
+               ostid_res_name_to_id(&oa->o_oi, &lock->l_resource->lr_name);
                oa->o_valid = OBD_MD_FLID|OBD_MD_FLGROUP;
                oinfo->oi_oa = oa;
                oinfo->oi_capa = BYPASS_CAPA;
@@ -1663,6 +1695,15 @@ int ost_msg_check_version(struct lustre_msg *msg)
                                lustre_msg_get_version(msg),
                                LUSTRE_OBD_VERSION);
                 break;
+       case SEQ_QUERY:
+               /* Note: client always use MDS_VERSION for FID request */
+               rc = lustre_msg_check_version(msg, LUSTRE_MDS_VERSION);
+               if (rc)
+                       CERROR("bad opc %u version %08x, expecting %08x\n",
+                              lustre_msg_get_opc(msg),
+                              lustre_msg_get_version(msg),
+                              LUSTRE_MDS_VERSION);
+               break;
         case OST_CREATE:
         case OST_DESTROY:
         case OST_GETATTR:
@@ -1839,10 +1880,9 @@ static int ost_rw_hpreq_lock_match(struct ptlrpc_request *req,
         nb += ioo->ioo_bufcnt - 1;
         ext.end = nb->offset + nb->len - 1;
 
-        LASSERT(lock->l_resource != NULL);
-        if (!osc_res_name_eq(ioo->ioo_id, ioo->ioo_seq,
-                             &lock->l_resource->lr_name))
-                RETURN(0);
+       LASSERT(lock->l_resource != NULL);
+       if (!ostid_res_name_eq(&ioo->ioo_oid, &lock->l_resource->lr_name))
+               RETURN(0);
 
         mode = LCK_PW;
         if (opc == OST_READ)
@@ -1890,7 +1930,7 @@ static int ost_rw_hpreq_check(struct ptlrpc_request *req)
         LASSERT(nb != NULL);
         LASSERT(!(nb->flags & OBD_BRW_SRVLOCK));
 
-        osc_build_res_name(ioo->ioo_id, ioo->ioo_seq, &opd.opd_resid);
+       ostid_build_res_name(&ioo->ioo_oid, &opd.opd_resid);
 
         opd.opd_req = req;
         mode = LCK_PW;
@@ -1915,7 +1955,7 @@ static int ost_rw_hpreq_check(struct ptlrpc_request *req)
         CDEBUG(D_DLMTRACE, "%s: refreshed %u locks timeout for req %p.\n",
                obd->obd_name, opd.opd_locks, req);
 
-        RETURN(opd.opd_locks);
+        RETURN(opd.opd_locks > 0);
 }
 
 static void ost_rw_hpreq_fini(struct ptlrpc_request *req)
@@ -1974,7 +2014,7 @@ static int ost_punch_hpreq_check(struct ptlrpc_request *req)
                 opd.opd_extent.end = OBD_OBJECT_EOF;
         opd.opd_timeout = prolong_timeout(req);
 
-        osc_build_res_name(oa->o_id, oa->o_seq, &opd.opd_resid);
+       ostid_build_res_name(&oa->o_oi, &opd.opd_resid);
 
         CDEBUG(D_DLMTRACE,
                "%s: refresh locks: "LPU64"/"LPU64" ("LPU64"->"LPU64")\n",
@@ -2170,7 +2210,7 @@ int ost_handle(struct ptlrpc_request *req)
                 RETURN(rc);
 
        if (req && req->rq_reqmsg && req->rq_export &&
-           (req->rq_export->exp_connect_flags & OBD_CONNECT_JOBSTATS))
+           (exp_connect_flags(req->rq_export) & OBD_CONNECT_JOBSTATS))
                oti->oti_jobid = lustre_msg_get_jobid(req->rq_reqmsg);
 
         switch (lustre_msg_get_opc(req->rq_reqmsg)) {
@@ -2299,6 +2339,10 @@ int ost_handle(struct ptlrpc_request *req)
                 req_capsule_set(&req->rq_pill, &RQF_OST_GET_INFO_GENERIC);
                 rc = ost_get_info(req->rq_export, req);
                 break;
+       case SEQ_QUERY:
+               CDEBUG(D_INODE, "seq\n");
+               rc = seq_handle(req);
+               break;
         case OST_QUOTACHECK:
                 CDEBUG(D_INODE, "quotacheck\n");
                 req_capsule_set(&req->rq_pill, &RQF_OST_QUOTACHECK);
@@ -2567,9 +2611,9 @@ static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
                .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
                .psc_buf                = {
                        .bc_nbufs               = OST_NBUFS,
-                       .bc_buf_size            = OST_BUFSIZE,
-                       .bc_req_max_size        = OST_MAXREQSIZE,
-                       .bc_rep_max_size        = OST_MAXREPSIZE,
+                       .bc_buf_size            = OST_IO_BUFSIZE,
+                       .bc_req_max_size        = OST_IO_MAXREQSIZE,
+                       .bc_rep_max_size        = OST_IO_MAXREPSIZE,
                        .bc_req_portal          = OST_IO_PORTAL,
                        .bc_rep_portal          = OSC_REPLY_PORTAL,
                },
@@ -2605,10 +2649,53 @@ static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
                GOTO(out_create, rc);
         }
 
+       memset(&svc_conf, 0, sizeof(svc_conf));
+       svc_conf = (typeof(svc_conf)) {
+               .psc_name               = "ost_seq",
+               .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
+               .psc_buf                = {
+                       .bc_nbufs               = OST_NBUFS,
+                       .bc_buf_size            = OST_BUFSIZE,
+                       .bc_req_max_size        = OST_MAXREQSIZE,
+                       .bc_rep_max_size        = OST_MAXREPSIZE,
+                       .bc_req_portal          = SEQ_DATA_PORTAL,
+                       .bc_rep_portal          = OSC_REPLY_PORTAL,
+               },
+               .psc_thr                = {
+                       .tc_thr_name            = "ll_ost_seq",
+                       .tc_thr_factor          = OSS_CR_THR_FACTOR,
+                       .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
+                       .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
+                       .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
+                       .tc_nthrs_user          = oss_num_create_threads,
+                       .tc_cpu_affinity        = 1,
+                       .tc_ctx_tags            = LCT_DT_THREAD,
+               },
+
+               .psc_cpt                = {
+                       .cc_pattern          = oss_cpts,
+               },
+               .psc_ops                = {
+                       .so_req_handler         = ost_handle,
+                       .so_req_printer         = target_print_req,
+                       .so_hpreq_handler       = NULL,
+               },
+       };
+       ost->ost_seq_service = ptlrpc_register_service(&svc_conf,
+                                                     obd->obd_proc_entry);
+       if (IS_ERR(ost->ost_seq_service)) {
+               rc = PTR_ERR(ost->ost_seq_service);
+               CERROR("failed to start OST seq service: %d\n", rc);
+               ost->ost_seq_service = NULL;
+               GOTO(out_io, rc);
+       }
+
         ping_evictor_start();
 
         RETURN(0);
-
+out_io:
+       ptlrpc_unregister_service(ost->ost_io_service);
+       ost->ost_io_service = NULL;
 out_create:
         ptlrpc_unregister_service(ost->ost_create_service);
         ost->ost_create_service = NULL;
@@ -2622,22 +2709,24 @@ out_lprocfs:
 
 static int ost_cleanup(struct obd_device *obd)
 {
-        struct ost_obd *ost = &obd->u.ost;
-        int err = 0;
-        ENTRY;
+       struct ost_obd *ost = &obd->u.ost;
+       int err = 0;
+       ENTRY;
 
-        ping_evictor_stop();
+       ping_evictor_stop();
 
-        /* there is no recovery for OST OBD, all recovery is controlled by
-         * obdfilter OBD */
-        LASSERT(obd->obd_recovering == 0);
+       /* there is no recovery for OST OBD, all recovery is controlled by
+        * obdfilter OBD */
+       LASSERT(obd->obd_recovering == 0);
        mutex_lock(&ost->ost_health_mutex);
-        ptlrpc_unregister_service(ost->ost_service);
-        ptlrpc_unregister_service(ost->ost_create_service);
-        ptlrpc_unregister_service(ost->ost_io_service);
-        ost->ost_service = NULL;
-        ost->ost_create_service = NULL;
+       ptlrpc_unregister_service(ost->ost_service);
+       ptlrpc_unregister_service(ost->ost_create_service);
+       ptlrpc_unregister_service(ost->ost_io_service);
+       ptlrpc_unregister_service(ost->ost_seq_service);
+       ost->ost_service = NULL;
+       ost->ost_create_service = NULL;
        ost->ost_io_service = NULL;
+       ost->ost_seq_service = NULL;
 
        mutex_unlock(&ost->ost_health_mutex);