Whamcloud - gitweb
Branch b_release_1_8_1
authorbobijam <bobijam>
Wed, 6 May 2009 02:31:02 +0000 (02:31 +0000)
committerbobijam <bobijam>
Wed, 6 May 2009 02:31:02 +0000 (02:31 +0000)
b=12069
i=adilger
i=tom.wang (wangdi)

Enable adjusting grant_shrink_interval and grant target value via /proc.

lustre/include/obd.h
lustre/include/obd_support.h
lustre/osc/lproc_osc.c
lustre/osc/osc_internal.h
lustre/osc/osc_request.c

index cfeb383..a906c12 100644 (file)
@@ -428,6 +428,7 @@ struct client_obd {
         cfs_time_t               cl_next_shrink_grant;   /* jiffies */
         struct list_head         cl_grant_shrink_list;  /* Timeout event list */
         struct semaphore         cl_grant_sem;   /*grant shrink list semaphore*/
+        int                      cl_grant_shrink_interval; /* seconds */
 
         /* keep track of objects that have lois that contain pages which
          * have been queued for async brw.  this lock also protects the
@@ -542,7 +543,7 @@ struct mds_obd {
         char                            *mds_profile;
         struct obd_export               *mds_osc_exp; /* XXX lov_exp */
         struct lov_desc                  mds_lov_desc;
-       
+
         /* mark pages dirty for write. */
         bitmap_t                         *mds_lov_page_dirty;
         /* array for store pages with obd_id */
@@ -1063,7 +1064,7 @@ enum obd_cleanup_stage {
 #define KEY_ASYNC               "async"
 #define KEY_CAPA_KEY            "capa_key"
 #define KEY_GRANT_SHRINK        "grant_shrink"
-#define KEY_OFF_RPCSIZE                "off_rpcsize"
+#define KEY_OFF_RPCSIZE         "off_rpcsize"
 
 struct obd_ops {
         struct module *o_owner;
@@ -1316,14 +1317,14 @@ static inline struct lsm_operations *lsm_op_find(int magic)
 int lvfs_check_io_health(struct obd_device *obd, struct file *file);
 
 /* Requests for obd_extent_calc() */
-#define OBD_CALC_STRIPE_START                 0x0001 
-#define OBD_CALC_STRIPE_END                   0x0010 
-#define OBD_CALC_STRIPE_RPC_ALIGN      0x0100 
+#define OBD_CALC_STRIPE_START          0x0001
+#define OBD_CALC_STRIPE_END            0x0010
+#define OBD_CALC_STRIPE_RPC_ALIGN      0x0100
 
 #define OBD_CALC_STRIPE_RPC_START_ALIGN (OBD_CALC_STRIPE_START | \
-                                        OBD_CALC_STRIPE_RPC_ALIGN)
+                                         OBD_CALC_STRIPE_RPC_ALIGN)
 #define OBD_CALC_STRIPE_RPC_END_ALIGN (OBD_CALC_STRIPE_START | \
-                                      OBD_CALC_STRIPE_RPC_ALIGN)
+                                       OBD_CALC_STRIPE_RPC_ALIGN)
 
 static inline void obd_transno_commit_cb(struct obd_device *obd, __u64 transno,
                                          struct obd_export *exp, int error)
index 81c8c0a..ad8ec2a 100644 (file)
@@ -121,8 +121,7 @@ extern unsigned int obd_alloc_fail_rate;
  * Time interval of shrink, if the client is "idle" more than this interval,
  * then the ll_grant thread will return the requested grant space to filter
  */
-#define GRANT_SHRINK_INTERVAL             360/*6 minutes*/
-
+#define GRANT_SHRINK_INTERVAL            1200/*20 minutes*/
 
 #define OBD_FAIL_MDS                     0x100
 #define OBD_FAIL_MDS_HANDLE_UNPACK       0x101
index 8c42b04..37d2ec8 100644 (file)
@@ -223,6 +223,70 @@ static int osc_rd_cur_grant_bytes(char *page, char **start, off_t off,
         return rc;
 }
 
+static int osc_wr_cur_grant_bytes(struct file *file, const char *buffer,
+                                  unsigned long count, void *data)
+{
+        struct obd_device *obd = data;
+        struct client_obd *cli = &obd->u.cli;
+        int                rc;
+        __u64              val;
+
+        if (obd == NULL)
+                return 0;
+
+        rc = lprocfs_write_u64_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        /* this is only for shrinking grant */
+        client_obd_list_lock(&cli->cl_loi_list_lock);
+        if (val >= cli->cl_avail_grant) {
+                client_obd_list_unlock(&cli->cl_loi_list_lock);
+                return 0;
+        }
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
+
+        LPROCFS_CLIMP_CHECK(obd);
+        if (cli->cl_import->imp_state == LUSTRE_IMP_FULL)
+                rc = osc_shrink_grant_to_target(cli, val);
+        LPROCFS_CLIMP_EXIT(obd);
+        if (rc)
+                return rc;
+        return count;
+}
+
+static int osc_rd_grant_shrink_interval(char *page, char **start, off_t off,
+                                        int count, int *eof, void *data)
+{
+        struct obd_device *obd = data;
+
+        if (obd == NULL)
+                return 0;
+        return snprintf(page, count, "%d\n",
+                        obd->u.cli.cl_grant_shrink_interval);
+}
+
+static int osc_wr_grant_shrink_interval(struct file *file, const char *buffer,
+                                        unsigned long count, void *data)
+{
+        struct obd_device *obd = data;
+        int val, rc;
+
+        if (obd == NULL)
+                return 0;
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val <= 0)
+                return -ERANGE;
+
+        obd->u.cli.cl_grant_shrink_interval = val;
+
+        return count;
+}
+
 static int osc_rd_create_count(char *page, char **start, off_t off, int count,
                                int *eof, void *data)
 {
@@ -465,7 +529,10 @@ static struct lprocfs_vars lprocfs_osc_obd_vars[] = {
                                 osc_wr_max_rpcs_in_flight, 0 },
         { "max_dirty_mb",    osc_rd_max_dirty_mb, osc_wr_max_dirty_mb, 0 },
         { "cur_dirty_bytes", osc_rd_cur_dirty_bytes, 0, 0 },
-        { "cur_grant_bytes", osc_rd_cur_grant_bytes, 0, 0 },
+        { "cur_grant_bytes", osc_rd_cur_grant_bytes,
+                             osc_wr_cur_grant_bytes, 0 },
+        { "grant_shrink_interval", osc_rd_grant_shrink_interval,
+                                   osc_wr_grant_shrink_interval, 0 },
         { "create_count",    osc_rd_create_count, osc_wr_create_count, 0 },
         { "max_create_count", osc_rd_max_create_count,
                               osc_wr_max_create_count, 0},
index 393e6d9..3a31967 100644 (file)
@@ -91,11 +91,12 @@ struct osc_cache_waiter {
 
 int osc_precreate(struct obd_export *exp);
 int osc_create(struct obd_export *exp, struct obdo *oa,
-              struct lov_stripe_md **ea, struct obd_trans_info *oti);
+               struct lov_stripe_md **ea, struct obd_trans_info *oti);
 int osc_real_create(struct obd_export *exp, struct obdo *oa,
-              struct lov_stripe_md **ea, struct obd_trans_info *oti);
+                    struct lov_stripe_md **ea, struct obd_trans_info *oti);
 void oscc_init(struct obd_device *obd);
 void osc_wake_cache_waiters(struct client_obd *cli);
+int osc_shrink_grant_to_target(struct client_obd *cli, long target);
 
 #ifdef LPROCFS
 int lproc_osc_attach_seqstat(struct obd_device *dev);
@@ -121,8 +122,8 @@ static inline int osc_recoverable_error(int rc)
 /* return 1 if osc should be resend request */
 static inline int osc_should_resend(int resend, struct client_obd *cli)
 {
-        return atomic_read(&cli->cl_resends) ? 
-                atomic_read(&cli->cl_resends) > resend : 1; 
+        return atomic_read(&cli->cl_resends) ?
+                atomic_read(&cli->cl_resends) > resend : 1;
 }
 
 static inline int osc_exp_is_2_0_server(struct obd_export *exp) {
index df93f5e..0a71674 100644 (file)
@@ -233,7 +233,7 @@ static int osc_getattr(struct obd_export *exp, struct obd_info *oinfo)
                 RETURN(-ENOMEM);
 
         body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
-        lustre_set_wire_obdo(&body->oa, oinfo->oi_oa);        
+        lustre_set_wire_obdo(&body->oa, oinfo->oi_oa);
 
         ptlrpc_req_set_repsize(req, 2, size);
 
@@ -278,7 +278,7 @@ static int osc_setattr(struct obd_export *exp, struct obd_info *oinfo,
                 RETURN(-ENOMEM);
 
         body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
-        lustre_set_wire_obdo(&body->oa, oinfo->oi_oa);        
+        lustre_set_wire_obdo(&body->oa, oinfo->oi_oa);
 
         ptlrpc_req_set_repsize(req, 2, size);
 
@@ -291,7 +291,7 @@ static int osc_setattr(struct obd_export *exp, struct obd_info *oinfo,
         if (body == NULL)
                 GOTO(out, rc = -EPROTO);
 
-        lustre_get_wire_obdo(oinfo->oi_oa, &body->oa);        
+        lustre_get_wire_obdo(oinfo->oi_oa, &body->oa);
 
         EXIT;
 out:
@@ -315,7 +315,7 @@ static int osc_setattr_interpret(struct ptlrpc_request *req,
                 GOTO(out, rc = -EPROTO);
         }
 
-        lustre_get_wire_obdo(aa->aa_oi->oi_oa, &body->oa);        
+        lustre_get_wire_obdo(aa->aa_oi->oi_oa, &body->oa);
 out:
         rc = aa->aa_oi->oi_cb_up(aa->aa_oi, rc);
         RETURN(rc);
@@ -348,7 +348,7 @@ static int osc_setattr_async(struct obd_export *exp, struct obd_info *oinfo,
                 oinfo->oi_oa->o_lcookie = *oti->oti_logcookies;
         }
 
-        lustre_set_wire_obdo(&body->oa, oinfo->oi_oa);        
+        lustre_set_wire_obdo(&body->oa, oinfo->oi_oa);
         ptlrpc_req_set_repsize(req, 2, size);
         /* do mds to ost setattr asynchronouly */
         if (!rqset) {
@@ -393,7 +393,7 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa,
                 GOTO(out, rc = -ENOMEM);
 
         body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
-        lustre_set_wire_obdo(&body->oa, oa);        
+        lustre_set_wire_obdo(&body->oa, oa);
 
         ptlrpc_req_set_repsize(req, 2, size);
         if ((oa->o_valid & OBD_MD_FLFLAGS) &&
@@ -415,7 +415,7 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa,
                 GOTO (out_req, rc = -EPROTO);
         }
 
-        lustre_get_wire_obdo(oa, &body->oa);        
+        lustre_get_wire_obdo(oa, &body->oa);
 
         /* This should really be sent by the OST */
         oa->o_blksize = PTLRPC_MAX_BRW_SIZE;
@@ -464,7 +464,7 @@ static int osc_punch_interpret(struct ptlrpc_request *req,
                 GOTO(out, rc = -EPROTO);
         }
 
-        lustre_get_wire_obdo(aa->aa_oi->oi_oa, &body->oa);        
+        lustre_get_wire_obdo(aa->aa_oi->oi_oa, &body->oa);
 out:
         rc = aa->aa_oi->oi_cb_up(aa->aa_oi, rc);
         RETURN(rc);
@@ -734,8 +734,8 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
 
 static void osc_update_next_shrink(struct client_obd *cli)
 {
-        int time = GRANT_SHRINK_INTERVAL;
-        cli->cl_next_shrink_grant = cfs_time_shift(time);
+        cli->cl_next_shrink_grant =
+                cfs_time_shift(cli->cl_grant_shrink_interval);
         CDEBUG(D_CACHE, "next time %ld to shrink grant \n",
                cli->cl_next_shrink_grant);
 }
@@ -860,7 +860,7 @@ static int osc_shrink_grant_interpret(struct ptlrpc_request *req,
         struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
         struct obdo *oa = aa->aa_oa;
         struct ost_body *body;
-        
+
         if (rc != 0) {
                 client_obd_list_lock(&cli->cl_loi_list_lock);
                 cli->cl_avail_grant += oa->o_grant;
@@ -872,31 +872,68 @@ static int osc_shrink_grant_interpret(struct ptlrpc_request *req,
         osc_update_grant(cli, body);
 out:
         OBD_FREE_PTR(oa);
-        return rc;        
+        return rc;
 }
 
 static void osc_shrink_grant_local(struct client_obd *cli, struct obdo *oa)
 {
         client_obd_list_lock(&cli->cl_loi_list_lock);
         oa->o_grant = cli->cl_avail_grant / 4;
-        cli->cl_avail_grant -= oa->o_grant; 
+        cli->cl_avail_grant -= oa->o_grant;
         client_obd_list_unlock(&cli->cl_loi_list_lock);
         oa->o_flags |= OBD_FL_SHRINK_GRANT;
         osc_update_next_shrink(cli);
 }
 
+/* Shrink the current grant, either from some large amount to enough for a
+ * full set of in-flight RPCs, or if we have already shrunk to that limit
+ * then to enough for a single RPC.  This avoids keeping more grant than
+ * needed, and avoids shrinking the grant piecemeal. */
 static int osc_shrink_grant(struct client_obd *cli)
 {
+        long target = (cli->cl_max_rpcs_in_flight + 1) *
+                      cli->cl_max_pages_per_rpc;
+
+        client_obd_list_lock(&cli->cl_loi_list_lock);
+        if (cli->cl_avail_grant <= target)
+                target = cli->cl_max_pages_per_rpc;
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
+
+        return osc_shrink_grant_to_target(cli, target);
+}
+
+int osc_shrink_grant_to_target(struct client_obd *cli, long target)
+{
         int    rc = 0;
         struct ost_body     *body;
         ENTRY;
 
+        client_obd_list_lock(&cli->cl_loi_list_lock);
+        /* Don't shrink if we are already above or below the desired limit
+         * We don't want to shrink below a single RPC, as that will negatively
+         * impact block allocation and long-term performance. */
+        if (target < cli->cl_max_pages_per_rpc)
+                target = cli->cl_max_pages_per_rpc;
+
+        if (target >= cli->cl_avail_grant) {
+                client_obd_list_unlock(&cli->cl_loi_list_lock);
+                RETURN(0);
+        }
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
+
         OBD_ALLOC_PTR(body);
         if (!body)
                 RETURN(-ENOMEM);
 
         osc_announce_cached(cli, &body->oa, 0);
-        osc_shrink_grant_local(cli, &body->oa);
+
+        client_obd_list_lock(&cli->cl_loi_list_lock);
+        body->oa.o_grant = cli->cl_avail_grant - target;
+        cli->cl_avail_grant = target;
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
+        body->oa.o_flags |= OBD_FL_SHRINK_GRANT;
+        osc_update_next_shrink(cli);
+
         rc = osc_set_info_async(cli->cl_import->imp_obd->obd_self_export,
                                 sizeof(KEY_GRANT_SHRINK), KEY_GRANT_SHRINK,
                                 sizeof(*body), body, NULL);
@@ -905,8 +942,7 @@ static int osc_shrink_grant(struct client_obd *cli)
                 cli->cl_avail_grant += body->oa.o_grant;
                 client_obd_list_unlock(&cli->cl_loi_list_lock);
         }
-        if (body)
-               OBD_FREE_PTR(body);
+        OBD_FREE_PTR(body);
         RETURN(rc);
 }
 
@@ -940,24 +976,24 @@ static int osc_add_shrink_grant(struct client_obd *client)
 {
         int rc;
 
-        rc = ptlrpc_add_timeout_client(GRANT_SHRINK_INTERVAL, 
-                                         TIMEOUT_GRANT,
-                                         osc_grant_shrink_grant_cb, NULL,
-                                         &client->cl_grant_shrink_list);
+        rc = ptlrpc_add_timeout_client(client->cl_grant_shrink_interval,
+                                       TIMEOUT_GRANT,
+                                       osc_grant_shrink_grant_cb, NULL,
+                                       &client->cl_grant_shrink_list);
         if (rc) {
-                CERROR("add grant client %s error %d\n", 
+                CERROR("add grant client %s error %d\n",
                         client->cl_import->imp_obd->obd_name, rc);
                 return rc;
         }
-        CDEBUG(D_CACHE, "add grant client %s \n", 
+        CDEBUG(D_CACHE, "add grant client %s \n",
                client->cl_import->imp_obd->obd_name);
         osc_update_next_shrink(client);
-        return 0; 
+        return 0;
 }
 
 static int osc_del_shrink_grant(struct client_obd *client)
 {
-        return ptlrpc_del_timeout_client(&client->cl_grant_shrink_list, 
+        return ptlrpc_del_timeout_client(&client->cl_grant_shrink_list,
                                          TIMEOUT_GRANT);
 }
 
@@ -1210,7 +1246,7 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa,
 
         osc_announce_cached(cli, &body->oa, opc == OST_WRITE ? requested_nob:0);
         if (osc_should_shrink_grant(cli))
-                osc_shrink_grant_local(cli, &body->oa); 
+                osc_shrink_grant_local(cli, &body->oa);
 
         /* size[REQ_REC_OFF] still sizeof (*body) */
         if (opc == OST_WRITE) {
@@ -1838,7 +1874,7 @@ static int osc_brw_async(int cmd, struct obd_export *exp,
                         if (copy == NULL)
                                 GOTO(out, rc = -ENOMEM);
                         memcpy(copy, ppga, pages_per_brw * sizeof(*copy));
-                        
+
                         OBDO_ALLOC(oa);
                         if (oa == NULL) {
                                 OBD_FREE(copy, pages_per_brw * sizeof(*copy));
@@ -2182,7 +2218,7 @@ static int brw_interpret(struct ptlrpc_request *request, void *data, int rc)
                 obd_count i;
                 for (i = 0; i < aa->aa_page_count; i++)
                         osc_release_write_grant(aa->aa_cli, aa->aa_ppga[i], 1);
-               
+
                 if (aa->aa_oa->o_flags & OBD_FL_TEMPORARY)
                         OBDO_FREE(aa->aa_oa);
         }
@@ -2396,7 +2432,7 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi,
 #if defined(__KERNEL__) && defined(__linux__)
                  if(!(PageLocked(oap->oap_page) &&
                      (CheckWriteback(oap->oap_page, cmd) || oap->oap_oig !=NULL))) {
-                       CDEBUG(D_PAGE, "page %p lost wb %lx/%x\n",
+                        CDEBUG(D_PAGE, "page %p lost wb %lx/%x\n",
                                oap->oap_page, (long)oap->oap_page->flags, oap->oap_async_flags);
                         LBUG();
                 }
@@ -3787,12 +3823,12 @@ static int osc_get_info(struct obd_export *exp, obd_count keylen,
                 *stripe = 0;
                 RETURN(0);
         } else if (KEY_IS(KEY_OFF_RPCSIZE)) {
-               struct client_obd *cli = &exp->exp_obd->u.cli;
-               __u64 *rpcsize = val;
-               LASSERT(*vallen == sizeof(__u64));
-               *rpcsize = (__u64)cli->cl_max_pages_per_rpc;    
-               RETURN(0);
-       } else if (KEY_IS(KEY_LAST_ID)) {
+                struct client_obd *cli = &exp->exp_obd->u.cli;
+                __u64 *rpcsize = val;
+                LASSERT(*vallen == sizeof(__u64));
+                *rpcsize = (__u64)cli->cl_max_pages_per_rpc;
+                RETURN(0);
+        } else if (KEY_IS(KEY_LAST_ID)) {
                 struct ptlrpc_request *req;
                 obd_id *reply;
                 char *bufs[2] = { NULL, key };
@@ -3969,7 +4005,7 @@ static int osc_set_info_async(struct obd_export *exp, obd_count keylen,
                 }
                 *oa = ((struct ost_body *)val)->oa;
                 aa->aa_oa = oa;
-                 
+
                 size[1] = vallen;
                 ptlrpc_req_set_repsize(req, 2, size);
                 ptlrpcd_add_req(req);
@@ -4103,17 +4139,17 @@ static int osc_disconnect(struct obd_export *exp)
          * causes the following problem if setup (connect) and cleanup
          * (disconnect) are tangled together.
          *      connect p1                     disconnect p2
-         *   ptlrpc_connect_import 
+         *   ptlrpc_connect_import
          *     ...............               class_manual_cleanup
          *                                     osc_disconnect
          *                                     del_shrink_grant
          *   ptlrpc_connect_interrupt
          *     init_grant_shrink
-         *   add this client to shrink list                 
+         *   add this client to shrink list
          *                                      cleanup_osc
          * Bang! pinger trigger the shrink.
          * So the osc should be disconnected from the shrink list, after we
-         * are sure the import has been destroyed. BUG18662 
+         * are sure the import has been destroyed. BUG18662
          */
         if (obd->u.cli.cl_import == NULL)
                 osc_del_shrink_grant(&obd->u.cli);
@@ -4218,6 +4254,7 @@ int osc_setup(struct obd_device *obd, obd_count len, void *buf)
                 struct lprocfs_static_vars lvars = { 0 };
                 struct client_obd *cli = &obd->u.cli;
 
+                cli->cl_grant_shrink_interval = GRANT_SHRINK_INTERVAL;
                 lprocfs_osc_init_vars(&lvars);
                 if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0) {
                         lproc_osc_attach_seqstat(obd);