Whamcloud - gitweb
LU-1303 lod: introcude QoS structures
authorAlex Zhuravlev <alexey.zhuravlev@intel.com>
Thu, 20 Sep 2012 07:23:17 +0000 (11:23 +0400)
committerOleg Drokin <green@whamcloud.com>
Fri, 28 Sep 2012 20:46:50 +0000 (16:46 -0400)
like LOV before LOD will be maintaining OST->OSS mapping to be
able to distribute load among OSSs.

Signed-off-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Change-Id: I132382cb09b50aec1edcd667cde1851c9ee043dc
Reviewed-on: http://review.whamcloud.com/4055
Tested-by: Hudson
Reviewed-by: wangdi <di.wang@whamcloud.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/lod/Makefile.in
lustre/lod/lod_internal.h
lustre/lod/lod_lov.c
lustre/lod/lod_qos.c [new file with mode: 0644]

index 0eb50e0..31459b5 100644 (file)
@@ -1,5 +1,5 @@
 MODULES := lod
 MODULES := lod
-lod-objs := lod_dev.o lod_lov.o lproc_lod.o lod_pool.o lod_object.o
+lod-objs := lod_dev.o lod_lov.o lproc_lod.o lod_pool.o lod_object.o lod_qos.o
 
 EXTRA_DIST = $(lod-objs:.o=.c) lod_internal.h
 
 
 EXTRA_DIST = $(lod-objs:.o=.c) lod_internal.h
 
index 6bb5020..e0fedfb 100644 (file)
@@ -275,6 +275,10 @@ int lod_pool_new(struct obd_device *obd, char *poolname);
 int lod_pool_add(struct obd_device *obd, char *poolname, char *ostname);
 int lod_pool_remove(struct obd_device *obd, char *poolname, char *ostname);
 
 int lod_pool_add(struct obd_device *obd, char *poolname, char *ostname);
 int lod_pool_remove(struct obd_device *obd, char *poolname, char *ostname);
 
+/* lod_qos.c */
+int qos_add_tgt(struct lod_device*, struct lod_ost_desc *);
+int qos_del_tgt(struct lod_device *, struct lod_ost_desc *);
+
 /* lproc_lod.c */
 extern struct file_operations lod_proc_target_fops;
 void lprocfs_lod_init_vars(struct lprocfs_static_vars *lvars);
 /* lproc_lod.c */
 extern struct file_operations lod_proc_target_fops;
 void lprocfs_lod_init_vars(struct lprocfs_static_vars *lvars);
index d3be857..a793659 100644 (file)
@@ -96,11 +96,15 @@ void lod_putref(struct lod_device *lod)
                cfs_list_for_each_entry_safe(ost_desc, tmp, &kill, ltd_kill) {
                        int rc;
                        cfs_list_del(&ost_desc->ltd_kill);
                cfs_list_for_each_entry_safe(ost_desc, tmp, &kill, ltd_kill) {
                        int rc;
                        cfs_list_del(&ost_desc->ltd_kill);
-                       /* XXX: remove from QoS structures */
-                       /* disconnect from OSP */
+                       /* remove from QoS structures */
+                       rc = qos_del_tgt(lod, ost_desc);
+                       if (rc)
+                               CERROR("%s: qos_del_tgt(%s) failed: rc = %d\n",
+                                      lod2obd(lod)->obd_name,
+                                      obd_uuid2str(&ost_desc->ltd_uuid), rc);
                        rc = obd_disconnect(ost_desc->ltd_exp);
                        if (rc)
                        rc = obd_disconnect(ost_desc->ltd_exp);
                        if (rc)
-                               CERROR("%s: failed to disconnect %s (%d)\n",
+                               CERROR("%s: failed to disconnect %s: rc = %d\n",
                                       lod2obd(lod)->obd_name,
                                       obd_uuid2str(&ost_desc->ltd_uuid), rc);
                        OBD_FREE_PTR(ost_desc);
                                       lod2obd(lod)->obd_name,
                                       obd_uuid2str(&ost_desc->ltd_uuid), rc);
                        OBD_FREE_PTR(ost_desc);
@@ -268,6 +272,13 @@ int lod_add_device(const struct lu_env *env, struct lod_device *lod,
                GOTO(out_mutex, rc);
        }
 
                GOTO(out_mutex, rc);
        }
 
+       rc = qos_add_tgt(lod, ost_desc);
+       if (rc) {
+               CERROR("%s: qos_add_tgt(%s) failed: rc = %d\n", obd->obd_name,
+                      obd_uuid2str(&ost_desc->ltd_uuid), rc);
+               GOTO(out_pool, rc);
+       }
+
        /* The new OST is now a full citizen */
        if (index >= lod->lod_desc.ld_tgt_count)
                lod->lod_desc.ld_tgt_count = index + 1;
        /* The new OST is now a full citizen */
        if (index >= lod->lod_desc.ld_tgt_count)
                lod->lod_desc.ld_tgt_count = index + 1;
@@ -284,6 +295,7 @@ int lod_add_device(const struct lu_env *env, struct lod_device *lod,
 
        RETURN(0);
 
 
        RETURN(0);
 
+out_pool:
        lod_ost_pool_remove(&lod->lod_pool_info, index);
 out_mutex:
        cfs_mutex_unlock(&lod->lod_mutex);
        lod_ost_pool_remove(&lod->lod_pool_info, index);
 out_mutex:
        cfs_mutex_unlock(&lod->lod_mutex);
diff --git a/lustre/lod/lod_qos.c b/lustre/lod/lod_qos.c
new file mode 100644 (file)
index 0000000..fbc7299
--- /dev/null
@@ -0,0 +1,147 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License version 2 for more details.  A copy is
+ * included in the COPYING file that accompanied this code.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright  2009 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/lod/lod_qos.c
+ *
+ */
+
+#define DEBUG_SUBSYSTEM S_LOV
+
+#include <libcfs/libcfs.h>
+#include <obd_class.h>
+#include <obd_lov.h>
+#include <lustre/lustre_idl.h>
+#include "lod_internal.h"
+
+/*
+ * force QoS policy (not RR) to be used for testing purposes
+ */
+#define FORCE_QOS_
+
+#define D_QOS   D_OTHER
+
+#if 0
+#define QOS_DEBUG(fmt, ...)     CDEBUG(D_OTHER, fmt, ## __VA_ARGS__)
+#define QOS_CONSOLE(fmt, ...)   LCONSOLE(D_OTHER, fmt, ## __VA_ARGS__)
+#else
+#define QOS_DEBUG(fmt, ...)
+#define QOS_CONSOLE(fmt, ...)
+#endif
+
+#define TGT_BAVAIL(i) (OST_TGT(lod,i)->ltd_statfs.os_bavail * \
+                      OST_TGT(lod,i)->ltd_statfs.os_bsize)
+
+int qos_add_tgt(struct lod_device *lod, struct lod_ost_desc *ost_desc)
+{
+       struct lov_qos_oss *oss = NULL, *temposs;
+       struct obd_export  *exp = ost_desc->ltd_exp;
+       int                 rc = 0, found = 0;
+       cfs_list_t         *list;
+       ENTRY;
+
+       cfs_down_write(&lod->lod_qos.lq_rw_sem);
+       /*
+        * a bit hacky approach to learn NID of corresponding connection
+        * but there is no official API to access information like this
+        * with OSD API.
+        */
+       cfs_list_for_each_entry(oss, &lod->lod_qos.lq_oss_list, lqo_oss_list) {
+               if (obd_uuid_equals(&oss->lqo_uuid,
+                                   &exp->exp_connection->c_remote_uuid)) {
+                       found++;
+                       break;
+               }
+       }
+
+       if (!found) {
+               OBD_ALLOC_PTR(oss);
+               if (!oss)
+                       GOTO(out, rc = -ENOMEM);
+               memcpy(&oss->lqo_uuid, &exp->exp_connection->c_remote_uuid,
+                      sizeof(oss->lqo_uuid));
+       } else {
+               /* Assume we have to move this one */
+               cfs_list_del(&oss->lqo_oss_list);
+       }
+
+       oss->lqo_ost_count++;
+       ost_desc->ltd_qos.ltq_oss = oss;
+
+       CDEBUG(D_QOS, "add tgt %s to OSS %s (%d OSTs)\n",
+              obd_uuid2str(&ost_desc->ltd_uuid), obd_uuid2str(&oss->lqo_uuid),
+              oss->lqo_ost_count);
+
+       /* Add sorted by # of OSTs.  Find the first entry that we're
+          bigger than... */
+       list = &lod->lod_qos.lq_oss_list;
+       cfs_list_for_each_entry(temposs, list, lqo_oss_list) {
+               if (oss->lqo_ost_count > temposs->lqo_ost_count)
+                       break;
+       }
+       /* ...and add before it.  If we're the first or smallest, temposs
+          points to the list head, and we add to the end. */
+       cfs_list_add_tail(&oss->lqo_oss_list, &temposs->lqo_oss_list);
+
+       lod->lod_qos.lq_dirty = 1;
+       lod->lod_qos.lq_rr.lqr_dirty = 1;
+
+out:
+       cfs_up_write(&lod->lod_qos.lq_rw_sem);
+       RETURN(rc);
+}
+
+int qos_del_tgt(struct lod_device *lod, struct lod_ost_desc *ost_desc)
+{
+       struct lov_qos_oss *oss;
+       int                 rc = 0;
+       ENTRY;
+
+       cfs_down_write(&lod->lod_qos.lq_rw_sem);
+       oss = ost_desc->ltd_qos.ltq_oss;
+       if (!oss)
+               GOTO(out, rc = -ENOENT);
+
+       oss->lqo_ost_count--;
+       if (oss->lqo_ost_count == 0) {
+               CDEBUG(D_QOS, "removing OSS %s\n",
+                      obd_uuid2str(&oss->lqo_uuid));
+               cfs_list_del(&oss->lqo_oss_list);
+               ost_desc->ltd_qos.ltq_oss = NULL;
+               OBD_FREE_PTR(oss);
+       }
+
+       lod->lod_qos.lq_dirty = 1;
+       lod->lod_qos.lq_rr.lqr_dirty = 1;
+out:
+       cfs_up_write(&lod->lod_qos.lq_rw_sem);
+       RETURN(rc);
+}
+