From: Alex Zhuravlev Date: Thu, 20 Sep 2012 07:23:17 +0000 (+0400) Subject: LU-1303 lod: introcude QoS structures X-Git-Tag: 2.3.51~12 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=976cbeb5ff1d32e7cac54478e6ff249cffe45116 LU-1303 lod: introcude QoS structures like LOV before LOD will be maintaining OST->OSS mapping to be able to distribute load among OSSs. Signed-off-by: Alex Zhuravlev Change-Id: I132382cb09b50aec1edcd667cde1851c9ee043dc Reviewed-on: http://review.whamcloud.com/4055 Tested-by: Hudson Reviewed-by: wangdi Tested-by: Maloo Reviewed-by: Andreas Dilger --- diff --git a/lustre/lod/Makefile.in b/lustre/lod/Makefile.in index 0eb50e0..31459b5 100644 --- a/lustre/lod/Makefile.in +++ b/lustre/lod/Makefile.in @@ -1,5 +1,5 @@ MODULES := lod -lod-objs := lod_dev.o lod_lov.o lproc_lod.o lod_pool.o lod_object.o +lod-objs := lod_dev.o lod_lov.o lproc_lod.o lod_pool.o lod_object.o lod_qos.o EXTRA_DIST = $(lod-objs:.o=.c) lod_internal.h diff --git a/lustre/lod/lod_internal.h b/lustre/lod/lod_internal.h index 6bb5020..e0fedfb 100644 --- a/lustre/lod/lod_internal.h +++ b/lustre/lod/lod_internal.h @@ -275,6 +275,10 @@ int lod_pool_new(struct obd_device *obd, char *poolname); int lod_pool_add(struct obd_device *obd, char *poolname, char *ostname); int lod_pool_remove(struct obd_device *obd, char *poolname, char *ostname); +/* lod_qos.c */ +int qos_add_tgt(struct lod_device*, struct lod_ost_desc *); +int qos_del_tgt(struct lod_device *, struct lod_ost_desc *); + /* lproc_lod.c */ extern struct file_operations lod_proc_target_fops; void lprocfs_lod_init_vars(struct lprocfs_static_vars *lvars); diff --git a/lustre/lod/lod_lov.c b/lustre/lod/lod_lov.c index d3be857..a793659 100644 --- a/lustre/lod/lod_lov.c +++ b/lustre/lod/lod_lov.c @@ -96,11 +96,15 @@ void lod_putref(struct lod_device *lod) cfs_list_for_each_entry_safe(ost_desc, tmp, &kill, ltd_kill) { int rc; cfs_list_del(&ost_desc->ltd_kill); - /* XXX: remove from QoS structures */ - /* disconnect from OSP */ + /* remove from QoS structures */ + rc = qos_del_tgt(lod, ost_desc); + if (rc) + CERROR("%s: qos_del_tgt(%s) failed: rc = %d\n", + lod2obd(lod)->obd_name, + obd_uuid2str(&ost_desc->ltd_uuid), rc); rc = obd_disconnect(ost_desc->ltd_exp); if (rc) - CERROR("%s: failed to disconnect %s (%d)\n", + CERROR("%s: failed to disconnect %s: rc = %d\n", lod2obd(lod)->obd_name, obd_uuid2str(&ost_desc->ltd_uuid), rc); OBD_FREE_PTR(ost_desc); @@ -268,6 +272,13 @@ int lod_add_device(const struct lu_env *env, struct lod_device *lod, GOTO(out_mutex, rc); } + rc = qos_add_tgt(lod, ost_desc); + if (rc) { + CERROR("%s: qos_add_tgt(%s) failed: rc = %d\n", obd->obd_name, + obd_uuid2str(&ost_desc->ltd_uuid), rc); + GOTO(out_pool, rc); + } + /* The new OST is now a full citizen */ if (index >= lod->lod_desc.ld_tgt_count) lod->lod_desc.ld_tgt_count = index + 1; @@ -284,6 +295,7 @@ int lod_add_device(const struct lu_env *env, struct lod_device *lod, RETURN(0); +out_pool: lod_ost_pool_remove(&lod->lod_pool_info, index); out_mutex: cfs_mutex_unlock(&lod->lod_mutex); diff --git a/lustre/lod/lod_qos.c b/lustre/lod/lod_qos.c new file mode 100644 index 0000000..fbc7299 --- /dev/null +++ b/lustre/lod/lod_qos.c @@ -0,0 +1,147 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License version 2 for more details. A copy is + * included in the COPYING file that accompanied this code. + + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * GPL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Whamcloud, Inc. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/lod/lod_qos.c + * + */ + +#define DEBUG_SUBSYSTEM S_LOV + +#include +#include +#include +#include +#include "lod_internal.h" + +/* + * force QoS policy (not RR) to be used for testing purposes + */ +#define FORCE_QOS_ + +#define D_QOS D_OTHER + +#if 0 +#define QOS_DEBUG(fmt, ...) CDEBUG(D_OTHER, fmt, ## __VA_ARGS__) +#define QOS_CONSOLE(fmt, ...) LCONSOLE(D_OTHER, fmt, ## __VA_ARGS__) +#else +#define QOS_DEBUG(fmt, ...) +#define QOS_CONSOLE(fmt, ...) +#endif + +#define TGT_BAVAIL(i) (OST_TGT(lod,i)->ltd_statfs.os_bavail * \ + OST_TGT(lod,i)->ltd_statfs.os_bsize) + +int qos_add_tgt(struct lod_device *lod, struct lod_ost_desc *ost_desc) +{ + struct lov_qos_oss *oss = NULL, *temposs; + struct obd_export *exp = ost_desc->ltd_exp; + int rc = 0, found = 0; + cfs_list_t *list; + ENTRY; + + cfs_down_write(&lod->lod_qos.lq_rw_sem); + /* + * a bit hacky approach to learn NID of corresponding connection + * but there is no official API to access information like this + * with OSD API. + */ + cfs_list_for_each_entry(oss, &lod->lod_qos.lq_oss_list, lqo_oss_list) { + if (obd_uuid_equals(&oss->lqo_uuid, + &exp->exp_connection->c_remote_uuid)) { + found++; + break; + } + } + + if (!found) { + OBD_ALLOC_PTR(oss); + if (!oss) + GOTO(out, rc = -ENOMEM); + memcpy(&oss->lqo_uuid, &exp->exp_connection->c_remote_uuid, + sizeof(oss->lqo_uuid)); + } else { + /* Assume we have to move this one */ + cfs_list_del(&oss->lqo_oss_list); + } + + oss->lqo_ost_count++; + ost_desc->ltd_qos.ltq_oss = oss; + + CDEBUG(D_QOS, "add tgt %s to OSS %s (%d OSTs)\n", + obd_uuid2str(&ost_desc->ltd_uuid), obd_uuid2str(&oss->lqo_uuid), + oss->lqo_ost_count); + + /* Add sorted by # of OSTs. Find the first entry that we're + bigger than... */ + list = &lod->lod_qos.lq_oss_list; + cfs_list_for_each_entry(temposs, list, lqo_oss_list) { + if (oss->lqo_ost_count > temposs->lqo_ost_count) + break; + } + /* ...and add before it. If we're the first or smallest, temposs + points to the list head, and we add to the end. */ + cfs_list_add_tail(&oss->lqo_oss_list, &temposs->lqo_oss_list); + + lod->lod_qos.lq_dirty = 1; + lod->lod_qos.lq_rr.lqr_dirty = 1; + +out: + cfs_up_write(&lod->lod_qos.lq_rw_sem); + RETURN(rc); +} + +int qos_del_tgt(struct lod_device *lod, struct lod_ost_desc *ost_desc) +{ + struct lov_qos_oss *oss; + int rc = 0; + ENTRY; + + cfs_down_write(&lod->lod_qos.lq_rw_sem); + oss = ost_desc->ltd_qos.ltq_oss; + if (!oss) + GOTO(out, rc = -ENOENT); + + oss->lqo_ost_count--; + if (oss->lqo_ost_count == 0) { + CDEBUG(D_QOS, "removing OSS %s\n", + obd_uuid2str(&oss->lqo_uuid)); + cfs_list_del(&oss->lqo_oss_list); + ost_desc->ltd_qos.ltq_oss = NULL; + OBD_FREE_PTR(oss); + } + + lod->lod_qos.lq_dirty = 1; + lod->lod_qos.lq_rr.lqr_dirty = 1; +out: + cfs_up_write(&lod->lod_qos.lq_rw_sem); + RETURN(rc); +} +