From 87d06d8213b9bdb3dfa5d145e992ae24a3d1c5e4 Mon Sep 17 00:00:00 2001 From: Mikhail Pershin Date: Mon, 21 May 2012 13:36:42 +0400 Subject: [PATCH] LU-1406 ofd: add FMD support Add FMD code in OFD Signed-off-by: Mikhail Pershin Change-Id: Iec1cd619cd99b49f5a40c21fc7e8815b5ae392e0 Reviewed-on: http://review.whamcloud.com/2863 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Johann Lombardi Reviewed-by: Alex Zhuravlev --- lustre/ofd/Makefile.in | 2 +- lustre/ofd/lproc_ofd.c | 64 ++++++++++++ lustre/ofd/ofd_dev.c | 12 ++- lustre/ofd/ofd_fmd.c | 247 ++++++++++++++++++++++++++++++++++++++++++++++ lustre/ofd/ofd_internal.h | 32 ++++++ lustre/ofd/ofd_obd.c | 2 + 6 files changed, 357 insertions(+), 2 deletions(-) create mode 100644 lustre/ofd/ofd_fmd.c diff --git a/lustre/ofd/Makefile.in b/lustre/ofd/Makefile.in index ccdd3e4..f43fda4 100644 --- a/lustre/ofd/Makefile.in +++ b/lustre/ofd/Makefile.in @@ -1,7 +1,7 @@ MODULES := ofd ofd-objs := ofd_dev.o ofd_obd.o ofd_fs.o -ofd-objs += lproc_ofd.o ofd_capa.o +ofd-objs += lproc_ofd.o ofd_capa.o ofd_fmd.o EXTRA_DIST = $(ofd-objs:%.o=%.c) ofd_internal.h diff --git a/lustre/ofd/lproc_ofd.c b/lustre/ofd/lproc_ofd.c index e806a61..74dba8a 100644 --- a/lustre/ofd/lproc_ofd.c +++ b/lustre/ofd/lproc_ofd.c @@ -79,6 +79,66 @@ static int lprocfs_ofd_rd_last_id(char *page, char **start, off_t off, return retval; } +int lprocfs_ofd_rd_fmd_max_num(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = data; + struct ofd_device *ofd = ofd_dev(obd->obd_lu_dev); + int rc; + + rc = snprintf(page, count, "%u\n", ofd->ofd_fmd_max_num); + return rc; +} + +int lprocfs_ofd_wr_fmd_max_num(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + struct ofd_device *ofd = ofd_dev(obd->obd_lu_dev); + int val; + int rc; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val > 65536 || val < 1) + return -EINVAL; + + ofd->ofd_fmd_max_num = val; + return count; +} + +int lprocfs_ofd_rd_fmd_max_age(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = data; + struct ofd_device *ofd = ofd_dev(obd->obd_lu_dev); + int rc; + + rc = snprintf(page, count, "%ld\n", ofd->ofd_fmd_max_age / CFS_HZ); + return rc; +} + +int lprocfs_ofd_wr_fmd_max_age(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + struct ofd_device *ofd = ofd_dev(obd->obd_lu_dev); + int val; + int rc; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val > 65536 || val < 1) + return -EINVAL; + + ofd->ofd_fmd_max_age = val * CFS_HZ; + return count; +} + static int lprocfs_ofd_rd_capa(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -271,6 +331,10 @@ static struct lprocfs_vars lprocfs_ofd_obd_vars[] = { { "instance", lprocfs_target_rd_instance, 0 }, { "ir_factor", lprocfs_obd_rd_ir_factor, lprocfs_obd_wr_ir_factor, 0}, + { "client_cache_count", lprocfs_ofd_rd_fmd_max_num, + lprocfs_ofd_wr_fmd_max_num, 0 }, + { "client_cache_seconds", lprocfs_ofd_rd_fmd_max_age, + lprocfs_ofd_wr_fmd_max_age, 0 }, { "capa", lprocfs_ofd_rd_capa, lprocfs_ofd_wr_capa, 0 }, { "capa_count", lprocfs_ofd_rd_capa_count, 0, 0 }, diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index b9104a4..ee9c226 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -416,6 +416,9 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m, obd->u.obt.obt_magic = OBT_MAGIC; + m->ofd_fmd_max_num = OFD_FMD_MAX_NUM_DEFAULT; + m->ofd_fmd_max_age = OFD_FMD_MAX_AGE_DEFAULT; + cfs_spin_lock_init(&m->ofd_flags_lock); m->ofd_raid_degraded = 0; m->ofd_syncjournal = 0; @@ -636,6 +639,12 @@ int __init ofd_init(void) if (rc) return rc; + rc = ofd_fmd_init(); + if (rc) { + lu_kmem_fini(ofd_caches); + return(rc); + } + lprocfs_ofd_init_vars(&lvars); rc = class_register_type(&ofd_obd_ops, NULL, lvars.module_vars, @@ -645,8 +654,9 @@ int __init ofd_init(void) void __exit ofd_exit(void) { - class_unregister_type(LUSTRE_OST_NAME); + ofd_fmd_exit(); lu_kmem_fini(ofd_caches); + class_unregister_type(LUSTRE_OST_NAME); } MODULE_AUTHOR("Whamcloud, Inc. "); diff --git a/lustre/ofd/ofd_fmd.c b/lustre/ofd/ofd_fmd.c new file mode 100644 index 0000000..8663f9e --- /dev/null +++ b/lustre/ofd/ofd_fmd.c @@ -0,0 +1,247 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Whamcloud, Inc. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/ofd/filter_fmd.c + */ + +#define DEBUG_SUBSYSTEM S_FILTER + +#include "ofd_internal.h" + +static cfs_mem_cache_t *ll_fmd_cachep; + +/* drop fmd reference, free it if last ref. must be called with fed_lock held.*/ +static inline void ofd_fmd_put_nolock(struct obd_export *exp, + struct ofd_mod_data *fmd) +{ + struct filter_export_data *fed = &exp->exp_filter_data; + + LASSERT_SPIN_LOCKED(&fed->fed_lock); + if (--fmd->fmd_refcount == 0) { + /* XXX when we have persistent reservations and the handle + * is stored herein we need to drop it here. */ + fed->fed_mod_count--; + cfs_list_del(&fmd->fmd_list); + OBD_SLAB_FREE(fmd, ll_fmd_cachep, sizeof(*fmd)); + } +} + +/* drop fmd reference, free it if last ref */ +void ofd_fmd_put(struct obd_export *exp, struct ofd_mod_data *fmd) +{ + struct filter_export_data *fed = &exp->exp_filter_data; + + if (fmd == NULL) + return; + + cfs_spin_lock(&fed->fed_lock); + ofd_fmd_put_nolock(exp, fmd); /* caller reference */ + cfs_spin_unlock(&fed->fed_lock); +} + +/* expire entries from the end of the list if there are too many + * or they are too old */ +static void ofd_fmd_expire_nolock(struct obd_export *exp, + struct ofd_mod_data *keep) +{ + struct filter_export_data *fed = &exp->exp_filter_data; + struct ofd_device *ofd = ofd_exp(exp); + struct ofd_mod_data *fmd, *tmp; + + cfs_time_t now = cfs_time_current(); + + cfs_list_for_each_entry_safe(fmd, tmp, &fed->fed_mod_list, fmd_list) { + if (fmd == keep) + break; + + if (cfs_time_before(now, fmd->fmd_expire) && + fed->fed_mod_count < ofd->ofd_fmd_max_num) + break; + + cfs_list_del_init(&fmd->fmd_list); + ofd_fmd_put_nolock(exp, fmd); /* list reference */ + } +} + +void ofd_fmd_expire(struct obd_export *exp) +{ + struct filter_export_data *fed = &exp->exp_filter_data; + + cfs_spin_lock(&fed->fed_lock); + ofd_fmd_expire_nolock(exp, NULL); + cfs_spin_unlock(&fed->fed_lock); +} + +/* find specified fid in fed_fmd_list. + * caller must hold fed_lock and take fmd reference itself */ +static struct ofd_mod_data *ofd_fmd_find_nolock(struct obd_export *exp, + const struct lu_fid *fid) +{ + struct filter_export_data *fed = &exp->exp_filter_data; + struct ofd_mod_data *found = NULL, *fmd; + struct ofd_device *ofd = ofd_exp(exp); + + cfs_time_t now = cfs_time_current(); + + LASSERT_SPIN_LOCKED(&fed->fed_lock); + + cfs_list_for_each_entry_reverse(fmd, &fed->fed_mod_list, fmd_list) { + if (lu_fid_eq(&fmd->fmd_fid, fid)) { + found = fmd; + cfs_list_del(&fmd->fmd_list); + cfs_list_add_tail(&fmd->fmd_list, &fed->fed_mod_list); + fmd->fmd_expire = cfs_time_add(now, ofd->ofd_fmd_max_age); + break; + } + } + + ofd_fmd_expire_nolock(exp, found); + + return found; +} + +/* Find fmd based on fid or return NULL if not found. */ +struct ofd_mod_data *ofd_fmd_find(struct obd_export *exp, + struct lu_fid *fid) +{ + struct filter_export_data *fed = &exp->exp_filter_data; + struct ofd_mod_data *fmd; + + cfs_spin_lock(&fed->fed_lock); + fmd = ofd_fmd_find_nolock(exp, fid); + if (fmd) + fmd->fmd_refcount++; /* caller reference */ + cfs_spin_unlock(&fed->fed_lock); + + return fmd; +} + +/* Find fmd based on FID, or create a new one if none is found. + * It is possible for this function to return NULL under memory pressure, + * or if fid = 0 is passed (which will only cause old entries to expire). + * Currently this is not fatal because any fmd state is transient and + * may also be freed when it gets sufficiently old. */ +struct ofd_mod_data *ofd_fmd_get(struct obd_export *exp, struct lu_fid *fid) +{ + struct filter_export_data *fed = &exp->exp_filter_data; + struct ofd_device *ofd = ofd_exp(exp); + struct ofd_mod_data *found = NULL, *fmd_new = NULL; + + cfs_time_t now = cfs_time_current(); + + OBD_SLAB_ALLOC_PTR(fmd_new, ll_fmd_cachep); + + cfs_spin_lock(&fed->fed_lock); + found = ofd_fmd_find_nolock(exp, fid); + if (fmd_new) { + if (found == NULL) { + cfs_list_add_tail(&fmd_new->fmd_list, + &fed->fed_mod_list); + fmd_new->fmd_fid = *fid; + fmd_new->fmd_refcount++; /* list reference */ + found = fmd_new; + fed->fed_mod_count++; + } else { + OBD_SLAB_FREE_PTR(fmd_new, ll_fmd_cachep); + } + } + if (found) { + found->fmd_refcount++; /* caller reference */ + found->fmd_expire = cfs_time_add(now, ofd->ofd_fmd_max_age); + } + + cfs_spin_unlock(&fed->fed_lock); + + return found; +} + +#ifdef DO_FMD_DROP +/* drop fmd list reference so it will disappear when last reference is put. + * This isn't so critical because it would in fact only affect the one client + * that is doing the unlink and at worst we have an stale entry referencing + * an object that should never be used again. */ +void ofd_fmd_drop(struct obd_export *exp, struct lu_fid *fid) +{ + struct filter_export_data *fed = &exp->exp_filter_data; + struct ofd_mod_data *found = NULL; + + cfs_spin_lock(&fed->fed_lock); + found = ofd_fmd_find_nolock(exp, fid); + if (found) { + cfs_list_del_init(&found->fmd_list); + ofd_fmd_put_nolock(exp, found); + } + cfs_spin_unlock(&fed->fed_lock); +} +#endif + +/* remove all entries from fmd list */ +void ofd_fmd_cleanup(struct obd_export *exp) +{ + struct filter_export_data *fed = &exp->exp_filter_data; + struct ofd_mod_data *fmd = NULL, *tmp; + + cfs_spin_lock(&fed->fed_lock); + cfs_list_for_each_entry_safe(fmd, tmp, &fed->fed_mod_list, fmd_list) { + cfs_list_del_init(&fmd->fmd_list); + if (fmd->fmd_refcount > 1) { + CDEBUG(D_INFO, "fmd %p still referenced (refcount = %d)\n", + fmd, fmd->fmd_refcount); + } + ofd_fmd_put_nolock(exp, fmd); + } + cfs_spin_unlock(&fed->fed_lock); +} + +int ofd_fmd_init(void) +{ + ll_fmd_cachep = cfs_mem_cache_create("ll_fmd_cache", + sizeof(struct ofd_mod_data), + 0, 0); + if (!ll_fmd_cachep) + return -ENOMEM; + else + return 0; +} + +void ofd_fmd_exit(void) +{ + if (ll_fmd_cachep) { + int rc = cfs_mem_cache_destroy(ll_fmd_cachep); + + LASSERTF(rc == 0, "Cannot destroy ll_fmd_cachep: rc %d\n", rc); + ll_fmd_cachep = NULL; + } +} diff --git a/lustre/ofd/ofd_internal.h b/lustre/ofd/ofd_internal.h index d701a3c..db2a5b4f 100644 --- a/lustre/ofd/ofd_internal.h +++ b/lustre/ofd/ofd_internal.h @@ -49,6 +49,18 @@ OBD_INCOMPAT_COMMON_LR) #define OFD_MAX_GROUPS 256 +/* per-client-per-object persistent state (LRU) */ +struct ofd_mod_data { + cfs_list_t fmd_list; /* linked to fed_mod_list */ + struct lu_fid fmd_fid; /* FID being written to */ + __u64 fmd_mactime_xid; /* xid highest {m,a,c}time setattr */ + cfs_time_t fmd_expire; /* time when the fmd should expire */ + int fmd_refcount; /* reference counter - list holds 1 */ +}; + +#define OFD_FMD_MAX_NUM_DEFAULT 128 +#define OFD_FMD_MAX_AGE_DEFAULT ((obd_timeout + 10) * CFS_HZ) + enum { LPROC_OFD_READ_BYTES = 0, LPROC_OFD_WRITE_BYTES = 1, @@ -75,6 +87,10 @@ struct ofd_device { struct dt_object *ofd_lastid_obj[OFD_MAX_GROUPS]; cfs_spinlock_t ofd_objid_lock; + /* ofd mod data: ofd_device wide values */ + int ofd_fmd_max_num; /* per ofd ofd_mod_data */ + cfs_duration_t ofd_fmd_max_age; /* time to fmd expiry */ + cfs_spinlock_t ofd_flags_lock; unsigned long ofd_raid_degraded:1, /* sync journal on writes */ @@ -162,6 +178,22 @@ void lprocfs_ofd_init_vars(struct lprocfs_static_vars *lvars); int lproc_ofd_attach_seqstat(struct obd_device *dev); extern struct file_operations ofd_per_nid_stats_fops; +/* ofd_fmd.c */ +int ofd_fmd_init(void); +void ofd_fmd_exit(void); +struct ofd_mod_data *ofd_fmd_find(struct obd_export *exp, + struct lu_fid *fid); +struct ofd_mod_data *ofd_fmd_get(struct obd_export *exp, + struct lu_fid *fid); +void ofd_fmd_put(struct obd_export *exp, struct ofd_mod_data *fmd); +void ofd_fmd_expire(struct obd_export *exp); +void ofd_fmd_cleanup(struct obd_export *exp); +#ifdef DO_FMD_DROP +void ofd_fmd_drop(struct obd_export *exp, struct lu_fid *fid); +#else +#define ofd_fmd_drop(exp, fid) do {} while (0) +#endif + static inline struct ofd_thread_info * ofd_info(const struct lu_env *env) { struct ofd_thread_info *info; diff --git a/lustre/ofd/ofd_obd.c b/lustre/ofd/ofd_obd.c index ec13b71..7a4f3f5 100644 --- a/lustre/ofd/ofd_obd.c +++ b/lustre/ofd/ofd_obd.c @@ -383,6 +383,8 @@ static int ofd_destroy_export(struct obd_export *exp) ldlm_destroy_export(exp); lut_client_free(exp); + ofd_fmd_cleanup(exp); + LASSERT(cfs_list_empty(&exp->exp_filter_data.fed_mod_list)); return 0; } -- 1.8.3.1