From a6d6cda53b5576ade182e0110c568389ba613cdf Mon Sep 17 00:00:00 2001 From: Mikhail Pershin Date: Sun, 20 May 2012 20:55:56 +0400 Subject: [PATCH] LU-1406 ofd: OFD device methods Add capability code and basic device functions: statfs, set_info, get_info, iocontrol, ping, sync, health_check Signed-off-by: Mikhail Pershin Change-Id: I7a0a671ef45e8f4860344bd8c9a9a306d60bd780 Reviewed-on: http://review.whamcloud.com/2856 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Alex Zhuravlev Reviewed-by: Oleg Drokin --- lustre/ofd/Makefile.in | 2 +- lustre/ofd/ofd_capa.c | 231 ++++++++++++++++++++++++++++++++++++++ lustre/ofd/ofd_dev.c | 9 ++ lustre/ofd/ofd_internal.h | 24 ++-- lustre/ofd/ofd_obd.c | 275 +++++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 531 insertions(+), 10 deletions(-) create mode 100644 lustre/ofd/ofd_capa.c diff --git a/lustre/ofd/Makefile.in b/lustre/ofd/Makefile.in index cfd5802..ccdd3e4 100644 --- a/lustre/ofd/Makefile.in +++ b/lustre/ofd/Makefile.in @@ -1,7 +1,7 @@ MODULES := ofd ofd-objs := ofd_dev.o ofd_obd.o ofd_fs.o -ofd-objs += lproc_ofd.o +ofd-objs += lproc_ofd.o ofd_capa.o EXTRA_DIST = $(ofd-objs:%.o=%.c) ofd_internal.h diff --git a/lustre/ofd/ofd_capa.c b/lustre/ofd/ofd_capa.c new file mode 100644 index 0000000..6505442 --- /dev/null +++ b/lustre/ofd/ofd_capa.c @@ -0,0 +1,231 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Whamcloud, Inc. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/ofd/ofd_capa.c + * + * Author: Lai Siyao + */ + +#define DEBUG_SUBSYSTEM S_FILTER + +#include "ofd_internal.h" + +static inline __u32 ofd_ck_keyid(struct filter_capa_key *key) +{ + return key->k_key.lk_keyid; +} + +int ofd_update_capa_key(struct ofd_device *ofd, struct lustre_capa_key *new) +{ + struct obd_device *obd = ofd_obd(ofd); + struct filter_capa_key *k, *keys[2] = { NULL, NULL }; + int i; + + cfs_spin_lock(&capa_lock); + cfs_list_for_each_entry(k, &obd->u.filter.fo_capa_keys, k_list) { + if (k->k_key.lk_seq != new->lk_seq) + continue; + + if (keys[0]) { + keys[1] = k; + if (ofd_ck_keyid(keys[1]) > ofd_ck_keyid(keys[0])) + keys[1] = keys[0], keys[0] = k; + } else { + keys[0] = k; + } + } + cfs_spin_unlock(&capa_lock); + + for (i = 0; i < 2; i++) { + if (!keys[i]) + continue; + if (ofd_ck_keyid(keys[i]) != new->lk_keyid) + continue; + /* maybe because of recovery or other reasons, MDS sent the + * the old capability key again. + */ + cfs_spin_lock(&capa_lock); + keys[i]->k_key = *new; + cfs_spin_unlock(&capa_lock); + + RETURN(0); + } + + if (keys[1]) { + /* if OSS already have two keys, update the old one */ + k = keys[1]; + } else { + OBD_ALLOC_PTR(k); + if (!k) + RETURN(-ENOMEM); + CFS_INIT_LIST_HEAD(&k->k_list); + } + + cfs_spin_lock(&capa_lock); + k->k_key = *new; + if (cfs_list_empty(&k->k_list)) + cfs_list_add(&k->k_list, &obd->u.filter.fo_capa_keys); + cfs_spin_unlock(&capa_lock); + + DEBUG_CAPA_KEY(D_SEC, new, "new"); + RETURN(0); +} + +int ofd_auth_capa(struct obd_export *exp, struct lu_fid *fid, obd_seq seq, + struct lustre_capa *capa, __u64 opc) +{ + struct filter_obd *filter = &exp->exp_obd->u.filter; + struct filter_capa_key *k; + struct lustre_capa_key key; + struct obd_capa *oc; + __u8 *hmac; + int keys_ready = 0, key_found = 0, rc = 0; + + ENTRY; + + /* skip capa check for llog and obdecho */ + if (!fid_seq_is_mdt(seq)) + RETURN(0); + + /* capability is disabled */ + if (!filter->fo_fl_oss_capa) + RETURN(0); + + if (!(exp->exp_connect_flags & OBD_CONNECT_OSS_CAPA)) + RETURN(0); + + if (capa == NULL) { + if (fid) + CERROR("seq/fid/opc "LPU64"/"DFID"/"LPX64 + ": no capability has been passed\n", + seq, PFID(fid), opc); + else + CERROR("seq/opc "LPU64"/"LPX64 + ": no capability has been passed\n", + seq, opc); + RETURN(-EACCES); + } + + if (opc == CAPA_OPC_OSS_READ) { + if (!(capa->lc_opc & CAPA_OPC_OSS_RW)) + rc = -EACCES; + } else if (!capa_opc_supported(capa, opc)) { + rc = -EACCES; + } + + if (rc) { + DEBUG_CAPA(D_ERROR, capa, "opc "LPX64" not supported by", opc); + RETURN(rc); + } + + oc = capa_lookup(filter->fo_capa_hash, capa, 0); + if (oc) { + cfs_spin_lock(&oc->c_lock); + if (capa_is_expired(oc)) { + DEBUG_CAPA(D_ERROR, capa, "expired"); + rc = -ESTALE; + } + cfs_spin_unlock(&oc->c_lock); + + capa_put(oc); + RETURN(rc); + } + + if (capa_is_expired_sec(capa)) { + DEBUG_CAPA(D_ERROR, capa, "expired"); + RETURN(-ESTALE); + } + + cfs_spin_lock(&capa_lock); + cfs_list_for_each_entry(k, &filter->fo_capa_keys, k_list) { + if (k->k_key.lk_seq == seq) { + keys_ready = 1; + if (k->k_key.lk_keyid == capa_keyid(capa)) { + key = k->k_key; + key_found = 1; + break; + } + } + } + cfs_spin_unlock(&capa_lock); + + if (!keys_ready) { + CDEBUG(D_SEC, "MDS hasn't propagated capability keys yet, " + "ignore check!\n"); + RETURN(0); + } + + if (!key_found) { + DEBUG_CAPA(D_ERROR, capa, "no matched capability key for"); + RETURN(-ESTALE); + } + + OBD_ALLOC(hmac, CAPA_HMAC_MAX_LEN); + if (hmac == NULL) + RETURN(-ENOMEM); + + rc = capa_hmac(hmac, capa, key.lk_key); + if (rc) { + DEBUG_CAPA(D_ERROR, capa, "HMAC failed: rc %d", rc); + OBD_FREE(hmac, CAPA_HMAC_MAX_LEN); + RETURN(rc); + } + + rc = memcmp(hmac, capa->lc_hmac, CAPA_HMAC_MAX_LEN); + OBD_FREE(hmac, CAPA_HMAC_MAX_LEN); + if (rc) { + DEBUG_CAPA_KEY(D_ERROR, &key, "calculate HMAC with "); + DEBUG_CAPA(D_ERROR, capa, "HMAC mismatch"); + RETURN(-EACCES); + } + + /* store in capa hash */ + oc = capa_add(filter->fo_capa_hash, capa); + capa_put(oc); + RETURN(0); +} + +void ofd_free_capa_keys(struct ofd_device *ofd) +{ + struct obd_device *obd = ofd_obd(ofd); + struct filter_capa_key *key, *n; + + cfs_spin_lock(&capa_lock); + cfs_list_for_each_entry_safe(key, n, &obd->u.filter.fo_capa_keys, k_list) { + cfs_list_del_init(&key->k_list); + OBD_FREE_PTR(key); + } + cfs_spin_unlock(&capa_lock); +} diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index 345189d..7d9b436 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -343,6 +343,12 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m, cfs_rwlock_init(&obd->u.filter.fo_sptlrpc_lock); sptlrpc_rule_set_init(&obd->u.filter.fo_sptlrpc_rset); + obd->u.filter.fo_fl_oss_capa = 0; + CFS_INIT_LIST_HEAD(&obd->u.filter.fo_capa_keys); + obd->u.filter.fo_capa_hash = init_capa_hash(); + if (obd->u.filter.fo_capa_hash == NULL) + RETURN(-ENOMEM); + m->ofd_dt_dev.dd_lu_dev.ld_ops = &ofd_lu_ops; m->ofd_dt_dev.dd_lu_dev.ld_obd = obd; /* set this lu_device to obd, because error handling need it */ @@ -433,6 +439,9 @@ static void ofd_fini(const struct lu_env *env, struct ofd_device *m) lut_fini(env, &m->ofd_lut); ofd_fs_cleanup(env, m); + ofd_free_capa_keys(m); + cleanup_capa_hash(obd->u.filter.fo_capa_hash); + if (m->ofd_namespace != NULL) { ldlm_namespace_free(m->ofd_namespace, NULL, d->ld_obd->obd_force); diff --git a/lustre/ofd/ofd_internal.h b/lustre/ofd/ofd_internal.h index 515a944..2df6149 100644 --- a/lustre/ofd/ofd_internal.h +++ b/lustre/ofd/ofd_internal.h @@ -41,6 +41,7 @@ #include #include #include +#include #define OFD_INIT_OBJID 0 #define OFD_ROCOMPAT_SUPP (0) @@ -106,18 +107,19 @@ static inline struct ofd_object *ofd_obj(struct lu_object *o) * to reduce stack consumption. */ struct ofd_thread_info { - const struct lu_env *fti_env; + const struct lu_env *fti_env; - struct obd_export *fti_exp; - struct lu_fid fti_fid; - struct lu_attr fti_attr; + struct obd_export *fti_exp; + struct lu_fid fti_fid; + struct lu_attr fti_attr; union { - char name[64]; /* for ofd_init0() */ + char name[64]; /* for ofd_init0() */ + struct obd_statfs osfs; /* for obdofd_statfs() */ } fti_u; - struct dt_object_format fti_dof; - struct lu_buf fti_buf; - loff_t fti_off; + struct dt_object_format fti_dof; + struct lu_buf fti_buf; + loff_t fti_off; }; static inline int ofd_export_stats_init(struct ofd_device *ofd, @@ -129,6 +131,12 @@ static inline int ofd_export_stats_init(struct ofd_device *ofd, extern void target_recovery_fini(struct obd_device *obd); extern void target_recovery_init(struct lu_target *lut, svc_handler_t handler); +/* ofd_capa.c */ +int ofd_update_capa_key(struct ofd_device *ofd, struct lustre_capa_key *key); +int ofd_auth_capa(struct obd_export *exp, struct lu_fid *fid, obd_seq seq, + struct lustre_capa *capa, __u64 opc); +void ofd_free_capa_keys(struct ofd_device *ofd); + /* ofd_dev.c */ extern struct lu_context_key ofd_thread_key; diff --git a/lustre/ofd/ofd_obd.c b/lustre/ofd/ofd_obd.c index 2ff9c17..6a1a3c7 100644 --- a/lustre/ofd/ofd_obd.c +++ b/lustre/ofd/ofd_obd.c @@ -49,7 +49,8 @@ static int ofd_parse_connect_data(const struct lu_env *env, struct obd_export *exp, struct obd_connect_data *data) { - struct ofd_device *ofd = ofd_exp(exp); + struct ofd_device *ofd = ofd_exp(exp); + struct filter_export_data *fed = &exp->exp_filter_data; if (!data) RETURN(0); @@ -60,6 +61,17 @@ static int ofd_parse_connect_data(const struct lu_env *env, data->ocd_connect_flags, data->ocd_version, data->ocd_grant, data->ocd_index); + if (fed->fed_group != 0 && fed->fed_group != data->ocd_group) { + CWARN("!!! This export (nid %s) used object group %d " + "earlier; now it's trying to use group %d! This could " + "be a bug in the MDS. Please report to " + "http://bugs.whamcloud.com/\n", + obd_export_nid2str(exp), fed->fed_group, + data->ocd_group); + RETURN(-EPROTO); + } + fed->fed_group = data->ocd_group; + data->ocd_connect_flags &= OST_CONNECT_SUPPORTED; exp->exp_connect_flags = data->ocd_connect_flags; data->ocd_version = LUSTRE_VERSION_CODE; @@ -319,6 +331,259 @@ int ofd_obd_postrecov(struct obd_device *obd) RETURN(rc); } +static int ofd_set_mds_conn(struct obd_export *exp, void *val) +{ + int rc = 0; + + ENTRY; + + LCONSOLE_WARN("%s: received MDS connection from %s\n", + exp->exp_obd->obd_name, obd_export_nid2str(exp)); + RETURN(rc); +} + +static int ofd_set_info_async(const struct lu_env *env, struct obd_export *exp, + __u32 keylen, void *key, __u32 vallen, void *val, + struct ptlrpc_request_set *set) +{ + struct ofd_device *ofd = ofd_exp(exp); + int rc = 0; + + ENTRY; + + if (exp->exp_obd == NULL) { + CDEBUG(D_IOCTL, "invalid export %p\n", exp); + RETURN(-EINVAL); + } + + if (KEY_IS(KEY_CAPA_KEY)) { + rc = ofd_update_capa_key(ofd, val); + if (rc) + CERROR("ofd update capability key failed: %d\n", rc); + } else if (KEY_IS(KEY_MDS_CONN)) { + rc = ofd_set_mds_conn(exp, val); + } else { + CERROR("%s: Unsupported key %s\n", + exp->exp_obd->obd_name, (char*)key); + rc = -EOPNOTSUPP; + } + + RETURN(rc); +} + +static int ofd_get_info(const struct lu_env *env, struct obd_export *exp, + __u32 keylen, void *key, __u32 *vallen, void *val, + struct lov_stripe_md *lsm) +{ + struct ofd_device *ofd = ofd_exp(exp); + int rc = 0; + + ENTRY; + + if (exp->exp_obd == NULL) { + CDEBUG(D_IOCTL, "invalid client export %p\n", exp); + RETURN(-EINVAL); + } + + if (KEY_IS(KEY_BLOCKSIZE)) { + __u32 *blocksize = val; + if (blocksize) { + if (*vallen < sizeof(*blocksize)) + RETURN(-EOVERFLOW); + *blocksize = 1 << ofd->ofd_dt_conf.ddp_block_shift; + } + *vallen = sizeof(*blocksize); + } else if (KEY_IS(KEY_BLOCKSIZE_BITS)) { + __u32 *blocksize_bits = val; + if (blocksize_bits) { + if (*vallen < sizeof(*blocksize_bits)) + RETURN(-EOVERFLOW); + *blocksize_bits = ofd->ofd_dt_conf.ddp_block_shift; + } + *vallen = sizeof(*blocksize_bits); + } else if (KEY_IS(KEY_LAST_ID)) { + obd_id *last_id = val; + if (last_id) { + if (*vallen < sizeof(*last_id)) + RETURN(-EOVERFLOW); + *last_id = ofd_last_id(ofd, + exp->exp_filter_data.fed_group); + } + *vallen = sizeof(*last_id); + } else { + CERROR("Not supported key %s\n", (char*)key); + rc = -EOPNOTSUPP; + } + + RETURN(rc); +} + +/** helper function for statfs, also used by grant code */ +int ofd_statfs_internal(const struct lu_env *env, struct ofd_device *ofd, + struct obd_statfs *osfs, __u64 max_age, int *from_cache) +{ + int rc; + + rc = dt_statfs(env, ofd->ofd_osd, osfs); + if (unlikely(rc)) + return rc; + + return 0; +} + +static int ofd_statfs(const struct lu_env *env, struct obd_export *exp, + struct obd_statfs *osfs, __u64 max_age, __u32 flags) +{ + struct ofd_device *ofd = ofd_dev(exp->exp_obd->obd_lu_dev); + int rc; + + ENTRY; + + rc = ofd_statfs_internal(env, ofd, osfs, max_age, NULL); + if (unlikely(rc)) + GOTO(out, rc); + + if (OBD_FAIL_CHECK_VALUE(OBD_FAIL_OST_ENOSPC, + ofd->ofd_lut.lut_lsd.lsd_ost_index)) + osfs->os_bfree = osfs->os_bavail = 2; + + if (OBD_FAIL_CHECK_VALUE(OBD_FAIL_OST_ENOINO, + ofd->ofd_lut.lut_lsd.lsd_ost_index)) + osfs->os_ffree = 0; + EXIT; +out: + return rc; +} + +static int ofd_sync(const struct lu_env *env, struct obd_export *exp, + struct obd_info *oinfo, obd_size start, obd_size end, + struct ptlrpc_request_set *set) +{ + struct ofd_device *ofd = ofd_exp(exp); + int rc = 0; + + ENTRY; + + /* if no objid is specified, it means "sync whole filesystem" */ + if (oinfo->oi_oa == NULL || !(oinfo->oi_oa->o_valid & OBD_MD_FLID)) { + rc = dt_sync(env, ofd->ofd_osd); + GOTO(out, rc); + } + + EXIT; +out: + return rc; +} + +int ofd_iocontrol(unsigned int cmd, struct obd_export *exp, int len, + void *karg, void *uarg) +{ + struct lu_env env; + struct ofd_device *ofd = ofd_exp(exp); + struct obd_device *obd = ofd_obd(ofd); + int rc; + + ENTRY; + + CDEBUG(D_IOCTL, "handling ioctl cmd %#x\n", cmd); + rc = lu_env_init(&env, LCT_LOCAL); + if (rc) + RETURN(rc); + + switch (cmd) { + case OBD_IOC_ABORT_RECOVERY: + CERROR("aborting recovery for device %s\n", obd->obd_name); + target_stop_recovery_thread(obd); + break; + case OBD_IOC_SYNC: + CDEBUG(D_RPCTRACE, "syncing ost %s\n", obd->obd_name); + rc = dt_sync(&env, ofd->ofd_osd); + break; + case OBD_IOC_SET_READONLY: + rc = dt_sync(&env, ofd->ofd_osd); + if (rc == 0) + rc = dt_ro(&env, ofd->ofd_osd); + break; + default: + CERROR("Not supported cmd = %d for device %s\n", + cmd, obd->obd_name); + rc = -ENOTTY; + } + + lu_env_fini(&env); + RETURN(rc); +} + +static int ofd_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) +{ + int rc = 0; + + ENTRY; + + switch(stage) { + case OBD_CLEANUP_EARLY: + break; + case OBD_CLEANUP_EXPORTS: + target_cleanup_recovery(obd); + break; + } + RETURN(rc); +} + +static int ofd_ping(const struct lu_env *env, struct obd_export *exp) +{ + return 0; +} + +static int ofd_health_check(const struct lu_env *env, struct obd_device *obd) +{ + struct ofd_device *ofd = ofd_dev(obd->obd_lu_dev); + struct ofd_thread_info *info; +#ifdef USE_HEALTH_CHECK_WRITE + struct thandle *th; +#endif + int rc = 0; + + info = ofd_info_init(env, NULL); + rc = dt_statfs(env, ofd->ofd_osd, &info->fti_u.osfs); + if (unlikely(rc)) + GOTO(out, rc); + + if (info->fti_u.osfs.os_state == OS_STATE_READONLY) + GOTO(out, rc = -EROFS); + +#ifdef USE_HEALTH_CHECK_WRITE + OBD_ALLOC(info->fti_buf.lb_buf, CFS_PAGE_SIZE); + if (info->fti_buf.lb_buf == NULL) + GOTO(out, rc = -ENOMEM); + + info->fti_buf.lb_len = CFS_PAGE_SIZE; + info->fti_off = 0; + + th = dt_trans_create(env, ofd->ofd_osd); + if (IS_ERR(th)) + GOTO(out, rc = PTR_ERR(th)); + + rc = dt_declare_record_write(env, ofd->ofd_health_check_file, + info->fti_buf.lb_len, info->fti_off, th); + if (rc == 0) { + th->th_sync = 1; /* sync IO is needed */ + rc = dt_trans_start_local(env, ofd->ofd_osd, th); + if (rc == 0) + rc = dt_record_write(env, ofd->ofd_health_check_file, + &info->fti_buf, &info->fti_off, + th); + } + dt_trans_stop(env, ofd->ofd_osd, th); + + OBD_FREE(info->fti_buf.lb_buf, CFS_PAGE_SIZE); + + CDEBUG(D_INFO, "write 1 page synchronously for checking io rc %d\n",rc); +#endif +out: + return !!rc; +} + static int ofd_obd_notify(struct obd_device *obd, struct obd_device *unused, enum obd_notify_event ev, void *data) { @@ -341,8 +606,16 @@ struct obd_ops ofd_obd_ops = { .o_connect = ofd_obd_connect, .o_reconnect = ofd_obd_reconnect, .o_disconnect = ofd_obd_disconnect, + .o_set_info_async = ofd_set_info_async, + .o_get_info = ofd_get_info, + .o_statfs = ofd_statfs, .o_init_export = ofd_init_export, .o_destroy_export = ofd_destroy_export, .o_postrecov = ofd_obd_postrecov, + .o_sync = ofd_sync, + .o_iocontrol = ofd_iocontrol, + .o_precleanup = ofd_precleanup, + .o_ping = ofd_ping, + .o_health_check = ofd_health_check, .o_notify = ofd_obd_notify, }; -- 1.8.3.1