X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fofd%2Fofd_obd.c;h=dba54a484034817095c9f11f8f28034a48a095ef;hp=09922d81f3f913fdb0532a45972e272771fb0350;hb=61743b88672687f789f9c609c85a351e1595118b;hpb=520631764c0e9d1bf5c70f8d060b81a2f322d5dc;ds=sidebyside diff --git a/lustre/ofd/ofd_obd.c b/lustre/ofd/ofd_obd.c index 09922d8..dba54a4 100644 --- a/lustre/ofd/ofd_obd.c +++ b/lustre/ofd/ofd_obd.c @@ -23,7 +23,7 @@ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2012, 2015, Intel Corporation. + * Copyright (c) 2012, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -43,10 +43,9 @@ #define DEBUG_SUBSYSTEM S_FILTER -#include #include "ofd_internal.h" #include -#include +#include #include #include #include @@ -122,6 +121,10 @@ out: * connect flags from the obd_connect_data::ocd_connect_flags field of the * reply. \see tgt_connect(). * + * Before 2.7.50 clients will send a struct obd_connect_data_v1 rather than a + * full struct obd_connect_data. So care must be taken when accessing fields + * that are not present in struct obd_connect_data_v1. See LU-16. + * * \param[in] env execution environment * \param[in] exp the obd_export associated with this * client/target pair @@ -164,7 +167,10 @@ static int ofd_parse_connect_data(const struct lu_env *env, fed->fed_group = data->ocd_group; data->ocd_connect_flags &= OST_CONNECT_SUPPORTED; - data->ocd_connect_flags2 &= OST_CONNECT_SUPPORTED2; + + if (data->ocd_connect_flags & OBD_CONNECT_FLAGS2) + data->ocd_connect_flags2 &= OST_CONNECT_SUPPORTED2; + data->ocd_version = LUSTRE_VERSION_CODE; /* Kindly make sure the SKIP_ORPHAN flag is from MDS. */ @@ -195,23 +201,24 @@ static int ofd_parse_connect_data(const struct lu_env *env, } if (OCD_HAS_FLAG(data, GRANT_PARAM)) { + struct dt_device_param *ddp = &ofd->ofd_lut.lut_dt_conf; + /* client is reporting its page size, for future use */ - exp->exp_filter_data.fed_pagebits = data->ocd_grant_blkbits; - data->ocd_grant_blkbits = ofd->ofd_blockbits; + exp->exp_target_data.ted_pagebits = data->ocd_grant_blkbits; + data->ocd_grant_blkbits = ofd->ofd_lut.lut_tgd.tgd_blockbits; /* ddp_inodespace may not be power-of-two value, eg. for ldiskfs * it's LDISKFS_DIR_REC_LEN(20) = 28. */ - data->ocd_grant_inobits = - fls(ofd->ofd_dt_conf.ddp_inodespace - 1); + data->ocd_grant_inobits = fls(ddp->ddp_inodespace - 1); /* ocd_grant_tax_kb is in 1K byte blocks */ - data->ocd_grant_tax_kb = ofd->ofd_dt_conf.ddp_extent_tax >> 10; - data->ocd_grant_max_blks = ofd->ofd_dt_conf.ddp_max_extent_blks; + data->ocd_grant_tax_kb = ddp->ddp_extent_tax >> 10; + data->ocd_grant_max_blks = ddp->ddp_max_extent_blks; } if (OCD_HAS_FLAG(data, GRANT)) { - /* Save connect_data we have so far because ofd_grant_connect() + /* Save connect_data we have so far because tgt_grant_connect() * uses it to calculate grant. */ exp->exp_connect_data = *data; - ofd_grant_connect(env, exp, data, new_connection); + tgt_grant_connect(env, exp, data, new_connection); } if (data->ocd_connect_flags & OBD_CONNECT_INDEX) { @@ -230,7 +237,7 @@ static int ofd_parse_connect_data(const struct lu_env *env, if (!(lsd->lsd_feature_compat & OBD_COMPAT_OST)) { /* this will only happen on the first connect */ lsd->lsd_feature_compat |= OBD_COMPAT_OST; - /* sync is not needed here as lut_client_add will + /* sync is not needed here as tgt_client_new will * set exp_need_sync flag */ tgt_server_data_update(env, &ofd->ofd_lut, 0); } @@ -242,7 +249,7 @@ static int ofd_parse_connect_data(const struct lu_env *env, /* The client set in ocd_cksum_types the checksum types it * supports. We have to mask off the algorithms that we don't * support */ - data->ocd_cksum_types &= cksum_types_supported_server(); + data->ocd_cksum_types &= ofd->ofd_cksum_types_supported; if (unlikely(data->ocd_cksum_types == 0)) { CERROR("%s: Connect with checksum support but no " @@ -255,15 +262,15 @@ static int ofd_parse_connect_data(const struct lu_env *env, "%x\n", exp->exp_obd->obd_name, obd_export_nid2str(exp), cksum_types, data->ocd_cksum_types); } else { - /* This client does not support OBD_CONNECT_CKSUM - * fall back to CRC32 */ + /* This client does not support OBD_CONNECT_CKSUM. + * Report failure to negotiate checksum at connect */ CDEBUG(D_RPCTRACE, "%s: cli %s does not support " - "OBD_CONNECT_CKSUM, CRC32 will be used\n", + "OBD_CONNECT_CKSUM\n", exp->exp_obd->obd_name, obd_export_nid2str(exp)); } if (data->ocd_connect_flags & OBD_CONNECT_MAXBYTES) - data->ocd_maxbytes = ofd->ofd_dt_conf.ddp_maxbytes; + data->ocd_maxbytes = ofd->ofd_lut.lut_dt_conf.ddp_maxbytes; if (OCD_HAS_FLAG(data, PINGLESS)) { if (ptlrpc_pinger_suppress_pings()) { @@ -422,11 +429,11 @@ int ofd_obd_disconnect(struct obd_export *exp) class_export_get(exp); if (!(exp->exp_flags & OBD_OPT_FORCE)) - ofd_grant_sanity_check(ofd_obd(ofd), __FUNCTION__); + tgt_grant_sanity_check(ofd_obd(ofd), __func__); rc = server_disconnect_export(exp); - ofd_grant_discard(exp); + tgt_grant_discard(exp); /* Do not erase record for recoverable client. */ if (exp->exp_obd->obd_replayable && @@ -495,10 +502,10 @@ static int ofd_destroy_export(struct obd_export *exp) { struct ofd_device *ofd = ofd_exp(exp); - if (exp->exp_filter_data.fed_pending) + if (exp->exp_target_data.ted_pending) CERROR("%s: cli %s/%p has %lu pending on destroyed export" "\n", exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, - exp, exp->exp_filter_data.fed_pending); + exp, exp->exp_target_data.ted_pending); target_destroy_export(exp); @@ -515,14 +522,14 @@ static int ofd_destroy_export(struct obd_export *exp) * discard grants once we're sure no more * interaction with the client is possible */ - ofd_grant_discard(exp); + tgt_grant_discard(exp); ofd_fmd_cleanup(exp); if (exp_connect_flags(exp) & OBD_CONNECT_GRANT) - ofd->ofd_tot_granted_clients--; + ofd->ofd_lut.lut_tgd.tgd_tot_granted_clients--; if (!(exp->exp_flags & OBD_OPT_FORCE)) - ofd_grant_sanity_check(exp->exp_obd, __FUNCTION__); + tgt_grant_sanity_check(exp->exp_obd, __func__); LASSERT(list_empty(&exp->exp_filter_data.fed_mod_list)); return 0; @@ -547,7 +554,7 @@ int ofd_postrecov(const struct lu_env *env, struct ofd_device *ofd) CDEBUG(D_HA, "%s: recovery is over\n", ofd_name(ofd)); - if (!ofd->ofd_skip_lfsck) { + if (!ofd->ofd_skip_lfsck && !ofd->ofd_osd->dd_rdonly) { struct lfsck_start_param lsp; lsp.lsp_start = NULL; @@ -623,7 +630,7 @@ static int ofd_set_info_async(const struct lu_env *env, struct obd_export *exp, } if (KEY_IS(KEY_SPTLRPC_CONF)) { - rc = tgt_adapt_sptlrpc_conf(class_exp2tgt(exp), 0); + rc = tgt_adapt_sptlrpc_conf(class_exp2tgt(exp)); } else { CERROR("%s: Unsupported key %s\n", exp->exp_obd->obd_name, (char*)key); @@ -686,105 +693,6 @@ static int ofd_get_info(const struct lu_env *env, struct obd_export *exp, } /** - * Get file system statistics of OST server. - * - * Helper function for ofd_statfs(), also used by grant code. - * Implements caching for statistics to avoid calling OSD device each time. - * - * \param[in] env execution environment - * \param[in] ofd OFD device - * \param[out] osfs statistic data to return - * \param[in] max_age maximum age for cached data - * \param[in] from_cache show that data was get from cache or not - * - * \retval 0 if successful - * \retval negative value on error - */ -int ofd_statfs_internal(const struct lu_env *env, struct ofd_device *ofd, - struct obd_statfs *osfs, __u64 max_age, int *from_cache) -{ - int rc = 0; - ENTRY; - - spin_lock(&ofd->ofd_osfs_lock); - if (cfs_time_before_64(ofd->ofd_osfs_age, max_age) || max_age == 0) { - u64 unstable; - - /* statfs data are too old, get up-to-date one. - * we must be cautious here since multiple threads might be - * willing to update statfs data concurrently and we must - * grant that cached statfs data are always consistent */ - - if (ofd->ofd_statfs_inflight == 0) - /* clear inflight counter if no users, although it would - * take a while to overflow this 64-bit counter ... */ - ofd->ofd_osfs_inflight = 0; - /* notify ofd_grant_commit() that we want to track writes - * completed as of now */ - ofd->ofd_statfs_inflight++; - /* record value of inflight counter before running statfs to - * compute the diff once statfs is completed */ - unstable = ofd->ofd_osfs_inflight; - spin_unlock(&ofd->ofd_osfs_lock); - - /* statfs can sleep ... hopefully not for too long since we can - * call it fairly often as space fills up */ - rc = dt_statfs(env, ofd->ofd_osd, osfs); - if (unlikely(rc)) - GOTO(out, rc); - - spin_lock(&ofd->ofd_grant_lock); - spin_lock(&ofd->ofd_osfs_lock); - /* calculate how much space was written while we released the - * ofd_osfs_lock */ - unstable = ofd->ofd_osfs_inflight - unstable; - ofd->ofd_osfs_unstable = 0; - if (unstable) { - /* some writes committed while we were running statfs - * w/o the ofd_osfs_lock. Those ones got added to - * the cached statfs data that we are about to crunch. - * Take them into account in the new statfs data */ - osfs->os_bavail -= min_t(u64, osfs->os_bavail, - unstable >> ofd->ofd_blockbits); - /* However, we don't really know if those writes got - * accounted in the statfs call, so tell - * ofd_grant_space_left() there is some uncertainty - * on the accounting of those writes. - * The purpose is to prevent spurious error messages in - * ofd_grant_space_left() since those writes might be - * accounted twice. */ - ofd->ofd_osfs_unstable += unstable; - } - /* similarly, there is some uncertainty on write requests - * between prepare & commit */ - ofd->ofd_osfs_unstable += ofd->ofd_tot_pending; - spin_unlock(&ofd->ofd_grant_lock); - - /* finally udpate cached statfs data */ - ofd->ofd_osfs = *osfs; - ofd->ofd_osfs_age = cfs_time_current_64(); - - ofd->ofd_statfs_inflight--; /* stop tracking */ - if (ofd->ofd_statfs_inflight == 0) - ofd->ofd_osfs_inflight = 0; - spin_unlock(&ofd->ofd_osfs_lock); - - if (from_cache) - *from_cache = 0; - } else { - /* use cached statfs data */ - *osfs = ofd->ofd_osfs; - spin_unlock(&ofd->ofd_osfs_lock); - if (from_cache) - *from_cache = 1; - } - GOTO(out, rc); - -out: - return rc; -} - -/** * Implementation of obd_ops::o_statfs. * * This function returns information about a storage file system. @@ -806,15 +714,16 @@ out: * \retval negative value on error */ int ofd_statfs(const struct lu_env *env, struct obd_export *exp, - struct obd_statfs *osfs, __u64 max_age, __u32 flags) + struct obd_statfs *osfs, time64_t max_age, __u32 flags) { struct obd_device *obd = class_exp2obd(exp); struct ofd_device *ofd = ofd_exp(exp); + struct tg_grants_data *tgd = &ofd->ofd_lut.lut_tgd; int rc; ENTRY; - rc = ofd_statfs_internal(env, ofd, osfs, max_age, NULL); + rc = tgt_statfs_internal(env, &ofd->ofd_lut, osfs, max_age, NULL); if (unlikely(rc)) GOTO(out, rc); @@ -824,54 +733,62 @@ int ofd_statfs(const struct lu_env *env, struct obd_export *exp, CDEBUG(D_SUPER | D_CACHE, "blocks cached %llu granted %llu" " pending %llu free %llu avail %llu\n", - ofd->ofd_tot_dirty, ofd->ofd_tot_granted, ofd->ofd_tot_pending, - osfs->os_bfree << ofd->ofd_blockbits, - osfs->os_bavail << ofd->ofd_blockbits); + tgd->tgd_tot_dirty, tgd->tgd_tot_granted, + tgd->tgd_tot_pending, + osfs->os_bfree << tgd->tgd_blockbits, + osfs->os_bavail << tgd->tgd_blockbits); osfs->os_bavail -= min_t(u64, osfs->os_bavail, - ((ofd->ofd_tot_dirty + ofd->ofd_tot_pending + - osfs->os_bsize - 1) >> ofd->ofd_blockbits)); + ((tgd->tgd_tot_dirty + tgd->tgd_tot_pending + + osfs->os_bsize - 1) >> tgd->tgd_blockbits)); /* The QoS code on the MDS does not care about space reserved for * precreate, so take it out. */ if (exp_connect_flags(exp) & OBD_CONNECT_MDS) { - struct filter_export_data *fed; + struct tg_export_data *ted; - fed = &obd->obd_self_export->exp_filter_data; + ted = &obd->obd_self_export->exp_target_data; osfs->os_bavail -= min_t(u64, osfs->os_bavail, - fed->fed_grant >> ofd->ofd_blockbits); + ted->ted_grant >> tgd->tgd_blockbits); } - ofd_grant_sanity_check(obd, __FUNCTION__); + tgt_grant_sanity_check(obd, __func__); CDEBUG(D_CACHE, "%llu blocks: %llu free, %llu avail; " "%llu objects: %llu free; state %x\n", osfs->os_blocks, osfs->os_bfree, osfs->os_bavail, osfs->os_files, osfs->os_ffree, osfs->os_state); if (OBD_FAIL_CHECK_VALUE(OBD_FAIL_OST_ENOINO, - ofd->ofd_lut.lut_lsd.lsd_osd_index)) - osfs->os_ffree = 0; + ofd->ofd_lut.lut_lsd.lsd_osd_index)) { + /* Reduce free inode count to zero, but keep "used" intact */ + osfs->os_files -= osfs->os_ffree; + osfs->os_ffree -= osfs->os_ffree; + } /* OS_STATE_READONLY can be set by OSD already */ if (ofd->ofd_raid_degraded) osfs->os_state |= OS_STATE_DEGRADED; - if (obd->obd_self_export != exp && !ofd_grant_param_supp(exp) && - ofd->ofd_blockbits > COMPAT_BSIZE_SHIFT) { + if (obd->obd_self_export != exp && !exp_grant_param_supp(exp) && + tgd->tgd_blockbits > COMPAT_BSIZE_SHIFT) { /* clients which don't support OBD_CONNECT_GRANT_PARAM * should not see a block size > page size, otherwise * cl_lost_grant goes mad. Therefore, we emulate a 4KB (=2^12) * block size which is the biggest block size known to work * with all client's page size. */ - osfs->os_blocks <<= ofd->ofd_blockbits - COMPAT_BSIZE_SHIFT; - osfs->os_bfree <<= ofd->ofd_blockbits - COMPAT_BSIZE_SHIFT; - osfs->os_bavail <<= ofd->ofd_blockbits - COMPAT_BSIZE_SHIFT; + osfs->os_blocks <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT; + osfs->os_bfree <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT; + osfs->os_bavail <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT; osfs->os_bsize = 1 << COMPAT_BSIZE_SHIFT; } if (OBD_FAIL_CHECK_VALUE(OBD_FAIL_OST_ENOSPC, - ofd->ofd_lut.lut_lsd.lsd_osd_index)) - osfs->os_bfree = osfs->os_bavail = 2; + ofd->ofd_lut.lut_lsd.lsd_osd_index)) { + /* Reduce free blocks count near zero, but keep "used" intact */ + osfs->os_bavail -= osfs->os_bavail - 2; + osfs->os_blocks -= osfs->os_bfree - 2; + osfs->os_bfree -= osfs->os_bfree - 2; + } EXIT; out: @@ -900,7 +817,6 @@ static int ofd_echo_setattr(const struct lu_env *env, struct obd_export *exp, struct ldlm_resource *res; struct ofd_object *fo; struct lu_fid *fid = &oa->o_oi.oi_fid; - struct filter_fid *ff = NULL; int rc = 0; ENTRY; @@ -937,13 +853,8 @@ static int ofd_echo_setattr(const struct lu_env *env, struct obd_export *exp, la_from_obdo(&info->fti_attr, oa, oa->o_valid); info->fti_attr.la_valid &= ~LA_TYPE; - if (oa->o_valid & OBD_MD_FLFID) { - ff = &info->fti_mds_fid; - ofd_prepare_fidea(ff, oa); - } - /* setting objects attributes (including owner/group) */ - rc = ofd_attr_set(env, fo, &info->fti_attr, ff); + rc = ofd_attr_set(env, fo, &info->fti_attr, oa); if (rc) GOTO(out_unlock, rc); @@ -973,7 +884,7 @@ out: * * Supplemental function to destroy object by FID, it is used by request * handler and by ofd_echo_destroy() below to find object by FID, lock it - * and call ofd_object_destroy() finally. + * and call ofd_destroy() finally. * * \param[in] env execution environment * \param[in] ofd OFD device @@ -1013,7 +924,7 @@ int ofd_destroy_by_fid(const struct lu_env *env, struct ofd_device *ofd, LASSERT(fo != NULL); - rc = ofd_object_destroy(env, fo, orphan); + rc = ofd_destroy(env, fo, orphan); EXIT; ofd_object_put(env, fo); @@ -1048,6 +959,10 @@ static int ofd_echo_destroy(const struct lu_env *env, struct obd_export *exp, ofd_info_init(env, exp); + rc = ofd_validate_seq(exp, ostid_seq(&oa->o_oi)); + if (rc != 0) + RETURN(rc); + CDEBUG(D_HA, "%s: Destroy object "DFID"\n", ofd_name(ofd), PFID(fid)); rc = ofd_destroy_by_fid(env, ofd, fid, 0); @@ -1087,17 +1002,17 @@ static int ofd_echo_create(const struct lu_env *env, struct obd_export *exp, struct obdo *oa) { struct ofd_device *ofd = ofd_exp(exp); - struct ofd_thread_info *info; u64 seq = ostid_seq(&oa->o_oi); struct ofd_seq *oseq; - int rc = 0, diff = 1; long granted; u64 next_id; + s64 diff = 1; + int rc = 0; int count; ENTRY; - info = ofd_info_init(env, exp); + ofd_info_init(env, exp); LASSERT(seq == FID_SEQ_ECHO); LASSERT(oa->o_valid & OBD_MD_FLGROUP); @@ -1111,6 +1026,10 @@ static int ofd_echo_create(const struct lu_env *env, struct obd_export *exp, if (unlikely(ofd->ofd_lastid_rebuilding)) GOTO(out_sem, rc = -ENOSPC); + rc = ofd_validate_seq(exp, seq); + if (rc != 0) + RETURN(rc); + oseq = ofd_seq_load(env, ofd, seq); if (IS_ERR(oseq)) { CERROR("%s: Can't find FID Sequence %#llx: rc = %ld\n", @@ -1119,30 +1038,35 @@ static int ofd_echo_create(const struct lu_env *env, struct obd_export *exp, } mutex_lock(&oseq->os_create_lock); - granted = ofd_grant_create(env, ofd_obd(ofd)->obd_self_export, &diff); + granted = tgt_grant_create(env, ofd_obd(ofd)->obd_self_export, &diff); if (granted < 0) { rc = granted; granted = 0; CDEBUG(D_HA, "%s: failed to acquire grant space for " - "precreate (%d): rc = %d\n", ofd_name(ofd), diff, rc); + "precreate (%lld): rc = %d\n", ofd_name(ofd), diff, rc); diff = 0; GOTO(out, rc); } next_id = ofd_seq_last_oid(oseq) + 1; - count = ofd_precreate_batch(ofd, diff); + count = ofd_precreate_batch(ofd, (int)diff); rc = ofd_precreate_objects(env, ofd, next_id, oseq, count, 0); if (rc < 0) { CERROR("%s: unable to precreate: rc = %d\n", ofd_name(ofd), rc); } else { - ostid_set_id(&oa->o_oi, ofd_seq_last_oid(oseq)); + rc = ostid_set_id(&oa->o_oi, ofd_seq_last_oid(oseq)); + if (rc) { + CERROR("%s: Bad %llu to set " DOSTID " : rc %d\n", + ofd_name(ofd), + (unsigned long long)ofd_seq_last_oid(oseq), + POSTID(&oa->o_oi), rc); + } oa->o_valid |= OBD_MD_FLID | OBD_MD_FLGROUP; - rc = 0; } - ofd_grant_commit(ofd_obd(ofd)->obd_self_export, granted, rc); + tgt_grant_commit(ofd_obd(ofd)->obd_self_export, granted, rc); out: mutex_unlock(&oseq->os_create_lock); ofd_seq_put(env, oseq); @@ -1191,7 +1115,7 @@ static int ofd_echo_getattr(const struct lu_env *env, struct obd_export *exp, __u64 curr_version; obdo_from_la(oa, &info->fti_attr, - OFD_VALID_FLAGS | LA_UID | LA_GID); + OFD_VALID_FLAGS | LA_UID | LA_GID | LA_PROJID); /* Store object version in reply */ curr_version = dt_version_get(env, ofd_object_child(fo)); @@ -1239,10 +1163,12 @@ static int ofd_ioc_get_obj_version(const struct lu_env *env, data->ioc_inllen3 == sizeof(__u64) && data->ioc_inlbuf4 != NULL && data->ioc_inllen4 == sizeof(__u64)) { - struct ost_id ostid; + struct ost_id ostid = { }; ostid_set_seq(&ostid, *(__u64 *)data->ioc_inlbuf4); - ostid_set_id(&ostid, *(__u64 *)data->ioc_inlbuf3); + rc = ostid_set_id(&ostid, *(__u64 *)data->ioc_inlbuf3); + if (rc) + GOTO(out, rc); rc = ostid_to_fid(&fid, &ostid, ofd->ofd_lut.lut_lsd.lsd_osd_index); if (rc != 0) @@ -1439,7 +1365,7 @@ static int ofd_health_check(const struct lu_env *nul, struct obd_device *obd) if (unlikely(rc)) GOTO(out, rc); - if (info->fti_u.osfs.os_state == OS_STATE_READONLY) + if (info->fti_u.osfs.os_state & OS_STATE_READONLY) GOTO(out, rc = -EROFS); #ifdef USE_HEALTH_CHECK_WRITE