return p->mo_dir_ops->mdo_unlink(env, p, c, lname, ma, no_name);
}
+static inline int mdo_statfs(const struct lu_env *env,
+ struct md_device *m,
+ struct obd_statfs *sfs)
+{
+ LASSERT(m->md_ops->mdo_statfs);
+ return m->md_ops->mdo_statfs(env, m, sfs);
+}
+
/**
* Used in MDD/OUT layer for object lock rule
**/
__u32 tgts_size; /* size of tgts array */
struct lmv_tgt_desc **tgts;
+ int lmv_statfs_start;
+
struct obd_connect_data conn_data;
struct kobject *lmv_tgts_kobj;
* obd_osfs_age */
#define OBD_STATFS_FOR_MDT0 0x0004 /* The statfs is only for retrieving
* information from MDT0. */
+#define OBD_STATFS_SUM 0x0008 /* get aggregated statfs from MDT */
extern rwlock_t obd_dev_lock;
CDEBUG(D_SUPER, "osfs %lld, max_age %lld\n",
obd->obd_osfs_age, max_age);
- if (obd->obd_osfs_age < max_age) {
+ /* ignore cache if aggregated isn't expected */
+ if (obd->obd_osfs_age < max_age ||
+ ((obd->obd_osfs.os_state & OS_STATE_SUM) &&
+ !(flags & OBD_STATFS_SUM))) {
rc = OBP(obd, statfs)(env, exp, osfs, max_age, flags);
if (rc == 0) {
spin_lock(&obd->obd_osfs_lock);
#define OBD_FAIL_MDS_GET_ROOT_NET 0x11b
#define OBD_FAIL_MDS_GET_ROOT_PACK 0x11c
#define OBD_FAIL_MDS_STATFS_PACK 0x11d
+#define OBD_FAIL_MDS_STATFS_SUM_PACK 0x11d
#define OBD_FAIL_MDS_STATFS_NET 0x11e
+#define OBD_FAIL_MDS_STATFS_SUM_NET 0x11e
#define OBD_FAIL_MDS_GETATTR_NAME_NET 0x11f
#define OBD_FAIL_MDS_PIN_NET 0x120
#define OBD_FAIL_MDS_UNPIN_NET 0x121
#define OBD_CONNECT2_FILE_SECCTX 0x1ULL /* set file security context at create */
#define OBD_CONNECT2_LOCKAHEAD 0x2ULL /* ladvise lockahead v2 */
#define OBD_CONNECT2_DIR_MIGRATE 0x4ULL /* migrate striped dir */
+#define OBD_CONNECT2_SUM_STATFS 0x8ULL /* MDT return aggregated stats */
#define OBD_CONNECT2_FLR 0x20ULL /* FLR support */
#define OBD_CONNECT2_WBC_INTENTS 0x40ULL /* create/unlink/... intents for wbc, also operations under client-held parent locks */
#define OBD_CONNECT2_LOCK_CONVERT 0x80ULL /* IBITS lock convert support */
OBD_CONNECT_SHORTIO | OBD_CONNECT_FLAGS2)
#define MDT_CONNECT_SUPPORTED2 (OBD_CONNECT2_FILE_SECCTX | OBD_CONNECT2_FLR | \
-+ OBD_CONNECT2_LOCK_CONVERT)
+ OBD_CONNECT2_SUM_STATFS | \
+ OBD_CONNECT2_LOCK_CONVERT)
#define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
#define OBD_MD_FLXATTRLS (0x0000002000000000ULL) /* xattr list */
#define OBD_MD_FLXATTRRM (0x0000004000000000ULL) /* xattr remove */
#define OBD_MD_FLACL (0x0000008000000000ULL) /* ACL */
-/* OBD_MD_FLRMTPERM (0x0000010000000000ULL) remote perm, obsolete */
+#define OBD_MD_FLAGSTATFS (0x0000010000000000ULL) /* aggregated statfs */
#define OBD_MD_FLMDSCAPA (0x0000020000000000ULL) /* MDS capability */
#define OBD_MD_FLOSSCAPA (0x0000040000000000ULL) /* OSS capability */
/* OBD_MD_FLCKSPLIT (0x0000080000000000ULL) obsolete 2.3.58*/
OS_STATE_NOPRECREATE = 0x00000004, /**< no object precreation */
OS_STATE_ENOSPC = 0x00000020, /**< not enough free space */
OS_STATE_ENOINO = 0x00000040, /**< not enough inodes */
+ OS_STATE_SUM = 0x00000100, /**< aggregated for all tagrets */
};
/** filesystem statistics/attributes for target device */
__u32 os_fprecreated; /* objs available now to the caller */
/* used in QoS code to find preferred
* OSTs */
- __u32 os_spare2; /* Unused padding fields. Remember */
- __u32 os_spare3; /* to fix lustre_swab_obd_statfs() */
- __u32 os_spare4;
- __u32 os_spare5;
- __u32 os_spare6;
- __u32 os_spare7;
- __u32 os_spare8;
- __u32 os_spare9;
+ __u32 os_granted; /* space granted for MDS */
+ __u32 os_spare3; /* Unused padding fields. Remember */
+ __u32 os_spare4; /* to fix lustre_swab_obd_statfs() */
+ __u32 os_spare5;
+ __u32 os_spare6;
+ __u32 os_spare7;
+ __u32 os_spare8;
+ __u32 os_spare9;
};
/**
data->ocd_connect_flags2 = OBD_CONNECT2_FLR |
OBD_CONNECT2_LOCK_CONVERT |
- OBD_CONNECT2_DIR_MIGRATE;
+ OBD_CONNECT2_DIR_MIGRATE |
+ OBD_CONNECT2_SUM_STATFS;
#ifdef HAVE_LRU_RESIZE_SUPPORT
if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
CDEBUG(D_SUPER, "MDC blocks %llu/%llu objects %llu/%llu\n",
osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files);
+ if (osfs->os_state & OS_STATE_SUM)
+ GOTO(out, rc);
+
if (sbi->ll_flags & LL_SBI_LAZYSTATFS)
flags |= OBD_STATFS_NODELAY;
osfs->os_ffree = obd_osfs.os_ffree;
}
+out:
RETURN(rc);
}
int ll_statfs(struct dentry *de, struct kstatfs *sfs)
ll_stats_ops_tally(ll_s2sbi(sb), LPROC_LL_STAFS, 1);
/* Some amount of caching on the client is allowed */
- rc = ll_statfs_internal(ll_s2sbi(sb), &osfs, 0);
+ rc = ll_statfs_internal(ll_s2sbi(sb), &osfs, OBD_STATFS_SUM);
if (rc)
return rc;
RETURN(rc);
}
+static int lmv_select_statfs_mdt(struct lmv_obd *lmv, __u32 flags)
+{
+ int i;
+
+ if (flags & OBD_STATFS_FOR_MDT0)
+ return 0;
+
+ if (lmv->lmv_statfs_start || lmv->desc.ld_tgt_count == 1)
+ return lmv->lmv_statfs_start;
+
+ /* choose initial MDT for this client */
+ for (i = 0;; i++) {
+ struct lnet_process_id lnet_id;
+ if (LNetGetId(i, &lnet_id) == -ENOENT)
+ break;
+
+ if (LNET_NETTYP(LNET_NIDNET(lnet_id.nid)) != LOLND) {
+ lmv->lmv_statfs_start =
+ lnet_id.nid % lmv->desc.ld_tgt_count;
+ break;
+ }
+ }
+
+ return lmv->lmv_statfs_start;
+}
+
static int lmv_statfs(const struct lu_env *env, struct obd_export *exp,
struct obd_statfs *osfs, time64_t max_age, __u32 flags)
{
struct lmv_obd *lmv = &obd->u.lmv;
struct obd_statfs *temp;
int rc = 0;
- __u32 i;
+ __u32 i, idx;
ENTRY;
OBD_ALLOC(temp, sizeof(*temp));
if (temp == NULL)
RETURN(-ENOMEM);
- for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- if (lmv->tgts[i] == NULL || lmv->tgts[i]->ltd_exp == NULL)
+ /* distribute statfs among MDTs */
+ idx = lmv_select_statfs_mdt(lmv, flags);
+
+ for (i = 0; i < lmv->desc.ld_tgt_count; i++, idx++) {
+ idx = idx % lmv->desc.ld_tgt_count;
+ if (lmv->tgts[idx] == NULL || lmv->tgts[idx]->ltd_exp == NULL)
continue;
- rc = obd_statfs(env, lmv->tgts[i]->ltd_exp, temp,
+ rc = obd_statfs(env, lmv->tgts[idx]->ltd_exp, temp,
max_age, flags);
if (rc) {
CERROR("can't stat MDS #%d (%s), error %d\n", i,
- lmv->tgts[i]->ltd_exp->exp_obd->obd_name,
+ lmv->tgts[idx]->ltd_exp->exp_obd->obd_name,
rc);
GOTO(out_free_temp, rc);
}
+ if (temp->os_state & OS_STATE_SUM ||
+ flags == OBD_STATFS_FOR_MDT0) {
+ /* reset to the last aggregated values
+ * and don't sum with non-aggrated data */
+ /* If the statfs is from mount, it needs to retrieve
+ * necessary information from MDT0. i.e. mount does
+ * not need the merged osfs from all of MDT. Also
+ * clients can be mounted as long as MDT0 is in
+ * service */
+ *osfs = *temp;
+ break;
+ }
+
if (i == 0) {
*osfs = *temp;
- /* If the statfs is from mount, it will needs
- * retrieve necessary information from MDT0.
- * i.e. mount does not need the merged osfs
- * from all of MDT.
- * And also clients can be mounted as long as
- * MDT0 is in service*/
- if (flags & OBD_STATFS_FOR_MDT0)
- GOTO(out_free_temp, rc);
- } else {
- osfs->os_bavail += temp->os_bavail;
- osfs->os_blocks += temp->os_blocks;
- osfs->os_ffree += temp->os_ffree;
- osfs->os_files += temp->os_files;
- }
+ } else {
+ osfs->os_bavail += temp->os_bavail;
+ osfs->os_blocks += temp->os_blocks;
+ osfs->os_ffree += temp->os_ffree;
+ osfs->os_files += temp->os_files;
+ osfs->os_granted += temp->os_granted;
+ }
}
EXIT;
return dt_root_get(env, dt2lod_dev(dev)->lod_child, f);
}
+static void lod_statfs_sum(struct obd_statfs *sfs,
+ struct obd_statfs *ost_sfs, int *bs)
+{
+ while (ost_sfs->os_bsize < *bs) {
+ *bs >>= 1;
+ sfs->os_bsize >>= 1;
+ sfs->os_bavail <<= 1;
+ sfs->os_blocks <<= 1;
+ sfs->os_bfree <<= 1;
+ sfs->os_granted <<= 1;
+ }
+ while (ost_sfs->os_bsize > *bs) {
+ ost_sfs->os_bsize >>= 1;
+ ost_sfs->os_bavail <<= 1;
+ ost_sfs->os_blocks <<= 1;
+ ost_sfs->os_bfree <<= 1;
+ ost_sfs->os_granted <<= 1;
+ }
+ sfs->os_bavail += ost_sfs->os_bavail;
+ sfs->os_blocks += ost_sfs->os_blocks;
+ sfs->os_bfree += ost_sfs->os_bfree;
+ sfs->os_granted += ost_sfs->os_granted;
+}
+
/**
* Implementation of dt_device_operations::dt_statfs() for LOD
*
static int lod_statfs(const struct lu_env *env,
struct dt_device *dev, struct obd_statfs *sfs)
{
- return dt_statfs(env, dt2lod_dev(dev)->lod_child, sfs);
+ struct lod_device *lod = dt2lod_dev(dev);
+ struct lod_ost_desc *ost;
+ struct lod_mdt_desc *mdt;
+ struct obd_statfs ost_sfs;
+ int i, rc, bs;
+ bool mdtonly;
+
+ rc = dt_statfs(env, dt2lod_dev(dev)->lod_child, sfs);
+ if (rc)
+ GOTO(out, rc);
+
+ bs = sfs->os_bsize;
+
+ sfs->os_bavail = 0;
+ sfs->os_blocks = 0;
+ sfs->os_bfree = 0;
+ sfs->os_granted = 0;
+
+ lod_getref(&lod->lod_mdt_descs);
+ lod_foreach_mdt(lod, i) {
+ mdt = MDT_TGT(lod, i);
+ LASSERT(mdt && mdt->ltd_mdt);
+ rc = dt_statfs(env, mdt->ltd_mdt, &ost_sfs);
+ /* ignore errors */
+ if (rc)
+ continue;
+ sfs->os_files += ost_sfs.os_files;
+ sfs->os_ffree += ost_sfs.os_ffree;
+ lod_statfs_sum(sfs, &ost_sfs, &bs);
+ }
+ lod_putref(lod, &lod->lod_mdt_descs);
+
+ /* at some point we can check whether DoM is enabled and
+ * decide how to account MDT space. for simplicity let's
+ * just fallback to pre-DoM policy if any OST is alive */
+ mdtonly = true;
+
+ lod_getref(&lod->lod_ost_descs);
+ lod_foreach_ost(lod, i) {
+ ost = OST_TGT(lod, i);
+ LASSERT(ost && ost->ltd_ost);
+ rc = dt_statfs(env, ost->ltd_ost, &ost_sfs);
+ /* ignore errors */
+ if (rc || ost_sfs.os_bsize == 0)
+ continue;
+ if (mdtonly) {
+ /* if only MDTs and DoM report MDT space,
+ * otherwise only OST space */
+ sfs->os_bavail = 0;
+ sfs->os_blocks = 0;
+ sfs->os_bfree = 0;
+ sfs->os_granted = 0;
+ mdtonly = false;
+ }
+ ost_sfs.os_bavail += ost_sfs.os_granted;
+ lod_statfs_sum(sfs, &ost_sfs, &bs);
+ LASSERTF(bs == ost_sfs.os_bsize, "%d != %d\n",
+ (int)sfs->os_bsize, (int)ost_sfs.os_bsize);
+ }
+ lod_putref(lod, &lod->lod_ost_descs);
+ sfs->os_state |= OS_STATE_SUM;
+
+ /* a single successful statfs should be enough */
+ rc = 0;
+
+out:
+ RETURN(rc);
}
/**
struct dt_object_format *dof,
struct thandle *th)
{
+ struct lod_thread_info *info = lod_env_info(env);
struct lod_device *lod = lu2lod_dev(dt->do_lu.lo_dev);
struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
struct lod_object *lo = lod_dt_obj(dt);
continue;
tgt_dt = tgt->ltd_tgt;
- rc = dt_statfs(env, tgt_dt, NULL);
+ rc = dt_statfs(env, tgt_dt, &info->lti_osfs);
if (rc) {
/* this OSP doesn't feel well */
rc = 0;
struct ptlrpc_request *req;
struct obd_statfs *msfs;
struct obd_import *imp = NULL;
- int rc;
+ int rc;
ENTRY;
/*
if (!imp)
RETURN(-ENODEV);
- req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_STATFS,
- LUSTRE_MDS_VERSION, MDS_STATFS);
- if (req == NULL)
- GOTO(output, rc = -ENOMEM);
+ req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_STATFS,
+ LUSTRE_MDS_VERSION, MDS_STATFS);
+ if (req == NULL)
+ GOTO(output, rc = -ENOMEM);
+
+ if ((flags & OBD_STATFS_SUM) &&
+ (exp_connect_flags2(exp) & OBD_CONNECT2_SUM_STATFS)) {
+ /* request aggregated states */
+ struct mdt_body *body;
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_MDT_BODY);
+ if (body == NULL)
+ GOTO(out, rc = -EPROTO);
+ body->mbo_valid = OBD_MD_FLAGSTATFS;
+ }
ptlrpc_request_set_replen(req);
static int mdt_statfs(struct tgt_session_info *tsi)
{
- struct ptlrpc_request *req = tgt_ses_req(tsi);
- struct mdt_thread_info *info = tsi2mdt_info(tsi);
- struct mdt_device *mdt = info->mti_mdt;
- struct tg_grants_data *tgd = &mdt->mdt_lut.lut_tgd;
- struct ptlrpc_service_part *svcpt;
- struct obd_statfs *osfs;
- int rc;
+ struct ptlrpc_request *req = tgt_ses_req(tsi);
+ struct mdt_thread_info *info = tsi2mdt_info(tsi);
+ struct mdt_device *mdt = info->mti_mdt;
+ struct tg_grants_data *tgd = &mdt->mdt_lut.lut_tgd;
+ struct md_device *next = mdt->mdt_child;
+ struct ptlrpc_service_part *svcpt;
+ struct obd_statfs *osfs;
+ struct mdt_body *reqbody = NULL;
+ struct mdt_statfs_cache *msf;
+ int rc;
ENTRY;
if (!osfs)
GOTO(out, rc = -EPROTO);
- rc = tgt_statfs_internal(tsi->tsi_env, &mdt->mdt_lut, osfs,
- ktime_get_seconds() - OBD_STATFS_CACHE_SECONDS,
- NULL);
- if (unlikely(rc))
- GOTO(out, rc);
+ if (mdt_is_sum_statfs_client(req->rq_export))
+ reqbody = req_capsule_client_get(info->mti_pill, &RMF_MDT_BODY);
+
+ if (reqbody && reqbody->mbo_valid & OBD_MD_FLAGSTATFS)
+ msf = &mdt->mdt_sum_osfs;
+ else
+ msf = &mdt->mdt_osfs;
+
+ if (msf->msf_age + OBD_STATFS_CACHE_SECONDS <= ktime_get_seconds()) {
+ /** statfs data is too old, get up-to-date one */
+ if (reqbody && reqbody->mbo_valid & OBD_MD_FLAGSTATFS)
+ rc = next->md_ops->mdo_statfs(info->mti_env,
+ next, osfs);
+ else
+ rc = dt_statfs(info->mti_env, mdt->mdt_bottom,
+ osfs);
+ if (rc)
+ GOTO(out, rc);
+ spin_lock(&mdt->mdt_lock);
+ msf->msf_osfs = *osfs;
+ msf->msf_age = ktime_get_seconds();
+ spin_unlock(&mdt->mdt_lock);
+ } else {
+ /** use cached statfs data */
+ spin_lock(&mdt->mdt_lock);
+ *osfs = msf->msf_osfs;
+ spin_unlock(&mdt->mdt_lock);
+ }
/* at least try to account for cached pages. its still racy and
* might be under-reporting if clients haven't announced their
NUM_DOM_LOCK_ON_OPEN_MODES
};
+struct mdt_statfs_cache {
+ struct obd_statfs msf_osfs;
+ __u64 msf_age;
+};
+
struct mdt_device {
/* super-class */
struct lu_device mdt_lu_dev;
/* lock for osfs and md_root */
spinlock_t mdt_lock;
+ /* statfs optimization: we cache a bit */
+ struct mdt_statfs_cache mdt_sum_osfs;
+ struct mdt_statfs_cache mdt_osfs;
+
/* root squash */
struct root_squash_info mdt_squash;
le16_to_cpu(lcm->lcm_mirror_count) > 0;
}
+static inline bool mdt_is_sum_statfs_client(struct obd_export *exp)
+{
+ return exp_connect_flags(exp) & OBD_CONNECT_FLAGS2 &&
+ exp_connect_flags2(exp) & OBD_CONNECT2_SUM_STATFS;
+}
+
__u64 mdt_get_disposition(struct ldlm_reply *rep, __u64 op_flag);
void mdt_set_disposition(struct mdt_thread_info *info,
struct ldlm_reply *rep, __u64 op_flag);
struct tg_export_data *ted;
ted = &obd->obd_self_export->exp_target_data;
- osfs->os_bavail -= min_t(u64, osfs->os_bavail,
- ted->ted_grant >> tgd->tgd_blockbits);
+ osfs->os_granted = min_t(u64, osfs->os_bavail,
+ ted->ted_grant >> tgd->tgd_blockbits);
+ osfs->os_bavail -= osfs->os_granted;
}
tgt_grant_sanity_check(obd, __func__);
osfs->os_blocks <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
osfs->os_bfree <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
osfs->os_bavail <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
+ osfs->os_granted <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
osfs->os_bsize = 1 << COMPAT_BSIZE_SHIFT;
}
rc = osp_disconnect(d);
+ osp_statfs_fini(d);
+
if (!d->opd_connect_mdt) {
/* stop sync thread */
osp_sync_fini(d);
if (unlikely(d->opd_imp_active == 0))
RETURN(-ENOTCONN);
- if (d->opd_pre == NULL)
- RETURN(0);
-
/* return recently updated data */
*sfs = d->opd_statfs;
+ if (d->opd_pre == NULL)
+ RETURN(0);
+
/*
* layer above osp (usually lod) can use ffree to estimate
* how many objects are available for immediate creation
if (rc < 0)
GOTO(out_precreat, rc);
} else {
+ osp->opd_got_disconnected = 1;
rc = osp_update_init(osp);
if (rc != 0)
GOTO(out_fid, rc);
}
+ rc = osp_init_statfs(osp);
+ if (rc)
+ GOTO(out_precreat, rc);
+
ns_register_cancel(obd->obd_namespace, osp_cancel_weight);
/*
d->opd_imp_connected = 1;
d->opd_imp_seen_connected = 1;
d->opd_obd->obd_inactive = 0;
+ wake_up(&d->opd_pre_waitq);
if (d->opd_connect_mdt)
break;
- if (d->opd_pre != NULL)
- wake_up(&d->opd_pre_waitq);
-
osp_sync_check_for_work(d);
CDEBUG(D_HA, "got connected\n");
break;
/*
* Precreation pool
*/
- spinlock_t osp_pre_lock;
/* last fid to assign in creation */
struct lu_fid osp_pre_used_fid;
struct osp_precreate *opd_pre;
/* dedicate precreate thread */
struct ptlrpc_thread opd_pre_thread;
+ spinlock_t opd_pre_lock;
/* thread waits for signals about pool going empty */
wait_queue_head_t opd_pre_waitq;
int opd_reserved_mb_low;
};
-#define opd_pre_lock opd_pre->osp_pre_lock
#define opd_pre_used_fid opd_pre->osp_pre_used_fid
#define opd_pre_last_created_fid opd_pre->osp_pre_last_created_fid
#define opd_pre_reserved opd_pre->osp_pre_reserved
int osp_write_last_oid_seq_files(struct lu_env *env, struct osp_device *osp,
struct lu_fid *fid, int sync);
int osp_init_pre_fid(struct osp_device *osp);
+int osp_init_statfs(struct osp_device *osp);
+void osp_fini_statfs(struct osp_device *osp);
+void osp_statfs_fini(struct osp_device *d);
/* lproc_osp.c */
void osp_tunables_init(struct osp_device *osp);
struct osp_device *d = cfs_from_timer(d, data, opd_statfs_timer);
LASSERT(d);
- if (d->opd_pre != NULL && osp_precreate_running(d))
+ if (osp_precreate_running(d))
wake_up(&d->opd_pre_waitq);
}
d->opd_statfs = *msfs;
- osp_pre_update_status(d, rc);
+ if (d->opd_pre)
+ osp_pre_update_status(d, rc);
/* schedule next update */
maxage_ns = d->opd_statfs_maxage * NSEC_PER_SEC;
imp = d->opd_obd->u.cli.cl_import;
LASSERT(imp);
- req = ptlrpc_request_alloc(imp, &RQF_OST_STATFS);
+ req = ptlrpc_request_alloc(imp,
+ d->opd_pre ? &RQF_OST_STATFS : &RQF_MDS_STATFS);
if (req == NULL)
RETURN(-ENOMEM);
- rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_STATFS);
+ rc = ptlrpc_request_pack(req,
+ d->opd_pre ? LUSTRE_OST_VERSION : LUSTRE_MDS_VERSION,
+ d->opd_pre ? OST_STATFS : MDS_STATFS);
if (rc) {
ptlrpc_request_free(req);
RETURN(rc);
}
ptlrpc_request_set_replen(req);
- req->rq_request_portal = OST_CREATE_PORTAL;
+ if (d->opd_pre)
+ req->rq_request_portal = OST_CREATE_PORTAL;
ptlrpc_at_set_req_timeout(req);
req->rq_interpret_reply = (ptlrpc_interpterer_t)osp_statfs_interpret;
{
int rc;
+ if (d->opd_pre == NULL)
+ return 0;
+
/* XXX: do we really need locking here? */
spin_lock(&d->opd_pre_lock);
rc = osp_precreate_near_empty_nolock(env, d);
* need to be connected to OST
*/
while (osp_precreate_running(d)) {
- if (d->opd_pre_recovering &&
+ if ((d->opd_pre == NULL || d->opd_pre_recovering) &&
d->opd_imp_connected &&
!d->opd_got_disconnected)
break;
if (!osp_precreate_running(d))
break;
- LASSERT(d->opd_obd->u.cli.cl_seq != NULL);
- /* Sigh, fid client is not ready yet */
- if (d->opd_obd->u.cli.cl_seq->lcs_exp == NULL)
- continue;
+ if (d->opd_pre) {
+ LASSERT(d->opd_obd->u.cli.cl_seq != NULL);
+ /* Sigh, fid client is not ready yet */
+ if (d->opd_obd->u.cli.cl_seq->lcs_exp == NULL)
+ continue;
- /* Init fid for osp_precreate if necessary */
- rc = osp_init_pre_fid(d);
- if (rc != 0) {
- class_export_put(d->opd_exp);
- d->opd_obd->u.cli.cl_seq->lcs_exp = NULL;
- CERROR("%s: init pre fid error: rc = %d\n",
- d->opd_obd->obd_name, rc);
- continue;
+ /* Init fid for osp_precreate if necessary */
+ rc = osp_init_pre_fid(d);
+ if (rc != 0) {
+ class_export_put(d->opd_exp);
+ d->opd_obd->u.cli.cl_seq->lcs_exp = NULL;
+ CERROR("%s: init pre fid error: rc = %d\n",
+ d->opd_obd->obd_name, rc);
+ continue;
+ }
}
if (osp_statfs_update(&env, d)) {
continue;
}
- /*
- * Clean up orphans or recreate missing objects.
- */
- rc = osp_precreate_cleanup_orphans(&env, d);
- if (rc != 0) {
- schedule_timeout_interruptible(cfs_time_seconds(1));
- continue;
+ if (d->opd_pre) {
+ /*
+ * Clean up orphans or recreate missing objects.
+ */
+ rc = osp_precreate_cleanup_orphans(&env, d);
+ if (rc != 0) {
+ schedule_timeout_interruptible(
+ msecs_to_jiffies(MSEC_PER_SEC));
+ continue;
+ }
}
+
/*
* connected, can handle precreates now
*/
if (osp_statfs_update(&env, d))
break;
+ if (d->opd_pre == NULL)
+ continue;
+
/* To avoid handling different seq in precreate/orphan
* cleanup, it will hold precreate until current seq is
* used up. */
*/
int osp_init_precreate(struct osp_device *d)
{
- struct l_wait_info lwi = { 0 };
- struct task_struct *task;
-
ENTRY;
OBD_ALLOC_PTR(d->opd_pre);
RETURN(-ENOMEM);
/* initially precreation isn't ready */
+ init_waitqueue_head(&d->opd_pre_user_waitq);
d->opd_pre_status = -EAGAIN;
fid_zero(&d->opd_pre_used_fid);
d->opd_pre_used_fid.f_oid = 1;
d->opd_reserved_mb_high = 0;
d->opd_reserved_mb_low = 0;
+ RETURN(0);
+}
+
+/**
+ * Finish precreate functionality of OSP
+ *
+ *
+ * Asks all the activity (the thread, update timer) to stop, then
+ * wait till that is done.
+ *
+ * \param[in] d OSP device
+ */
+void osp_precreate_fini(struct osp_device *d)
+{
+ ENTRY;
+
+ if (d->opd_pre == NULL)
+ RETURN_EXIT;
+
+ OBD_FREE_PTR(d->opd_pre);
+ d->opd_pre = NULL;
+
+ EXIT;
+}
+
+int osp_init_statfs(struct osp_device *d)
+{
+ struct l_wait_info lwi = { 0 };
+ struct task_struct *task;
+
+ ENTRY;
+
spin_lock_init(&d->opd_pre_lock);
init_waitqueue_head(&d->opd_pre_waitq);
- init_waitqueue_head(&d->opd_pre_user_waitq);
thread_set_flags(&d->opd_pre_thread, SVC_INIT);
init_waitqueue_head(&d->opd_pre_thread.t_ctl_waitq);
RETURN(0);
}
-/**
- * Finish precreate functionality of OSP
- *
- *
- * Asks all the activity (the thread, update timer) to stop, then
- * wait till that is done.
- *
- * \param[in] d OSP device
- */
-void osp_precreate_fini(struct osp_device *d)
+void osp_statfs_fini(struct osp_device *d)
{
struct ptlrpc_thread *thread = &d->opd_pre_thread;
ENTRY;
del_timer(&d->opd_statfs_timer);
- if (d->opd_pre == NULL)
- RETURN_EXIT;
-
if (!thread_is_init(thread) && !thread_is_stopped(thread)) {
thread->t_flags = SVC_STOPPING;
wake_up(&d->opd_pre_waitq);
wait_event(thread->t_ctl_waitq, thread_is_stopped(thread));
}
- OBD_FREE_PTR(d->opd_pre);
- d->opd_pre = NULL;
-
EXIT;
}
-
/* cancel any generation record */
rc = llog_cat_cancel_records(env, cathandle, 1, &cookie);
+ /* flush all pending records ASAP */
+ osp_sync_force(env, d);
+
RETURN_EXIT;
}
EXPORT_SYMBOL(RQF_MDS_GET_ROOT);
struct req_format RQF_MDS_STATFS =
- DEFINE_REQ_FMT0("MDS_STATFS", empty, obd_statfs_server);
+ DEFINE_REQ_FMT0("MDS_STATFS", mdt_body_only, obd_statfs_server);
EXPORT_SYMBOL(RQF_MDS_STATFS);
struct req_format RQF_MDS_SYNC =
__swab64s(&os->os_maxbytes);
__swab32s(&os->os_state);
__swab32s(&os->os_fprecreated);
- CLASSERT(offsetof(typeof(*os), os_spare2) != 0);
+ __swab32s(&os->os_granted);
CLASSERT(offsetof(typeof(*os), os_spare3) != 0);
CLASSERT(offsetof(typeof(*os), os_spare4) != 0);
CLASSERT(offsetof(typeof(*os), os_spare5) != 0);
(long long)(int)offsetof(struct obd_statfs, os_fprecreated));
LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_fprecreated) == 4, "found %lld\n",
(long long)(int)sizeof(((struct obd_statfs *)0)->os_fprecreated));
- LASSERTF((int)offsetof(struct obd_statfs, os_spare2) == 112, "found %lld\n",
- (long long)(int)offsetof(struct obd_statfs, os_spare2));
- LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare2) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare2));
+ LASSERTF((int)offsetof(struct obd_statfs, os_granted) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_granted));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_granted) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_granted));
LASSERTF((int)offsetof(struct obd_statfs, os_spare3) == 116, "found %lld\n",
(long long)(int)offsetof(struct obd_statfs, os_spare3));
LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare3) == 4, "found %lld\n",
reply = req_capsule_server_get(tsi->tsi_pill, &RMF_CONNECT_DATA);
spin_lock(&tsi->tsi_exp->exp_lock);
*exp_connect_flags_ptr(tsi->tsi_exp) = reply->ocd_connect_flags;
+ if (reply->ocd_connect_flags & OBD_CONNECT_FLAGS2)
+ *exp_connect_flags2_ptr(tsi->tsi_exp) =
+ reply->ocd_connect_flags2;
tsi->tsi_exp->exp_connect_data.ocd_brw_size = reply->ocd_brw_size;
spin_unlock(&tsi->tsi_exp->exp_lock);
do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000701"
# lctl below may fail, it is valid case
$LCTL --device $mdcdev recover
- df $MOUNT
+ $LFS df $MOUNT
done
do_facet $SINGLEMDS "lctl set_param fail_loc=0"
[ $at_max_saved -ne 0 ] && at_max_set $at_max_saved mds
}
run_test 88 "MDS should not assign same objid to different files "
+function calc_osc_kbytes_used() {
+ local kbtotal=$(calc_osc_kbytes kbytestotal)
+ local kbfree=$(calc_osc_kbytes kbytesfree)
+ echo $((kbtotal-kbfree))
+}
+
test_89() {
cancel_lru_locks osc
mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
rm -f $DIR/$tdir/$tfile
wait_mds_ost_sync || error "initial MDS-OST sync timed out"
wait_delete_completed || error "initial wait delete timed out"
- local blocks1=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
+ local blocks1=$(calc_osc_kbytes_used)
local write_size=$(fs_log_size)
$SETSTRIPE -i 0 -c 1 $DIR/$tdir/$tfile
wait_mds_ost_sync || error "MDS-OST sync timed out"
wait_delete_completed || error "wait delete timed out"
- local blocks2=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
+ local blocks2=$(calc_osc_kbytes_used)
[ $((blocks2 - blocks1)) -le $(fs_log_size) ] ||
error $((blocks2 - blocks1)) blocks leaked
ls -l ${testdir}/${tfile} > /dev/null|| error "ls failed"
check_stats $SINGLEMDS "getattr" 1
fi
+ rm -rf $DIR/${tdir}
+
+ # when DNE is enabled, MDT uses STATFS RPC to ping other targets
+ # so the check below is not reliable
+ [ $MDSCOUNT -eq 1 ] || return 0
+
# Sleep to avoid a cached response.
#define OBD_STATFS_CACHE_SECONDS 1
sleep 2
$LFS df || error "lfs failed"
check_stats $SINGLEMDS "statfs" 1
- rm -rf $DIR/${tdir}
+ return 0
}
run_test 133b "Verifying extra MDT stats =================================="
(long long)(int)offsetof(struct obd_statfs, os_fprecreated));
LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_fprecreated) == 4, "found %lld\n",
(long long)(int)sizeof(((struct obd_statfs *)0)->os_fprecreated));
- LASSERTF((int)offsetof(struct obd_statfs, os_spare2) == 112, "found %lld\n",
- (long long)(int)offsetof(struct obd_statfs, os_spare2));
- LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare2) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare2));
+ LASSERTF((int)offsetof(struct obd_statfs, os_granted) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_granted));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_granted) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_granted));
LASSERTF((int)offsetof(struct obd_statfs, os_spare3) == 116, "found %lld\n",
(long long)(int)offsetof(struct obd_statfs, os_spare3));
LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare3) == 4, "found %lld\n",