/* couldn't update statfs, try again as soon as possible */
cfs_waitq_signal(&d->opd_pre_waitq);
if (req->rq_import_generation == imp->imp_generation)
- CERROR("%s: couldn't update statfs: rc = %d\n",
+ CDEBUG(D_CACHE, "%s: couldn't update statfs: rc = %d\n",
d->opd_obd->obd_name, rc);
RETURN(rc);
}
aa = ptlrpc_req_async_args(req);
aa->pointer_arg[0] = d;
- ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
-
- cfs_timer_disarm(&d->opd_statfs_timer);
-
/*
* no updates till reply
*/
+ cfs_timer_disarm(&d->opd_statfs_timer);
d->opd_statfs_fresh_till = cfs_time_shift(obd_timeout * 1000);
d->opd_statfs_update_in_progress = 1;
+ ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+
RETURN(0);
}
static inline int osp_precreate_near_empty_nolock(struct osp_device *d)
{
- int window = d->opd_pre_last_created - d->opd_pre_next;
+ int window = d->opd_pre_last_created - d->opd_pre_used_id;
/* don't consider new precreation till OST is healty and
* has free space */
int rc;
/* XXX: do we really need locking here? */
- cfs_spin_lock(&d->opd_pre_lock);
+ spin_lock(&d->opd_pre_lock);
rc = osp_precreate_near_empty_nolock(d);
- cfs_spin_unlock(&d->opd_pre_lock);
+ spin_unlock(&d->opd_pre_lock);
return rc;
}
RETURN(rc);
}
- cfs_spin_lock(&d->opd_pre_lock);
+ spin_lock(&d->opd_pre_lock);
if (d->opd_pre_grow_count > d->opd_pre_max_grow_count / 2)
d->opd_pre_grow_count = d->opd_pre_max_grow_count / 2;
grow = d->opd_pre_grow_count;
- cfs_spin_unlock(&d->opd_pre_lock);
+ spin_unlock(&d->opd_pre_lock);
body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
LASSERT(body);
if (body == NULL)
GOTO(out_req, rc = -EPROTO);
- CDEBUG(D_HA, "new last_created %lu\n", (unsigned long) body->oa.o_id);
- LASSERT(body->oa.o_id > d->opd_pre_next);
+ CDEBUG(D_HA, "%s: new last_created "LPU64"\n", d->opd_obd->obd_name,
+ body->oa.o_id);
+ LASSERT(body->oa.o_id > d->opd_pre_used_id);
diff = body->oa.o_id - d->opd_pre_last_created;
- cfs_spin_lock(&d->opd_pre_lock);
+ spin_lock(&d->opd_pre_lock);
if (diff < grow) {
/* the OST has not managed to create all the
* objects we asked for */
d->opd_pre_grow_slow = 0;
}
d->opd_pre_last_created = body->oa.o_id;
- cfs_spin_unlock(&d->opd_pre_lock);
+ spin_unlock(&d->opd_pre_lock);
CDEBUG(D_OTHER, "current precreated pool: %llu-%llu\n",
- d->opd_pre_next, d->opd_pre_last_created);
+ d->opd_pre_used_id, d->opd_pre_last_created);
out_req:
/* now we can wakeup all users awaiting for objects */
body->oa.o_valid = OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
body->oa.o_seq = FID_SEQ_OST_MDT0;
- /* remove from NEXT after used one */
- body->oa.o_id = d->opd_last_used_id + 1;
+ body->oa.o_id = d->opd_last_used_id;
ptlrpc_request_set_replen(req);
/*
* OST provides us with id new pool starts from in body->oa.o_id
*/
- cfs_spin_lock(&d->opd_pre_lock);
+ spin_lock(&d->opd_pre_lock);
if (le64_to_cpu(d->opd_last_used_id) > body->oa.o_id) {
d->opd_pre_grow_count = OST_MIN_PRECREATE +
le64_to_cpu(d->opd_last_used_id) -
body->oa.o_id;
- d->opd_pre_last_created = le64_to_cpu(d->opd_last_used_id) + 1;
+ d->opd_pre_last_created = le64_to_cpu(d->opd_last_used_id);
} else {
d->opd_pre_grow_count = OST_MIN_PRECREATE;
- d->opd_pre_last_created = body->oa.o_id + 1;
+ d->opd_pre_last_created = body->oa.o_id;
}
- d->opd_pre_next = d->opd_pre_last_created;
+ d->opd_pre_used_id = d->opd_pre_last_created;
d->opd_pre_grow_slow = 0;
- cfs_spin_unlock(&d->opd_pre_lock);
+ spin_unlock(&d->opd_pre_lock);
- CDEBUG(D_HA, "Got last_id "LPU64" from OST, last_used is "LPU64
- ", next "LPU64"\n", body->oa.o_id,
- le64_to_cpu(d->opd_last_used_id), d->opd_pre_next);
+ CDEBUG(D_HA, "%s: Got last_id "LPU64" from OST, last_used is "LPU64
+ ", pre_used "LPU64"\n", d->opd_obd->obd_name, body->oa.o_id,
+ le64_to_cpu(d->opd_last_used_id), d->opd_pre_used_id);
out:
if (req)
if (rc)
goto out;
+ /* Add a bit of hysteresis so this flag isn't continually flapping,
+ * and ensure that new files don't get extremely fragmented due to
+ * only a small amount of available space in the filesystem.
+ * We want to set the NOSPC flag when there is less than ~0.1% free
+ * and clear it when there is at least ~0.2% free space, so:
+ * avail < ~0.1% max max = avail + used
+ * 1025 * avail < avail + used used = blocks - free
+ * 1024 * avail < used
+ * 1024 * avail < blocks - free
+ * avail < ((blocks - free) >> 10)
+ *
+ * On very large disk, say 16TB 0.1% will be 16 GB. We don't want to
+ * lose that amount of space so in those cases we report no space left
+ * if their is less than 1 GB left. */
if (likely(msfs->os_type)) {
used = min_t(__u64, (msfs->os_blocks - msfs->os_bfree) >> 10,
1 << 30);
sprintf(pname, "osp-pre-%u\n", d->opd_index);
cfs_daemonize(pname);
- cfs_spin_lock(&d->opd_pre_lock);
+ spin_lock(&d->opd_pre_lock);
thread->t_flags = SVC_RUNNING;
- cfs_spin_unlock(&d->opd_pre_lock);
+ spin_unlock(&d->opd_pre_lock);
cfs_waitq_signal(&thread->t_ctl_waitq);
while (osp_precreate_running(d)) {
static int osp_precreate_ready_condition(struct osp_device *d)
{
+ __u64 next;
+
/* ready if got enough precreated objects */
- if (d->opd_pre_next + d->opd_pre_reserved < d->opd_pre_last_created)
+ /* we need to wait for others (opd_pre_reserved) and our object (+1) */
+ next = d->opd_pre_used_id + d->opd_pre_reserved + 1;
+ if (next <= d->opd_pre_last_created)
return 1;
/* ready if OST reported no space and no destoys in progress */
"reserved="LPU64", syn_changes=%lu, "
"syn_rpc_in_progress=%d, status=%d\n",
d->opd_obd->obd_name, d->opd_pre_last_created,
- d->opd_pre_next, d->opd_pre_reserved,
+ d->opd_pre_used_id, d->opd_pre_reserved,
d->opd_syn_changes, d->opd_syn_rpc_in_progress,
d->opd_pre_status);
ENTRY;
- LASSERT(d->opd_pre_last_created >= d->opd_pre_next);
+ LASSERT(d->opd_pre_last_created >= d->opd_pre_used_id);
lwi = LWI_TIMEOUT(cfs_time_seconds(obd_timeout),
osp_precreate_timeout_condition, d);
break;
}
-#if LUSTRE_VERSION_CODE >= OBD_OCD_VERSION(2, 3, 90, 0)
-#error "remove this before the release"
-#endif
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 3, 90, 0)
/*
* to address Andreas's concern on possible busy-loop
* between this thread and osp_precreate_send()
*/
- LASSERT(count++ < 1000);
+ if (unlikely(count++ == 1000)) {
+ osp_precreate_timeout_condition(d);
+ LBUG();
+ }
+#endif
/*
* increase number of precreations
*/
if (d->opd_pre_grow_count < d->opd_pre_max_grow_count &&
d->opd_pre_grow_slow == 0 &&
- (d->opd_pre_last_created - d->opd_pre_next <=
+ (d->opd_pre_last_created - d->opd_pre_used_id <=
d->opd_pre_grow_count / 4 + 1)) {
- cfs_spin_lock(&d->opd_pre_lock);
+ spin_lock(&d->opd_pre_lock);
d->opd_pre_grow_slow = 1;
d->opd_pre_grow_count *= 2;
- cfs_spin_unlock(&d->opd_pre_lock);
+ spin_unlock(&d->opd_pre_lock);
}
- /*
- * we never use the last object in the window
- */
- cfs_spin_lock(&d->opd_pre_lock);
- precreated = d->opd_pre_last_created - d->opd_pre_next;
+ spin_lock(&d->opd_pre_lock);
+ precreated = d->opd_pre_last_created - d->opd_pre_used_id;
if (precreated > d->opd_pre_reserved) {
d->opd_pre_reserved++;
- cfs_spin_unlock(&d->opd_pre_lock);
+ spin_unlock(&d->opd_pre_lock);
rc = 0;
/* XXX: don't wake up if precreation is in progress */
break;
}
- cfs_spin_unlock(&d->opd_pre_lock);
+ spin_unlock(&d->opd_pre_lock);
/*
* all precreated objects have been used and no-space
obd_id objid;
/* grab next id from the pool */
- cfs_spin_lock(&d->opd_pre_lock);
- LASSERT(d->opd_pre_next <= d->opd_pre_last_created);
- objid = d->opd_pre_next++;
+ spin_lock(&d->opd_pre_lock);
+ LASSERT(d->opd_pre_used_id < d->opd_pre_last_created);
+ objid = ++d->opd_pre_used_id;
d->opd_pre_reserved--;
/*
* last_used_id must be changed along with getting new id otherwise
* we might miscalculate gap causing object loss or leak
*/
osp_update_last_id(d, objid);
- cfs_spin_unlock(&d->opd_pre_lock);
+ spin_unlock(&d->opd_pre_lock);
/*
* probably main thread suspended orphan cleanup till
/* initially precreation isn't ready */
d->opd_pre_status = -EAGAIN;
- d->opd_pre_next = 1;
- d->opd_pre_last_created = 1;
+ d->opd_pre_used_id = 0;
+ d->opd_pre_last_created = 0;
d->opd_pre_reserved = 0;
d->opd_got_disconnected = 1;
d->opd_pre_grow_slow = 0;
d->opd_pre_min_grow_count = OST_MIN_PRECREATE;
d->opd_pre_max_grow_count = OST_MAX_PRECREATE;
- cfs_spin_lock_init(&d->opd_pre_lock);
+ spin_lock_init(&d->opd_pre_lock);
cfs_waitq_init(&d->opd_pre_waitq);
cfs_waitq_init(&d->opd_pre_user_waitq);
cfs_waitq_init(&d->opd_pre_thread.t_ctl_waitq);