+ LASSERT(osc_object_is_locked(ext->oe_obj));
+ LASSERT(state >= OES_INV && state < OES_STATE_MAX);
+
+ /* Never try to sanity check a state changing extent :-) */
+ /* LASSERT(sanity_check_nolock(ext) == 0); */
+
+ /* TODO: validate the state machine */
+ ext->oe_state = state;
+ cfs_waitq_broadcast(&ext->oe_waitq);
+}
+
+static struct osc_extent *osc_extent_alloc(struct osc_object *obj)
+{
+ struct osc_extent *ext;
+
+ OBD_SLAB_ALLOC_PTR_GFP(ext, osc_extent_kmem, CFS_ALLOC_STD);
+ if (ext == NULL)
+ return NULL;
+
+ RB_CLEAR_NODE(&ext->oe_node);
+ ext->oe_obj = obj;
+ cfs_atomic_set(&ext->oe_refc, 1);
+ cfs_atomic_set(&ext->oe_users, 0);
+ CFS_INIT_LIST_HEAD(&ext->oe_link);
+ ext->oe_state = OES_INV;
+ CFS_INIT_LIST_HEAD(&ext->oe_pages);
+ cfs_waitq_init(&ext->oe_waitq);
+ ext->oe_osclock = NULL;
+
+ return ext;
+}
+
+static void osc_extent_free(struct osc_extent *ext)
+{
+ OBD_SLAB_FREE_PTR(ext, osc_extent_kmem);
+}
+
+static struct osc_extent *osc_extent_get(struct osc_extent *ext)
+{
+ LASSERT(cfs_atomic_read(&ext->oe_refc) >= 0);
+ cfs_atomic_inc(&ext->oe_refc);
+ return ext;
+}
+
+static void osc_extent_put(const struct lu_env *env, struct osc_extent *ext)
+{
+ LASSERT(cfs_atomic_read(&ext->oe_refc) > 0);
+ if (cfs_atomic_dec_and_test(&ext->oe_refc)) {
+ LASSERT(cfs_list_empty(&ext->oe_link));
+ LASSERT(cfs_atomic_read(&ext->oe_users) == 0);
+ LASSERT(ext->oe_state == OES_INV);
+ LASSERT(!ext->oe_intree);
+
+ if (ext->oe_osclock) {
+ cl_lock_put(env, ext->oe_osclock);
+ ext->oe_osclock = NULL;
+ }
+ osc_extent_free(ext);
+ }
+}
+
+/**
+ * osc_extent_put_trust() is a special version of osc_extent_put() when
+ * it's known that the caller is not the last user. This is to address the
+ * problem of lacking of lu_env ;-).
+ */
+static void osc_extent_put_trust(struct osc_extent *ext)
+{
+ LASSERT(cfs_atomic_read(&ext->oe_refc) > 1);
+ LASSERT(osc_object_is_locked(ext->oe_obj));
+ cfs_atomic_dec(&ext->oe_refc);
+}
+
+/**
+ * Return the extent which includes pgoff @index, or return the greatest
+ * previous extent in the tree.
+ */
+static struct osc_extent *osc_extent_search(struct osc_object *obj,
+ pgoff_t index)
+{
+ struct rb_node *n = obj->oo_root.rb_node;
+ struct osc_extent *tmp, *p = NULL;
+
+ LASSERT(osc_object_is_locked(obj));
+ while (n != NULL) {
+ tmp = rb_extent(n);
+ if (index < tmp->oe_start) {
+ n = n->rb_left;
+ } else if (index > tmp->oe_end) {
+ p = rb_extent(n);
+ n = n->rb_right;
+ } else {
+ return tmp;
+ }
+ }
+ return p;
+}
+
+/*
+ * Return the extent covering @index, otherwise return NULL.
+ * caller must have held object lock.
+ */
+static struct osc_extent *osc_extent_lookup(struct osc_object *obj,
+ pgoff_t index)
+{
+ struct osc_extent *ext;
+
+ ext = osc_extent_search(obj, index);
+ if (ext != NULL && ext->oe_start <= index && index <= ext->oe_end)
+ return osc_extent_get(ext);
+ return NULL;
+}
+
+/* caller must have held object lock. */
+static void osc_extent_insert(struct osc_object *obj, struct osc_extent *ext)
+{
+ struct rb_node **n = &obj->oo_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct osc_extent *tmp;
+
+ LASSERT(ext->oe_intree == 0);
+ LASSERT(ext->oe_obj == obj);
+ LASSERT(osc_object_is_locked(obj));
+ while (*n != NULL) {
+ tmp = rb_extent(*n);
+ parent = *n;
+
+ if (ext->oe_end < tmp->oe_start)
+ n = &(*n)->rb_left;
+ else if (ext->oe_start > tmp->oe_end)
+ n = &(*n)->rb_right;
+ else
+ EASSERTF(0, tmp, EXTSTR, EXTPARA(ext));
+ }
+ rb_link_node(&ext->oe_node, parent, n);
+ rb_insert_color(&ext->oe_node, &obj->oo_root);
+ osc_extent_get(ext);
+ ext->oe_intree = 1;
+}
+
+/* caller must have held object lock. */
+static void osc_extent_erase(struct osc_extent *ext)
+{
+ struct osc_object *obj = ext->oe_obj;
+ LASSERT(osc_object_is_locked(obj));
+ if (ext->oe_intree) {
+ rb_erase(&ext->oe_node, &obj->oo_root);
+ ext->oe_intree = 0;
+ /* rbtree held a refcount */
+ osc_extent_put_trust(ext);
+ }
+}
+
+static struct osc_extent *osc_extent_hold(struct osc_extent *ext)
+{
+ struct osc_object *obj = ext->oe_obj;
+
+ LASSERT(osc_object_is_locked(obj));
+ LASSERT(ext->oe_state == OES_ACTIVE || ext->oe_state == OES_CACHE);
+ if (ext->oe_state == OES_CACHE) {
+ osc_extent_state_set(ext, OES_ACTIVE);
+ osc_update_pending(obj, OBD_BRW_WRITE, -ext->oe_nr_pages);
+ }
+ cfs_atomic_inc(&ext->oe_users);
+ cfs_list_del_init(&ext->oe_link);
+ return osc_extent_get(ext);
+}
+
+static void __osc_extent_remove(struct osc_extent *ext)
+{
+ LASSERT(osc_object_is_locked(ext->oe_obj));
+ LASSERT(cfs_list_empty(&ext->oe_pages));
+ osc_extent_erase(ext);
+ cfs_list_del_init(&ext->oe_link);
+ osc_extent_state_set(ext, OES_INV);
+ OSC_EXTENT_DUMP(D_CACHE, ext, "destroyed.\n");
+}
+
+static void osc_extent_remove(struct osc_extent *ext)
+{
+ struct osc_object *obj = ext->oe_obj;
+
+ osc_object_lock(obj);
+ __osc_extent_remove(ext);
+ osc_object_unlock(obj);
+}
+
+/**
+ * This function is used to merge extents to get better performance. It checks
+ * if @cur and @victim are contiguous at chunk level.
+ */
+static int osc_extent_merge(const struct lu_env *env, struct osc_extent *cur,
+ struct osc_extent *victim)
+{
+ struct osc_object *obj = cur->oe_obj;
+ pgoff_t chunk_start;
+ pgoff_t chunk_end;
+ int ppc_bits;
+
+ LASSERT(cur->oe_state == OES_CACHE);
+ LASSERT(osc_object_is_locked(obj));
+ if (victim == NULL)
+ return -EINVAL;
+
+ if (victim->oe_state != OES_CACHE || victim->oe_fsync_wait)
+ return -EBUSY;
+
+ if (cur->oe_max_end != victim->oe_max_end)
+ return -ERANGE;
+
+ LASSERT(cur->oe_osclock == victim->oe_osclock);
+ ppc_bits = osc_cli(obj)->cl_chunkbits - CFS_PAGE_SHIFT;
+ chunk_start = cur->oe_start >> ppc_bits;
+ chunk_end = cur->oe_end >> ppc_bits;
+ if (chunk_start != (victim->oe_end >> ppc_bits) + 1 &&
+ chunk_end + 1 != victim->oe_start >> ppc_bits)
+ return -ERANGE;
+
+ OSC_EXTENT_DUMP(D_CACHE, victim, "will be merged by %p.\n", cur);
+
+ cur->oe_start = min(cur->oe_start, victim->oe_start);
+ cur->oe_end = max(cur->oe_end, victim->oe_end);
+ cur->oe_grants += victim->oe_grants;
+ cur->oe_nr_pages += victim->oe_nr_pages;
+ /* only the following bits are needed to merge */
+ cur->oe_urgent |= victim->oe_urgent;
+ cur->oe_memalloc |= victim->oe_memalloc;
+ cfs_list_splice_init(&victim->oe_pages, &cur->oe_pages);
+ cfs_list_del_init(&victim->oe_link);
+ victim->oe_nr_pages = 0;
+
+ osc_extent_get(victim);
+ __osc_extent_remove(victim);
+ osc_extent_put(env, victim);
+
+ OSC_EXTENT_DUMP(D_CACHE, cur, "after merging %p.\n", victim);
+ return 0;
+}
+
+/**
+ * Drop user count of osc_extent, and unplug IO asynchronously.
+ */
+int osc_extent_release(const struct lu_env *env, struct osc_extent *ext)
+{
+ struct osc_object *obj = ext->oe_obj;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(cfs_atomic_read(&ext->oe_users) > 0);
+ LASSERT(sanity_check(ext) == 0);
+ LASSERT(ext->oe_grants > 0);
+
+ if (cfs_atomic_dec_and_lock(&ext->oe_users, &obj->oo_lock)) {
+ LASSERT(ext->oe_state == OES_ACTIVE);
+ if (ext->oe_trunc_pending) {
+ /* a truncate process is waiting for this extent.
+ * This may happen due to a race, check
+ * osc_cache_truncate_start(). */
+ osc_extent_state_set(ext, OES_TRUNC);
+ ext->oe_trunc_pending = 0;
+ } else {
+ osc_extent_state_set(ext, OES_CACHE);
+ osc_update_pending(obj, OBD_BRW_WRITE,
+ ext->oe_nr_pages);
+
+ /* try to merge the previous and next extent. */
+ osc_extent_merge(env, ext, prev_extent(ext));
+ osc_extent_merge(env, ext, next_extent(ext));
+
+ if (ext->oe_urgent)
+ cfs_list_move_tail(&ext->oe_link,
+ &obj->oo_urgent_exts);
+ }
+ osc_object_unlock(obj);
+
+ osc_io_unplug_async(env, osc_cli(obj), obj);
+ }
+ osc_extent_put(env, ext);
+ RETURN(rc);
+}
+
+static inline int overlapped(struct osc_extent *ex1, struct osc_extent *ex2)
+{
+ return !(ex1->oe_end < ex2->oe_start || ex2->oe_end < ex1->oe_start);
+}
+
+/**
+ * Find or create an extent which includes @index, core function to manage
+ * extent tree.
+ */
+struct osc_extent *osc_extent_find(const struct lu_env *env,
+ struct osc_object *obj, pgoff_t index,
+ int *grants)
+
+{
+ struct client_obd *cli = osc_cli(obj);
+ struct cl_lock *lock;
+ struct osc_extent *cur;
+ struct osc_extent *ext;
+ struct osc_extent *conflict = NULL;
+ struct osc_extent *found = NULL;
+ pgoff_t chunk;
+ pgoff_t max_end;
+ int max_pages; /* max_pages_per_rpc */
+ int chunksize;
+ int ppc_bits; /* pages per chunk bits */
+ int chunk_mask;
+ int rc;
+ ENTRY;
+
+ cur = osc_extent_alloc(obj);
+ if (cur == NULL)
+ RETURN(ERR_PTR(-ENOMEM));
+
+ lock = cl_lock_at_pgoff(env, osc2cl(obj), index, NULL, 1, 0);
+ LASSERT(lock != NULL);
+ LASSERT(lock->cll_descr.cld_mode >= CLM_WRITE);
+
+ LASSERT(cli->cl_chunkbits >= CFS_PAGE_SHIFT);
+ ppc_bits = cli->cl_chunkbits - CFS_PAGE_SHIFT;
+ chunk_mask = ~((1 << ppc_bits) - 1);
+ chunksize = 1 << cli->cl_chunkbits;
+ chunk = index >> ppc_bits;
+
+ /* align end to rpc edge, rpc size may not be a power 2 integer. */
+ max_pages = cli->cl_max_pages_per_rpc;
+ LASSERT((max_pages & ~chunk_mask) == 0);
+ max_end = index - (index % max_pages) + max_pages - 1;
+ max_end = min_t(pgoff_t, max_end, lock->cll_descr.cld_end);
+
+ /* initialize new extent by parameters so far */
+ cur->oe_max_end = max_end;
+ cur->oe_start = index & chunk_mask;
+ cur->oe_end = ((index + ~chunk_mask + 1) & chunk_mask) - 1;
+ if (cur->oe_start < lock->cll_descr.cld_start)
+ cur->oe_start = lock->cll_descr.cld_start;
+ if (cur->oe_end > max_end)
+ cur->oe_end = max_end;
+ cur->oe_osclock = lock;
+ cur->oe_grants = 0;
+ cur->oe_mppr = max_pages;
+
+ /* grants has been allocated by caller */
+ LASSERTF(*grants >= chunksize + cli->cl_extent_tax,
+ "%u/%u/%u.\n", *grants, chunksize, cli->cl_extent_tax);
+ LASSERTF((max_end - cur->oe_start) < max_pages, EXTSTR, EXTPARA(cur));
+
+restart:
+ osc_object_lock(obj);
+ ext = osc_extent_search(obj, cur->oe_start);
+ if (ext == NULL)
+ ext = first_extent(obj);
+ while (ext != NULL) {
+ loff_t ext_chk_start = ext->oe_start >> ppc_bits;
+ loff_t ext_chk_end = ext->oe_end >> ppc_bits;
+
+ LASSERT(sanity_check_nolock(ext) == 0);
+ if (chunk > ext_chk_end + 1)
+ break;
+
+ /* if covering by different locks, no chance to match */
+ if (lock != ext->oe_osclock) {
+ EASSERTF(!overlapped(ext, cur), ext,
+ EXTSTR, EXTPARA(cur));
+
+ ext = next_extent(ext);
+ continue;
+ }
+
+ /* discontiguous chunks? */
+ if (chunk + 1 < ext_chk_start) {
+ ext = next_extent(ext);
+ continue;
+ }
+
+ /* ok, from now on, ext and cur have these attrs:
+ * 1. covered by the same lock
+ * 2. contiguous at chunk level or overlapping. */
+
+ if (overlapped(ext, cur)) {
+ /* cur is the minimum unit, so overlapping means
+ * full contain. */
+ EASSERTF((ext->oe_start <= cur->oe_start &&
+ ext->oe_end >= cur->oe_end),
+ ext, EXTSTR, EXTPARA(cur));
+
+ if (ext->oe_state > OES_CACHE || ext->oe_fsync_wait) {
+ /* for simplicity, we wait for this extent to
+ * finish before going forward. */
+ conflict = osc_extent_get(ext);
+ break;
+ }
+
+ found = osc_extent_hold(ext);
+ break;
+ }
+
+ /* non-overlapped extent */
+ if (ext->oe_state != OES_CACHE || ext->oe_fsync_wait) {
+ /* we can't do anything for a non OES_CACHE extent, or
+ * if there is someone waiting for this extent to be
+ * flushed, try next one. */
+ ext = next_extent(ext);
+ continue;
+ }
+
+ /* check if they belong to the same rpc slot before trying to
+ * merge. the extents are not overlapped and contiguous at
+ * chunk level to get here. */
+ if (ext->oe_max_end != max_end) {
+ /* if they don't belong to the same RPC slot or
+ * max_pages_per_rpc has ever changed, do not merge. */
+ ext = next_extent(ext);
+ continue;
+ }
+
+ /* it's required that an extent must be contiguous at chunk
+ * level so that we know the whole extent is covered by grant
+ * (the pages in the extent are NOT required to be contiguous).
+ * Otherwise, it will be too much difficult to know which
+ * chunks have grants allocated. */
+
+ /* try to do front merge - extend ext's start */
+ if (chunk + 1 == ext_chk_start) {
+ /* ext must be chunk size aligned */
+ EASSERT((ext->oe_start & ~chunk_mask) == 0, ext);
+
+ /* pull ext's start back to cover cur */
+ ext->oe_start = cur->oe_start;
+ ext->oe_grants += chunksize;
+ *grants -= chunksize;
+
+ found = osc_extent_hold(ext);
+ } else if (chunk == ext_chk_end + 1) {
+ /* rear merge */
+ ext->oe_end = cur->oe_end;
+ ext->oe_grants += chunksize;
+ *grants -= chunksize;
+
+ /* try to merge with the next one because we just fill
+ * in a gap */
+ if (osc_extent_merge(env, ext, next_extent(ext)) == 0)
+ /* we can save extent tax from next extent */
+ *grants += cli->cl_extent_tax;
+
+ found = osc_extent_hold(ext);
+ }
+ if (found != NULL)
+ break;
+
+ ext = next_extent(ext);
+ }
+
+ osc_extent_tree_dump(D_CACHE, obj);
+ if (found != NULL) {
+ LASSERT(conflict == NULL);
+ if (!IS_ERR(found)) {
+ LASSERT(found->oe_osclock == cur->oe_osclock);
+ OSC_EXTENT_DUMP(D_CACHE, found,
+ "found caching ext for %lu.\n", index);
+ }
+ } else if (conflict == NULL) {
+ /* create a new extent */
+ EASSERT(osc_extent_is_overlapped(obj, cur) == 0, cur);
+ cur->oe_grants = chunksize + cli->cl_extent_tax;
+ *grants -= cur->oe_grants;
+ LASSERT(*grants >= 0);
+
+ cur->oe_state = OES_CACHE;
+ found = osc_extent_hold(cur);
+ osc_extent_insert(obj, cur);
+ OSC_EXTENT_DUMP(D_CACHE, cur, "add into tree %lu/%lu.\n",
+ index, lock->cll_descr.cld_end);
+ }
+ osc_object_unlock(obj);
+
+ if (conflict != NULL) {
+ LASSERT(found == NULL);
+
+ /* waiting for IO to finish. Please notice that it's impossible
+ * to be an OES_TRUNC extent. */
+ rc = osc_extent_wait(env, conflict, OES_INV);
+ osc_extent_put(env, conflict);
+ conflict = NULL;
+ if (rc < 0)
+ GOTO(out, found = ERR_PTR(rc));
+
+ goto restart;
+ }
+ EXIT;
+
+out:
+ osc_extent_put(env, cur);
+ LASSERT(*grants >= 0);
+ return found;
+}
+
+/**
+ * Called when IO is finished to an extent.
+ */
+int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
+ int sent, int rc)
+{
+ struct client_obd *cli = osc_cli(ext->oe_obj);
+ struct osc_async_page *oap;
+ struct osc_async_page *tmp;
+ int nr_pages = ext->oe_nr_pages;
+ int lost_grant = 0;
+ int blocksize = cli->cl_import->imp_obd->obd_osfs.os_bsize ? : 4096;
+ __u64 last_off = 0;
+ int last_count = -1;
+ ENTRY;
+
+ OSC_EXTENT_DUMP(D_CACHE, ext, "extent finished.\n");
+
+ ext->oe_rc = rc ?: ext->oe_nr_pages;
+ EASSERT(ergo(rc == 0, ext->oe_state == OES_RPC), ext);
+ cfs_list_for_each_entry_safe(oap, tmp, &ext->oe_pages,
+ oap_pending_item) {
+ cfs_list_del_init(&oap->oap_rpc_item);
+ cfs_list_del_init(&oap->oap_pending_item);
+ if (last_off <= oap->oap_obj_off) {
+ last_off = oap->oap_obj_off;
+ last_count = oap->oap_count;
+ }
+
+ --ext->oe_nr_pages;
+ osc_ap_completion(env, cli, oap, sent, rc);
+ }
+ EASSERT(ext->oe_nr_pages == 0, ext);
+
+ if (!sent) {
+ lost_grant = ext->oe_grants;
+ } else if (blocksize < CFS_PAGE_SIZE &&
+ last_count != CFS_PAGE_SIZE) {
+ /* For short writes we shouldn't count parts of pages that
+ * span a whole chunk on the OST side, or our accounting goes
+ * wrong. Should match the code in filter_grant_check. */
+ int offset = oap->oap_page_off & ~CFS_PAGE_MASK;
+ int count = oap->oap_count + (offset & (blocksize - 1));
+ int end = (offset + oap->oap_count) & (blocksize - 1);
+ if (end)
+ count += blocksize - end;
+
+ lost_grant = CFS_PAGE_SIZE - count;
+ }
+ if (ext->oe_grants > 0)
+ osc_free_grant(cli, nr_pages, lost_grant);
+
+ osc_extent_remove(ext);
+ /* put the refcount for RPC */
+ osc_extent_put(env, ext);
+ RETURN(0);
+}
+
+static int extent_wait_cb(struct osc_extent *ext, int state)
+{
+ int ret;
+
+ osc_object_lock(ext->oe_obj);
+ ret = ext->oe_state == state;
+ osc_object_unlock(ext->oe_obj);
+
+ return ret;
+}
+
+/**
+ * Wait for the extent's state to become @state.
+ */
+static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext,
+ int state)
+{
+ struct osc_object *obj = ext->oe_obj;
+ struct l_wait_info lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(600), NULL,
+ LWI_ON_SIGNAL_NOOP, NULL);
+ int rc = 0;
+ ENTRY;
+
+ osc_object_lock(obj);
+ LASSERT(sanity_check_nolock(ext) == 0);
+ /* `Kick' this extent only if the caller is waiting for it to be
+ * written out. */
+ if (state == OES_INV && !ext->oe_urgent && !ext->oe_hp) {
+ if (ext->oe_state == OES_ACTIVE) {
+ ext->oe_urgent = 1;
+ } else if (ext->oe_state == OES_CACHE) {
+ ext->oe_urgent = 1;
+ osc_extent_hold(ext);
+ rc = 1;
+ }
+ }
+ osc_object_unlock(obj);
+ if (rc == 1)
+ osc_extent_release(env, ext);
+
+ /* wait for the extent until its state becomes @state */
+ rc = l_wait_event(ext->oe_waitq, extent_wait_cb(ext, state), &lwi);
+ if (rc == -ETIMEDOUT) {
+ OSC_EXTENT_DUMP(D_ERROR, ext,
+ "%s: wait ext to %d timedout, recovery in progress?\n",
+ osc_export(obj)->exp_obd->obd_name, state);
+
+ lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+ rc = l_wait_event(ext->oe_waitq, extent_wait_cb(ext, state),
+ &lwi);
+ }
+ if (rc == 0 && ext->oe_rc < 0)
+ rc = ext->oe_rc;
+ RETURN(rc);
+}
+
+/**
+ * Discard pages with index greater than @size. If @ext is overlapped with
+ * @size, then partial truncate happens.
+ */
+static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index,
+ bool partial)
+{
+ struct cl_env_nest nest;
+ struct lu_env *env;
+ struct cl_io *io;
+ struct osc_object *obj = ext->oe_obj;
+ struct client_obd *cli = osc_cli(obj);
+ struct osc_async_page *oap;
+ struct osc_async_page *tmp;
+ int pages_in_chunk = 0;
+ int ppc_bits = cli->cl_chunkbits - CFS_PAGE_SHIFT;
+ __u64 trunc_chunk = trunc_index >> ppc_bits;
+ int grants = 0;
+ int nr_pages = 0;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(sanity_check(ext) == 0);
+ LASSERT(ext->oe_state == OES_TRUNC);
+ LASSERT(!ext->oe_urgent);
+
+ /* Request new lu_env.
+ * We can't use that env from osc_cache_truncate_start() because
+ * it's from lov_io_sub and not fully initialized. */
+ env = cl_env_nested_get(&nest);
+ io = &osc_env_info(env)->oti_io;
+ io->ci_obj = cl_object_top(osc2cl(obj));
+ rc = cl_io_init(env, io, CIT_MISC, io->ci_obj);
+ if (rc < 0)
+ GOTO(out, rc);
+
+ /* discard all pages with index greater then trunc_index */
+ cfs_list_for_each_entry_safe(oap, tmp, &ext->oe_pages,
+ oap_pending_item) {
+ struct cl_page *sub = oap2cl_page(oap);
+ struct cl_page *page = cl_page_top(sub);
+
+ LASSERT(cfs_list_empty(&oap->oap_rpc_item));
+
+ /* only discard the pages with their index greater than
+ * trunc_index, and ... */
+ if (sub->cp_index < trunc_index ||
+ (sub->cp_index == trunc_index && partial)) {
+ /* accounting how many pages remaining in the chunk
+ * so that we can calculate grants correctly. */
+ if (sub->cp_index >> ppc_bits == trunc_chunk)
+ ++pages_in_chunk;
+ continue;
+ }
+
+ cfs_list_del_init(&oap->oap_pending_item);
+
+ cl_page_get(page);
+ lu_ref_add(&page->cp_reference, "truncate", cfs_current());
+
+ if (cl_page_own(env, io, page) == 0) {
+ cl_page_unmap(env, io, page);
+ cl_page_discard(env, io, page);
+ cl_page_disown(env, io, page);
+ } else {
+ LASSERT(page->cp_state == CPS_FREEING);
+ LASSERT(0);
+ }
+
+ lu_ref_del(&page->cp_reference, "truncate", cfs_current());
+ cl_page_put(env, page);
+
+ --ext->oe_nr_pages;
+ ++nr_pages;
+ }
+ EASSERTF(ergo(ext->oe_start >= trunc_index + !!partial,
+ ext->oe_nr_pages == 0),
+ ext, "trunc_index %lu, partial %d\n", trunc_index, partial);
+
+ osc_object_lock(obj);
+ if (ext->oe_nr_pages == 0) {
+ LASSERT(pages_in_chunk == 0);
+ grants = ext->oe_grants;
+ ext->oe_grants = 0;
+ } else { /* calculate how many grants we can free */
+ int chunks = (ext->oe_end >> ppc_bits) - trunc_chunk;
+ pgoff_t last_index;
+
+
+ /* if there is no pages in this chunk, we can also free grants
+ * for the last chunk */
+ if (pages_in_chunk == 0) {
+ /* if this is the 1st chunk and no pages in this chunk,
+ * ext->oe_nr_pages must be zero, so we should be in
+ * the other if-clause. */
+ LASSERT(trunc_chunk > 0);
+ --trunc_chunk;
+ ++chunks;
+ }
+
+ /* this is what we can free from this extent */
+ grants = chunks << cli->cl_chunkbits;
+ ext->oe_grants -= grants;
+ last_index = ((trunc_chunk + 1) << ppc_bits) - 1;
+ ext->oe_end = min(last_index, ext->oe_max_end);
+ LASSERT(ext->oe_end >= ext->oe_start);
+ LASSERT(ext->oe_grants > 0);
+ }
+ osc_object_unlock(obj);
+
+ if (grants > 0 || nr_pages > 0)
+ osc_free_grant(cli, nr_pages, grants);
+
+out:
+ cl_io_fini(env, io);
+ cl_env_nested_put(&nest, env);
+ RETURN(rc);