4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2013, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
37 #define DEBUG_SUBSYSTEM S_LOV
40 #include <libcfs/libcfs.h>
42 #include <liblustre.h>
45 #include <obd_class.h>
47 #include <lustre/lustre_idl.h>
49 #include "lov_internal.h"
51 static void lov_init_set(struct lov_request_set *set)
54 cfs_atomic_set(&set->set_completes, 0);
55 cfs_atomic_set(&set->set_success, 0);
56 cfs_atomic_set(&set->set_finish_checked, 0);
58 CFS_INIT_LIST_HEAD(&set->set_list);
59 cfs_atomic_set(&set->set_refcount, 1);
60 cfs_waitq_init(&set->set_waitq);
61 spin_lock_init(&set->set_lock);
64 void lov_finish_set(struct lov_request_set *set)
70 cfs_list_for_each_safe(pos, n, &set->set_list) {
71 struct lov_request *req = cfs_list_entry(pos,
74 cfs_list_del_init(&req->rq_link);
77 OBDO_FREE(req->rq_oi.oi_oa);
79 OBD_FREE_LARGE(req->rq_oi.oi_md, req->rq_buflen);
80 if (req->rq_oi.oi_osfs)
81 OBD_FREE(req->rq_oi.oi_osfs,
82 sizeof(*req->rq_oi.oi_osfs));
83 OBD_FREE(req, sizeof(*req));
87 int len = set->set_oabufs * sizeof(*set->set_pga);
88 OBD_FREE_LARGE(set->set_pga, len);
91 lov_llh_put(set->set_lockh);
93 OBD_FREE(set, sizeof(*set));
97 int lov_set_finished(struct lov_request_set *set, int idempotent)
99 int completes = cfs_atomic_read(&set->set_completes);
101 CDEBUG(D_INFO, "check set %d/%d\n", completes, set->set_count);
103 if (completes == set->set_count) {
106 if (cfs_atomic_inc_return(&set->set_finish_checked) == 1)
112 void lov_update_set(struct lov_request_set *set,
113 struct lov_request *req, int rc)
115 req->rq_complete = 1;
118 cfs_atomic_inc(&set->set_completes);
120 cfs_atomic_inc(&set->set_success);
122 cfs_waitq_signal(&set->set_waitq);
125 int lov_update_common_set(struct lov_request_set *set,
126 struct lov_request *req, int rc)
128 struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
131 lov_update_set(set, req, rc);
133 /* grace error on inactive ost */
134 if (rc && !(lov->lov_tgts[req->rq_idx] &&
135 lov->lov_tgts[req->rq_idx]->ltd_active))
138 /* FIXME in raid1 regime, should return 0 */
142 void lov_set_add_req(struct lov_request *req, struct lov_request_set *set)
144 cfs_list_add_tail(&req->rq_link, &set->set_list);
149 static int lov_check_set(struct lov_obd *lov, int idx)
152 mutex_lock(&lov->lov_lock);
154 if (lov->lov_tgts[idx] == NULL ||
155 lov->lov_tgts[idx]->ltd_active ||
156 (lov->lov_tgts[idx]->ltd_exp != NULL &&
157 class_exp2cliimp(lov->lov_tgts[idx]->ltd_exp)->imp_connect_tried))
160 mutex_unlock(&lov->lov_lock);
164 /* Check if the OSC connection exists and is active.
165 * If the OSC has not yet had a chance to connect to the OST the first time,
166 * wait once for it to connect instead of returning an error.
168 int lov_check_and_wait_active(struct lov_obd *lov, int ost_idx)
171 struct l_wait_info lwi;
172 struct lov_tgt_desc *tgt;
175 mutex_lock(&lov->lov_lock);
177 tgt = lov->lov_tgts[ost_idx];
179 if (unlikely(tgt == NULL))
182 if (likely(tgt->ltd_active))
185 if (tgt->ltd_exp && class_exp2cliimp(tgt->ltd_exp)->imp_connect_tried)
188 mutex_unlock(&lov->lov_lock);
190 cfs_waitq_init(&waitq);
191 lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(obd_timeout),
192 cfs_time_seconds(1), NULL, NULL);
194 rc = l_wait_event(waitq, lov_check_set(lov, ost_idx), &lwi);
195 if (tgt != NULL && tgt->ltd_active)
201 mutex_unlock(&lov->lov_lock);
205 extern void osc_update_enqueue(struct lustre_handle *lov_lockhp,
206 struct lov_oinfo *loi, int flags,
207 struct ost_lvb *lvb, __u32 mode, int rc);
209 static int lov_update_enqueue_lov(struct obd_export *exp,
210 struct lustre_handle *lov_lockhp,
211 struct lov_oinfo *loi, int flags, int idx,
212 struct ost_id *oi, int rc)
214 struct lov_obd *lov = &exp->exp_obd->u.lov;
216 if (rc != ELDLM_OK &&
217 !(rc == ELDLM_LOCK_ABORTED && (flags & LDLM_FL_HAS_INTENT))) {
218 memset(lov_lockhp, 0, sizeof(*lov_lockhp));
219 if (lov->lov_tgts[idx] && lov->lov_tgts[idx]->ltd_active) {
220 /* -EUSERS used by OST to report file contention */
221 if (rc != -EINTR && rc != -EUSERS)
222 CERROR("%s: enqueue objid "DOSTID" subobj"
223 DOSTID" on OST idx %d: rc %d\n",
224 exp->exp_obd->obd_name,
225 POSTID(oi), POSTID(&loi->loi_oi),
226 loi->loi_ost_idx, rc);
233 int lov_update_enqueue_set(struct lov_request *req, __u32 mode, int rc)
235 struct lov_request_set *set = req->rq_rqset;
236 struct lustre_handle *lov_lockhp;
237 struct obd_info *oi = set->set_oi;
238 struct lov_oinfo *loi;
243 lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
244 loi = oi->oi_md->lsm_oinfo[req->rq_stripe];
246 /* XXX LOV STACKING: OSC gets a copy, created in lov_prep_enqueue_set
247 * and that copy can be arbitrarily out of date.
249 * The LOV API is due for a serious rewriting anyways, and this
250 * can be addressed then. */
252 lov_stripe_lock(oi->oi_md);
253 osc_update_enqueue(lov_lockhp, loi, oi->oi_flags,
254 &req->rq_oi.oi_md->lsm_oinfo[0]->loi_lvb, mode, rc);
255 if (rc == ELDLM_LOCK_ABORTED && (oi->oi_flags & LDLM_FL_HAS_INTENT))
256 memset(lov_lockhp, 0, sizeof *lov_lockhp);
257 rc = lov_update_enqueue_lov(set->set_exp, lov_lockhp, loi, oi->oi_flags,
258 req->rq_idx, &oi->oi_md->lsm_oi, rc);
259 lov_stripe_unlock(oi->oi_md);
260 lov_update_set(set, req, rc);
264 /* The callback for osc_enqueue that updates lov info for every OSC request. */
265 static int cb_update_enqueue(void *cookie, int rc)
267 struct obd_info *oinfo = cookie;
268 struct ldlm_enqueue_info *einfo;
269 struct lov_request *lovreq;
271 lovreq = container_of(oinfo, struct lov_request, rq_oi);
272 einfo = lovreq->rq_rqset->set_ei;
273 return lov_update_enqueue_set(lovreq, einfo->ei_mode, rc);
276 static int enqueue_done(struct lov_request_set *set, __u32 mode)
278 struct lov_request *req;
279 struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
280 int completes = cfs_atomic_read(&set->set_completes);
284 /* enqueue/match success, just return */
285 if (completes && completes == cfs_atomic_read(&set->set_success))
288 /* cancel enqueued/matched locks */
289 cfs_list_for_each_entry(req, &set->set_list, rq_link) {
290 struct lustre_handle *lov_lockhp;
292 if (!req->rq_complete || req->rq_rc)
295 lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
297 if (!lustre_handle_is_used(lov_lockhp))
300 rc = obd_cancel(lov->lov_tgts[req->rq_idx]->ltd_exp,
301 req->rq_oi.oi_md, mode, lov_lockhp);
302 if (rc && lov->lov_tgts[req->rq_idx] &&
303 lov->lov_tgts[req->rq_idx]->ltd_active)
304 CERROR("%s: cancelling obdjid "DOSTID" on OST"
305 "idx %d error: rc = %d\n",
306 set->set_exp->exp_obd->obd_name,
307 POSTID(&req->rq_oi.oi_md->lsm_oi),
311 lov_llh_put(set->set_lockh);
315 int lov_fini_enqueue_set(struct lov_request_set *set, __u32 mode, int rc,
316 struct ptlrpc_request_set *rqset)
323 LASSERT(set->set_exp);
324 /* Do enqueue_done only for sync requests and if any request
328 cfs_atomic_set(&set->set_completes, 0);
329 ret = enqueue_done(set, mode);
330 } else if (set->set_lockh)
331 lov_llh_put(set->set_lockh);
335 RETURN(rc ? rc : ret);
338 static void lov_llh_addref(void *llhp)
340 struct lov_lock_handles *llh = llhp;
342 cfs_atomic_inc(&llh->llh_refcount);
343 CDEBUG(D_INFO, "GETting llh %p : new refcount %d\n", llh,
344 cfs_atomic_read(&llh->llh_refcount));
347 static struct portals_handle_ops lov_handle_ops = {
348 .hop_addref = lov_llh_addref,
352 static struct lov_lock_handles *lov_llh_new(struct lov_stripe_md *lsm)
354 struct lov_lock_handles *llh;
356 OBD_ALLOC(llh, sizeof *llh +
357 sizeof(*llh->llh_handles) * lsm->lsm_stripe_count);
361 cfs_atomic_set(&llh->llh_refcount, 2);
362 llh->llh_stripe_count = lsm->lsm_stripe_count;
363 CFS_INIT_LIST_HEAD(&llh->llh_handle.h_link);
364 class_handle_hash(&llh->llh_handle, &lov_handle_ops);
369 int lov_prep_enqueue_set(struct obd_export *exp, struct obd_info *oinfo,
370 struct ldlm_enqueue_info *einfo,
371 struct lov_request_set **reqset)
373 struct lov_obd *lov = &exp->exp_obd->u.lov;
374 struct lov_request_set *set;
378 OBD_ALLOC(set, sizeof(*set));
386 set->set_lockh = lov_llh_new(oinfo->oi_md);
387 if (set->set_lockh == NULL)
388 GOTO(out_set, rc = -ENOMEM);
389 oinfo->oi_lockh->cookie = set->set_lockh->llh_handle.h_cookie;
391 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
392 struct lov_oinfo *loi;
393 struct lov_request *req;
396 loi = oinfo->oi_md->lsm_oinfo[i];
397 if (!lov_stripe_intersects(oinfo->oi_md, i,
398 oinfo->oi_policy.l_extent.start,
399 oinfo->oi_policy.l_extent.end,
403 if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
404 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
408 OBD_ALLOC(req, sizeof(*req));
410 GOTO(out_set, rc = -ENOMEM);
412 req->rq_buflen = sizeof(*req->rq_oi.oi_md) +
413 sizeof(struct lov_oinfo *) +
414 sizeof(struct lov_oinfo);
415 OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
416 if (req->rq_oi.oi_md == NULL) {
417 OBD_FREE(req, sizeof(*req));
418 GOTO(out_set, rc = -ENOMEM);
420 req->rq_oi.oi_md->lsm_oinfo[0] =
421 ((void *)req->rq_oi.oi_md) + sizeof(*req->rq_oi.oi_md) +
422 sizeof(struct lov_oinfo *);
424 /* Set lov request specific parameters. */
425 req->rq_oi.oi_lockh = set->set_lockh->llh_handles + i;
426 req->rq_oi.oi_cb_up = cb_update_enqueue;
427 req->rq_oi.oi_flags = oinfo->oi_flags;
429 LASSERT(req->rq_oi.oi_lockh);
431 req->rq_oi.oi_policy.l_extent.gid =
432 oinfo->oi_policy.l_extent.gid;
433 req->rq_oi.oi_policy.l_extent.start = start;
434 req->rq_oi.oi_policy.l_extent.end = end;
436 req->rq_idx = loi->loi_ost_idx;
439 /* XXX LOV STACKING: submd should be from the subobj */
440 req->rq_oi.oi_md->lsm_oi = loi->loi_oi;
441 req->rq_oi.oi_md->lsm_stripe_count = 0;
442 req->rq_oi.oi_md->lsm_oinfo[0]->loi_kms_valid =
444 req->rq_oi.oi_md->lsm_oinfo[0]->loi_kms = loi->loi_kms;
445 req->rq_oi.oi_md->lsm_oinfo[0]->loi_lvb = loi->loi_lvb;
447 lov_set_add_req(req, set);
450 GOTO(out_set, rc = -EIO);
454 lov_fini_enqueue_set(set, einfo->ei_mode, rc, NULL);
458 int lov_fini_match_set(struct lov_request_set *set, __u32 mode, int flags)
465 LASSERT(set->set_exp);
466 rc = enqueue_done(set, mode);
467 if ((set->set_count == cfs_atomic_read(&set->set_success)) &&
468 (flags & LDLM_FL_TEST_LOCK))
469 lov_llh_put(set->set_lockh);
476 int lov_prep_match_set(struct obd_export *exp, struct obd_info *oinfo,
477 struct lov_stripe_md *lsm, ldlm_policy_data_t *policy,
478 __u32 mode, struct lustre_handle *lockh,
479 struct lov_request_set **reqset)
481 struct lov_obd *lov = &exp->exp_obd->u.lov;
482 struct lov_request_set *set;
486 OBD_ALLOC(set, sizeof(*set));
493 set->set_oi->oi_md = lsm;
494 set->set_lockh = lov_llh_new(lsm);
495 if (set->set_lockh == NULL)
496 GOTO(out_set, rc = -ENOMEM);
497 lockh->cookie = set->set_lockh->llh_handle.h_cookie;
499 for (i = 0; i < lsm->lsm_stripe_count; i++){
500 struct lov_oinfo *loi;
501 struct lov_request *req;
504 loi = lsm->lsm_oinfo[i];
505 if (!lov_stripe_intersects(lsm, i, policy->l_extent.start,
506 policy->l_extent.end, &start, &end))
509 /* FIXME raid1 should grace this error */
510 if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
511 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
512 GOTO(out_set, rc = -EIO);
515 OBD_ALLOC(req, sizeof(*req));
517 GOTO(out_set, rc = -ENOMEM);
519 req->rq_buflen = sizeof(*req->rq_oi.oi_md);
520 OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
521 if (req->rq_oi.oi_md == NULL) {
522 OBD_FREE(req, sizeof(*req));
523 GOTO(out_set, rc = -ENOMEM);
526 req->rq_oi.oi_policy.l_extent.start = start;
527 req->rq_oi.oi_policy.l_extent.end = end;
528 req->rq_oi.oi_policy.l_extent.gid = policy->l_extent.gid;
530 req->rq_idx = loi->loi_ost_idx;
533 /* XXX LOV STACKING: submd should be from the subobj */
534 req->rq_oi.oi_md->lsm_oi = loi->loi_oi;
535 req->rq_oi.oi_md->lsm_stripe_count = 0;
537 lov_set_add_req(req, set);
540 GOTO(out_set, rc = -EIO);
544 lov_fini_match_set(set, mode, 0);
548 int lov_fini_cancel_set(struct lov_request_set *set)
556 LASSERT(set->set_exp);
558 lov_llh_put(set->set_lockh);
565 int lov_prep_cancel_set(struct obd_export *exp, struct obd_info *oinfo,
566 struct lov_stripe_md *lsm, __u32 mode,
567 struct lustre_handle *lockh,
568 struct lov_request_set **reqset)
570 struct lov_request_set *set;
574 OBD_ALLOC(set, sizeof(*set));
581 set->set_oi->oi_md = lsm;
582 set->set_lockh = lov_handle2llh(lockh);
583 if (set->set_lockh == NULL) {
584 CERROR("LOV: invalid lov lock handle %p\n", lockh);
585 GOTO(out_set, rc = -EINVAL);
587 lockh->cookie = set->set_lockh->llh_handle.h_cookie;
589 for (i = 0; i < lsm->lsm_stripe_count; i++){
590 struct lov_request *req;
591 struct lustre_handle *lov_lockhp;
592 struct lov_oinfo *loi = lsm->lsm_oinfo[i];
594 lov_lockhp = set->set_lockh->llh_handles + i;
595 if (!lustre_handle_is_used(lov_lockhp)) {
596 CDEBUG(D_INFO, "lov idx %d subobj "DOSTID" no lock\n",
597 loi->loi_ost_idx, POSTID(&loi->loi_oi));
601 OBD_ALLOC(req, sizeof(*req));
603 GOTO(out_set, rc = -ENOMEM);
605 req->rq_buflen = sizeof(*req->rq_oi.oi_md);
606 OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
607 if (req->rq_oi.oi_md == NULL) {
608 OBD_FREE(req, sizeof(*req));
609 GOTO(out_set, rc = -ENOMEM);
612 req->rq_idx = loi->loi_ost_idx;
615 /* XXX LOV STACKING: submd should be from the subobj */
616 req->rq_oi.oi_md->lsm_oi = loi->loi_oi;
617 req->rq_oi.oi_md->lsm_stripe_count = 0;
619 lov_set_add_req(req, set);
622 GOTO(out_set, rc = -EIO);
626 lov_fini_cancel_set(set);
629 static int common_attr_done(struct lov_request_set *set)
632 struct lov_request *req;
634 int rc = 0, attrset = 0;
637 LASSERT(set->set_oi != NULL);
639 if (set->set_oi->oi_oa == NULL)
642 if (!cfs_atomic_read(&set->set_success))
647 GOTO(out, rc = -ENOMEM);
649 cfs_list_for_each (pos, &set->set_list) {
650 req = cfs_list_entry(pos, struct lov_request, rq_link);
652 if (!req->rq_complete || req->rq_rc)
654 if (req->rq_oi.oi_oa->o_valid == 0) /* inactive stripe */
656 lov_merge_attrs(tmp_oa, req->rq_oi.oi_oa,
657 req->rq_oi.oi_oa->o_valid,
658 set->set_oi->oi_md, req->rq_stripe, &attrset);
661 CERROR("No stripes had valid attrs\n");
664 if ((set->set_oi->oi_oa->o_valid & OBD_MD_FLEPOCH) &&
665 (set->set_oi->oi_md->lsm_stripe_count != attrset)) {
666 /* When we take attributes of some epoch, we require all the
667 * ost to be active. */
668 CERROR("Not all the stripes had valid attrs\n");
669 GOTO(out, rc = -EIO);
672 tmp_oa->o_oi = set->set_oi->oi_oa->o_oi;
673 memcpy(set->set_oi->oi_oa, tmp_oa, sizeof(*set->set_oi->oi_oa));
681 static int brw_done(struct lov_request_set *set)
683 struct lov_stripe_md *lsm = set->set_oi->oi_md;
684 struct lov_oinfo *loi = NULL;
686 struct lov_request *req;
689 cfs_list_for_each (pos, &set->set_list) {
690 req = cfs_list_entry(pos, struct lov_request, rq_link);
692 if (!req->rq_complete || req->rq_rc)
695 loi = lsm->lsm_oinfo[req->rq_stripe];
697 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLBLOCKS)
698 loi->loi_lvb.lvb_blocks = req->rq_oi.oi_oa->o_blocks;
704 int lov_fini_brw_set(struct lov_request_set *set)
711 LASSERT(set->set_exp);
712 if (cfs_atomic_read(&set->set_completes)) {
714 /* FIXME update qos data here */
721 int lov_prep_brw_set(struct obd_export *exp, struct obd_info *oinfo,
722 obd_count oa_bufs, struct brw_page *pga,
723 struct obd_trans_info *oti,
724 struct lov_request_set **reqset)
731 struct lov_request_set *set;
732 struct lov_obd *lov = &exp->exp_obd->u.lov;
733 int rc = 0, i, shift;
736 OBD_ALLOC(set, sizeof(*set));
744 set->set_oabufs = oa_bufs;
745 OBD_ALLOC_LARGE(set->set_pga, oa_bufs * sizeof(*set->set_pga));
747 GOTO(out, rc = -ENOMEM);
749 OBD_ALLOC_LARGE(info, sizeof(*info) * oinfo->oi_md->lsm_stripe_count);
751 GOTO(out, rc = -ENOMEM);
753 /* calculate the page count for each stripe */
754 for (i = 0; i < oa_bufs; i++) {
755 int stripe = lov_stripe_number(oinfo->oi_md, pga[i].off);
756 info[stripe].count++;
759 /* alloc and initialize lov request */
761 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++){
762 struct lov_oinfo *loi = NULL;
763 struct lov_request *req;
765 if (info[i].count == 0)
768 loi = oinfo->oi_md->lsm_oinfo[i];
769 if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
770 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
771 GOTO(out, rc = -EIO);
774 OBD_ALLOC(req, sizeof(*req));
776 GOTO(out, rc = -ENOMEM);
778 OBDO_ALLOC(req->rq_oi.oi_oa);
779 if (req->rq_oi.oi_oa == NULL) {
780 OBD_FREE(req, sizeof(*req));
781 GOTO(out, rc = -ENOMEM);
785 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
786 sizeof(*req->rq_oi.oi_oa));
788 req->rq_oi.oi_oa->o_oi = loi->loi_oi;
789 req->rq_oi.oi_oa->o_stripe_idx = i;
791 req->rq_buflen = sizeof(*req->rq_oi.oi_md);
792 OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
793 if (req->rq_oi.oi_md == NULL) {
794 OBDO_FREE(req->rq_oi.oi_oa);
795 OBD_FREE(req, sizeof(*req));
796 GOTO(out, rc = -ENOMEM);
799 req->rq_idx = loi->loi_ost_idx;
802 /* XXX LOV STACKING */
803 req->rq_oi.oi_md->lsm_oi = loi->loi_oi;
804 req->rq_oabufs = info[i].count;
805 req->rq_pgaidx = shift;
806 shift += req->rq_oabufs;
808 /* remember the index for sort brw_page array */
809 info[i].index = req->rq_pgaidx;
811 req->rq_oi.oi_capa = oinfo->oi_capa;
813 lov_set_add_req(req, set);
816 GOTO(out, rc = -EIO);
818 /* rotate & sort the brw_page array */
819 for (i = 0; i < oa_bufs; i++) {
820 int stripe = lov_stripe_number(oinfo->oi_md, pga[i].off);
822 shift = info[stripe].index + info[stripe].off;
823 LASSERT(shift < oa_bufs);
824 set->set_pga[shift] = pga[i];
825 lov_stripe_offset(oinfo->oi_md, pga[i].off, stripe,
826 &set->set_pga[shift].off);
832 sizeof(*info) * oinfo->oi_md->lsm_stripe_count);
837 lov_fini_brw_set(set);
842 int lov_fini_getattr_set(struct lov_request_set *set)
849 LASSERT(set->set_exp);
850 if (cfs_atomic_read(&set->set_completes))
851 rc = common_attr_done(set);
858 /* The callback for osc_getattr_async that finilizes a request info when a
859 * response is received. */
860 static int cb_getattr_update(void *cookie, int rc)
862 struct obd_info *oinfo = cookie;
863 struct lov_request *lovreq;
864 lovreq = container_of(oinfo, struct lov_request, rq_oi);
865 return lov_update_common_set(lovreq->rq_rqset, lovreq, rc);
868 int lov_prep_getattr_set(struct obd_export *exp, struct obd_info *oinfo,
869 struct lov_request_set **reqset)
871 struct lov_request_set *set;
872 struct lov_obd *lov = &exp->exp_obd->u.lov;
876 OBD_ALLOC(set, sizeof(*set));
884 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
885 struct lov_oinfo *loi;
886 struct lov_request *req;
888 loi = oinfo->oi_md->lsm_oinfo[i];
889 if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
890 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
891 if (oinfo->oi_oa->o_valid & OBD_MD_FLEPOCH)
892 /* SOM requires all the OSTs to be active. */
893 GOTO(out_set, rc = -EIO);
897 OBD_ALLOC(req, sizeof(*req));
899 GOTO(out_set, rc = -ENOMEM);
902 req->rq_idx = loi->loi_ost_idx;
904 OBDO_ALLOC(req->rq_oi.oi_oa);
905 if (req->rq_oi.oi_oa == NULL) {
906 OBD_FREE(req, sizeof(*req));
907 GOTO(out_set, rc = -ENOMEM);
909 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
910 sizeof(*req->rq_oi.oi_oa));
911 req->rq_oi.oi_oa->o_oi = loi->loi_oi;
912 req->rq_oi.oi_cb_up = cb_getattr_update;
913 req->rq_oi.oi_capa = oinfo->oi_capa;
915 lov_set_add_req(req, set);
918 GOTO(out_set, rc = -EIO);
922 lov_fini_getattr_set(set);
926 int lov_fini_destroy_set(struct lov_request_set *set)
932 LASSERT(set->set_exp);
933 if (cfs_atomic_read(&set->set_completes)) {
934 /* FIXME update qos data here */
942 int lov_prep_destroy_set(struct obd_export *exp, struct obd_info *oinfo,
943 struct obdo *src_oa, struct lov_stripe_md *lsm,
944 struct obd_trans_info *oti,
945 struct lov_request_set **reqset)
947 struct lov_request_set *set;
948 struct lov_obd *lov = &exp->exp_obd->u.lov;
952 OBD_ALLOC(set, sizeof(*set));
959 set->set_oi->oi_md = lsm;
960 set->set_oi->oi_oa = src_oa;
962 if (oti != NULL && src_oa->o_valid & OBD_MD_FLCOOKIE)
963 set->set_cookies = oti->oti_logcookies;
965 for (i = 0; i < lsm->lsm_stripe_count; i++) {
966 struct lov_oinfo *loi;
967 struct lov_request *req;
969 loi = lsm->lsm_oinfo[i];
970 if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
971 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
975 OBD_ALLOC(req, sizeof(*req));
977 GOTO(out_set, rc = -ENOMEM);
980 req->rq_idx = loi->loi_ost_idx;
982 OBDO_ALLOC(req->rq_oi.oi_oa);
983 if (req->rq_oi.oi_oa == NULL) {
984 OBD_FREE(req, sizeof(*req));
985 GOTO(out_set, rc = -ENOMEM);
987 memcpy(req->rq_oi.oi_oa, src_oa, sizeof(*req->rq_oi.oi_oa));
988 req->rq_oi.oi_oa->o_oi = loi->loi_oi;
989 lov_set_add_req(req, set);
992 GOTO(out_set, rc = -EIO);
996 lov_fini_destroy_set(set);
1000 int lov_fini_setattr_set(struct lov_request_set *set)
1007 LASSERT(set->set_exp);
1008 if (cfs_atomic_read(&set->set_completes)) {
1009 rc = common_attr_done(set);
1010 /* FIXME update qos data here */
1013 lov_put_reqset(set);
1017 int lov_update_setattr_set(struct lov_request_set *set,
1018 struct lov_request *req, int rc)
1020 struct lov_obd *lov = &req->rq_rqset->set_exp->exp_obd->u.lov;
1021 struct lov_stripe_md *lsm = req->rq_rqset->set_oi->oi_md;
1024 lov_update_set(set, req, rc);
1026 /* grace error on inactive ost */
1027 if (rc && !(lov->lov_tgts[req->rq_idx] &&
1028 lov->lov_tgts[req->rq_idx]->ltd_active))
1032 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLCTIME)
1033 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_ctime =
1034 req->rq_oi.oi_oa->o_ctime;
1035 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLMTIME)
1036 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_mtime =
1037 req->rq_oi.oi_oa->o_mtime;
1038 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLATIME)
1039 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_atime =
1040 req->rq_oi.oi_oa->o_atime;
1046 /* The callback for osc_setattr_async that finilizes a request info when a
1047 * response is received. */
1048 static int cb_setattr_update(void *cookie, int rc)
1050 struct obd_info *oinfo = cookie;
1051 struct lov_request *lovreq;
1052 lovreq = container_of(oinfo, struct lov_request, rq_oi);
1053 return lov_update_setattr_set(lovreq->rq_rqset, lovreq, rc);
1056 int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo,
1057 struct obd_trans_info *oti,
1058 struct lov_request_set **reqset)
1060 struct lov_request_set *set;
1061 struct lov_obd *lov = &exp->exp_obd->u.lov;
1065 OBD_ALLOC(set, sizeof(*set));
1072 set->set_oi = oinfo;
1073 if (oti != NULL && oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE)
1074 set->set_cookies = oti->oti_logcookies;
1076 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
1077 struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i];
1078 struct lov_request *req;
1080 if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
1081 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1085 OBD_ALLOC(req, sizeof(*req));
1087 GOTO(out_set, rc = -ENOMEM);
1089 req->rq_idx = loi->loi_ost_idx;
1091 OBDO_ALLOC(req->rq_oi.oi_oa);
1092 if (req->rq_oi.oi_oa == NULL) {
1093 OBD_FREE(req, sizeof(*req));
1094 GOTO(out_set, rc = -ENOMEM);
1096 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
1097 sizeof(*req->rq_oi.oi_oa));
1098 req->rq_oi.oi_oa->o_oi = loi->loi_oi;
1099 req->rq_oi.oi_oa->o_stripe_idx = i;
1100 req->rq_oi.oi_cb_up = cb_setattr_update;
1101 req->rq_oi.oi_capa = oinfo->oi_capa;
1103 if (oinfo->oi_oa->o_valid & OBD_MD_FLSIZE) {
1104 int off = lov_stripe_offset(oinfo->oi_md,
1105 oinfo->oi_oa->o_size, i,
1106 &req->rq_oi.oi_oa->o_size);
1108 if (off < 0 && req->rq_oi.oi_oa->o_size)
1109 req->rq_oi.oi_oa->o_size--;
1111 CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
1112 i, req->rq_oi.oi_oa->o_size,
1113 oinfo->oi_oa->o_size);
1115 lov_set_add_req(req, set);
1117 if (!set->set_count)
1118 GOTO(out_set, rc = -EIO);
1122 lov_fini_setattr_set(set);
1126 int lov_fini_punch_set(struct lov_request_set *set)
1133 LASSERT(set->set_exp);
1134 if (cfs_atomic_read(&set->set_completes)) {
1136 /* FIXME update qos data here */
1137 if (cfs_atomic_read(&set->set_success))
1138 rc = common_attr_done(set);
1141 lov_put_reqset(set);
1146 int lov_update_punch_set(struct lov_request_set *set,
1147 struct lov_request *req, int rc)
1149 struct lov_obd *lov = &req->rq_rqset->set_exp->exp_obd->u.lov;
1150 struct lov_stripe_md *lsm = req->rq_rqset->set_oi->oi_md;
1153 lov_update_set(set, req, rc);
1155 /* grace error on inactive ost */
1156 if (rc && !lov->lov_tgts[req->rq_idx]->ltd_active)
1160 lov_stripe_lock(lsm);
1161 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLBLOCKS) {
1162 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_blocks =
1163 req->rq_oi.oi_oa->o_blocks;
1166 lov_stripe_unlock(lsm);
1172 /* The callback for osc_punch that finilizes a request info when a response
1174 static int cb_update_punch(void *cookie, int rc)
1176 struct obd_info *oinfo = cookie;
1177 struct lov_request *lovreq;
1178 lovreq = container_of(oinfo, struct lov_request, rq_oi);
1179 return lov_update_punch_set(lovreq->rq_rqset, lovreq, rc);
1182 int lov_prep_punch_set(struct obd_export *exp, struct obd_info *oinfo,
1183 struct obd_trans_info *oti,
1184 struct lov_request_set **reqset)
1186 struct lov_request_set *set;
1187 struct lov_obd *lov = &exp->exp_obd->u.lov;
1191 OBD_ALLOC(set, sizeof(*set));
1196 set->set_oi = oinfo;
1199 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
1200 struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i];
1201 struct lov_request *req;
1204 if (!lov_stripe_intersects(oinfo->oi_md, i,
1205 oinfo->oi_policy.l_extent.start,
1206 oinfo->oi_policy.l_extent.end,
1210 if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
1211 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1212 GOTO(out_set, rc = -EIO);
1215 OBD_ALLOC(req, sizeof(*req));
1217 GOTO(out_set, rc = -ENOMEM);
1219 req->rq_idx = loi->loi_ost_idx;
1221 OBDO_ALLOC(req->rq_oi.oi_oa);
1222 if (req->rq_oi.oi_oa == NULL) {
1223 OBD_FREE(req, sizeof(*req));
1224 GOTO(out_set, rc = -ENOMEM);
1226 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
1227 sizeof(*req->rq_oi.oi_oa));
1228 req->rq_oi.oi_oa->o_oi = loi->loi_oi;
1229 req->rq_oi.oi_oa->o_valid |= OBD_MD_FLGROUP;
1231 req->rq_oi.oi_oa->o_stripe_idx = i;
1232 req->rq_oi.oi_cb_up = cb_update_punch;
1234 req->rq_oi.oi_policy.l_extent.start = rs;
1235 req->rq_oi.oi_policy.l_extent.end = re;
1236 req->rq_oi.oi_policy.l_extent.gid = -1;
1238 req->rq_oi.oi_capa = oinfo->oi_capa;
1240 lov_set_add_req(req, set);
1242 if (!set->set_count)
1243 GOTO(out_set, rc = -EIO);
1247 lov_fini_punch_set(set);
1251 int lov_fini_sync_set(struct lov_request_set *set)
1258 LASSERT(set->set_exp);
1259 if (cfs_atomic_read(&set->set_completes)) {
1260 if (!cfs_atomic_read(&set->set_success))
1262 /* FIXME update qos data here */
1265 lov_put_reqset(set);
1270 /* The callback for osc_sync that finilizes a request info when a
1271 * response is recieved. */
1272 static int cb_sync_update(void *cookie, int rc)
1274 struct obd_info *oinfo = cookie;
1275 struct lov_request *lovreq;
1277 lovreq = container_of(oinfo, struct lov_request, rq_oi);
1278 return lov_update_common_set(lovreq->rq_rqset, lovreq, rc);
1281 int lov_prep_sync_set(struct obd_export *exp, struct obd_info *oinfo,
1282 obd_off start, obd_off end,
1283 struct lov_request_set **reqset)
1285 struct lov_request_set *set;
1286 struct lov_obd *lov = &exp->exp_obd->u.lov;
1296 set->set_oi = oinfo;
1298 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
1299 struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i];
1300 struct lov_request *req;
1303 if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
1304 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1308 if (!lov_stripe_intersects(oinfo->oi_md, i, start, end, &rs,
1314 GOTO(out_set, rc = -ENOMEM);
1316 req->rq_idx = loi->loi_ost_idx;
1318 OBDO_ALLOC(req->rq_oi.oi_oa);
1319 if (req->rq_oi.oi_oa == NULL) {
1320 OBD_FREE(req, sizeof(*req));
1321 GOTO(out_set, rc = -ENOMEM);
1323 *req->rq_oi.oi_oa = *oinfo->oi_oa;
1324 req->rq_oi.oi_oa->o_oi = loi->loi_oi;
1325 req->rq_oi.oi_oa->o_stripe_idx = i;
1327 req->rq_oi.oi_policy.l_extent.start = rs;
1328 req->rq_oi.oi_policy.l_extent.end = re;
1329 req->rq_oi.oi_policy.l_extent.gid = -1;
1330 req->rq_oi.oi_cb_up = cb_sync_update;
1332 lov_set_add_req(req, set);
1334 if (!set->set_count)
1335 GOTO(out_set, rc = -EIO);
1339 lov_fini_sync_set(set);
1343 #define LOV_U64_MAX ((__u64)~0ULL)
1344 #define LOV_SUM_MAX(tot, add) \
1346 if ((tot) + (add) < (tot)) \
1347 (tot) = LOV_U64_MAX; \
1352 int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs,int success)
1357 __u32 expected_stripes = lov_get_stripecnt(&obd->u.lov,
1359 if (osfs->os_files != LOV_U64_MAX)
1360 lov_do_div64(osfs->os_files, expected_stripes);
1361 if (osfs->os_ffree != LOV_U64_MAX)
1362 lov_do_div64(osfs->os_ffree, expected_stripes);
1364 spin_lock(&obd->obd_osfs_lock);
1365 memcpy(&obd->obd_osfs, osfs, sizeof(*osfs));
1366 obd->obd_osfs_age = cfs_time_current_64();
1367 spin_unlock(&obd->obd_osfs_lock);
1374 int lov_fini_statfs_set(struct lov_request_set *set)
1382 if (cfs_atomic_read(&set->set_completes)) {
1383 rc = lov_fini_statfs(set->set_obd, set->set_oi->oi_osfs,
1384 cfs_atomic_read(&set->set_success));
1386 lov_put_reqset(set);
1390 void lov_update_statfs(struct obd_statfs *osfs, struct obd_statfs *lov_sfs,
1393 int shift = 0, quit = 0;
1397 memcpy(osfs, lov_sfs, sizeof(*lov_sfs));
1399 if (osfs->os_bsize != lov_sfs->os_bsize) {
1400 /* assume all block sizes are always powers of 2 */
1401 /* get the bits difference */
1402 tmp = osfs->os_bsize | lov_sfs->os_bsize;
1403 for (shift = 0; shift <= 64; ++shift) {
1415 if (osfs->os_bsize < lov_sfs->os_bsize) {
1416 osfs->os_bsize = lov_sfs->os_bsize;
1418 osfs->os_bfree >>= shift;
1419 osfs->os_bavail >>= shift;
1420 osfs->os_blocks >>= shift;
1421 } else if (shift != 0) {
1422 lov_sfs->os_bfree >>= shift;
1423 lov_sfs->os_bavail >>= shift;
1424 lov_sfs->os_blocks >>= shift;
1427 /* Sandia requested that df (and so, statfs) only
1428 returned minimal available space on
1429 a single OST, so people would be able to
1430 write this much data guaranteed. */
1431 if (osfs->os_bavail > lov_sfs->os_bavail) {
1432 /* Presumably if new bavail is smaller,
1433 new bfree is bigger as well */
1434 osfs->os_bfree = lov_sfs->os_bfree;
1435 osfs->os_bavail = lov_sfs->os_bavail;
1438 osfs->os_bfree += lov_sfs->os_bfree;
1439 osfs->os_bavail += lov_sfs->os_bavail;
1441 osfs->os_blocks += lov_sfs->os_blocks;
1442 /* XXX not sure about this one - depends on policy.
1443 * - could be minimum if we always stripe on all OBDs
1444 * (but that would be wrong for any other policy,
1445 * if one of the OBDs has no more objects left)
1446 * - could be sum if we stripe whole objects
1447 * - could be average, just to give a nice number
1449 * To give a "reasonable" (if not wholly accurate)
1450 * number, we divide the total number of free objects
1451 * by expected stripe count (watch out for overflow).
1453 LOV_SUM_MAX(osfs->os_files, lov_sfs->os_files);
1454 LOV_SUM_MAX(osfs->os_ffree, lov_sfs->os_ffree);
1458 /* The callback for osc_statfs_async that finilizes a request info when a
1459 * response is received. */
1460 static int cb_statfs_update(void *cookie, int rc)
1462 struct obd_info *oinfo = cookie;
1463 struct lov_request *lovreq;
1464 struct lov_request_set *set;
1465 struct obd_statfs *osfs, *lov_sfs;
1466 struct lov_obd *lov;
1467 struct lov_tgt_desc *tgt;
1468 struct obd_device *lovobd, *tgtobd;
1472 lovreq = container_of(oinfo, struct lov_request, rq_oi);
1473 set = lovreq->rq_rqset;
1474 lovobd = set->set_obd;
1475 lov = &lovobd->u.lov;
1476 osfs = set->set_oi->oi_osfs;
1477 lov_sfs = oinfo->oi_osfs;
1478 success = cfs_atomic_read(&set->set_success);
1479 /* XXX: the same is done in lov_update_common_set, however
1480 lovset->set_exp is not initialized. */
1481 lov_update_set(set, lovreq, rc);
1486 tgt = lov->lov_tgts[lovreq->rq_idx];
1487 if (!tgt || !tgt->ltd_active)
1488 GOTO(out_update, rc);
1490 tgtobd = class_exp2obd(tgt->ltd_exp);
1491 spin_lock(&tgtobd->obd_osfs_lock);
1492 memcpy(&tgtobd->obd_osfs, lov_sfs, sizeof(*lov_sfs));
1493 if ((oinfo->oi_flags & OBD_STATFS_FROM_CACHE) == 0)
1494 tgtobd->obd_osfs_age = cfs_time_current_64();
1495 spin_unlock(&tgtobd->obd_osfs_lock);
1498 lov_update_statfs(osfs, lov_sfs, success);
1502 if (set->set_oi->oi_flags & OBD_STATFS_PTLRPCD &&
1503 lov_set_finished(set, 0)) {
1504 lov_statfs_interpret(NULL, set, set->set_count !=
1505 cfs_atomic_read(&set->set_success));
1511 int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo,
1512 struct lov_request_set **reqset)
1514 struct lov_request_set *set;
1515 struct lov_obd *lov = &obd->u.lov;
1519 OBD_ALLOC(set, sizeof(*set));
1525 set->set_oi = oinfo;
1527 /* We only get block data from the OBD */
1528 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
1529 struct lov_request *req;
1531 if (lov->lov_tgts[i] == NULL ||
1532 (!lov_check_and_wait_active(lov, i) &&
1533 (oinfo->oi_flags & OBD_STATFS_NODELAY))) {
1534 CDEBUG(D_HA, "lov idx %d inactive\n", i);
1538 /* skip targets that have been explicitely disabled by the
1540 if (!lov->lov_tgts[i]->ltd_exp) {
1541 CDEBUG(D_HA, "lov idx %d administratively disabled\n", i);
1545 OBD_ALLOC(req, sizeof(*req));
1547 GOTO(out_set, rc = -ENOMEM);
1549 OBD_ALLOC(req->rq_oi.oi_osfs, sizeof(*req->rq_oi.oi_osfs));
1550 if (req->rq_oi.oi_osfs == NULL) {
1551 OBD_FREE(req, sizeof(*req));
1552 GOTO(out_set, rc = -ENOMEM);
1556 req->rq_oi.oi_cb_up = cb_statfs_update;
1557 req->rq_oi.oi_flags = oinfo->oi_flags;
1559 lov_set_add_req(req, set);
1561 if (!set->set_count)
1562 GOTO(out_set, rc = -EIO);
1566 lov_fini_statfs_set(set);