1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
6 * This file is part of the Lustre file system, http://www.lustre.org
7 * Lustre is a trademark of Cluster File Systems, Inc.
9 * You may have signed or agreed to another license before downloading
10 * this software. If so, you are bound by the terms and conditions
11 * of that agreement, and the following does not apply to you. See the
12 * LICENSE file included with this distribution for more information.
14 * If you did not agree to a different license, then this copy of Lustre
15 * is open source software; you can redistribute it and/or modify it
16 * under the terms of version 2 of the GNU General Public License as
17 * published by the Free Software Foundation.
19 * In either case, Lustre is distributed in the hope that it will be
20 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
21 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * license text for more details.
26 # define EXPORT_SYMTAB
28 #define DEBUG_SUBSYSTEM S_LOV
31 #include <libcfs/libcfs.h>
33 #include <liblustre.h>
36 #include <obd_class.h>
38 #include <lustre/lustre_idl.h>
40 #include "lov_internal.h"
42 static void lov_init_set(struct lov_request_set *set)
45 set->set_completes = 0;
48 CFS_INIT_LIST_HEAD(&set->set_list);
49 atomic_set(&set->set_refcount, 1);
52 static void lov_finish_set(struct lov_request_set *set)
54 struct list_head *pos, *n;
58 list_for_each_safe(pos, n, &set->set_list) {
59 struct lov_request *req = list_entry(pos, struct lov_request,
61 list_del_init(&req->rq_link);
64 OBDO_FREE(req->rq_oi.oi_oa);
66 OBD_FREE(req->rq_oi.oi_md, req->rq_buflen);
67 if (req->rq_oi.oi_osfs)
68 OBD_FREE(req->rq_oi.oi_osfs,
69 sizeof(*req->rq_oi.oi_osfs));
70 OBD_FREE(req, sizeof(*req));
74 int len = set->set_oabufs * sizeof(*set->set_pga);
75 OBD_FREE(set->set_pga, len);
78 lov_llh_put(set->set_lockh);
80 OBD_FREE(set, sizeof(*set));
84 void lov_update_set(struct lov_request_set *set,
85 struct lov_request *req, int rc)
95 int lov_update_common_set(struct lov_request_set *set,
96 struct lov_request *req, int rc)
98 struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
101 lov_update_set(set, req, rc);
103 /* grace error on inactive ost */
104 if (rc && !(lov->lov_tgts[req->rq_idx] &&
105 lov->lov_tgts[req->rq_idx]->ltd_active))
108 /* FIXME in raid1 regime, should return 0 */
112 void lov_set_add_req(struct lov_request *req, struct lov_request_set *set)
114 list_add_tail(&req->rq_link, &set->set_list);
118 int lov_update_enqueue_set(struct lov_request *req, __u32 mode, int rc)
120 struct lov_request_set *set = req->rq_rqset;
121 struct lustre_handle *lov_lockhp;
122 struct lov_oinfo *loi;
125 LASSERT(set != NULL);
126 LASSERT(set->set_oi != NULL);
128 lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
129 loi = set->set_oi->oi_md->lsm_oinfo[req->rq_stripe];
131 /* XXX LOV STACKING: OSC gets a copy, created in lov_prep_enqueue_set
132 * and that copy can be arbitrarily out of date.
134 * The LOV API is due for a serious rewriting anyways, and this
135 * can be addressed then. */
137 if (rc == ELDLM_OK) {
138 struct ldlm_lock *lock = ldlm_handle2lock(lov_lockhp);
141 LASSERT(lock != NULL);
142 lov_stripe_lock(set->set_oi->oi_md);
143 loi->loi_lvb = req->rq_oi.oi_md->lsm_oinfo[0]->loi_lvb;
144 tmp = loi->loi_lvb.lvb_size;
145 /* Extend KMS up to the end of this lock and no further
146 * A lock on [x,y] means a KMS of up to y + 1 bytes! */
147 if (tmp > lock->l_policy_data.l_extent.end)
148 tmp = lock->l_policy_data.l_extent.end + 1;
149 if (tmp >= loi->loi_kms) {
150 LDLM_DEBUG(lock, "lock acquired, setting rss="LPU64
151 ", kms="LPU64, loi->loi_lvb.lvb_size, tmp);
153 loi->loi_kms_valid = 1;
155 LDLM_DEBUG(lock, "lock acquired, setting rss="
156 LPU64"; leaving kms="LPU64", end="LPU64,
157 loi->loi_lvb.lvb_size, loi->loi_kms,
158 lock->l_policy_data.l_extent.end);
160 lov_stripe_unlock(set->set_oi->oi_md);
161 ldlm_lock_allow_match(lock);
163 } else if ((rc == ELDLM_LOCK_ABORTED) &&
164 (set->set_ei->ei_flags & LDLM_FL_HAS_INTENT)) {
165 memset(lov_lockhp, 0, sizeof(*lov_lockhp));
166 lov_stripe_lock(set->set_oi->oi_md);
167 loi->loi_lvb = req->rq_oi.oi_md->lsm_oinfo[0]->loi_lvb;
168 lov_stripe_unlock(set->set_oi->oi_md);
169 CDEBUG(D_INODE, "glimpsed, setting rss="LPU64"; leaving"
170 " kms="LPU64"\n", loi->loi_lvb.lvb_size, loi->loi_kms);
173 struct obd_export *exp = set->set_exp;
174 struct lov_obd *lov = &exp->exp_obd->u.lov;
176 memset(lov_lockhp, 0, sizeof(*lov_lockhp));
177 if (lov->lov_tgts[req->rq_idx] &&
178 lov->lov_tgts[req->rq_idx]->ltd_active) {
180 CERROR("enqueue objid "LPX64" subobj "
181 LPX64" on OST idx %d: rc %d\n",
182 set->set_oi->oi_md->lsm_object_id,
183 loi->loi_id, loi->loi_ost_idx, rc);
188 lov_update_set(set, req, rc);
192 /* The callback for osc_enqueue that updates lov info for every OSC request. */
193 static int cb_update_enqueue(struct obd_info *oinfo, int rc)
195 struct obd_enqueue_info *einfo;
196 struct lov_request *lovreq;
198 lovreq = container_of(oinfo, struct lov_request, rq_oi);
199 einfo = lovreq->rq_rqset->set_ei;
200 return lov_update_enqueue_set(lovreq, einfo->ei_mode, rc);
203 static int enqueue_done(struct lov_request_set *set, __u32 mode)
205 struct lov_request *req;
206 struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
210 /* enqueue/match success, just return */
211 if (set->set_completes && set->set_completes == set->set_success)
214 /* cancel enqueued/matched locks */
215 list_for_each_entry(req, &set->set_list, rq_link) {
216 struct lustre_handle *lov_lockhp;
218 if (!req->rq_complete || req->rq_rc)
221 lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
223 if (!lustre_handle_is_used(lov_lockhp))
226 rc = obd_cancel(lov->lov_tgts[req->rq_idx]->ltd_exp,
227 req->rq_oi.oi_md, mode, lov_lockhp);
228 if (rc && lov->lov_tgts[req->rq_idx] &&
229 lov->lov_tgts[req->rq_idx]->ltd_active)
230 CERROR("cancelling obdjid "LPX64" on OST "
231 "idx %d error: rc = %d\n",
232 req->rq_oi.oi_md->lsm_object_id,
236 lov_llh_put(set->set_lockh);
240 int lov_fini_enqueue_set(struct lov_request_set *set, __u32 mode, int rc)
247 LASSERT(set->set_exp);
248 /* Do enqueue_done only for sync requests and if any request
250 if (!set->set_ei->ei_rqset) {
252 set->set_completes = 0;
253 ret = enqueue_done(set, mode);
254 } else if (set->set_lockh)
255 lov_llh_put(set->set_lockh);
257 if (atomic_dec_and_test(&set->set_refcount))
260 RETURN(rc ? rc : ret);
263 int lov_prep_enqueue_set(struct obd_export *exp, struct obd_info *oinfo,
264 struct obd_enqueue_info *einfo,
265 struct lov_request_set **reqset)
267 struct lov_obd *lov = &exp->exp_obd->u.lov;
268 struct lov_request_set *set;
272 OBD_ALLOC(set, sizeof(*set));
280 set->set_lockh = lov_llh_new(oinfo->oi_md);
281 if (set->set_lockh == NULL)
282 GOTO(out_set, rc = -ENOMEM);
283 oinfo->oi_lockh->cookie = set->set_lockh->llh_handle.h_cookie;
285 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
286 struct lov_oinfo *loi;
287 struct lov_request *req;
290 loi = oinfo->oi_md->lsm_oinfo[i];
291 if (!lov_stripe_intersects(oinfo->oi_md, i,
292 oinfo->oi_policy.l_extent.start,
293 oinfo->oi_policy.l_extent.end,
297 if (!lov->lov_tgts[loi->loi_ost_idx] ||
298 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
299 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
303 OBD_ALLOC(req, sizeof(*req));
305 GOTO(out_set, rc = -ENOMEM);
307 req->rq_buflen = sizeof(*req->rq_oi.oi_md) +
308 sizeof(struct lov_oinfo *) +
309 sizeof(struct lov_oinfo);
310 OBD_ALLOC(req->rq_oi.oi_md, req->rq_buflen);
311 if (req->rq_oi.oi_md == NULL) {
312 OBD_FREE(req, sizeof(*req));
313 GOTO(out_set, rc = -ENOMEM);
315 req->rq_oi.oi_md->lsm_oinfo[0] =
316 ((void *)req->rq_oi.oi_md) + sizeof(*req->rq_oi.oi_md) +
317 sizeof(struct lov_oinfo *);
321 /* Set lov request specific parameters. */
322 req->rq_oi.oi_lockh = set->set_lockh->llh_handles + i;
323 req->rq_oi.oi_cb_up = cb_update_enqueue;
325 LASSERT(req->rq_oi.oi_lockh);
327 req->rq_oi.oi_policy.l_extent.gid =
328 oinfo->oi_policy.l_extent.gid;
329 req->rq_oi.oi_policy.l_extent.start = start;
330 req->rq_oi.oi_policy.l_extent.end = end;
332 req->rq_idx = loi->loi_ost_idx;
335 /* XXX LOV STACKING: submd should be from the subobj */
336 req->rq_oi.oi_md->lsm_object_id = loi->loi_id;
337 req->rq_oi.oi_md->lsm_object_gr = oinfo->oi_md->lsm_object_gr;
338 req->rq_oi.oi_md->lsm_stripe_count = 0;
339 req->rq_oi.oi_md->lsm_oinfo[0]->loi_kms_valid =
341 req->rq_oi.oi_md->lsm_oinfo[0]->loi_kms = loi->loi_kms;
342 req->rq_oi.oi_md->lsm_oinfo[0]->loi_lvb = loi->loi_lvb;
344 lov_set_add_req(req, set);
347 GOTO(out_set, rc = -EIO);
351 lov_fini_enqueue_set(set, einfo->ei_mode, rc);
355 int lov_update_match_set(struct lov_request_set *set, struct lov_request *req,
365 lov_update_set(set, req, ret);
369 int lov_fini_match_set(struct lov_request_set *set, __u32 mode, int flags)
376 LASSERT(set->set_exp);
377 rc = enqueue_done(set, mode);
378 if ((set->set_count == set->set_success) &&
379 (flags & LDLM_FL_TEST_LOCK))
380 lov_llh_put(set->set_lockh);
382 if (atomic_dec_and_test(&set->set_refcount))
388 int lov_prep_match_set(struct obd_export *exp, struct obd_info *oinfo,
389 struct lov_stripe_md *lsm, ldlm_policy_data_t *policy,
390 __u32 mode, struct lustre_handle *lockh,
391 struct lov_request_set **reqset)
393 struct lov_obd *lov = &exp->exp_obd->u.lov;
394 struct lov_request_set *set;
398 OBD_ALLOC(set, sizeof(*set));
405 set->set_oi->oi_md = lsm;
406 set->set_lockh = lov_llh_new(lsm);
407 if (set->set_lockh == NULL)
408 GOTO(out_set, rc = -ENOMEM);
409 lockh->cookie = set->set_lockh->llh_handle.h_cookie;
411 for (i = 0; i < lsm->lsm_stripe_count; i++){
412 struct lov_oinfo *loi;
413 struct lov_request *req;
416 loi = lsm->lsm_oinfo[i];
417 if (!lov_stripe_intersects(lsm, i, policy->l_extent.start,
418 policy->l_extent.end, &start, &end))
421 /* FIXME raid1 should grace this error */
422 if (!lov->lov_tgts[loi->loi_ost_idx] ||
423 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
424 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
425 GOTO(out_set, rc = -EIO);
428 OBD_ALLOC(req, sizeof(*req));
430 GOTO(out_set, rc = -ENOMEM);
432 req->rq_buflen = sizeof(*req->rq_oi.oi_md);
433 OBD_ALLOC(req->rq_oi.oi_md, req->rq_buflen);
434 if (req->rq_oi.oi_md == NULL) {
435 OBD_FREE(req, sizeof(*req));
436 GOTO(out_set, rc = -ENOMEM);
439 req->rq_oi.oi_policy.l_extent.start = start;
440 req->rq_oi.oi_policy.l_extent.end = end;
441 req->rq_oi.oi_policy.l_extent.gid = policy->l_extent.gid;
443 req->rq_idx = loi->loi_ost_idx;
446 /* XXX LOV STACKING: submd should be from the subobj */
447 req->rq_oi.oi_md->lsm_object_id = loi->loi_id;
448 req->rq_oi.oi_md->lsm_object_gr = lsm->lsm_object_gr;
449 req->rq_oi.oi_md->lsm_stripe_count = 0;
451 lov_set_add_req(req, set);
454 GOTO(out_set, rc = -EIO);
458 lov_fini_match_set(set, mode, 0);
462 int lov_fini_cancel_set(struct lov_request_set *set)
470 LASSERT(set->set_exp);
472 lov_llh_put(set->set_lockh);
474 if (atomic_dec_and_test(&set->set_refcount))
480 int lov_prep_cancel_set(struct obd_export *exp, struct obd_info *oinfo,
481 struct lov_stripe_md *lsm, __u32 mode,
482 struct lustre_handle *lockh,
483 struct lov_request_set **reqset)
485 struct lov_request_set *set;
489 OBD_ALLOC(set, sizeof(*set));
496 set->set_oi->oi_md = lsm;
497 set->set_lockh = lov_handle2llh(lockh);
498 if (set->set_lockh == NULL) {
499 CERROR("LOV: invalid lov lock handle %p\n", lockh);
500 GOTO(out_set, rc = -EINVAL);
502 lockh->cookie = set->set_lockh->llh_handle.h_cookie;
504 for (i = 0; i < lsm->lsm_stripe_count; i++){
505 struct lov_request *req;
506 struct lustre_handle *lov_lockhp;
507 struct lov_oinfo *loi = lsm->lsm_oinfo[i];
509 lov_lockhp = set->set_lockh->llh_handles + i;
510 if (!lustre_handle_is_used(lov_lockhp)) {
511 CDEBUG(D_HA, "lov idx %d subobj "LPX64" no lock?\n",
512 loi->loi_ost_idx, loi->loi_id);
516 OBD_ALLOC(req, sizeof(*req));
518 GOTO(out_set, rc = -ENOMEM);
520 req->rq_buflen = sizeof(*req->rq_oi.oi_md);
521 OBD_ALLOC(req->rq_oi.oi_md, req->rq_buflen);
522 if (req->rq_oi.oi_md == NULL) {
523 OBD_FREE(req, sizeof(*req));
524 GOTO(out_set, rc = -ENOMEM);
527 req->rq_idx = loi->loi_ost_idx;
530 /* XXX LOV STACKING: submd should be from the subobj */
531 req->rq_oi.oi_md->lsm_object_id = loi->loi_id;
532 req->rq_oi.oi_md->lsm_object_gr = lsm->lsm_object_gr;
533 req->rq_oi.oi_md->lsm_stripe_count = 0;
535 lov_set_add_req(req, set);
538 GOTO(out_set, rc = -EIO);
542 lov_fini_cancel_set(set);
546 static int create_done(struct obd_export *exp, struct lov_request_set *set,
547 struct lov_stripe_md **lsmp)
549 struct lov_obd *lov = &exp->exp_obd->u.lov;
550 struct obd_trans_info *oti = set->set_oti;
551 struct obdo *src_oa = set->set_oi->oi_oa;
552 struct lov_request *req;
553 struct obdo *ret_oa = NULL;
554 int attrset = 0, rc = 0;
557 LASSERT(set->set_completes);
559 /* try alloc objects on other osts if osc_create fails for
560 * exceptions: RPC failure, ENOSPC, etc */
561 if (set->set_count != set->set_success) {
562 list_for_each_entry (req, &set->set_list, rq_link) {
566 set->set_completes--;
567 req->rq_complete = 0;
569 rc = qos_remedy_create(set, req);
570 lov_update_create_set(set, req, rc);
577 /* no successful creates */
578 if (set->set_success == 0)
581 /* If there was an explicit stripe set, fail. Otherwise, we
582 * got some objects and that's not bad. */
583 if (set->set_count != set->set_success) {
586 set->set_count = set->set_success;
592 GOTO(cleanup, rc = -ENOMEM);
594 list_for_each_entry(req, &set->set_list, rq_link) {
595 if (!req->rq_complete || req->rq_rc)
597 lov_merge_attrs(ret_oa, req->rq_oi.oi_oa,
598 req->rq_oi.oi_oa->o_valid, set->set_oi->oi_md,
599 req->rq_stripe, &attrset);
601 if (src_oa->o_valid & OBD_MD_FLSIZE &&
602 ret_oa->o_size != src_oa->o_size) {
603 CERROR("original size "LPU64" isn't new object size "LPU64"\n",
604 src_oa->o_size, ret_oa->o_size);
607 ret_oa->o_id = src_oa->o_id;
608 ret_oa->o_gr = src_oa->o_gr;
609 ret_oa->o_valid |= OBD_MD_FLID | OBD_MD_FLGROUP;
610 memcpy(src_oa, ret_oa, sizeof(*src_oa));
613 *lsmp = set->set_oi->oi_md;
617 list_for_each_entry(req, &set->set_list, rq_link) {
618 struct obd_export *sub_exp;
621 if (!req->rq_complete || req->rq_rc)
624 sub_exp = lov->lov_tgts[req->rq_idx]->ltd_exp;
625 err = obd_destroy(sub_exp, req->rq_oi.oi_oa, NULL, oti, NULL);
627 CERROR("Failed to uncreate objid "LPX64" subobj "
628 LPX64" on OST idx %d: rc = %d\n",
629 src_oa->o_id, req->rq_oi.oi_oa->o_id,
633 obd_free_memmd(exp, &set->set_oi->oi_md);
635 if (oti && set->set_cookies) {
636 oti->oti_logcookies = set->set_cookies;
637 if (!set->set_cookie_sent) {
638 oti_free_cookies(oti);
639 src_oa->o_valid &= ~OBD_MD_FLCOOKIE;
641 src_oa->o_valid |= OBD_MD_FLCOOKIE;
647 int lov_fini_create_set(struct lov_request_set *set,struct lov_stripe_md **lsmp)
654 LASSERT(set->set_exp);
655 if (set->set_completes)
656 rc = create_done(set->set_exp, set, lsmp);
658 if (atomic_dec_and_test(&set->set_refcount))
664 int lov_update_create_set(struct lov_request_set *set,
665 struct lov_request *req, int rc)
667 struct obd_trans_info *oti = set->set_oti;
668 struct lov_stripe_md *lsm = set->set_oi->oi_md;
669 struct lov_oinfo *loi;
670 struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
673 req->rq_stripe = set->set_success;
674 loi = lsm->lsm_oinfo[req->rq_stripe];
676 if (rc && lov->lov_tgts[req->rq_idx] &&
677 lov->lov_tgts[req->rq_idx]->ltd_active) {
678 CERROR("error creating fid "LPX64" sub-object"
679 " on OST idx %d/%d: rc = %d\n",
680 set->set_oi->oi_oa->o_id, req->rq_idx,
681 lsm->lsm_stripe_count, rc);
683 CERROR("obd_create returned invalid err %d\n", rc);
687 lov_update_set(set, req, rc);
691 if (oti && oti->oti_objid)
692 oti->oti_objid[req->rq_idx] = req->rq_oi.oi_oa->o_id;
694 loi->loi_id = req->rq_oi.oi_oa->o_id;
695 loi->loi_gr = req->rq_oi.oi_oa->o_gr;
696 loi->loi_ost_idx = req->rq_idx;
697 CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64"/"LPU64" at idx %d\n",
698 lsm->lsm_object_id, loi->loi_id, loi->loi_id, req->rq_idx);
701 if (oti && set->set_cookies)
702 ++oti->oti_logcookies;
703 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLCOOKIE)
704 set->set_cookie_sent++;
709 int lov_prep_create_set(struct obd_export *exp, struct obd_info *oinfo,
710 struct lov_stripe_md **lsmp, struct obdo *src_oa,
711 struct obd_trans_info *oti,
712 struct lov_request_set **reqset)
714 struct lov_request_set *set;
718 OBD_ALLOC(set, sizeof(*set));
725 set->set_oi->oi_md = *lsmp;
726 set->set_oi->oi_oa = src_oa;
729 rc = qos_prep_create(exp, set);
731 lov_fini_create_set(set, lsmp);
737 static int common_attr_done(struct lov_request_set *set)
739 struct list_head *pos;
740 struct lov_request *req;
742 int rc = 0, attrset = 0;
745 LASSERT(set->set_oi != NULL);
747 if (set->set_oi->oi_oa == NULL)
750 if (!set->set_success)
755 GOTO(out, rc = -ENOMEM);
757 list_for_each (pos, &set->set_list) {
758 req = list_entry(pos, struct lov_request, rq_link);
760 if (!req->rq_complete || req->rq_rc)
762 if (req->rq_oi.oi_oa->o_valid == 0) /* inactive stripe */
764 lov_merge_attrs(tmp_oa, req->rq_oi.oi_oa,
765 req->rq_oi.oi_oa->o_valid,
766 set->set_oi->oi_md, req->rq_stripe, &attrset);
769 CERROR("No stripes had valid attrs\n");
772 tmp_oa->o_id = set->set_oi->oi_oa->o_id;
773 memcpy(set->set_oi->oi_oa, tmp_oa, sizeof(*set->set_oi->oi_oa));
781 static int brw_done(struct lov_request_set *set)
783 struct lov_stripe_md *lsm = set->set_oi->oi_md;
784 struct lov_oinfo *loi = NULL;
785 struct list_head *pos;
786 struct lov_request *req;
789 list_for_each (pos, &set->set_list) {
790 req = list_entry(pos, struct lov_request, rq_link);
792 if (!req->rq_complete || req->rq_rc)
795 loi = lsm->lsm_oinfo[req->rq_stripe];
797 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLBLOCKS)
798 loi->loi_lvb.lvb_blocks = req->rq_oi.oi_oa->o_blocks;
804 int lov_fini_brw_set(struct lov_request_set *set)
811 LASSERT(set->set_exp);
812 if (set->set_completes) {
814 /* FIXME update qos data here */
816 if (atomic_dec_and_test(&set->set_refcount))
822 int lov_prep_brw_set(struct obd_export *exp, struct obd_info *oinfo,
823 obd_count oa_bufs, struct brw_page *pga,
824 struct obd_trans_info *oti,
825 struct lov_request_set **reqset)
832 struct lov_request_set *set;
833 struct lov_obd *lov = &exp->exp_obd->u.lov;
834 int rc = 0, i, shift;
837 OBD_ALLOC(set, sizeof(*set));
845 set->set_oabufs = oa_bufs;
846 OBD_ALLOC(set->set_pga, oa_bufs * sizeof(*set->set_pga));
848 GOTO(out, rc = -ENOMEM);
850 OBD_ALLOC(info, sizeof(*info) * oinfo->oi_md->lsm_stripe_count);
852 GOTO(out, rc = -ENOMEM);
854 /* calculate the page count for each stripe */
855 for (i = 0; i < oa_bufs; i++) {
856 int stripe = lov_stripe_number(oinfo->oi_md, pga[i].off);
857 info[stripe].count++;
860 /* alloc and initialize lov request */
862 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++){
863 struct lov_oinfo *loi = NULL;
864 struct lov_request *req;
866 if (info[i].count == 0)
869 loi = oinfo->oi_md->lsm_oinfo[i];
870 if (!lov->lov_tgts[loi->loi_ost_idx] ||
871 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
872 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
873 GOTO(out, rc = -EIO);
876 OBD_ALLOC(req, sizeof(*req));
878 GOTO(out, rc = -ENOMEM);
880 OBDO_ALLOC(req->rq_oi.oi_oa);
881 if (req->rq_oi.oi_oa == NULL) {
882 OBD_FREE(req, sizeof(*req));
883 GOTO(out, rc = -ENOMEM);
887 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
888 sizeof(*req->rq_oi.oi_oa));
890 req->rq_oi.oi_oa->o_id = loi->loi_id;
891 req->rq_oi.oi_oa->o_stripe_idx = i;
893 req->rq_buflen = sizeof(*req->rq_oi.oi_md);
894 OBD_ALLOC(req->rq_oi.oi_md, req->rq_buflen);
895 if (req->rq_oi.oi_md == NULL) {
896 OBDO_FREE(req->rq_oi.oi_oa);
897 OBD_FREE(req, sizeof(*req));
898 GOTO(out, rc = -ENOMEM);
901 req->rq_idx = loi->loi_ost_idx;
904 /* XXX LOV STACKING */
905 req->rq_oi.oi_md->lsm_object_id = loi->loi_id;
906 req->rq_oi.oi_md->lsm_object_gr = oinfo->oi_md->lsm_object_gr;
907 req->rq_oabufs = info[i].count;
908 req->rq_pgaidx = shift;
909 shift += req->rq_oabufs;
911 /* remember the index for sort brw_page array */
912 info[i].index = req->rq_pgaidx;
914 req->rq_oi.oi_capa = oinfo->oi_capa;
916 lov_set_add_req(req, set);
919 GOTO(out, rc = -EIO);
921 /* rotate & sort the brw_page array */
922 for (i = 0; i < oa_bufs; i++) {
923 int stripe = lov_stripe_number(oinfo->oi_md, pga[i].off);
925 shift = info[stripe].index + info[stripe].off;
926 LASSERT(shift < oa_bufs);
927 set->set_pga[shift] = pga[i];
928 lov_stripe_offset(oinfo->oi_md, pga[i].off, stripe,
929 &set->set_pga[shift].off);
934 OBD_FREE(info, sizeof(*info) * oinfo->oi_md->lsm_stripe_count);
939 lov_fini_brw_set(set);
944 int lov_fini_getattr_set(struct lov_request_set *set)
951 LASSERT(set->set_exp);
952 if (set->set_completes)
953 rc = common_attr_done(set);
955 if (atomic_dec_and_test(&set->set_refcount))
961 /* The callback for osc_getattr_async that finilizes a request info when a
962 * response is recieved. */
963 static int cb_getattr_update(struct obd_info *oinfo, int rc)
965 struct lov_request *lovreq;
966 lovreq = container_of(oinfo, struct lov_request, rq_oi);
967 return lov_update_common_set(lovreq->rq_rqset, lovreq, rc);
970 int lov_prep_getattr_set(struct obd_export *exp, struct obd_info *oinfo,
971 struct lov_request_set **reqset)
973 struct lov_request_set *set;
974 struct lov_obd *lov = &exp->exp_obd->u.lov;
978 OBD_ALLOC(set, sizeof(*set));
986 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
987 struct lov_oinfo *loi;
988 struct lov_request *req;
990 loi = oinfo->oi_md->lsm_oinfo[i];
991 if (!lov->lov_tgts[loi->loi_ost_idx] ||
992 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
993 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
997 OBD_ALLOC(req, sizeof(*req));
999 GOTO(out_set, rc = -ENOMEM);
1002 req->rq_idx = loi->loi_ost_idx;
1004 OBDO_ALLOC(req->rq_oi.oi_oa);
1005 if (req->rq_oi.oi_oa == NULL) {
1006 OBD_FREE(req, sizeof(*req));
1007 GOTO(out_set, rc = -ENOMEM);
1009 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
1010 sizeof(*req->rq_oi.oi_oa));
1011 req->rq_oi.oi_oa->o_id = loi->loi_id;
1012 req->rq_oi.oi_cb_up = cb_getattr_update;
1013 req->rq_oi.oi_capa = oinfo->oi_capa;
1014 req->rq_rqset = set;
1016 lov_set_add_req(req, set);
1018 if (!set->set_count)
1019 GOTO(out_set, rc = -EIO);
1023 lov_fini_getattr_set(set);
1027 int lov_fini_destroy_set(struct lov_request_set *set)
1033 LASSERT(set->set_exp);
1034 if (set->set_completes) {
1035 /* FIXME update qos data here */
1038 if (atomic_dec_and_test(&set->set_refcount))
1039 lov_finish_set(set);
1044 int lov_prep_destroy_set(struct obd_export *exp, struct obd_info *oinfo,
1045 struct obdo *src_oa, struct lov_stripe_md *lsm,
1046 struct obd_trans_info *oti,
1047 struct lov_request_set **reqset)
1049 struct lov_request_set *set;
1050 struct lov_obd *lov = &exp->exp_obd->u.lov;
1054 OBD_ALLOC(set, sizeof(*set));
1060 set->set_oi = oinfo;
1061 set->set_oi->oi_md = lsm;
1062 set->set_oi->oi_oa = src_oa;
1064 if (oti != NULL && src_oa->o_valid & OBD_MD_FLCOOKIE)
1065 set->set_cookies = oti->oti_logcookies;
1067 for (i = 0; i < lsm->lsm_stripe_count; i++) {
1068 struct lov_oinfo *loi;
1069 struct lov_request *req;
1071 loi = lsm->lsm_oinfo[i];
1072 if (!lov->lov_tgts[loi->loi_ost_idx] ||
1073 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
1074 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1078 OBD_ALLOC(req, sizeof(*req));
1080 GOTO(out_set, rc = -ENOMEM);
1083 req->rq_idx = loi->loi_ost_idx;
1085 OBDO_ALLOC(req->rq_oi.oi_oa);
1086 if (req->rq_oi.oi_oa == NULL) {
1087 OBD_FREE(req, sizeof(*req));
1088 GOTO(out_set, rc = -ENOMEM);
1090 memcpy(req->rq_oi.oi_oa, src_oa, sizeof(*req->rq_oi.oi_oa));
1091 req->rq_oi.oi_oa->o_id = loi->loi_id;
1092 lov_set_add_req(req, set);
1094 if (!set->set_count)
1095 GOTO(out_set, rc = -EIO);
1099 lov_fini_destroy_set(set);
1103 int lov_fini_setattr_set(struct lov_request_set *set)
1110 LASSERT(set->set_exp);
1111 if (set->set_completes) {
1112 rc = common_attr_done(set);
1113 /* FIXME update qos data here */
1116 if (atomic_dec_and_test(&set->set_refcount))
1117 lov_finish_set(set);
1121 int lov_update_setattr_set(struct lov_request_set *set,
1122 struct lov_request *req, int rc)
1124 struct lov_obd *lov = &req->rq_rqset->set_exp->exp_obd->u.lov;
1125 struct lov_stripe_md *lsm = req->rq_rqset->set_oi->oi_md;
1128 lov_update_set(set, req, rc);
1130 /* grace error on inactive ost */
1131 if (rc && !(lov->lov_tgts[req->rq_idx] &&
1132 lov->lov_tgts[req->rq_idx]->ltd_active))
1136 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLCTIME)
1137 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_ctime =
1138 req->rq_oi.oi_oa->o_ctime;
1139 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLMTIME)
1140 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_mtime =
1141 req->rq_oi.oi_oa->o_mtime;
1142 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLATIME)
1143 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_atime =
1144 req->rq_oi.oi_oa->o_atime;
1150 /* The callback for osc_setattr_async that finilizes a request info when a
1151 * response is recieved. */
1152 static int cb_setattr_update(struct obd_info *oinfo, int rc)
1154 struct lov_request *lovreq;
1155 lovreq = container_of(oinfo, struct lov_request, rq_oi);
1156 return lov_update_setattr_set(lovreq->rq_rqset, lovreq, rc);
1159 int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo,
1160 struct obd_trans_info *oti,
1161 struct lov_request_set **reqset)
1163 struct lov_request_set *set;
1164 struct lov_obd *lov = &exp->exp_obd->u.lov;
1168 OBD_ALLOC(set, sizeof(*set));
1175 set->set_oi = oinfo;
1176 if (oti != NULL && oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE)
1177 set->set_cookies = oti->oti_logcookies;
1179 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
1180 struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i];
1181 struct lov_request *req;
1183 if (!lov->lov_tgts[loi->loi_ost_idx] ||
1184 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
1185 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1189 OBD_ALLOC(req, sizeof(*req));
1191 GOTO(out_set, rc = -ENOMEM);
1193 req->rq_idx = loi->loi_ost_idx;
1195 OBDO_ALLOC(req->rq_oi.oi_oa);
1196 if (req->rq_oi.oi_oa == NULL) {
1197 OBD_FREE(req, sizeof(*req));
1198 GOTO(out_set, rc = -ENOMEM);
1200 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
1201 sizeof(*req->rq_oi.oi_oa));
1202 req->rq_oi.oi_oa->o_id = loi->loi_id;
1203 LASSERT(!(req->rq_oi.oi_oa->o_valid & OBD_MD_FLGROUP)
1204 || req->rq_oi.oi_oa->o_gr>0);
1205 req->rq_oi.oi_oa->o_stripe_idx = i;
1206 req->rq_oi.oi_cb_up = cb_setattr_update;
1207 req->rq_oi.oi_capa = oinfo->oi_capa;
1208 req->rq_rqset = set;
1210 if (oinfo->oi_oa->o_valid & OBD_MD_FLSIZE) {
1211 int off = lov_stripe_offset(oinfo->oi_md,
1212 oinfo->oi_oa->o_size, i,
1213 &req->rq_oi.oi_oa->o_size);
1215 if (off < 0 && req->rq_oi.oi_oa->o_size)
1216 req->rq_oi.oi_oa->o_size--;
1218 CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
1219 i, req->rq_oi.oi_oa->o_size,
1220 oinfo->oi_oa->o_size);
1222 lov_set_add_req(req, set);
1224 if (!set->set_count)
1225 GOTO(out_set, rc = -EIO);
1229 lov_fini_setattr_set(set);
1233 int lov_fini_punch_set(struct lov_request_set *set)
1240 LASSERT(set->set_exp);
1241 if (set->set_completes) {
1243 /* FIXME update qos data here */
1244 if (set->set_success)
1245 rc = common_attr_done(set);
1248 if (atomic_dec_and_test(&set->set_refcount))
1249 lov_finish_set(set);
1254 int lov_update_punch_set(struct lov_request_set *set,
1255 struct lov_request *req, int rc)
1257 struct lov_obd *lov = &req->rq_rqset->set_exp->exp_obd->u.lov;
1258 struct lov_stripe_md *lsm = req->rq_rqset->set_oi->oi_md;
1261 lov_update_set(set, req, rc);
1263 /* grace error on inactive ost */
1264 if (rc && !lov->lov_tgts[req->rq_idx]->ltd_active)
1268 lov_stripe_lock(lsm);
1269 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLBLOCKS) {
1270 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_blocks =
1271 req->rq_oi.oi_oa->o_blocks;
1274 /* Do we need to update lvb_size here? It needn't because
1275 * it have been done in ll_truncate(). -jay */
1276 lov_stripe_unlock(lsm);
1282 /* The callback for osc_punch that finilizes a request info when a response
1284 static int cb_update_punch(struct obd_info *oinfo, int rc)
1286 struct lov_request *lovreq;
1287 lovreq = container_of(oinfo, struct lov_request, rq_oi);
1288 return lov_update_punch_set(lovreq->rq_rqset, lovreq, rc);
1291 int lov_prep_punch_set(struct obd_export *exp, struct obd_info *oinfo,
1292 struct obd_trans_info *oti,
1293 struct lov_request_set **reqset)
1295 struct lov_request_set *set;
1296 struct lov_obd *lov = &exp->exp_obd->u.lov;
1300 OBD_ALLOC(set, sizeof(*set));
1305 set->set_oi = oinfo;
1308 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
1309 struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i];
1310 struct lov_request *req;
1313 if (!lov->lov_tgts[loi->loi_ost_idx] ||
1314 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
1315 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1319 if (!lov_stripe_intersects(oinfo->oi_md, i,
1320 oinfo->oi_policy.l_extent.start,
1321 oinfo->oi_policy.l_extent.end,
1325 OBD_ALLOC(req, sizeof(*req));
1327 GOTO(out_set, rc = -ENOMEM);
1329 req->rq_idx = loi->loi_ost_idx;
1331 OBDO_ALLOC(req->rq_oi.oi_oa);
1332 if (req->rq_oi.oi_oa == NULL) {
1333 OBD_FREE(req, sizeof(*req));
1334 GOTO(out_set, rc = -ENOMEM);
1336 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
1337 sizeof(*req->rq_oi.oi_oa));
1338 req->rq_oi.oi_oa->o_id = loi->loi_id;
1339 req->rq_oi.oi_oa->o_gr = loi->loi_gr;
1340 req->rq_oi.oi_oa->o_valid |= OBD_MD_FLGROUP;
1342 req->rq_oi.oi_oa->o_stripe_idx = i;
1343 req->rq_oi.oi_cb_up = cb_update_punch;
1344 req->rq_rqset = set;
1346 req->rq_oi.oi_policy.l_extent.start = rs;
1347 req->rq_oi.oi_policy.l_extent.end = re;
1348 req->rq_oi.oi_policy.l_extent.gid = -1;
1350 req->rq_oi.oi_capa = oinfo->oi_capa;
1352 lov_set_add_req(req, set);
1354 if (!set->set_count)
1355 GOTO(out_set, rc = -EIO);
1359 lov_fini_punch_set(set);
1363 int lov_fini_sync_set(struct lov_request_set *set)
1370 LASSERT(set->set_exp);
1371 if (set->set_completes) {
1372 if (!set->set_success)
1374 /* FIXME update qos data here */
1377 if (atomic_dec_and_test(&set->set_refcount))
1378 lov_finish_set(set);
1383 int lov_prep_sync_set(struct obd_export *exp, struct obd_info *oinfo,
1384 struct obdo *src_oa, struct lov_stripe_md *lsm,
1385 obd_off start, obd_off end,
1386 struct lov_request_set **reqset)
1388 struct lov_request_set *set;
1389 struct lov_obd *lov = &exp->exp_obd->u.lov;
1393 OBD_ALLOC(set, sizeof(*set));
1399 set->set_oi = oinfo;
1400 set->set_oi->oi_md = lsm;
1401 set->set_oi->oi_oa = src_oa;
1403 for (i = 0; i < lsm->lsm_stripe_count; i++) {
1404 struct lov_oinfo *loi = lsm->lsm_oinfo[i];
1405 struct lov_request *req;
1408 if (!lov->lov_tgts[loi->loi_ost_idx] ||
1409 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
1410 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1414 if (!lov_stripe_intersects(lsm, i, start, end, &rs, &re))
1417 OBD_ALLOC(req, sizeof(*req));
1419 GOTO(out_set, rc = -ENOMEM);
1421 req->rq_idx = loi->loi_ost_idx;
1423 OBDO_ALLOC(req->rq_oi.oi_oa);
1424 if (req->rq_oi.oi_oa == NULL) {
1425 OBD_FREE(req, sizeof(*req));
1426 GOTO(out_set, rc = -ENOMEM);
1428 memcpy(req->rq_oi.oi_oa, src_oa, sizeof(*req->rq_oi.oi_oa));
1429 req->rq_oi.oi_oa->o_id = loi->loi_id;
1430 req->rq_oi.oi_oa->o_stripe_idx = i;
1432 req->rq_oi.oi_policy.l_extent.start = rs;
1433 req->rq_oi.oi_policy.l_extent.end = re;
1434 req->rq_oi.oi_policy.l_extent.gid = -1;
1436 lov_set_add_req(req, set);
1438 if (!set->set_count)
1439 GOTO(out_set, rc = -EIO);
1443 lov_fini_sync_set(set);
1447 #define LOV_U64_MAX ((__u64)~0ULL)
1448 #define LOV_SUM_MAX(tot, add) \
1450 if ((tot) + (add) < (tot)) \
1451 (tot) = LOV_U64_MAX; \
1456 int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs,int success)
1461 __u32 expected_stripes = lov_get_stripecnt(&obd->u.lov, 0);
1463 if (osfs->os_files != LOV_U64_MAX)
1464 do_div(osfs->os_files, expected_stripes);
1465 if (osfs->os_ffree != LOV_U64_MAX)
1466 do_div(osfs->os_ffree, expected_stripes);
1468 spin_lock(&obd->obd_osfs_lock);
1469 memcpy(&obd->obd_osfs, osfs, sizeof(*osfs));
1470 obd->obd_osfs_age = get_jiffies_64();
1471 spin_unlock(&obd->obd_osfs_lock);
1478 int lov_fini_statfs_set(struct lov_request_set *set)
1486 if (set->set_completes) {
1487 rc = lov_fini_statfs(set->set_obd, set->set_oi->oi_osfs,
1491 if (atomic_dec_and_test(&set->set_refcount))
1492 lov_finish_set(set);
1497 void lov_update_statfs(struct obd_device *obd, struct obd_statfs *osfs,
1498 struct obd_statfs *lov_sfs, int success)
1500 spin_lock(&obd->obd_osfs_lock);
1501 memcpy(&obd->obd_osfs, lov_sfs, sizeof(*lov_sfs));
1502 obd->obd_osfs_age = get_jiffies_64();
1503 spin_unlock(&obd->obd_osfs_lock);
1506 memcpy(osfs, lov_sfs, sizeof(*lov_sfs));
1509 /* Sandia requested that df (and so, statfs) only
1510 returned minimal available space on
1511 a single OST, so people would be able to
1512 write this much data guaranteed. */
1513 if (osfs->os_bavail > lov_sfs->os_bavail) {
1514 /* Presumably if new bavail is smaller,
1515 new bfree is bigger as well */
1516 osfs->os_bfree = lov_sfs->os_bfree;
1517 osfs->os_bavail = lov_sfs->os_bavail;
1520 osfs->os_bfree += lov_sfs->os_bfree;
1521 osfs->os_bavail += lov_sfs->os_bavail;
1523 osfs->os_blocks += lov_sfs->os_blocks;
1524 /* XXX not sure about this one - depends on policy.
1525 * - could be minimum if we always stripe on all OBDs
1526 * (but that would be wrong for any other policy,
1527 * if one of the OBDs has no more objects left)
1528 * - could be sum if we stripe whole objects
1529 * - could be average, just to give a nice number
1531 * To give a "reasonable" (if not wholly accurate)
1532 * number, we divide the total number of free objects
1533 * by expected stripe count (watch out for overflow).
1535 LOV_SUM_MAX(osfs->os_files, lov_sfs->os_files);
1536 LOV_SUM_MAX(osfs->os_ffree, lov_sfs->os_ffree);
1540 /* The callback for osc_statfs_async that finilizes a request info when a
1541 * response is recieved. */
1542 static int cb_statfs_update(struct obd_info *oinfo, int rc)
1544 struct lov_request *lovreq;
1545 struct obd_statfs *osfs, *lov_sfs;
1546 struct obd_device *obd;
1547 struct lov_obd *lov;
1551 lovreq = container_of(oinfo, struct lov_request, rq_oi);
1552 lov = &lovreq->rq_rqset->set_obd->u.lov;
1553 obd = class_exp2obd(lov->lov_tgts[lovreq->rq_idx]->ltd_exp);
1555 osfs = lovreq->rq_rqset->set_oi->oi_osfs;
1556 lov_sfs = oinfo->oi_osfs;
1558 success = lovreq->rq_rqset->set_success;
1560 /* XXX: the same is done in lov_update_common_set, however
1561 lovset->set_exp is not initialized. */
1562 lov_update_set(lovreq->rq_rqset, lovreq, rc);
1564 if (rc && !(lov->lov_tgts[lovreq->rq_idx] &&
1565 lov->lov_tgts[lovreq->rq_idx]->ltd_active))
1570 lov_update_statfs(obd, osfs, lov_sfs, success);
1576 int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo,
1577 struct lov_request_set **reqset)
1579 struct lov_request_set *set;
1580 struct lov_obd *lov = &obd->u.lov;
1584 OBD_ALLOC(set, sizeof(*set));
1590 set->set_oi = oinfo;
1592 /* We only get block data from the OBD */
1593 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
1594 struct lov_request *req;
1596 if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active) {
1597 CDEBUG(D_HA, "lov idx %d inactive\n", i);
1601 OBD_ALLOC(req, sizeof(*req));
1603 GOTO(out_set, rc = -ENOMEM);
1605 OBD_ALLOC(req->rq_oi.oi_osfs, sizeof(*req->rq_oi.oi_osfs));
1606 if (req->rq_oi.oi_osfs == NULL) {
1607 OBD_FREE(req, sizeof(*req));
1608 GOTO(out_set, rc = -ENOMEM);
1612 req->rq_oi.oi_cb_up = cb_statfs_update;
1613 req->rq_rqset = set;
1615 lov_set_add_req(req, set);
1617 if (!set->set_count)
1618 GOTO(out_set, rc = -EIO);
1622 lov_fini_statfs_set(set);