1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
6 * This file is part of the Lustre file system, http://www.lustre.org
7 * Lustre is a trademark of Cluster File Systems, Inc.
9 * You may have signed or agreed to another license before downloading
10 * this software. If so, you are bound by the terms and conditions
11 * of that agreement, and the following does not apply to you. See the
12 * LICENSE file included with this distribution for more information.
14 * If you did not agree to a different license, then this copy of Lustre
15 * is open source software; you can redistribute it and/or modify it
16 * under the terms of version 2 of the GNU General Public License as
17 * published by the Free Software Foundation.
19 * In either case, Lustre is distributed in the hope that it will be
20 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
21 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * license text for more details.
26 # define EXPORT_SYMTAB
28 #define DEBUG_SUBSYSTEM S_LOV
31 #include <libcfs/libcfs.h>
33 #include <liblustre.h>
36 #include <obd_class.h>
38 #include <lustre/lustre_idl.h>
40 #include "lov_internal.h"
42 static void lov_init_set(struct lov_request_set *set)
45 set->set_completes = 0;
48 CFS_INIT_LIST_HEAD(&set->set_list);
49 atomic_set(&set->set_refcount, 1);
52 static void lov_finish_set(struct lov_request_set *set)
54 struct list_head *pos, *n;
58 list_for_each_safe(pos, n, &set->set_list) {
59 struct lov_request *req = list_entry(pos, struct lov_request,
61 list_del_init(&req->rq_link);
64 OBDO_FREE(req->rq_oi.oi_oa);
66 OBD_FREE(req->rq_oi.oi_md, req->rq_buflen);
67 if (req->rq_oi.oi_osfs)
68 OBD_FREE(req->rq_oi.oi_osfs,
69 sizeof(*req->rq_oi.oi_osfs));
70 OBD_FREE(req, sizeof(*req));
74 int len = set->set_oabufs * sizeof(*set->set_pga);
75 OBD_FREE(set->set_pga, len);
78 lov_llh_put(set->set_lockh);
80 OBD_FREE(set, sizeof(*set));
84 void lov_update_set(struct lov_request_set *set,
85 struct lov_request *req, int rc)
95 int lov_update_common_set(struct lov_request_set *set,
96 struct lov_request *req, int rc)
98 struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
101 lov_update_set(set, req, rc);
103 /* grace error on inactive ost */
104 if (rc && !(lov->lov_tgts[req->rq_idx] &&
105 lov->lov_tgts[req->rq_idx]->ltd_active))
108 /* FIXME in raid1 regime, should return 0 */
112 void lov_set_add_req(struct lov_request *req, struct lov_request_set *set)
114 list_add_tail(&req->rq_link, &set->set_list);
118 int lov_update_enqueue_set(struct lov_request *req, __u32 mode, int rc)
120 struct lov_request_set *set = req->rq_rqset;
121 struct lustre_handle *lov_lockhp;
122 struct lov_oinfo *loi;
125 LASSERT(set != NULL);
126 LASSERT(set->set_oi != NULL);
128 lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
129 loi = set->set_oi->oi_md->lsm_oinfo[req->rq_stripe];
131 /* XXX LOV STACKING: OSC gets a copy, created in lov_prep_enqueue_set
132 * and that copy can be arbitrarily out of date.
134 * The LOV API is due for a serious rewriting anyways, and this
135 * can be addressed then. */
137 if (rc == ELDLM_OK) {
138 struct ldlm_lock *lock = ldlm_handle2lock(lov_lockhp);
141 LASSERT(lock != NULL);
142 lov_stripe_lock(set->set_oi->oi_md);
143 loi->loi_lvb = req->rq_oi.oi_md->lsm_oinfo[0]->loi_lvb;
144 tmp = loi->loi_lvb.lvb_size;
145 /* Extend KMS up to the end of this lock and no further
146 * A lock on [x,y] means a KMS of up to y + 1 bytes! */
147 if (tmp > lock->l_policy_data.l_extent.end)
148 tmp = lock->l_policy_data.l_extent.end + 1;
149 if (tmp >= loi->loi_kms) {
150 LDLM_DEBUG(lock, "lock acquired, setting rss="LPU64
151 ", kms="LPU64, loi->loi_lvb.lvb_size, tmp);
153 loi->loi_kms_valid = 1;
155 LDLM_DEBUG(lock, "lock acquired, setting rss="
156 LPU64"; leaving kms="LPU64", end="LPU64,
157 loi->loi_lvb.lvb_size, loi->loi_kms,
158 lock->l_policy_data.l_extent.end);
160 lov_stripe_unlock(set->set_oi->oi_md);
161 ldlm_lock_allow_match(lock);
163 } else if ((rc == ELDLM_LOCK_ABORTED) &&
164 (set->set_oi->oi_flags & LDLM_FL_HAS_INTENT)) {
165 memset(lov_lockhp, 0, sizeof(*lov_lockhp));
166 lov_stripe_lock(set->set_oi->oi_md);
167 loi->loi_lvb = req->rq_oi.oi_md->lsm_oinfo[0]->loi_lvb;
168 lov_stripe_unlock(set->set_oi->oi_md);
169 CDEBUG(D_INODE, "glimpsed, setting rss="LPU64"; leaving"
170 " kms="LPU64"\n", loi->loi_lvb.lvb_size, loi->loi_kms);
173 struct obd_export *exp = set->set_exp;
174 struct lov_obd *lov = &exp->exp_obd->u.lov;
176 memset(lov_lockhp, 0, sizeof(*lov_lockhp));
177 if (lov->lov_tgts[req->rq_idx] &&
178 lov->lov_tgts[req->rq_idx]->ltd_active) {
180 CERROR("enqueue objid "LPX64" subobj "
181 LPX64" on OST idx %d: rc %d\n",
182 set->set_oi->oi_md->lsm_object_id,
183 loi->loi_id, loi->loi_ost_idx, rc);
188 lov_update_set(set, req, rc);
192 /* The callback for osc_enqueue that updates lov info for every OSC request. */
193 static int cb_update_enqueue(struct obd_info *oinfo, int rc)
195 struct ldlm_enqueue_info *einfo;
196 struct lov_request *lovreq;
198 lovreq = container_of(oinfo, struct lov_request, rq_oi);
199 einfo = lovreq->rq_rqset->set_ei;
200 return lov_update_enqueue_set(lovreq, einfo->ei_mode, rc);
203 static int enqueue_done(struct lov_request_set *set, __u32 mode)
205 struct lov_request *req;
206 struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
210 /* enqueue/match success, just return */
211 if (set->set_completes && set->set_completes == set->set_success)
214 /* cancel enqueued/matched locks */
215 list_for_each_entry(req, &set->set_list, rq_link) {
216 struct lustre_handle *lov_lockhp;
218 if (!req->rq_complete || req->rq_rc)
221 lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
223 if (!lustre_handle_is_used(lov_lockhp))
226 rc = obd_cancel(lov->lov_tgts[req->rq_idx]->ltd_exp,
227 req->rq_oi.oi_md, mode, lov_lockhp);
228 if (rc && lov->lov_tgts[req->rq_idx] &&
229 lov->lov_tgts[req->rq_idx]->ltd_active)
230 CERROR("cancelling obdjid "LPX64" on OST "
231 "idx %d error: rc = %d\n",
232 req->rq_oi.oi_md->lsm_object_id,
236 lov_llh_put(set->set_lockh);
240 int lov_fini_enqueue_set(struct lov_request_set *set, __u32 mode, int rc,
241 struct ptlrpc_request_set *rqset)
248 LASSERT(set->set_exp);
249 /* Do enqueue_done only for sync requests and if any request
253 set->set_completes = 0;
254 ret = enqueue_done(set, mode);
255 } else if (set->set_lockh)
256 lov_llh_put(set->set_lockh);
258 if (atomic_dec_and_test(&set->set_refcount))
261 RETURN(rc ? rc : ret);
264 int lov_prep_enqueue_set(struct obd_export *exp, struct obd_info *oinfo,
265 struct ldlm_enqueue_info *einfo,
266 struct lov_request_set **reqset)
268 struct lov_obd *lov = &exp->exp_obd->u.lov;
269 struct lov_request_set *set;
271 struct lov_oinfo *loi;
274 OBD_ALLOC(set, sizeof(*set));
282 set->set_lockh = lov_llh_new(oinfo->oi_md);
283 if (set->set_lockh == NULL)
284 GOTO(out_set, rc = -ENOMEM);
285 oinfo->oi_lockh->cookie = set->set_lockh->llh_handle.h_cookie;
287 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
288 struct lov_request *req;
291 loi = oinfo->oi_md->lsm_oinfo[i];
292 if (!lov_stripe_intersects(oinfo->oi_md, i,
293 oinfo->oi_policy.l_extent.start,
294 oinfo->oi_policy.l_extent.end,
298 if (!lov->lov_tgts[loi->loi_ost_idx] ||
299 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
300 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
304 OBD_ALLOC(req, sizeof(*req));
306 GOTO(out_set, rc = -ENOMEM);
308 req->rq_buflen = sizeof(*req->rq_oi.oi_md) +
309 sizeof(struct lov_oinfo *) +
310 sizeof(struct lov_oinfo);
311 OBD_ALLOC(req->rq_oi.oi_md, req->rq_buflen);
312 if (req->rq_oi.oi_md == NULL) {
313 OBD_FREE(req, sizeof(*req));
314 GOTO(out_set, rc = -ENOMEM);
316 req->rq_oi.oi_md->lsm_oinfo[0] =
317 ((void *)req->rq_oi.oi_md) + sizeof(*req->rq_oi.oi_md) +
318 sizeof(struct lov_oinfo *);
322 /* Set lov request specific parameters. */
323 req->rq_oi.oi_lockh = set->set_lockh->llh_handles + i;
324 req->rq_oi.oi_cb_up = cb_update_enqueue;
325 req->rq_oi.oi_flags = oinfo->oi_flags;
327 LASSERT(req->rq_oi.oi_lockh);
329 req->rq_oi.oi_policy.l_extent.gid =
330 oinfo->oi_policy.l_extent.gid;
331 req->rq_oi.oi_policy.l_extent.start = start;
332 req->rq_oi.oi_policy.l_extent.end = end;
334 req->rq_idx = loi->loi_ost_idx;
337 /* XXX LOV STACKING: submd should be from the subobj */
338 req->rq_oi.oi_md->lsm_object_id = loi->loi_id;
339 req->rq_oi.oi_md->lsm_stripe_count = 0;
340 req->rq_oi.oi_md->lsm_oinfo[0]->loi_kms_valid =
342 req->rq_oi.oi_md->lsm_oinfo[0]->loi_kms = loi->loi_kms;
343 req->rq_oi.oi_md->lsm_oinfo[0]->loi_lvb = loi->loi_lvb;
345 lov_set_add_req(req, set);
348 GOTO(out_set, rc = -EIO);
352 lov_fini_enqueue_set(set, einfo->ei_mode, rc, NULL);
356 int lov_update_match_set(struct lov_request_set *set, struct lov_request *req,
366 lov_update_set(set, req, ret);
370 int lov_fini_match_set(struct lov_request_set *set, __u32 mode, int flags)
377 LASSERT(set->set_exp);
378 rc = enqueue_done(set, mode);
379 if ((set->set_count == set->set_success) &&
380 (flags & LDLM_FL_TEST_LOCK))
381 lov_llh_put(set->set_lockh);
383 if (atomic_dec_and_test(&set->set_refcount))
389 int lov_prep_match_set(struct obd_export *exp, struct obd_info *oinfo,
390 struct lov_stripe_md *lsm, ldlm_policy_data_t *policy,
391 __u32 mode, struct lustre_handle *lockh,
392 struct lov_request_set **reqset)
394 struct lov_obd *lov = &exp->exp_obd->u.lov;
395 struct lov_request_set *set;
397 struct lov_oinfo *loi;
400 OBD_ALLOC(set, sizeof(*set));
407 set->set_oi->oi_md = lsm;
408 set->set_lockh = lov_llh_new(lsm);
409 if (set->set_lockh == NULL)
410 GOTO(out_set, rc = -ENOMEM);
411 lockh->cookie = set->set_lockh->llh_handle.h_cookie;
413 for (i = 0; i < lsm->lsm_stripe_count; i++){
414 struct lov_request *req;
417 loi = lsm->lsm_oinfo[i];
418 if (!lov_stripe_intersects(lsm, i, policy->l_extent.start,
419 policy->l_extent.end, &start, &end))
422 /* FIXME raid1 should grace this error */
423 if (!lov->lov_tgts[loi->loi_ost_idx] ||
424 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
425 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
426 GOTO(out_set, rc = -EIO);
429 OBD_ALLOC(req, sizeof(*req));
431 GOTO(out_set, rc = -ENOMEM);
433 req->rq_buflen = sizeof(*req->rq_oi.oi_md);
434 OBD_ALLOC(req->rq_oi.oi_md, req->rq_buflen);
435 if (req->rq_oi.oi_md == NULL) {
436 OBD_FREE(req, sizeof(*req));
437 GOTO(out_set, rc = -ENOMEM);
440 req->rq_oi.oi_policy.l_extent.start = start;
441 req->rq_oi.oi_policy.l_extent.end = end;
442 req->rq_oi.oi_policy.l_extent.gid = policy->l_extent.gid;
444 req->rq_idx = loi->loi_ost_idx;
447 /* XXX LOV STACKING: submd should be from the subobj */
448 req->rq_oi.oi_md->lsm_object_id = loi->loi_id;
449 req->rq_oi.oi_md->lsm_stripe_count = 0;
451 lov_set_add_req(req, set);
454 GOTO(out_set, rc = -EIO);
458 lov_fini_match_set(set, mode, 0);
462 int lov_fini_cancel_set(struct lov_request_set *set)
470 LASSERT(set->set_exp);
472 lov_llh_put(set->set_lockh);
474 if (atomic_dec_and_test(&set->set_refcount))
480 int lov_prep_cancel_set(struct obd_export *exp, struct obd_info *oinfo,
481 struct lov_stripe_md *lsm, __u32 mode,
482 struct lustre_handle *lockh,
483 struct lov_request_set **reqset)
485 struct lov_request_set *set;
487 struct lov_oinfo *loi;
490 OBD_ALLOC(set, sizeof(*set));
497 set->set_oi->oi_md = lsm;
498 set->set_lockh = lov_handle2llh(lockh);
499 if (set->set_lockh == NULL) {
500 CERROR("LOV: invalid lov lock handle %p\n", lockh);
501 GOTO(out_set, rc = -EINVAL);
503 lockh->cookie = set->set_lockh->llh_handle.h_cookie;
505 for (i = 0; i < lsm->lsm_stripe_count; i++){
506 struct lov_request *req;
507 struct lustre_handle *lov_lockhp;
509 loi = lsm->lsm_oinfo[i];
510 lov_lockhp = set->set_lockh->llh_handles + i;
511 if (!lustre_handle_is_used(lov_lockhp)) {
512 CDEBUG(D_RPCTRACE,"lov idx %d subobj "LPX64" no lock\n",
513 loi->loi_ost_idx, loi->loi_id);
517 OBD_ALLOC(req, sizeof(*req));
519 GOTO(out_set, rc = -ENOMEM);
521 req->rq_buflen = sizeof(*req->rq_oi.oi_md);
522 OBD_ALLOC(req->rq_oi.oi_md, req->rq_buflen);
523 if (req->rq_oi.oi_md == NULL) {
524 OBD_FREE(req, sizeof(*req));
525 GOTO(out_set, rc = -ENOMEM);
528 req->rq_idx = loi->loi_ost_idx;
531 /* XXX LOV STACKING: submd should be from the subobj */
532 req->rq_oi.oi_md->lsm_object_id = loi->loi_id;
533 req->rq_oi.oi_md->lsm_stripe_count = 0;
535 lov_set_add_req(req, set);
538 GOTO(out_set, rc = -EIO);
542 lov_fini_cancel_set(set);
546 static int create_done(struct obd_export *exp, struct lov_request_set *set,
547 struct lov_stripe_md **lsmp)
549 struct lov_obd *lov = &exp->exp_obd->u.lov;
550 struct obd_trans_info *oti = set->set_oti;
551 struct obdo *src_oa = set->set_oi->oi_oa;
552 struct lov_request *req;
553 struct obdo *ret_oa = NULL;
554 int attrset = 0, rc = 0;
557 LASSERT(set->set_completes);
559 /* try alloc objects on other osts if osc_create fails for
560 * exceptions: RPC failure, ENOSPC, etc */
561 if (set->set_count != set->set_success) {
562 list_for_each_entry (req, &set->set_list, rq_link) {
566 set->set_completes--;
567 req->rq_complete = 0;
569 rc = qos_remedy_create(set, req);
570 lov_update_create_set(set, req, rc);
577 /* no successful creates */
578 if (set->set_success == 0)
581 /* If there was an explicit stripe set, fail. Otherwise, we
582 * got some objects and that's not bad. */
583 if (set->set_count != set->set_success) {
586 set->set_count = set->set_success;
592 GOTO(cleanup, rc = -ENOMEM);
594 list_for_each_entry(req, &set->set_list, rq_link) {
595 if (!req->rq_complete || req->rq_rc)
597 lov_merge_attrs(ret_oa, req->rq_oi.oi_oa,
598 req->rq_oi.oi_oa->o_valid, set->set_oi->oi_md,
599 req->rq_stripe, &attrset);
601 if (src_oa->o_valid & OBD_MD_FLSIZE &&
602 ret_oa->o_size != src_oa->o_size) {
603 CERROR("original size "LPU64" isn't new object size "LPU64"\n",
604 src_oa->o_size, ret_oa->o_size);
607 ret_oa->o_id = src_oa->o_id;
608 memcpy(src_oa, ret_oa, sizeof(*src_oa));
611 *lsmp = set->set_oi->oi_md;
615 list_for_each_entry(req, &set->set_list, rq_link) {
616 struct obd_export *sub_exp;
619 if (!req->rq_complete || req->rq_rc)
622 sub_exp = lov->lov_tgts[req->rq_idx]->ltd_exp;
623 err = obd_destroy(sub_exp, req->rq_oi.oi_oa, NULL, oti, NULL);
625 CERROR("Failed to uncreate objid "LPX64" subobj "
626 LPX64" on OST idx %d: rc = %d\n",
627 src_oa->o_id, req->rq_oi.oi_oa->o_id,
631 obd_free_memmd(exp, &set->set_oi->oi_md);
633 if (oti && set->set_cookies) {
634 oti->oti_logcookies = set->set_cookies;
635 if (!set->set_cookie_sent) {
636 oti_free_cookies(oti);
637 src_oa->o_valid &= ~OBD_MD_FLCOOKIE;
639 src_oa->o_valid |= OBD_MD_FLCOOKIE;
645 int lov_fini_create_set(struct lov_request_set *set,struct lov_stripe_md **lsmp)
652 LASSERT(set->set_exp);
653 if (set->set_completes)
654 rc = create_done(set->set_exp, set, lsmp);
656 if (atomic_dec_and_test(&set->set_refcount))
662 int lov_update_create_set(struct lov_request_set *set,
663 struct lov_request *req, int rc)
665 struct obd_trans_info *oti = set->set_oti;
666 struct lov_stripe_md *lsm = set->set_oi->oi_md;
667 struct lov_oinfo *loi;
668 struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
671 req->rq_stripe = set->set_success;
672 loi = lsm->lsm_oinfo[req->rq_stripe];
674 if (rc && lov->lov_tgts[req->rq_idx] &&
675 lov->lov_tgts[req->rq_idx]->ltd_active) {
676 CERROR("error creating fid "LPX64" sub-object"
677 " on OST idx %d/%d: rc = %d\n",
678 set->set_oi->oi_oa->o_id, req->rq_idx,
679 lsm->lsm_stripe_count, rc);
681 CERROR("obd_create returned invalid err %d\n", rc);
685 lov_update_set(set, req, rc);
689 if (oti && oti->oti_objid)
690 oti->oti_objid[req->rq_idx] = req->rq_oi.oi_oa->o_id;
692 loi->loi_id = req->rq_oi.oi_oa->o_id;
693 loi->loi_ost_idx = req->rq_idx;
694 CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64"/"LPU64" at idx %d\n",
695 lsm->lsm_object_id, loi->loi_id, loi->loi_id, req->rq_idx);
698 if (oti && set->set_cookies)
699 ++oti->oti_logcookies;
700 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLCOOKIE)
701 set->set_cookie_sent++;
706 int lov_prep_create_set(struct obd_export *exp, struct obd_info *oinfo,
707 struct lov_stripe_md **lsmp, struct obdo *src_oa,
708 struct obd_trans_info *oti,
709 struct lov_request_set **reqset)
711 struct lov_request_set *set;
715 OBD_ALLOC(set, sizeof(*set));
722 set->set_oi->oi_md = *lsmp;
723 set->set_oi->oi_oa = src_oa;
726 rc = qos_prep_create(exp, set);
728 lov_fini_create_set(set, lsmp);
734 static int common_attr_done(struct lov_request_set *set)
736 struct list_head *pos;
737 struct lov_request *req;
739 int rc = 0, attrset = 0;
742 LASSERT(set->set_oi != NULL);
744 if (set->set_oi->oi_oa == NULL)
747 if (!set->set_success)
752 GOTO(out, rc = -ENOMEM);
754 list_for_each (pos, &set->set_list) {
755 req = list_entry(pos, struct lov_request, rq_link);
757 if (!req->rq_complete || req->rq_rc)
759 if (req->rq_oi.oi_oa->o_valid == 0) /* inactive stripe */
761 lov_merge_attrs(tmp_oa, req->rq_oi.oi_oa,
762 req->rq_oi.oi_oa->o_valid,
763 set->set_oi->oi_md, req->rq_stripe, &attrset);
766 CERROR("No stripes had valid attrs\n");
769 tmp_oa->o_id = set->set_oi->oi_oa->o_id;
770 memcpy(set->set_oi->oi_oa, tmp_oa, sizeof(*set->set_oi->oi_oa));
778 static int brw_done(struct lov_request_set *set)
780 struct lov_stripe_md *lsm = set->set_oi->oi_md;
781 struct lov_oinfo *loi = NULL;
782 struct list_head *pos;
783 struct lov_request *req;
786 list_for_each (pos, &set->set_list) {
787 req = list_entry(pos, struct lov_request, rq_link);
789 if (!req->rq_complete || req->rq_rc)
792 loi = lsm->lsm_oinfo[req->rq_stripe];
794 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLBLOCKS)
795 loi->loi_lvb.lvb_blocks = req->rq_oi.oi_oa->o_blocks;
801 int lov_fini_brw_set(struct lov_request_set *set)
808 LASSERT(set->set_exp);
809 if (set->set_completes) {
811 /* FIXME update qos data here */
813 if (atomic_dec_and_test(&set->set_refcount))
819 int lov_prep_brw_set(struct obd_export *exp, struct obd_info *oinfo,
820 obd_count oa_bufs, struct brw_page *pga,
821 struct obd_trans_info *oti,
822 struct lov_request_set **reqset)
829 struct lov_request_set *set;
830 struct lov_oinfo *loi = NULL;
831 struct lov_obd *lov = &exp->exp_obd->u.lov;
832 int rc = 0, i, shift;
835 OBD_ALLOC(set, sizeof(*set));
843 set->set_oabufs = oa_bufs;
844 OBD_ALLOC(set->set_pga, oa_bufs * sizeof(*set->set_pga));
846 GOTO(out, rc = -ENOMEM);
848 OBD_ALLOC(info, sizeof(*info) * oinfo->oi_md->lsm_stripe_count);
850 GOTO(out, rc = -ENOMEM);
852 /* calculate the page count for each stripe */
853 for (i = 0; i < oa_bufs; i++) {
854 int stripe = lov_stripe_number(oinfo->oi_md, pga[i].off);
855 info[stripe].count++;
858 /* alloc and initialize lov request */
860 for (i = 0 ; i < oinfo->oi_md->lsm_stripe_count; i++){
861 struct lov_request *req;
863 if (info[i].count == 0)
866 loi = oinfo->oi_md->lsm_oinfo[i];
867 if (!lov->lov_tgts[loi->loi_ost_idx] ||
868 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
869 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
870 GOTO(out, rc = -EIO);
873 OBD_ALLOC(req, sizeof(*req));
875 GOTO(out, rc = -ENOMEM);
877 OBDO_ALLOC(req->rq_oi.oi_oa);
878 if (req->rq_oi.oi_oa == NULL) {
879 OBD_FREE(req, sizeof(*req));
880 GOTO(out, rc = -ENOMEM);
884 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
885 sizeof(*req->rq_oi.oi_oa));
887 req->rq_oi.oi_oa->o_id = loi->loi_id;
888 req->rq_oi.oi_oa->o_stripe_idx = i;
890 req->rq_buflen = sizeof(*req->rq_oi.oi_md);
891 OBD_ALLOC(req->rq_oi.oi_md, req->rq_buflen);
892 if (req->rq_oi.oi_md == NULL) {
893 OBDO_FREE(req->rq_oi.oi_oa);
894 OBD_FREE(req, sizeof(*req));
895 GOTO(out, rc = -ENOMEM);
898 req->rq_idx = loi->loi_ost_idx;
901 /* XXX LOV STACKING */
902 req->rq_oi.oi_md->lsm_object_id = loi->loi_id;
903 req->rq_oi.oi_md->lsm_object_gr = oinfo->oi_md->lsm_object_gr;
904 req->rq_oabufs = info[i].count;
905 req->rq_pgaidx = shift;
906 shift += req->rq_oabufs;
908 /* remember the index for sort brw_page array */
909 info[i].index = req->rq_pgaidx;
911 lov_set_add_req(req, set);
914 GOTO(out, rc = -EIO);
916 /* rotate & sort the brw_page array */
917 for (i = 0; i < oa_bufs; i++) {
918 int stripe = lov_stripe_number(oinfo->oi_md, pga[i].off);
920 shift = info[stripe].index + info[stripe].off;
921 LASSERT(shift < oa_bufs);
922 set->set_pga[shift] = pga[i];
923 lov_stripe_offset(oinfo->oi_md, pga[i].off, stripe,
924 &set->set_pga[shift].off);
929 OBD_FREE(info, sizeof(*info) * oinfo->oi_md->lsm_stripe_count);
934 lov_fini_brw_set(set);
939 int lov_fini_getattr_set(struct lov_request_set *set)
946 LASSERT(set->set_exp);
947 if (set->set_completes)
948 rc = common_attr_done(set);
950 if (atomic_dec_and_test(&set->set_refcount))
956 /* The callback for osc_getattr_async that finilizes a request info when a
957 * response is recieved. */
958 static int cb_getattr_update(struct obd_info *oinfo, int rc)
960 struct lov_request *lovreq;
961 lovreq = container_of(oinfo, struct lov_request, rq_oi);
962 return lov_update_common_set(lovreq->rq_rqset, lovreq, rc);
965 int lov_prep_getattr_set(struct obd_export *exp, struct obd_info *oinfo,
966 struct lov_request_set **reqset)
968 struct lov_request_set *set;
969 struct lov_oinfo *loi = NULL;
970 struct lov_obd *lov = &exp->exp_obd->u.lov;
974 OBD_ALLOC(set, sizeof(*set));
982 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
983 struct lov_request *req;
985 loi = oinfo->oi_md->lsm_oinfo[i];
986 if (!lov->lov_tgts[loi->loi_ost_idx] ||
987 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
988 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
992 OBD_ALLOC(req, sizeof(*req));
994 GOTO(out_set, rc = -ENOMEM);
997 req->rq_idx = loi->loi_ost_idx;
999 OBDO_ALLOC(req->rq_oi.oi_oa);
1000 if (req->rq_oi.oi_oa == NULL) {
1001 OBD_FREE(req, sizeof(*req));
1002 GOTO(out_set, rc = -ENOMEM);
1004 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
1005 sizeof(*req->rq_oi.oi_oa));
1006 req->rq_oi.oi_oa->o_id = loi->loi_id;
1007 req->rq_oi.oi_cb_up = cb_getattr_update;
1009 lov_set_add_req(req, set);
1011 if (!set->set_count)
1012 GOTO(out_set, rc = -EIO);
1016 lov_fini_getattr_set(set);
1020 int lov_fini_destroy_set(struct lov_request_set *set)
1026 LASSERT(set->set_exp);
1027 if (set->set_completes) {
1028 /* FIXME update qos data here */
1031 if (atomic_dec_and_test(&set->set_refcount))
1032 lov_finish_set(set);
1037 int lov_prep_destroy_set(struct obd_export *exp, struct obd_info *oinfo,
1038 struct obdo *src_oa, struct lov_stripe_md *lsm,
1039 struct obd_trans_info *oti,
1040 struct lov_request_set **reqset)
1042 struct lov_request_set *set;
1043 struct lov_oinfo *loi = NULL;
1044 struct lov_obd *lov = &exp->exp_obd->u.lov;
1048 OBD_ALLOC(set, sizeof(*set));
1054 set->set_oi = oinfo;
1055 set->set_oi->oi_md = lsm;
1056 set->set_oi->oi_oa = src_oa;
1058 if (oti != NULL && src_oa->o_valid & OBD_MD_FLCOOKIE)
1059 set->set_cookies = oti->oti_logcookies;
1061 for (i = 0; i < lsm->lsm_stripe_count; i++) {
1062 struct lov_request *req;
1064 loi = lsm->lsm_oinfo[i];
1065 if (!lov->lov_tgts[loi->loi_ost_idx] ||
1066 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
1067 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1071 OBD_ALLOC(req, sizeof(*req));
1073 GOTO(out_set, rc = -ENOMEM);
1076 req->rq_idx = loi->loi_ost_idx;
1078 OBDO_ALLOC(req->rq_oi.oi_oa);
1079 if (req->rq_oi.oi_oa == NULL) {
1080 OBD_FREE(req, sizeof(*req));
1081 GOTO(out_set, rc = -ENOMEM);
1083 memcpy(req->rq_oi.oi_oa, src_oa, sizeof(*req->rq_oi.oi_oa));
1084 req->rq_oi.oi_oa->o_id = loi->loi_id;
1085 lov_set_add_req(req, set);
1087 if (!set->set_count)
1088 GOTO(out_set, rc = -EIO);
1092 lov_fini_destroy_set(set);
1096 int lov_fini_setattr_set(struct lov_request_set *set)
1103 LASSERT(set->set_exp);
1104 if (set->set_completes) {
1105 rc = common_attr_done(set);
1106 /* FIXME update qos data here */
1109 if (atomic_dec_and_test(&set->set_refcount))
1110 lov_finish_set(set);
1114 int lov_update_setattr_set(struct lov_request_set *set,
1115 struct lov_request *req, int rc)
1117 struct lov_obd *lov = &req->rq_rqset->set_exp->exp_obd->u.lov;
1118 struct lov_stripe_md *lsm = req->rq_rqset->set_oi->oi_md;
1121 lov_update_set(set, req, rc);
1123 /* grace error on inactive ost */
1124 if (rc && !(lov->lov_tgts[req->rq_idx] &&
1125 lov->lov_tgts[req->rq_idx]->ltd_active))
1129 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLCTIME)
1130 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_ctime =
1131 req->rq_oi.oi_oa->o_ctime;
1132 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLMTIME)
1133 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_mtime =
1134 req->rq_oi.oi_oa->o_mtime;
1135 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLATIME)
1136 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_atime =
1137 req->rq_oi.oi_oa->o_atime;
1143 /* The callback for osc_setattr_async that finilizes a request info when a
1144 * response is recieved. */
1145 static int cb_setattr_update(struct obd_info *oinfo, int rc)
1147 struct lov_request *lovreq;
1148 lovreq = container_of(oinfo, struct lov_request, rq_oi);
1149 return lov_update_setattr_set(lovreq->rq_rqset, lovreq, rc);
1152 int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo,
1153 struct obd_trans_info *oti,
1154 struct lov_request_set **reqset)
1156 struct lov_request_set *set;
1157 struct lov_oinfo *loi = NULL;
1158 struct lov_obd *lov = &exp->exp_obd->u.lov;
1162 OBD_ALLOC(set, sizeof(*set));
1169 set->set_oi = oinfo;
1170 if (oti != NULL && oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE)
1171 set->set_cookies = oti->oti_logcookies;
1173 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
1174 struct lov_request *req;
1176 loi = oinfo->oi_md->lsm_oinfo[i];
1177 if (!lov->lov_tgts[loi->loi_ost_idx] ||
1178 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
1179 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1183 OBD_ALLOC(req, sizeof(*req));
1185 GOTO(out_set, rc = -ENOMEM);
1187 req->rq_idx = loi->loi_ost_idx;
1189 OBDO_ALLOC(req->rq_oi.oi_oa);
1190 if (req->rq_oi.oi_oa == NULL) {
1191 OBD_FREE(req, sizeof(*req));
1192 GOTO(out_set, rc = -ENOMEM);
1194 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
1195 sizeof(*req->rq_oi.oi_oa));
1196 req->rq_oi.oi_oa->o_id = loi->loi_id;
1197 req->rq_oi.oi_oa->o_stripe_idx = i;
1198 req->rq_oi.oi_cb_up = cb_setattr_update;
1199 req->rq_rqset = set;
1201 if (oinfo->oi_oa->o_valid & OBD_MD_FLSIZE) {
1202 int off = lov_stripe_offset(oinfo->oi_md,
1203 oinfo->oi_oa->o_size, i,
1204 &req->rq_oi.oi_oa->o_size);
1206 if (off < 0 && req->rq_oi.oi_oa->o_size)
1207 req->rq_oi.oi_oa->o_size--;
1209 CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
1210 i, req->rq_oi.oi_oa->o_size,
1211 oinfo->oi_oa->o_size);
1213 lov_set_add_req(req, set);
1215 if (!set->set_count)
1216 GOTO(out_set, rc = -EIO);
1220 lov_fini_setattr_set(set);
1224 int lov_fini_punch_set(struct lov_request_set *set)
1231 LASSERT(set->set_exp);
1232 if (set->set_completes) {
1234 /* FIXME update qos data here */
1235 if (set->set_success)
1236 rc = common_attr_done(set);
1239 if (atomic_dec_and_test(&set->set_refcount))
1240 lov_finish_set(set);
1245 int lov_update_punch_set(struct lov_request_set *set,
1246 struct lov_request *req, int rc)
1248 struct lov_obd *lov = &req->rq_rqset->set_exp->exp_obd->u.lov;
1249 struct lov_stripe_md *lsm = req->rq_rqset->set_oi->oi_md;
1252 lov_update_set(set, req, rc);
1254 /* grace error on inactive ost */
1255 if (rc && !lov->lov_tgts[req->rq_idx]->ltd_active)
1259 lov_stripe_lock(lsm);
1260 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLBLOCKS) {
1261 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_blocks =
1262 req->rq_oi.oi_oa->o_blocks;
1265 /* Do we need to update lvb_size here? It needn't because
1266 * it have been done in ll_truncate(). -jay */
1267 lov_stripe_unlock(lsm);
1273 /* The callback for osc_punch that finilizes a request info when a response
1275 static int cb_update_punch(struct obd_info *oinfo, int rc)
1277 struct lov_request *lovreq;
1278 lovreq = container_of(oinfo, struct lov_request, rq_oi);
1279 return lov_update_punch_set(lovreq->rq_rqset, lovreq, rc);
1282 int lov_prep_punch_set(struct obd_export *exp, struct obd_info *oinfo,
1283 struct obd_trans_info *oti,
1284 struct lov_request_set **reqset)
1286 struct lov_request_set *set;
1287 struct lov_oinfo *loi = NULL;
1288 struct lov_obd *lov = &exp->exp_obd->u.lov;
1292 OBD_ALLOC(set, sizeof(*set));
1297 set->set_oi = oinfo;
1300 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
1301 struct lov_request *req;
1304 loi = oinfo->oi_md->lsm_oinfo[i];
1305 if (!lov->lov_tgts[loi->loi_ost_idx] ||
1306 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
1307 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1311 if (!lov_stripe_intersects(oinfo->oi_md, i,
1312 oinfo->oi_policy.l_extent.start,
1313 oinfo->oi_policy.l_extent.end,
1317 OBD_ALLOC(req, sizeof(*req));
1319 GOTO(out_set, rc = -ENOMEM);
1321 req->rq_idx = loi->loi_ost_idx;
1323 OBDO_ALLOC(req->rq_oi.oi_oa);
1324 if (req->rq_oi.oi_oa == NULL) {
1325 OBD_FREE(req, sizeof(*req));
1326 GOTO(out_set, rc = -ENOMEM);
1328 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
1329 sizeof(*req->rq_oi.oi_oa));
1330 req->rq_oi.oi_oa->o_id = loi->loi_id;
1331 req->rq_oi.oi_oa->o_stripe_idx = i;
1332 req->rq_oi.oi_cb_up = cb_update_punch;
1333 req->rq_rqset = set;
1335 req->rq_oi.oi_policy.l_extent.start = rs;
1336 req->rq_oi.oi_policy.l_extent.end = re;
1337 req->rq_oi.oi_policy.l_extent.gid = -1;
1339 lov_set_add_req(req, set);
1341 if (!set->set_count)
1342 GOTO(out_set, rc = -EIO);
1346 lov_fini_punch_set(set);
1350 int lov_fini_sync_set(struct lov_request_set *set)
1357 LASSERT(set->set_exp);
1358 if (set->set_completes) {
1359 if (!set->set_success)
1361 /* FIXME update qos data here */
1364 if (atomic_dec_and_test(&set->set_refcount))
1365 lov_finish_set(set);
1370 int lov_prep_sync_set(struct obd_export *exp, struct obd_info *oinfo,
1371 struct obdo *src_oa, struct lov_stripe_md *lsm,
1372 obd_off start, obd_off end,
1373 struct lov_request_set **reqset)
1375 struct lov_request_set *set;
1376 struct lov_oinfo *loi = NULL;
1377 struct lov_obd *lov = &exp->exp_obd->u.lov;
1381 OBD_ALLOC(set, sizeof(*set));
1387 set->set_oi = oinfo;
1388 set->set_oi->oi_md = lsm;
1389 set->set_oi->oi_oa = src_oa;
1391 for (i = 0; i < lsm->lsm_stripe_count; i++) {
1392 struct lov_request *req;
1395 loi = lsm->lsm_oinfo[i];
1396 if (!lov->lov_tgts[loi->loi_ost_idx] ||
1397 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
1398 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1402 if (!lov_stripe_intersects(lsm, i, start, end, &rs, &re))
1405 OBD_ALLOC(req, sizeof(*req));
1407 GOTO(out_set, rc = -ENOMEM);
1409 req->rq_idx = loi->loi_ost_idx;
1411 OBDO_ALLOC(req->rq_oi.oi_oa);
1412 if (req->rq_oi.oi_oa == NULL) {
1413 OBD_FREE(req, sizeof(*req));
1414 GOTO(out_set, rc = -ENOMEM);
1416 memcpy(req->rq_oi.oi_oa, src_oa, sizeof(*req->rq_oi.oi_oa));
1417 req->rq_oi.oi_oa->o_id = loi->loi_id;
1418 req->rq_oi.oi_oa->o_stripe_idx = i;
1420 req->rq_oi.oi_policy.l_extent.start = rs;
1421 req->rq_oi.oi_policy.l_extent.end = re;
1422 req->rq_oi.oi_policy.l_extent.gid = -1;
1424 lov_set_add_req(req, set);
1426 if (!set->set_count)
1427 GOTO(out_set, rc = -EIO);
1431 lov_fini_sync_set(set);
1435 #define LOV_U64_MAX ((__u64)~0ULL)
1436 #define LOV_SUM_MAX(tot, add) \
1438 if ((tot) + (add) < (tot)) \
1439 (tot) = LOV_U64_MAX; \
1444 int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs,int success)
1449 __u32 expected_stripes = lov_get_stripecnt(&obd->u.lov, 0);
1451 if (osfs->os_files != LOV_U64_MAX)
1452 do_div(osfs->os_files, expected_stripes);
1453 if (osfs->os_ffree != LOV_U64_MAX)
1454 do_div(osfs->os_ffree, expected_stripes);
1456 spin_lock(&obd->obd_osfs_lock);
1457 memcpy(&obd->obd_osfs, osfs, sizeof(*osfs));
1458 obd->obd_osfs_age = get_jiffies_64();
1459 spin_unlock(&obd->obd_osfs_lock);
1466 int lov_fini_statfs_set(struct lov_request_set *set)
1474 if (set->set_completes) {
1475 rc = lov_fini_statfs(set->set_obd, set->set_oi->oi_osfs,
1479 if (atomic_dec_and_test(&set->set_refcount))
1480 lov_finish_set(set);
1485 void lov_update_statfs(struct obd_device *obd, struct obd_statfs *osfs,
1486 struct obd_statfs *lov_sfs, int success)
1488 spin_lock(&obd->obd_osfs_lock);
1489 memcpy(&obd->obd_osfs, lov_sfs, sizeof(*lov_sfs));
1490 obd->obd_osfs_age = get_jiffies_64();
1491 spin_unlock(&obd->obd_osfs_lock);
1494 memcpy(osfs, lov_sfs, sizeof(*lov_sfs));
1497 /* Sandia requested that df (and so, statfs) only
1498 returned minimal available space on
1499 a single OST, so people would be able to
1500 write this much data guaranteed. */
1501 if (osfs->os_bavail > lov_sfs->os_bavail) {
1502 /* Presumably if new bavail is smaller,
1503 new bfree is bigger as well */
1504 osfs->os_bfree = lov_sfs->os_bfree;
1505 osfs->os_bavail = lov_sfs->os_bavail;
1508 osfs->os_bfree += lov_sfs->os_bfree;
1509 osfs->os_bavail += lov_sfs->os_bavail;
1511 osfs->os_blocks += lov_sfs->os_blocks;
1512 /* XXX not sure about this one - depends on policy.
1513 * - could be minimum if we always stripe on all OBDs
1514 * (but that would be wrong for any other policy,
1515 * if one of the OBDs has no more objects left)
1516 * - could be sum if we stripe whole objects
1517 * - could be average, just to give a nice number
1519 * To give a "reasonable" (if not wholly accurate)
1520 * number, we divide the total number of free objects
1521 * by expected stripe count (watch out for overflow).
1523 LOV_SUM_MAX(osfs->os_files, lov_sfs->os_files);
1524 LOV_SUM_MAX(osfs->os_ffree, lov_sfs->os_ffree);
1528 /* The callback for osc_statfs_async that finilizes a request info when a
1529 * response is recieved. */
1530 static int cb_statfs_update(struct obd_info *oinfo, int rc)
1532 struct lov_request *lovreq;
1533 struct obd_statfs *osfs, *lov_sfs;
1534 struct obd_device *obd;
1535 struct lov_obd *lov;
1539 lovreq = container_of(oinfo, struct lov_request, rq_oi);
1540 lov = &lovreq->rq_rqset->set_obd->u.lov;
1541 obd = class_exp2obd(lov->lov_tgts[lovreq->rq_idx]->ltd_exp);
1543 osfs = lovreq->rq_rqset->set_oi->oi_osfs;
1544 lov_sfs = oinfo->oi_osfs;
1546 success = lovreq->rq_rqset->set_success;
1548 /* XXX: the same is done in lov_update_common_set, however
1549 lovset->set_exp is not initialized. */
1550 lov_update_set(lovreq->rq_rqset, lovreq, rc);
1552 if (rc && !(lov->lov_tgts[lovreq->rq_idx] &&
1553 lov->lov_tgts[lovreq->rq_idx]->ltd_active))
1558 lov_update_statfs(obd, osfs, lov_sfs, success);
1564 int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo,
1565 struct lov_request_set **reqset)
1567 struct lov_request_set *set;
1568 struct lov_obd *lov = &obd->u.lov;
1572 OBD_ALLOC(set, sizeof(*set));
1578 set->set_oi = oinfo;
1580 /* We only get block data from the OBD */
1581 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
1582 struct lov_request *req;
1584 if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active) {
1585 CDEBUG(D_HA, "lov idx %d inactive\n", i);
1589 OBD_ALLOC(req, sizeof(*req));
1591 GOTO(out_set, rc = -ENOMEM);
1593 OBD_ALLOC(req->rq_oi.oi_osfs, sizeof(*req->rq_oi.oi_osfs));
1594 if (req->rq_oi.oi_osfs == NULL) {
1595 OBD_FREE(req, sizeof(*req));
1596 GOTO(out_set, rc = -ENOMEM);
1600 req->rq_oi.oi_cb_up = cb_statfs_update;
1601 req->rq_rqset = set;
1603 lov_set_add_req(req, set);
1605 if (!set->set_count)
1606 GOTO(out_set, rc = -EIO);
1610 lov_fini_statfs_set(set);