4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, Whamcloud, Inc.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
37 #define DEBUG_SUBSYSTEM S_LOV
40 #include <libcfs/libcfs.h>
42 #include <liblustre.h>
45 #include <obd_class.h>
47 #include <lustre/lustre_idl.h>
49 #include "lov_internal.h"
51 static void lov_init_set(struct lov_request_set *set)
54 cfs_atomic_set(&set->set_completes, 0);
55 cfs_atomic_set(&set->set_success, 0);
56 cfs_atomic_set(&set->set_finish_checked, 0);
58 CFS_INIT_LIST_HEAD(&set->set_list);
59 cfs_atomic_set(&set->set_refcount, 1);
60 cfs_waitq_init(&set->set_waitq);
61 spin_lock_init(&set->set_lock);
64 void lov_finish_set(struct lov_request_set *set)
70 cfs_list_for_each_safe(pos, n, &set->set_list) {
71 struct lov_request *req = cfs_list_entry(pos,
74 cfs_list_del_init(&req->rq_link);
77 OBDO_FREE(req->rq_oi.oi_oa);
79 OBD_FREE_LARGE(req->rq_oi.oi_md, req->rq_buflen);
80 if (req->rq_oi.oi_osfs)
81 OBD_FREE(req->rq_oi.oi_osfs,
82 sizeof(*req->rq_oi.oi_osfs));
83 OBD_FREE(req, sizeof(*req));
87 int len = set->set_oabufs * sizeof(*set->set_pga);
88 OBD_FREE_LARGE(set->set_pga, len);
91 lov_llh_put(set->set_lockh);
93 OBD_FREE(set, sizeof(*set));
97 int lov_set_finished(struct lov_request_set *set, int idempotent)
99 int completes = cfs_atomic_read(&set->set_completes);
101 CDEBUG(D_INFO, "check set %d/%d\n", completes, set->set_count);
103 if (completes == set->set_count) {
106 if (cfs_atomic_inc_return(&set->set_finish_checked) == 1)
112 void lov_update_set(struct lov_request_set *set,
113 struct lov_request *req, int rc)
115 req->rq_complete = 1;
118 cfs_atomic_inc(&set->set_completes);
120 cfs_atomic_inc(&set->set_success);
122 cfs_waitq_signal(&set->set_waitq);
125 int lov_update_common_set(struct lov_request_set *set,
126 struct lov_request *req, int rc)
128 struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
131 lov_update_set(set, req, rc);
133 /* grace error on inactive ost */
134 if (rc && !(lov->lov_tgts[req->rq_idx] &&
135 lov->lov_tgts[req->rq_idx]->ltd_active))
138 /* FIXME in raid1 regime, should return 0 */
142 void lov_set_add_req(struct lov_request *req, struct lov_request_set *set)
144 cfs_list_add_tail(&req->rq_link, &set->set_list);
149 extern void osc_update_enqueue(struct lustre_handle *lov_lockhp,
150 struct lov_oinfo *loi, int flags,
151 struct ost_lvb *lvb, __u32 mode, int rc);
153 static int lov_update_enqueue_lov(struct obd_export *exp,
154 struct lustre_handle *lov_lockhp,
155 struct lov_oinfo *loi, int flags, int idx,
158 struct lov_obd *lov = &exp->exp_obd->u.lov;
160 if (rc != ELDLM_OK &&
161 !(rc == ELDLM_LOCK_ABORTED && (flags & LDLM_FL_HAS_INTENT))) {
162 memset(lov_lockhp, 0, sizeof(*lov_lockhp));
163 if (lov->lov_tgts[idx] && lov->lov_tgts[idx]->ltd_active) {
164 /* -EUSERS used by OST to report file contention */
165 if (rc != -EINTR && rc != -EUSERS)
166 CERROR("enqueue objid "LPX64" subobj "
167 LPX64" on OST idx %d: rc %d\n",
168 oid, loi->loi_id, loi->loi_ost_idx, rc);
175 int lov_update_enqueue_set(struct lov_request *req, __u32 mode, int rc)
177 struct lov_request_set *set = req->rq_rqset;
178 struct lustre_handle *lov_lockhp;
179 struct obd_info *oi = set->set_oi;
180 struct lov_oinfo *loi;
185 lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
186 loi = oi->oi_md->lsm_oinfo[req->rq_stripe];
188 /* XXX LOV STACKING: OSC gets a copy, created in lov_prep_enqueue_set
189 * and that copy can be arbitrarily out of date.
191 * The LOV API is due for a serious rewriting anyways, and this
192 * can be addressed then. */
194 lov_stripe_lock(oi->oi_md);
195 osc_update_enqueue(lov_lockhp, loi, oi->oi_flags,
196 &req->rq_oi.oi_md->lsm_oinfo[0]->loi_lvb, mode, rc);
197 if (rc == ELDLM_LOCK_ABORTED && (oi->oi_flags & LDLM_FL_HAS_INTENT))
198 memset(lov_lockhp, 0, sizeof *lov_lockhp);
199 rc = lov_update_enqueue_lov(set->set_exp, lov_lockhp, loi, oi->oi_flags,
200 req->rq_idx, oi->oi_md->lsm_object_id, rc);
201 lov_stripe_unlock(oi->oi_md);
202 lov_update_set(set, req, rc);
206 /* The callback for osc_enqueue that updates lov info for every OSC request. */
207 static int cb_update_enqueue(void *cookie, int rc)
209 struct obd_info *oinfo = cookie;
210 struct ldlm_enqueue_info *einfo;
211 struct lov_request *lovreq;
213 lovreq = container_of(oinfo, struct lov_request, rq_oi);
214 einfo = lovreq->rq_rqset->set_ei;
215 return lov_update_enqueue_set(lovreq, einfo->ei_mode, rc);
218 static int enqueue_done(struct lov_request_set *set, __u32 mode)
220 struct lov_request *req;
221 struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
222 int completes = cfs_atomic_read(&set->set_completes);
226 /* enqueue/match success, just return */
227 if (completes && completes == cfs_atomic_read(&set->set_success))
230 /* cancel enqueued/matched locks */
231 cfs_list_for_each_entry(req, &set->set_list, rq_link) {
232 struct lustre_handle *lov_lockhp;
234 if (!req->rq_complete || req->rq_rc)
237 lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
239 if (!lustre_handle_is_used(lov_lockhp))
242 rc = obd_cancel(lov->lov_tgts[req->rq_idx]->ltd_exp,
243 req->rq_oi.oi_md, mode, lov_lockhp);
244 if (rc && lov->lov_tgts[req->rq_idx] &&
245 lov->lov_tgts[req->rq_idx]->ltd_active)
246 CERROR("cancelling obdjid "LPX64" on OST "
247 "idx %d error: rc = %d\n",
248 req->rq_oi.oi_md->lsm_object_id,
252 lov_llh_put(set->set_lockh);
256 int lov_fini_enqueue_set(struct lov_request_set *set, __u32 mode, int rc,
257 struct ptlrpc_request_set *rqset)
264 LASSERT(set->set_exp);
265 /* Do enqueue_done only for sync requests and if any request
269 cfs_atomic_set(&set->set_completes, 0);
270 ret = enqueue_done(set, mode);
271 } else if (set->set_lockh)
272 lov_llh_put(set->set_lockh);
276 RETURN(rc ? rc : ret);
279 static void lov_llh_addref(void *llhp)
281 struct lov_lock_handles *llh = llhp;
283 cfs_atomic_inc(&llh->llh_refcount);
284 CDEBUG(D_INFO, "GETting llh %p : new refcount %d\n", llh,
285 cfs_atomic_read(&llh->llh_refcount));
288 static struct portals_handle_ops lov_handle_ops = {
289 .hop_addref = lov_llh_addref,
293 static struct lov_lock_handles *lov_llh_new(struct lov_stripe_md *lsm)
295 struct lov_lock_handles *llh;
297 OBD_ALLOC(llh, sizeof *llh +
298 sizeof(*llh->llh_handles) * lsm->lsm_stripe_count);
302 cfs_atomic_set(&llh->llh_refcount, 2);
303 llh->llh_stripe_count = lsm->lsm_stripe_count;
304 CFS_INIT_LIST_HEAD(&llh->llh_handle.h_link);
305 class_handle_hash(&llh->llh_handle, &lov_handle_ops);
310 int lov_prep_enqueue_set(struct obd_export *exp, struct obd_info *oinfo,
311 struct ldlm_enqueue_info *einfo,
312 struct lov_request_set **reqset)
314 struct lov_obd *lov = &exp->exp_obd->u.lov;
315 struct lov_request_set *set;
319 OBD_ALLOC(set, sizeof(*set));
327 set->set_lockh = lov_llh_new(oinfo->oi_md);
328 if (set->set_lockh == NULL)
329 GOTO(out_set, rc = -ENOMEM);
330 oinfo->oi_lockh->cookie = set->set_lockh->llh_handle.h_cookie;
332 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
333 struct lov_oinfo *loi;
334 struct lov_request *req;
337 loi = oinfo->oi_md->lsm_oinfo[i];
338 if (!lov_stripe_intersects(oinfo->oi_md, i,
339 oinfo->oi_policy.l_extent.start,
340 oinfo->oi_policy.l_extent.end,
344 if (!lov->lov_tgts[loi->loi_ost_idx] ||
345 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
346 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
350 OBD_ALLOC(req, sizeof(*req));
352 GOTO(out_set, rc = -ENOMEM);
354 req->rq_buflen = sizeof(*req->rq_oi.oi_md) +
355 sizeof(struct lov_oinfo *) +
356 sizeof(struct lov_oinfo);
357 OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
358 if (req->rq_oi.oi_md == NULL) {
359 OBD_FREE(req, sizeof(*req));
360 GOTO(out_set, rc = -ENOMEM);
362 req->rq_oi.oi_md->lsm_oinfo[0] =
363 ((void *)req->rq_oi.oi_md) + sizeof(*req->rq_oi.oi_md) +
364 sizeof(struct lov_oinfo *);
366 /* Set lov request specific parameters. */
367 req->rq_oi.oi_lockh = set->set_lockh->llh_handles + i;
368 req->rq_oi.oi_cb_up = cb_update_enqueue;
369 req->rq_oi.oi_flags = oinfo->oi_flags;
371 LASSERT(req->rq_oi.oi_lockh);
373 req->rq_oi.oi_policy.l_extent.gid =
374 oinfo->oi_policy.l_extent.gid;
375 req->rq_oi.oi_policy.l_extent.start = start;
376 req->rq_oi.oi_policy.l_extent.end = end;
378 req->rq_idx = loi->loi_ost_idx;
381 /* XXX LOV STACKING: submd should be from the subobj */
382 req->rq_oi.oi_md->lsm_object_id = loi->loi_id;
383 req->rq_oi.oi_md->lsm_object_seq = loi->loi_seq;
384 req->rq_oi.oi_md->lsm_stripe_count = 0;
385 req->rq_oi.oi_md->lsm_oinfo[0]->loi_kms_valid =
387 req->rq_oi.oi_md->lsm_oinfo[0]->loi_kms = loi->loi_kms;
388 req->rq_oi.oi_md->lsm_oinfo[0]->loi_lvb = loi->loi_lvb;
390 lov_set_add_req(req, set);
393 GOTO(out_set, rc = -EIO);
397 lov_fini_enqueue_set(set, einfo->ei_mode, rc, NULL);
401 int lov_fini_match_set(struct lov_request_set *set, __u32 mode, int flags)
408 LASSERT(set->set_exp);
409 rc = enqueue_done(set, mode);
410 if ((set->set_count == cfs_atomic_read(&set->set_success)) &&
411 (flags & LDLM_FL_TEST_LOCK))
412 lov_llh_put(set->set_lockh);
419 int lov_prep_match_set(struct obd_export *exp, struct obd_info *oinfo,
420 struct lov_stripe_md *lsm, ldlm_policy_data_t *policy,
421 __u32 mode, struct lustre_handle *lockh,
422 struct lov_request_set **reqset)
424 struct lov_obd *lov = &exp->exp_obd->u.lov;
425 struct lov_request_set *set;
429 OBD_ALLOC(set, sizeof(*set));
436 set->set_oi->oi_md = lsm;
437 set->set_lockh = lov_llh_new(lsm);
438 if (set->set_lockh == NULL)
439 GOTO(out_set, rc = -ENOMEM);
440 lockh->cookie = set->set_lockh->llh_handle.h_cookie;
442 for (i = 0; i < lsm->lsm_stripe_count; i++){
443 struct lov_oinfo *loi;
444 struct lov_request *req;
447 loi = lsm->lsm_oinfo[i];
448 if (!lov_stripe_intersects(lsm, i, policy->l_extent.start,
449 policy->l_extent.end, &start, &end))
452 /* FIXME raid1 should grace this error */
453 if (!lov->lov_tgts[loi->loi_ost_idx] ||
454 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
455 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
456 GOTO(out_set, rc = -EIO);
459 OBD_ALLOC(req, sizeof(*req));
461 GOTO(out_set, rc = -ENOMEM);
463 req->rq_buflen = sizeof(*req->rq_oi.oi_md);
464 OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
465 if (req->rq_oi.oi_md == NULL) {
466 OBD_FREE(req, sizeof(*req));
467 GOTO(out_set, rc = -ENOMEM);
470 req->rq_oi.oi_policy.l_extent.start = start;
471 req->rq_oi.oi_policy.l_extent.end = end;
472 req->rq_oi.oi_policy.l_extent.gid = policy->l_extent.gid;
474 req->rq_idx = loi->loi_ost_idx;
477 /* XXX LOV STACKING: submd should be from the subobj */
478 req->rq_oi.oi_md->lsm_object_id = loi->loi_id;
479 req->rq_oi.oi_md->lsm_object_seq = loi->loi_seq;
480 req->rq_oi.oi_md->lsm_stripe_count = 0;
482 lov_set_add_req(req, set);
485 GOTO(out_set, rc = -EIO);
489 lov_fini_match_set(set, mode, 0);
493 int lov_fini_cancel_set(struct lov_request_set *set)
501 LASSERT(set->set_exp);
503 lov_llh_put(set->set_lockh);
510 int lov_prep_cancel_set(struct obd_export *exp, struct obd_info *oinfo,
511 struct lov_stripe_md *lsm, __u32 mode,
512 struct lustre_handle *lockh,
513 struct lov_request_set **reqset)
515 struct lov_request_set *set;
519 OBD_ALLOC(set, sizeof(*set));
526 set->set_oi->oi_md = lsm;
527 set->set_lockh = lov_handle2llh(lockh);
528 if (set->set_lockh == NULL) {
529 CERROR("LOV: invalid lov lock handle %p\n", lockh);
530 GOTO(out_set, rc = -EINVAL);
532 lockh->cookie = set->set_lockh->llh_handle.h_cookie;
534 for (i = 0; i < lsm->lsm_stripe_count; i++){
535 struct lov_request *req;
536 struct lustre_handle *lov_lockhp;
537 struct lov_oinfo *loi = lsm->lsm_oinfo[i];
539 lov_lockhp = set->set_lockh->llh_handles + i;
540 if (!lustre_handle_is_used(lov_lockhp)) {
541 CDEBUG(D_INFO, "lov idx %d subobj "LPX64" no lock\n",
542 loi->loi_ost_idx, loi->loi_id);
546 OBD_ALLOC(req, sizeof(*req));
548 GOTO(out_set, rc = -ENOMEM);
550 req->rq_buflen = sizeof(*req->rq_oi.oi_md);
551 OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
552 if (req->rq_oi.oi_md == NULL) {
553 OBD_FREE(req, sizeof(*req));
554 GOTO(out_set, rc = -ENOMEM);
557 req->rq_idx = loi->loi_ost_idx;
560 /* XXX LOV STACKING: submd should be from the subobj */
561 req->rq_oi.oi_md->lsm_object_id = loi->loi_id;
562 req->rq_oi.oi_md->lsm_object_seq = loi->loi_seq;
563 req->rq_oi.oi_md->lsm_stripe_count = 0;
565 lov_set_add_req(req, set);
568 GOTO(out_set, rc = -EIO);
572 lov_fini_cancel_set(set);
575 static int common_attr_done(struct lov_request_set *set)
578 struct lov_request *req;
580 int rc = 0, attrset = 0;
583 LASSERT(set->set_oi != NULL);
585 if (set->set_oi->oi_oa == NULL)
588 if (!cfs_atomic_read(&set->set_success))
593 GOTO(out, rc = -ENOMEM);
595 cfs_list_for_each (pos, &set->set_list) {
596 req = cfs_list_entry(pos, struct lov_request, rq_link);
598 if (!req->rq_complete || req->rq_rc)
600 if (req->rq_oi.oi_oa->o_valid == 0) /* inactive stripe */
602 lov_merge_attrs(tmp_oa, req->rq_oi.oi_oa,
603 req->rq_oi.oi_oa->o_valid,
604 set->set_oi->oi_md, req->rq_stripe, &attrset);
607 CERROR("No stripes had valid attrs\n");
610 if ((set->set_oi->oi_oa->o_valid & OBD_MD_FLEPOCH) &&
611 (set->set_oi->oi_md->lsm_stripe_count != attrset)) {
612 /* When we take attributes of some epoch, we require all the
613 * ost to be active. */
614 CERROR("Not all the stripes had valid attrs\n");
615 GOTO(out, rc = -EIO);
618 tmp_oa->o_id = set->set_oi->oi_oa->o_id;
619 memcpy(set->set_oi->oi_oa, tmp_oa, sizeof(*set->set_oi->oi_oa));
627 static int brw_done(struct lov_request_set *set)
629 struct lov_stripe_md *lsm = set->set_oi->oi_md;
630 struct lov_oinfo *loi = NULL;
632 struct lov_request *req;
635 cfs_list_for_each (pos, &set->set_list) {
636 req = cfs_list_entry(pos, struct lov_request, rq_link);
638 if (!req->rq_complete || req->rq_rc)
641 loi = lsm->lsm_oinfo[req->rq_stripe];
643 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLBLOCKS)
644 loi->loi_lvb.lvb_blocks = req->rq_oi.oi_oa->o_blocks;
650 int lov_fini_brw_set(struct lov_request_set *set)
657 LASSERT(set->set_exp);
658 if (cfs_atomic_read(&set->set_completes)) {
660 /* FIXME update qos data here */
667 int lov_prep_brw_set(struct obd_export *exp, struct obd_info *oinfo,
668 obd_count oa_bufs, struct brw_page *pga,
669 struct obd_trans_info *oti,
670 struct lov_request_set **reqset)
677 struct lov_request_set *set;
678 struct lov_obd *lov = &exp->exp_obd->u.lov;
679 int rc = 0, i, shift;
682 OBD_ALLOC(set, sizeof(*set));
690 set->set_oabufs = oa_bufs;
691 OBD_ALLOC_LARGE(set->set_pga, oa_bufs * sizeof(*set->set_pga));
693 GOTO(out, rc = -ENOMEM);
695 OBD_ALLOC_LARGE(info, sizeof(*info) * oinfo->oi_md->lsm_stripe_count);
697 GOTO(out, rc = -ENOMEM);
699 /* calculate the page count for each stripe */
700 for (i = 0; i < oa_bufs; i++) {
701 int stripe = lov_stripe_number(oinfo->oi_md, pga[i].off);
702 info[stripe].count++;
705 /* alloc and initialize lov request */
707 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++){
708 struct lov_oinfo *loi = NULL;
709 struct lov_request *req;
711 if (info[i].count == 0)
714 loi = oinfo->oi_md->lsm_oinfo[i];
715 if (!lov->lov_tgts[loi->loi_ost_idx] ||
716 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
717 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
718 GOTO(out, rc = -EIO);
721 OBD_ALLOC(req, sizeof(*req));
723 GOTO(out, rc = -ENOMEM);
725 OBDO_ALLOC(req->rq_oi.oi_oa);
726 if (req->rq_oi.oi_oa == NULL) {
727 OBD_FREE(req, sizeof(*req));
728 GOTO(out, rc = -ENOMEM);
732 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
733 sizeof(*req->rq_oi.oi_oa));
735 req->rq_oi.oi_oa->o_id = loi->loi_id;
736 req->rq_oi.oi_oa->o_seq = loi->loi_seq;
737 req->rq_oi.oi_oa->o_stripe_idx = i;
739 req->rq_buflen = sizeof(*req->rq_oi.oi_md);
740 OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
741 if (req->rq_oi.oi_md == NULL) {
742 OBDO_FREE(req->rq_oi.oi_oa);
743 OBD_FREE(req, sizeof(*req));
744 GOTO(out, rc = -ENOMEM);
747 req->rq_idx = loi->loi_ost_idx;
750 /* XXX LOV STACKING */
751 req->rq_oi.oi_md->lsm_object_id = loi->loi_id;
752 req->rq_oi.oi_md->lsm_object_seq = loi->loi_seq;
753 req->rq_oabufs = info[i].count;
754 req->rq_pgaidx = shift;
755 shift += req->rq_oabufs;
757 /* remember the index for sort brw_page array */
758 info[i].index = req->rq_pgaidx;
760 req->rq_oi.oi_capa = oinfo->oi_capa;
762 lov_set_add_req(req, set);
765 GOTO(out, rc = -EIO);
767 /* rotate & sort the brw_page array */
768 for (i = 0; i < oa_bufs; i++) {
769 int stripe = lov_stripe_number(oinfo->oi_md, pga[i].off);
771 shift = info[stripe].index + info[stripe].off;
772 LASSERT(shift < oa_bufs);
773 set->set_pga[shift] = pga[i];
774 lov_stripe_offset(oinfo->oi_md, pga[i].off, stripe,
775 &set->set_pga[shift].off);
781 sizeof(*info) * oinfo->oi_md->lsm_stripe_count);
786 lov_fini_brw_set(set);
791 int lov_fini_getattr_set(struct lov_request_set *set)
798 LASSERT(set->set_exp);
799 if (cfs_atomic_read(&set->set_completes))
800 rc = common_attr_done(set);
807 /* The callback for osc_getattr_async that finilizes a request info when a
808 * response is received. */
809 static int cb_getattr_update(void *cookie, int rc)
811 struct obd_info *oinfo = cookie;
812 struct lov_request *lovreq;
813 lovreq = container_of(oinfo, struct lov_request, rq_oi);
814 return lov_update_common_set(lovreq->rq_rqset, lovreq, rc);
817 int lov_prep_getattr_set(struct obd_export *exp, struct obd_info *oinfo,
818 struct lov_request_set **reqset)
820 struct lov_request_set *set;
821 struct lov_obd *lov = &exp->exp_obd->u.lov;
825 OBD_ALLOC(set, sizeof(*set));
833 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
834 struct lov_oinfo *loi;
835 struct lov_request *req;
837 loi = oinfo->oi_md->lsm_oinfo[i];
838 if (!lov->lov_tgts[loi->loi_ost_idx] ||
839 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
840 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
841 if (oinfo->oi_oa->o_valid & OBD_MD_FLEPOCH)
842 /* SOM requires all the OSTs to be active. */
843 GOTO(out_set, rc = -EIO);
847 OBD_ALLOC(req, sizeof(*req));
849 GOTO(out_set, rc = -ENOMEM);
852 req->rq_idx = loi->loi_ost_idx;
854 OBDO_ALLOC(req->rq_oi.oi_oa);
855 if (req->rq_oi.oi_oa == NULL) {
856 OBD_FREE(req, sizeof(*req));
857 GOTO(out_set, rc = -ENOMEM);
859 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
860 sizeof(*req->rq_oi.oi_oa));
861 req->rq_oi.oi_oa->o_id = loi->loi_id;
862 req->rq_oi.oi_oa->o_seq = loi->loi_seq;
863 req->rq_oi.oi_cb_up = cb_getattr_update;
864 req->rq_oi.oi_capa = oinfo->oi_capa;
866 lov_set_add_req(req, set);
869 GOTO(out_set, rc = -EIO);
873 lov_fini_getattr_set(set);
877 int lov_fini_destroy_set(struct lov_request_set *set)
883 LASSERT(set->set_exp);
884 if (cfs_atomic_read(&set->set_completes)) {
885 /* FIXME update qos data here */
893 int lov_prep_destroy_set(struct obd_export *exp, struct obd_info *oinfo,
894 struct obdo *src_oa, struct lov_stripe_md *lsm,
895 struct obd_trans_info *oti,
896 struct lov_request_set **reqset)
898 struct lov_request_set *set;
899 struct lov_obd *lov = &exp->exp_obd->u.lov;
903 OBD_ALLOC(set, sizeof(*set));
910 set->set_oi->oi_md = lsm;
911 set->set_oi->oi_oa = src_oa;
913 if (oti != NULL && src_oa->o_valid & OBD_MD_FLCOOKIE)
914 set->set_cookies = oti->oti_logcookies;
916 for (i = 0; i < lsm->lsm_stripe_count; i++) {
917 struct lov_oinfo *loi;
918 struct lov_request *req;
920 loi = lsm->lsm_oinfo[i];
921 if (!lov->lov_tgts[loi->loi_ost_idx] ||
922 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
923 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
927 OBD_ALLOC(req, sizeof(*req));
929 GOTO(out_set, rc = -ENOMEM);
932 req->rq_idx = loi->loi_ost_idx;
934 OBDO_ALLOC(req->rq_oi.oi_oa);
935 if (req->rq_oi.oi_oa == NULL) {
936 OBD_FREE(req, sizeof(*req));
937 GOTO(out_set, rc = -ENOMEM);
939 memcpy(req->rq_oi.oi_oa, src_oa, sizeof(*req->rq_oi.oi_oa));
940 req->rq_oi.oi_oa->o_id = loi->loi_id;
941 req->rq_oi.oi_oa->o_seq = loi->loi_seq;
942 lov_set_add_req(req, set);
945 GOTO(out_set, rc = -EIO);
949 lov_fini_destroy_set(set);
953 int lov_fini_setattr_set(struct lov_request_set *set)
960 LASSERT(set->set_exp);
961 if (cfs_atomic_read(&set->set_completes)) {
962 rc = common_attr_done(set);
963 /* FIXME update qos data here */
970 int lov_update_setattr_set(struct lov_request_set *set,
971 struct lov_request *req, int rc)
973 struct lov_obd *lov = &req->rq_rqset->set_exp->exp_obd->u.lov;
974 struct lov_stripe_md *lsm = req->rq_rqset->set_oi->oi_md;
977 lov_update_set(set, req, rc);
979 /* grace error on inactive ost */
980 if (rc && !(lov->lov_tgts[req->rq_idx] &&
981 lov->lov_tgts[req->rq_idx]->ltd_active))
985 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLCTIME)
986 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_ctime =
987 req->rq_oi.oi_oa->o_ctime;
988 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLMTIME)
989 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_mtime =
990 req->rq_oi.oi_oa->o_mtime;
991 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLATIME)
992 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_atime =
993 req->rq_oi.oi_oa->o_atime;
999 /* The callback for osc_setattr_async that finilizes a request info when a
1000 * response is received. */
1001 static int cb_setattr_update(void *cookie, int rc)
1003 struct obd_info *oinfo = cookie;
1004 struct lov_request *lovreq;
1005 lovreq = container_of(oinfo, struct lov_request, rq_oi);
1006 return lov_update_setattr_set(lovreq->rq_rqset, lovreq, rc);
1009 int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo,
1010 struct obd_trans_info *oti,
1011 struct lov_request_set **reqset)
1013 struct lov_request_set *set;
1014 struct lov_obd *lov = &exp->exp_obd->u.lov;
1018 OBD_ALLOC(set, sizeof(*set));
1025 set->set_oi = oinfo;
1026 if (oti != NULL && oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE)
1027 set->set_cookies = oti->oti_logcookies;
1029 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
1030 struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i];
1031 struct lov_request *req;
1033 if (!lov->lov_tgts[loi->loi_ost_idx] ||
1034 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
1035 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1039 OBD_ALLOC(req, sizeof(*req));
1041 GOTO(out_set, rc = -ENOMEM);
1043 req->rq_idx = loi->loi_ost_idx;
1045 OBDO_ALLOC(req->rq_oi.oi_oa);
1046 if (req->rq_oi.oi_oa == NULL) {
1047 OBD_FREE(req, sizeof(*req));
1048 GOTO(out_set, rc = -ENOMEM);
1050 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
1051 sizeof(*req->rq_oi.oi_oa));
1052 req->rq_oi.oi_oa->o_id = loi->loi_id;
1053 req->rq_oi.oi_oa->o_seq= loi->loi_seq;
1054 req->rq_oi.oi_oa->o_stripe_idx = i;
1055 req->rq_oi.oi_cb_up = cb_setattr_update;
1056 req->rq_oi.oi_capa = oinfo->oi_capa;
1058 if (oinfo->oi_oa->o_valid & OBD_MD_FLSIZE) {
1059 int off = lov_stripe_offset(oinfo->oi_md,
1060 oinfo->oi_oa->o_size, i,
1061 &req->rq_oi.oi_oa->o_size);
1063 if (off < 0 && req->rq_oi.oi_oa->o_size)
1064 req->rq_oi.oi_oa->o_size--;
1066 CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
1067 i, req->rq_oi.oi_oa->o_size,
1068 oinfo->oi_oa->o_size);
1070 lov_set_add_req(req, set);
1072 if (!set->set_count)
1073 GOTO(out_set, rc = -EIO);
1077 lov_fini_setattr_set(set);
1081 int lov_fini_punch_set(struct lov_request_set *set)
1088 LASSERT(set->set_exp);
1089 if (cfs_atomic_read(&set->set_completes)) {
1091 /* FIXME update qos data here */
1092 if (cfs_atomic_read(&set->set_success))
1093 rc = common_attr_done(set);
1096 lov_put_reqset(set);
1101 int lov_update_punch_set(struct lov_request_set *set,
1102 struct lov_request *req, int rc)
1104 struct lov_obd *lov = &req->rq_rqset->set_exp->exp_obd->u.lov;
1105 struct lov_stripe_md *lsm = req->rq_rqset->set_oi->oi_md;
1108 lov_update_set(set, req, rc);
1110 /* grace error on inactive ost */
1111 if (rc && !lov->lov_tgts[req->rq_idx]->ltd_active)
1115 lov_stripe_lock(lsm);
1116 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLBLOCKS) {
1117 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_blocks =
1118 req->rq_oi.oi_oa->o_blocks;
1121 lov_stripe_unlock(lsm);
1127 /* The callback for osc_punch that finilizes a request info when a response
1129 static int cb_update_punch(void *cookie, int rc)
1131 struct obd_info *oinfo = cookie;
1132 struct lov_request *lovreq;
1133 lovreq = container_of(oinfo, struct lov_request, rq_oi);
1134 return lov_update_punch_set(lovreq->rq_rqset, lovreq, rc);
1137 int lov_prep_punch_set(struct obd_export *exp, struct obd_info *oinfo,
1138 struct obd_trans_info *oti,
1139 struct lov_request_set **reqset)
1141 struct lov_request_set *set;
1142 struct lov_obd *lov = &exp->exp_obd->u.lov;
1146 OBD_ALLOC(set, sizeof(*set));
1151 set->set_oi = oinfo;
1154 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
1155 struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i];
1156 struct lov_request *req;
1159 if (!lov_stripe_intersects(oinfo->oi_md, i,
1160 oinfo->oi_policy.l_extent.start,
1161 oinfo->oi_policy.l_extent.end,
1165 if (!lov->lov_tgts[loi->loi_ost_idx] ||
1166 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
1167 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1168 GOTO(out_set, rc = -EIO);
1171 OBD_ALLOC(req, sizeof(*req));
1173 GOTO(out_set, rc = -ENOMEM);
1175 req->rq_idx = loi->loi_ost_idx;
1177 OBDO_ALLOC(req->rq_oi.oi_oa);
1178 if (req->rq_oi.oi_oa == NULL) {
1179 OBD_FREE(req, sizeof(*req));
1180 GOTO(out_set, rc = -ENOMEM);
1182 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
1183 sizeof(*req->rq_oi.oi_oa));
1184 req->rq_oi.oi_oa->o_id = loi->loi_id;
1185 req->rq_oi.oi_oa->o_seq = loi->loi_seq;
1186 req->rq_oi.oi_oa->o_valid |= OBD_MD_FLGROUP;
1188 req->rq_oi.oi_oa->o_stripe_idx = i;
1189 req->rq_oi.oi_cb_up = cb_update_punch;
1191 req->rq_oi.oi_policy.l_extent.start = rs;
1192 req->rq_oi.oi_policy.l_extent.end = re;
1193 req->rq_oi.oi_policy.l_extent.gid = -1;
1195 req->rq_oi.oi_capa = oinfo->oi_capa;
1197 lov_set_add_req(req, set);
1199 if (!set->set_count)
1200 GOTO(out_set, rc = -EIO);
1204 lov_fini_punch_set(set);
1208 int lov_fini_sync_set(struct lov_request_set *set)
1215 LASSERT(set->set_exp);
1216 if (cfs_atomic_read(&set->set_completes)) {
1217 if (!cfs_atomic_read(&set->set_success))
1219 /* FIXME update qos data here */
1222 lov_put_reqset(set);
1227 /* The callback for osc_sync that finilizes a request info when a
1228 * response is recieved. */
1229 static int cb_sync_update(void *cookie, int rc)
1231 struct obd_info *oinfo = cookie;
1232 struct lov_request *lovreq;
1234 lovreq = container_of(oinfo, struct lov_request, rq_oi);
1235 return lov_update_common_set(lovreq->rq_rqset, lovreq, rc);
1238 int lov_prep_sync_set(struct obd_export *exp, struct obd_info *oinfo,
1239 obd_off start, obd_off end,
1240 struct lov_request_set **reqset)
1242 struct lov_request_set *set;
1243 struct lov_obd *lov = &exp->exp_obd->u.lov;
1253 set->set_oi = oinfo;
1255 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
1256 struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i];
1257 struct lov_request *req;
1260 if (!lov->lov_tgts[loi->loi_ost_idx] ||
1261 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
1262 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1266 if (!lov_stripe_intersects(oinfo->oi_md, i, start, end, &rs,
1272 GOTO(out_set, rc = -ENOMEM);
1274 req->rq_idx = loi->loi_ost_idx;
1276 OBDO_ALLOC(req->rq_oi.oi_oa);
1277 if (req->rq_oi.oi_oa == NULL) {
1278 OBD_FREE(req, sizeof(*req));
1279 GOTO(out_set, rc = -ENOMEM);
1281 *req->rq_oi.oi_oa = *oinfo->oi_oa;
1282 req->rq_oi.oi_oa->o_id = loi->loi_id;
1283 req->rq_oi.oi_oa->o_seq = loi->loi_seq;
1284 req->rq_oi.oi_oa->o_stripe_idx = i;
1286 req->rq_oi.oi_policy.l_extent.start = rs;
1287 req->rq_oi.oi_policy.l_extent.end = re;
1288 req->rq_oi.oi_policy.l_extent.gid = -1;
1289 req->rq_oi.oi_cb_up = cb_sync_update;
1291 lov_set_add_req(req, set);
1293 if (!set->set_count)
1294 GOTO(out_set, rc = -EIO);
1298 lov_fini_sync_set(set);
1302 #define LOV_U64_MAX ((__u64)~0ULL)
1303 #define LOV_SUM_MAX(tot, add) \
1305 if ((tot) + (add) < (tot)) \
1306 (tot) = LOV_U64_MAX; \
1311 int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs,int success)
1316 __u32 expected_stripes = lov_get_stripecnt(&obd->u.lov,
1318 if (osfs->os_files != LOV_U64_MAX)
1319 lov_do_div64(osfs->os_files, expected_stripes);
1320 if (osfs->os_ffree != LOV_U64_MAX)
1321 lov_do_div64(osfs->os_ffree, expected_stripes);
1323 spin_lock(&obd->obd_osfs_lock);
1324 memcpy(&obd->obd_osfs, osfs, sizeof(*osfs));
1325 obd->obd_osfs_age = cfs_time_current_64();
1326 spin_unlock(&obd->obd_osfs_lock);
1333 int lov_fini_statfs_set(struct lov_request_set *set)
1341 if (cfs_atomic_read(&set->set_completes)) {
1342 rc = lov_fini_statfs(set->set_obd, set->set_oi->oi_osfs,
1343 cfs_atomic_read(&set->set_success));
1345 lov_put_reqset(set);
1349 void lov_update_statfs(struct obd_statfs *osfs, struct obd_statfs *lov_sfs,
1352 int shift = 0, quit = 0;
1356 memcpy(osfs, lov_sfs, sizeof(*lov_sfs));
1358 if (osfs->os_bsize != lov_sfs->os_bsize) {
1359 /* assume all block sizes are always powers of 2 */
1360 /* get the bits difference */
1361 tmp = osfs->os_bsize | lov_sfs->os_bsize;
1362 for (shift = 0; shift <= 64; ++shift) {
1374 if (osfs->os_bsize < lov_sfs->os_bsize) {
1375 osfs->os_bsize = lov_sfs->os_bsize;
1377 osfs->os_bfree >>= shift;
1378 osfs->os_bavail >>= shift;
1379 osfs->os_blocks >>= shift;
1380 } else if (shift != 0) {
1381 lov_sfs->os_bfree >>= shift;
1382 lov_sfs->os_bavail >>= shift;
1383 lov_sfs->os_blocks >>= shift;
1386 /* Sandia requested that df (and so, statfs) only
1387 returned minimal available space on
1388 a single OST, so people would be able to
1389 write this much data guaranteed. */
1390 if (osfs->os_bavail > lov_sfs->os_bavail) {
1391 /* Presumably if new bavail is smaller,
1392 new bfree is bigger as well */
1393 osfs->os_bfree = lov_sfs->os_bfree;
1394 osfs->os_bavail = lov_sfs->os_bavail;
1397 osfs->os_bfree += lov_sfs->os_bfree;
1398 osfs->os_bavail += lov_sfs->os_bavail;
1400 osfs->os_blocks += lov_sfs->os_blocks;
1401 /* XXX not sure about this one - depends on policy.
1402 * - could be minimum if we always stripe on all OBDs
1403 * (but that would be wrong for any other policy,
1404 * if one of the OBDs has no more objects left)
1405 * - could be sum if we stripe whole objects
1406 * - could be average, just to give a nice number
1408 * To give a "reasonable" (if not wholly accurate)
1409 * number, we divide the total number of free objects
1410 * by expected stripe count (watch out for overflow).
1412 LOV_SUM_MAX(osfs->os_files, lov_sfs->os_files);
1413 LOV_SUM_MAX(osfs->os_ffree, lov_sfs->os_ffree);
1417 /* The callback for osc_statfs_async that finilizes a request info when a
1418 * response is received. */
1419 static int cb_statfs_update(void *cookie, int rc)
1421 struct obd_info *oinfo = cookie;
1422 struct lov_request *lovreq;
1423 struct lov_request_set *set;
1424 struct obd_statfs *osfs, *lov_sfs;
1425 struct lov_obd *lov;
1426 struct lov_tgt_desc *tgt;
1427 struct obd_device *lovobd, *tgtobd;
1431 lovreq = container_of(oinfo, struct lov_request, rq_oi);
1432 set = lovreq->rq_rqset;
1433 lovobd = set->set_obd;
1434 lov = &lovobd->u.lov;
1435 osfs = set->set_oi->oi_osfs;
1436 lov_sfs = oinfo->oi_osfs;
1437 success = cfs_atomic_read(&set->set_success);
1438 /* XXX: the same is done in lov_update_common_set, however
1439 lovset->set_exp is not initialized. */
1440 lov_update_set(set, lovreq, rc);
1445 tgt = lov->lov_tgts[lovreq->rq_idx];
1446 if (!tgt || !tgt->ltd_active)
1447 GOTO(out_update, rc);
1449 tgtobd = class_exp2obd(tgt->ltd_exp);
1450 spin_lock(&tgtobd->obd_osfs_lock);
1451 memcpy(&tgtobd->obd_osfs, lov_sfs, sizeof(*lov_sfs));
1452 if ((oinfo->oi_flags & OBD_STATFS_FROM_CACHE) == 0)
1453 tgtobd->obd_osfs_age = cfs_time_current_64();
1454 spin_unlock(&tgtobd->obd_osfs_lock);
1457 lov_update_statfs(osfs, lov_sfs, success);
1461 if (set->set_oi->oi_flags & OBD_STATFS_PTLRPCD &&
1462 lov_set_finished(set, 0)) {
1463 lov_statfs_interpret(NULL, set, set->set_count !=
1464 cfs_atomic_read(&set->set_success));
1470 int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo,
1471 struct lov_request_set **reqset)
1473 struct lov_request_set *set;
1474 struct lov_obd *lov = &obd->u.lov;
1478 OBD_ALLOC(set, sizeof(*set));
1484 set->set_oi = oinfo;
1486 /* We only get block data from the OBD */
1487 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
1488 struct lov_request *req;
1490 if (!lov->lov_tgts[i] || (!lov->lov_tgts[i]->ltd_active
1491 && (oinfo->oi_flags & OBD_STATFS_NODELAY))) {
1492 CDEBUG(D_HA, "lov idx %d inactive\n", i);
1496 /* skip targets that have been explicitely disabled by the
1498 if (!lov->lov_tgts[i]->ltd_exp) {
1499 CDEBUG(D_HA, "lov idx %d administratively disabled\n", i);
1503 OBD_ALLOC(req, sizeof(*req));
1505 GOTO(out_set, rc = -ENOMEM);
1507 OBD_ALLOC(req->rq_oi.oi_osfs, sizeof(*req->rq_oi.oi_osfs));
1508 if (req->rq_oi.oi_osfs == NULL) {
1509 OBD_FREE(req, sizeof(*req));
1510 GOTO(out_set, rc = -ENOMEM);
1514 req->rq_oi.oi_cb_up = cb_statfs_update;
1515 req->rq_oi.oi_flags = oinfo->oi_flags;
1517 lov_set_add_req(req, set);
1519 if (!set->set_count)
1520 GOTO(out_set, rc = -EIO);
1524 lov_fini_statfs_set(set);