4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
37 #define DEBUG_SUBSYSTEM S_LOV
40 #include <libcfs/libcfs.h>
42 #include <liblustre.h>
45 #include <obd_class.h>
47 #include <lustre/lustre_idl.h>
49 #include "lov_internal.h"
51 static void lov_init_set(struct lov_request_set *set)
54 cfs_atomic_set(&set->set_completes, 0);
55 cfs_atomic_set(&set->set_success, 0);
56 cfs_atomic_set(&set->set_finish_checked, 0);
58 CFS_INIT_LIST_HEAD(&set->set_list);
59 cfs_atomic_set(&set->set_refcount, 1);
60 cfs_waitq_init(&set->set_waitq);
61 spin_lock_init(&set->set_lock);
64 void lov_finish_set(struct lov_request_set *set)
70 cfs_list_for_each_safe(pos, n, &set->set_list) {
71 struct lov_request *req = cfs_list_entry(pos,
74 cfs_list_del_init(&req->rq_link);
77 OBDO_FREE(req->rq_oi.oi_oa);
79 OBD_FREE_LARGE(req->rq_oi.oi_md, req->rq_buflen);
80 if (req->rq_oi.oi_osfs)
81 OBD_FREE(req->rq_oi.oi_osfs,
82 sizeof(*req->rq_oi.oi_osfs));
83 OBD_FREE(req, sizeof(*req));
87 int len = set->set_oabufs * sizeof(*set->set_pga);
88 OBD_FREE_LARGE(set->set_pga, len);
91 lov_llh_put(set->set_lockh);
93 OBD_FREE(set, sizeof(*set));
97 int lov_set_finished(struct lov_request_set *set, int idempotent)
99 int completes = cfs_atomic_read(&set->set_completes);
101 CDEBUG(D_INFO, "check set %d/%d\n", completes, set->set_count);
103 if (completes == set->set_count) {
106 if (cfs_atomic_inc_return(&set->set_finish_checked) == 1)
112 void lov_update_set(struct lov_request_set *set,
113 struct lov_request *req, int rc)
115 req->rq_complete = 1;
118 cfs_atomic_inc(&set->set_completes);
120 cfs_atomic_inc(&set->set_success);
122 cfs_waitq_signal(&set->set_waitq);
125 int lov_update_common_set(struct lov_request_set *set,
126 struct lov_request *req, int rc)
128 struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
131 lov_update_set(set, req, rc);
133 /* grace error on inactive ost */
134 if (rc && !(lov->lov_tgts[req->rq_idx] &&
135 lov->lov_tgts[req->rq_idx]->ltd_active))
138 /* FIXME in raid1 regime, should return 0 */
142 void lov_set_add_req(struct lov_request *req, struct lov_request_set *set)
144 cfs_list_add_tail(&req->rq_link, &set->set_list);
149 extern void osc_update_enqueue(struct lustre_handle *lov_lockhp,
150 struct lov_oinfo *loi, int flags,
151 struct ost_lvb *lvb, __u32 mode, int rc);
153 static int lov_update_enqueue_lov(struct obd_export *exp,
154 struct lustre_handle *lov_lockhp,
155 struct lov_oinfo *loi, int flags, int idx,
156 struct ost_id *oi, int rc)
158 struct lov_obd *lov = &exp->exp_obd->u.lov;
160 if (rc != ELDLM_OK &&
161 !(rc == ELDLM_LOCK_ABORTED && (flags & LDLM_FL_HAS_INTENT))) {
162 memset(lov_lockhp, 0, sizeof(*lov_lockhp));
163 if (lov->lov_tgts[idx] && lov->lov_tgts[idx]->ltd_active) {
164 /* -EUSERS used by OST to report file contention */
165 if (rc != -EINTR && rc != -EUSERS)
166 CERROR("%s: enqueue objid "DOSTID" subobj"
167 DOSTID" on OST idx %d: rc %d\n",
168 exp->exp_obd->obd_name,
169 POSTID(oi), POSTID(&loi->loi_oi),
170 loi->loi_ost_idx, rc);
177 int lov_update_enqueue_set(struct lov_request *req, __u32 mode, int rc)
179 struct lov_request_set *set = req->rq_rqset;
180 struct lustre_handle *lov_lockhp;
181 struct obd_info *oi = set->set_oi;
182 struct lov_oinfo *loi;
187 lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
188 loi = oi->oi_md->lsm_oinfo[req->rq_stripe];
190 /* XXX LOV STACKING: OSC gets a copy, created in lov_prep_enqueue_set
191 * and that copy can be arbitrarily out of date.
193 * The LOV API is due for a serious rewriting anyways, and this
194 * can be addressed then. */
196 lov_stripe_lock(oi->oi_md);
197 osc_update_enqueue(lov_lockhp, loi, oi->oi_flags,
198 &req->rq_oi.oi_md->lsm_oinfo[0]->loi_lvb, mode, rc);
199 if (rc == ELDLM_LOCK_ABORTED && (oi->oi_flags & LDLM_FL_HAS_INTENT))
200 memset(lov_lockhp, 0, sizeof *lov_lockhp);
201 rc = lov_update_enqueue_lov(set->set_exp, lov_lockhp, loi, oi->oi_flags,
202 req->rq_idx, &oi->oi_md->lsm_oi, rc);
203 lov_stripe_unlock(oi->oi_md);
204 lov_update_set(set, req, rc);
208 /* The callback for osc_enqueue that updates lov info for every OSC request. */
209 static int cb_update_enqueue(void *cookie, int rc)
211 struct obd_info *oinfo = cookie;
212 struct ldlm_enqueue_info *einfo;
213 struct lov_request *lovreq;
215 lovreq = container_of(oinfo, struct lov_request, rq_oi);
216 einfo = lovreq->rq_rqset->set_ei;
217 return lov_update_enqueue_set(lovreq, einfo->ei_mode, rc);
220 static int enqueue_done(struct lov_request_set *set, __u32 mode)
222 struct lov_request *req;
223 struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
224 int completes = cfs_atomic_read(&set->set_completes);
228 /* enqueue/match success, just return */
229 if (completes && completes == cfs_atomic_read(&set->set_success))
232 /* cancel enqueued/matched locks */
233 cfs_list_for_each_entry(req, &set->set_list, rq_link) {
234 struct lustre_handle *lov_lockhp;
236 if (!req->rq_complete || req->rq_rc)
239 lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
241 if (!lustre_handle_is_used(lov_lockhp))
244 rc = obd_cancel(lov->lov_tgts[req->rq_idx]->ltd_exp,
245 req->rq_oi.oi_md, mode, lov_lockhp);
246 if (rc && lov->lov_tgts[req->rq_idx] &&
247 lov->lov_tgts[req->rq_idx]->ltd_active)
248 CERROR("%s: cancelling obdjid "DOSTID" on OST"
249 "idx %d error: rc = %d\n",
250 set->set_exp->exp_obd->obd_name,
251 POSTID(&req->rq_oi.oi_md->lsm_oi),
255 lov_llh_put(set->set_lockh);
259 int lov_fini_enqueue_set(struct lov_request_set *set, __u32 mode, int rc,
260 struct ptlrpc_request_set *rqset)
267 LASSERT(set->set_exp);
268 /* Do enqueue_done only for sync requests and if any request
272 cfs_atomic_set(&set->set_completes, 0);
273 ret = enqueue_done(set, mode);
274 } else if (set->set_lockh)
275 lov_llh_put(set->set_lockh);
279 RETURN(rc ? rc : ret);
282 static void lov_llh_addref(void *llhp)
284 struct lov_lock_handles *llh = llhp;
286 cfs_atomic_inc(&llh->llh_refcount);
287 CDEBUG(D_INFO, "GETting llh %p : new refcount %d\n", llh,
288 cfs_atomic_read(&llh->llh_refcount));
291 static struct portals_handle_ops lov_handle_ops = {
292 .hop_addref = lov_llh_addref,
296 static struct lov_lock_handles *lov_llh_new(struct lov_stripe_md *lsm)
298 struct lov_lock_handles *llh;
300 OBD_ALLOC(llh, sizeof *llh +
301 sizeof(*llh->llh_handles) * lsm->lsm_stripe_count);
305 cfs_atomic_set(&llh->llh_refcount, 2);
306 llh->llh_stripe_count = lsm->lsm_stripe_count;
307 CFS_INIT_LIST_HEAD(&llh->llh_handle.h_link);
308 class_handle_hash(&llh->llh_handle, &lov_handle_ops);
313 int lov_prep_enqueue_set(struct obd_export *exp, struct obd_info *oinfo,
314 struct ldlm_enqueue_info *einfo,
315 struct lov_request_set **reqset)
317 struct lov_obd *lov = &exp->exp_obd->u.lov;
318 struct lov_request_set *set;
322 OBD_ALLOC(set, sizeof(*set));
330 set->set_lockh = lov_llh_new(oinfo->oi_md);
331 if (set->set_lockh == NULL)
332 GOTO(out_set, rc = -ENOMEM);
333 oinfo->oi_lockh->cookie = set->set_lockh->llh_handle.h_cookie;
335 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
336 struct lov_oinfo *loi;
337 struct lov_request *req;
340 loi = oinfo->oi_md->lsm_oinfo[i];
341 if (!lov_stripe_intersects(oinfo->oi_md, i,
342 oinfo->oi_policy.l_extent.start,
343 oinfo->oi_policy.l_extent.end,
347 if (!lov->lov_tgts[loi->loi_ost_idx] ||
348 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
349 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
353 OBD_ALLOC(req, sizeof(*req));
355 GOTO(out_set, rc = -ENOMEM);
357 req->rq_buflen = sizeof(*req->rq_oi.oi_md) +
358 sizeof(struct lov_oinfo *) +
359 sizeof(struct lov_oinfo);
360 OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
361 if (req->rq_oi.oi_md == NULL) {
362 OBD_FREE(req, sizeof(*req));
363 GOTO(out_set, rc = -ENOMEM);
365 req->rq_oi.oi_md->lsm_oinfo[0] =
366 ((void *)req->rq_oi.oi_md) + sizeof(*req->rq_oi.oi_md) +
367 sizeof(struct lov_oinfo *);
369 /* Set lov request specific parameters. */
370 req->rq_oi.oi_lockh = set->set_lockh->llh_handles + i;
371 req->rq_oi.oi_cb_up = cb_update_enqueue;
372 req->rq_oi.oi_flags = oinfo->oi_flags;
374 LASSERT(req->rq_oi.oi_lockh);
376 req->rq_oi.oi_policy.l_extent.gid =
377 oinfo->oi_policy.l_extent.gid;
378 req->rq_oi.oi_policy.l_extent.start = start;
379 req->rq_oi.oi_policy.l_extent.end = end;
381 req->rq_idx = loi->loi_ost_idx;
384 /* XXX LOV STACKING: submd should be from the subobj */
385 req->rq_oi.oi_md->lsm_oi = loi->loi_oi;
386 req->rq_oi.oi_md->lsm_stripe_count = 0;
387 req->rq_oi.oi_md->lsm_oinfo[0]->loi_kms_valid =
389 req->rq_oi.oi_md->lsm_oinfo[0]->loi_kms = loi->loi_kms;
390 req->rq_oi.oi_md->lsm_oinfo[0]->loi_lvb = loi->loi_lvb;
392 lov_set_add_req(req, set);
395 GOTO(out_set, rc = -EIO);
399 lov_fini_enqueue_set(set, einfo->ei_mode, rc, NULL);
403 int lov_fini_match_set(struct lov_request_set *set, __u32 mode, int flags)
410 LASSERT(set->set_exp);
411 rc = enqueue_done(set, mode);
412 if ((set->set_count == cfs_atomic_read(&set->set_success)) &&
413 (flags & LDLM_FL_TEST_LOCK))
414 lov_llh_put(set->set_lockh);
421 int lov_prep_match_set(struct obd_export *exp, struct obd_info *oinfo,
422 struct lov_stripe_md *lsm, ldlm_policy_data_t *policy,
423 __u32 mode, struct lustre_handle *lockh,
424 struct lov_request_set **reqset)
426 struct lov_obd *lov = &exp->exp_obd->u.lov;
427 struct lov_request_set *set;
431 OBD_ALLOC(set, sizeof(*set));
438 set->set_oi->oi_md = lsm;
439 set->set_lockh = lov_llh_new(lsm);
440 if (set->set_lockh == NULL)
441 GOTO(out_set, rc = -ENOMEM);
442 lockh->cookie = set->set_lockh->llh_handle.h_cookie;
444 for (i = 0; i < lsm->lsm_stripe_count; i++){
445 struct lov_oinfo *loi;
446 struct lov_request *req;
449 loi = lsm->lsm_oinfo[i];
450 if (!lov_stripe_intersects(lsm, i, policy->l_extent.start,
451 policy->l_extent.end, &start, &end))
454 /* FIXME raid1 should grace this error */
455 if (!lov->lov_tgts[loi->loi_ost_idx] ||
456 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
457 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
458 GOTO(out_set, rc = -EIO);
461 OBD_ALLOC(req, sizeof(*req));
463 GOTO(out_set, rc = -ENOMEM);
465 req->rq_buflen = sizeof(*req->rq_oi.oi_md);
466 OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
467 if (req->rq_oi.oi_md == NULL) {
468 OBD_FREE(req, sizeof(*req));
469 GOTO(out_set, rc = -ENOMEM);
472 req->rq_oi.oi_policy.l_extent.start = start;
473 req->rq_oi.oi_policy.l_extent.end = end;
474 req->rq_oi.oi_policy.l_extent.gid = policy->l_extent.gid;
476 req->rq_idx = loi->loi_ost_idx;
479 /* XXX LOV STACKING: submd should be from the subobj */
480 req->rq_oi.oi_md->lsm_oi = loi->loi_oi;
481 req->rq_oi.oi_md->lsm_stripe_count = 0;
483 lov_set_add_req(req, set);
486 GOTO(out_set, rc = -EIO);
490 lov_fini_match_set(set, mode, 0);
494 int lov_fini_cancel_set(struct lov_request_set *set)
502 LASSERT(set->set_exp);
504 lov_llh_put(set->set_lockh);
511 int lov_prep_cancel_set(struct obd_export *exp, struct obd_info *oinfo,
512 struct lov_stripe_md *lsm, __u32 mode,
513 struct lustre_handle *lockh,
514 struct lov_request_set **reqset)
516 struct lov_request_set *set;
520 OBD_ALLOC(set, sizeof(*set));
527 set->set_oi->oi_md = lsm;
528 set->set_lockh = lov_handle2llh(lockh);
529 if (set->set_lockh == NULL) {
530 CERROR("LOV: invalid lov lock handle %p\n", lockh);
531 GOTO(out_set, rc = -EINVAL);
533 lockh->cookie = set->set_lockh->llh_handle.h_cookie;
535 for (i = 0; i < lsm->lsm_stripe_count; i++){
536 struct lov_request *req;
537 struct lustre_handle *lov_lockhp;
538 struct lov_oinfo *loi = lsm->lsm_oinfo[i];
540 lov_lockhp = set->set_lockh->llh_handles + i;
541 if (!lustre_handle_is_used(lov_lockhp)) {
542 CDEBUG(D_INFO, "lov idx %d subobj "DOSTID" no lock\n",
543 loi->loi_ost_idx, POSTID(&loi->loi_oi));
547 OBD_ALLOC(req, sizeof(*req));
549 GOTO(out_set, rc = -ENOMEM);
551 req->rq_buflen = sizeof(*req->rq_oi.oi_md);
552 OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
553 if (req->rq_oi.oi_md == NULL) {
554 OBD_FREE(req, sizeof(*req));
555 GOTO(out_set, rc = -ENOMEM);
558 req->rq_idx = loi->loi_ost_idx;
561 /* XXX LOV STACKING: submd should be from the subobj */
562 req->rq_oi.oi_md->lsm_oi = loi->loi_oi;
563 req->rq_oi.oi_md->lsm_stripe_count = 0;
565 lov_set_add_req(req, set);
568 GOTO(out_set, rc = -EIO);
572 lov_fini_cancel_set(set);
575 static int common_attr_done(struct lov_request_set *set)
578 struct lov_request *req;
580 int rc = 0, attrset = 0;
583 LASSERT(set->set_oi != NULL);
585 if (set->set_oi->oi_oa == NULL)
588 if (!cfs_atomic_read(&set->set_success))
593 GOTO(out, rc = -ENOMEM);
595 cfs_list_for_each (pos, &set->set_list) {
596 req = cfs_list_entry(pos, struct lov_request, rq_link);
598 if (!req->rq_complete || req->rq_rc)
600 if (req->rq_oi.oi_oa->o_valid == 0) /* inactive stripe */
602 lov_merge_attrs(tmp_oa, req->rq_oi.oi_oa,
603 req->rq_oi.oi_oa->o_valid,
604 set->set_oi->oi_md, req->rq_stripe, &attrset);
607 CERROR("No stripes had valid attrs\n");
610 if ((set->set_oi->oi_oa->o_valid & OBD_MD_FLEPOCH) &&
611 (set->set_oi->oi_md->lsm_stripe_count != attrset)) {
612 /* When we take attributes of some epoch, we require all the
613 * ost to be active. */
614 CERROR("Not all the stripes had valid attrs\n");
615 GOTO(out, rc = -EIO);
618 tmp_oa->o_oi = set->set_oi->oi_oa->o_oi;
619 memcpy(set->set_oi->oi_oa, tmp_oa, sizeof(*set->set_oi->oi_oa));
627 static int brw_done(struct lov_request_set *set)
629 struct lov_stripe_md *lsm = set->set_oi->oi_md;
630 struct lov_oinfo *loi = NULL;
632 struct lov_request *req;
635 cfs_list_for_each (pos, &set->set_list) {
636 req = cfs_list_entry(pos, struct lov_request, rq_link);
638 if (!req->rq_complete || req->rq_rc)
641 loi = lsm->lsm_oinfo[req->rq_stripe];
643 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLBLOCKS)
644 loi->loi_lvb.lvb_blocks = req->rq_oi.oi_oa->o_blocks;
650 int lov_fini_brw_set(struct lov_request_set *set)
657 LASSERT(set->set_exp);
658 if (cfs_atomic_read(&set->set_completes)) {
660 /* FIXME update qos data here */
667 int lov_prep_brw_set(struct obd_export *exp, struct obd_info *oinfo,
668 obd_count oa_bufs, struct brw_page *pga,
669 struct obd_trans_info *oti,
670 struct lov_request_set **reqset)
677 struct lov_request_set *set;
678 struct lov_obd *lov = &exp->exp_obd->u.lov;
679 int rc = 0, i, shift;
682 OBD_ALLOC(set, sizeof(*set));
690 set->set_oabufs = oa_bufs;
691 OBD_ALLOC_LARGE(set->set_pga, oa_bufs * sizeof(*set->set_pga));
693 GOTO(out, rc = -ENOMEM);
695 OBD_ALLOC_LARGE(info, sizeof(*info) * oinfo->oi_md->lsm_stripe_count);
697 GOTO(out, rc = -ENOMEM);
699 /* calculate the page count for each stripe */
700 for (i = 0; i < oa_bufs; i++) {
701 int stripe = lov_stripe_number(oinfo->oi_md, pga[i].off);
702 info[stripe].count++;
705 /* alloc and initialize lov request */
707 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++){
708 struct lov_oinfo *loi = NULL;
709 struct lov_request *req;
711 if (info[i].count == 0)
714 loi = oinfo->oi_md->lsm_oinfo[i];
715 if (!lov->lov_tgts[loi->loi_ost_idx] ||
716 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
717 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
718 GOTO(out, rc = -EIO);
721 OBD_ALLOC(req, sizeof(*req));
723 GOTO(out, rc = -ENOMEM);
725 OBDO_ALLOC(req->rq_oi.oi_oa);
726 if (req->rq_oi.oi_oa == NULL) {
727 OBD_FREE(req, sizeof(*req));
728 GOTO(out, rc = -ENOMEM);
732 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
733 sizeof(*req->rq_oi.oi_oa));
735 req->rq_oi.oi_oa->o_oi = loi->loi_oi;
736 req->rq_oi.oi_oa->o_stripe_idx = i;
738 req->rq_buflen = sizeof(*req->rq_oi.oi_md);
739 OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
740 if (req->rq_oi.oi_md == NULL) {
741 OBDO_FREE(req->rq_oi.oi_oa);
742 OBD_FREE(req, sizeof(*req));
743 GOTO(out, rc = -ENOMEM);
746 req->rq_idx = loi->loi_ost_idx;
749 /* XXX LOV STACKING */
750 req->rq_oi.oi_md->lsm_oi = loi->loi_oi;
751 req->rq_oabufs = info[i].count;
752 req->rq_pgaidx = shift;
753 shift += req->rq_oabufs;
755 /* remember the index for sort brw_page array */
756 info[i].index = req->rq_pgaidx;
758 req->rq_oi.oi_capa = oinfo->oi_capa;
760 lov_set_add_req(req, set);
763 GOTO(out, rc = -EIO);
765 /* rotate & sort the brw_page array */
766 for (i = 0; i < oa_bufs; i++) {
767 int stripe = lov_stripe_number(oinfo->oi_md, pga[i].off);
769 shift = info[stripe].index + info[stripe].off;
770 LASSERT(shift < oa_bufs);
771 set->set_pga[shift] = pga[i];
772 lov_stripe_offset(oinfo->oi_md, pga[i].off, stripe,
773 &set->set_pga[shift].off);
779 sizeof(*info) * oinfo->oi_md->lsm_stripe_count);
784 lov_fini_brw_set(set);
789 int lov_fini_getattr_set(struct lov_request_set *set)
796 LASSERT(set->set_exp);
797 if (cfs_atomic_read(&set->set_completes))
798 rc = common_attr_done(set);
805 /* The callback for osc_getattr_async that finilizes a request info when a
806 * response is received. */
807 static int cb_getattr_update(void *cookie, int rc)
809 struct obd_info *oinfo = cookie;
810 struct lov_request *lovreq;
811 lovreq = container_of(oinfo, struct lov_request, rq_oi);
812 return lov_update_common_set(lovreq->rq_rqset, lovreq, rc);
815 int lov_prep_getattr_set(struct obd_export *exp, struct obd_info *oinfo,
816 struct lov_request_set **reqset)
818 struct lov_request_set *set;
819 struct lov_obd *lov = &exp->exp_obd->u.lov;
823 OBD_ALLOC(set, sizeof(*set));
831 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
832 struct lov_oinfo *loi;
833 struct lov_request *req;
835 loi = oinfo->oi_md->lsm_oinfo[i];
836 if (!lov->lov_tgts[loi->loi_ost_idx] ||
837 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
838 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
839 if (oinfo->oi_oa->o_valid & OBD_MD_FLEPOCH)
840 /* SOM requires all the OSTs to be active. */
841 GOTO(out_set, rc = -EIO);
845 OBD_ALLOC(req, sizeof(*req));
847 GOTO(out_set, rc = -ENOMEM);
850 req->rq_idx = loi->loi_ost_idx;
852 OBDO_ALLOC(req->rq_oi.oi_oa);
853 if (req->rq_oi.oi_oa == NULL) {
854 OBD_FREE(req, sizeof(*req));
855 GOTO(out_set, rc = -ENOMEM);
857 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
858 sizeof(*req->rq_oi.oi_oa));
859 req->rq_oi.oi_oa->o_oi = loi->loi_oi;
860 req->rq_oi.oi_cb_up = cb_getattr_update;
861 req->rq_oi.oi_capa = oinfo->oi_capa;
863 lov_set_add_req(req, set);
866 GOTO(out_set, rc = -EIO);
870 lov_fini_getattr_set(set);
874 int lov_fini_destroy_set(struct lov_request_set *set)
880 LASSERT(set->set_exp);
881 if (cfs_atomic_read(&set->set_completes)) {
882 /* FIXME update qos data here */
890 int lov_prep_destroy_set(struct obd_export *exp, struct obd_info *oinfo,
891 struct obdo *src_oa, struct lov_stripe_md *lsm,
892 struct obd_trans_info *oti,
893 struct lov_request_set **reqset)
895 struct lov_request_set *set;
896 struct lov_obd *lov = &exp->exp_obd->u.lov;
900 OBD_ALLOC(set, sizeof(*set));
907 set->set_oi->oi_md = lsm;
908 set->set_oi->oi_oa = src_oa;
910 if (oti != NULL && src_oa->o_valid & OBD_MD_FLCOOKIE)
911 set->set_cookies = oti->oti_logcookies;
913 for (i = 0; i < lsm->lsm_stripe_count; i++) {
914 struct lov_oinfo *loi;
915 struct lov_request *req;
917 loi = lsm->lsm_oinfo[i];
918 if (!lov->lov_tgts[loi->loi_ost_idx] ||
919 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
920 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
924 OBD_ALLOC(req, sizeof(*req));
926 GOTO(out_set, rc = -ENOMEM);
929 req->rq_idx = loi->loi_ost_idx;
931 OBDO_ALLOC(req->rq_oi.oi_oa);
932 if (req->rq_oi.oi_oa == NULL) {
933 OBD_FREE(req, sizeof(*req));
934 GOTO(out_set, rc = -ENOMEM);
936 memcpy(req->rq_oi.oi_oa, src_oa, sizeof(*req->rq_oi.oi_oa));
937 req->rq_oi.oi_oa->o_oi = loi->loi_oi;
938 lov_set_add_req(req, set);
941 GOTO(out_set, rc = -EIO);
945 lov_fini_destroy_set(set);
949 int lov_fini_setattr_set(struct lov_request_set *set)
956 LASSERT(set->set_exp);
957 if (cfs_atomic_read(&set->set_completes)) {
958 rc = common_attr_done(set);
959 /* FIXME update qos data here */
966 int lov_update_setattr_set(struct lov_request_set *set,
967 struct lov_request *req, int rc)
969 struct lov_obd *lov = &req->rq_rqset->set_exp->exp_obd->u.lov;
970 struct lov_stripe_md *lsm = req->rq_rqset->set_oi->oi_md;
973 lov_update_set(set, req, rc);
975 /* grace error on inactive ost */
976 if (rc && !(lov->lov_tgts[req->rq_idx] &&
977 lov->lov_tgts[req->rq_idx]->ltd_active))
981 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLCTIME)
982 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_ctime =
983 req->rq_oi.oi_oa->o_ctime;
984 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLMTIME)
985 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_mtime =
986 req->rq_oi.oi_oa->o_mtime;
987 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLATIME)
988 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_atime =
989 req->rq_oi.oi_oa->o_atime;
995 /* The callback for osc_setattr_async that finilizes a request info when a
996 * response is received. */
997 static int cb_setattr_update(void *cookie, int rc)
999 struct obd_info *oinfo = cookie;
1000 struct lov_request *lovreq;
1001 lovreq = container_of(oinfo, struct lov_request, rq_oi);
1002 return lov_update_setattr_set(lovreq->rq_rqset, lovreq, rc);
1005 int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo,
1006 struct obd_trans_info *oti,
1007 struct lov_request_set **reqset)
1009 struct lov_request_set *set;
1010 struct lov_obd *lov = &exp->exp_obd->u.lov;
1014 OBD_ALLOC(set, sizeof(*set));
1021 set->set_oi = oinfo;
1022 if (oti != NULL && oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE)
1023 set->set_cookies = oti->oti_logcookies;
1025 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
1026 struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i];
1027 struct lov_request *req;
1029 if (!lov->lov_tgts[loi->loi_ost_idx] ||
1030 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
1031 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1035 OBD_ALLOC(req, sizeof(*req));
1037 GOTO(out_set, rc = -ENOMEM);
1039 req->rq_idx = loi->loi_ost_idx;
1041 OBDO_ALLOC(req->rq_oi.oi_oa);
1042 if (req->rq_oi.oi_oa == NULL) {
1043 OBD_FREE(req, sizeof(*req));
1044 GOTO(out_set, rc = -ENOMEM);
1046 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
1047 sizeof(*req->rq_oi.oi_oa));
1048 req->rq_oi.oi_oa->o_oi = loi->loi_oi;
1049 req->rq_oi.oi_oa->o_stripe_idx = i;
1050 req->rq_oi.oi_cb_up = cb_setattr_update;
1051 req->rq_oi.oi_capa = oinfo->oi_capa;
1053 if (oinfo->oi_oa->o_valid & OBD_MD_FLSIZE) {
1054 int off = lov_stripe_offset(oinfo->oi_md,
1055 oinfo->oi_oa->o_size, i,
1056 &req->rq_oi.oi_oa->o_size);
1058 if (off < 0 && req->rq_oi.oi_oa->o_size)
1059 req->rq_oi.oi_oa->o_size--;
1061 CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
1062 i, req->rq_oi.oi_oa->o_size,
1063 oinfo->oi_oa->o_size);
1065 lov_set_add_req(req, set);
1067 if (!set->set_count)
1068 GOTO(out_set, rc = -EIO);
1072 lov_fini_setattr_set(set);
1076 int lov_fini_punch_set(struct lov_request_set *set)
1083 LASSERT(set->set_exp);
1084 if (cfs_atomic_read(&set->set_completes)) {
1086 /* FIXME update qos data here */
1087 if (cfs_atomic_read(&set->set_success))
1088 rc = common_attr_done(set);
1091 lov_put_reqset(set);
1096 int lov_update_punch_set(struct lov_request_set *set,
1097 struct lov_request *req, int rc)
1099 struct lov_obd *lov = &req->rq_rqset->set_exp->exp_obd->u.lov;
1100 struct lov_stripe_md *lsm = req->rq_rqset->set_oi->oi_md;
1103 lov_update_set(set, req, rc);
1105 /* grace error on inactive ost */
1106 if (rc && !lov->lov_tgts[req->rq_idx]->ltd_active)
1110 lov_stripe_lock(lsm);
1111 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLBLOCKS) {
1112 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_blocks =
1113 req->rq_oi.oi_oa->o_blocks;
1116 lov_stripe_unlock(lsm);
1122 /* The callback for osc_punch that finilizes a request info when a response
1124 static int cb_update_punch(void *cookie, int rc)
1126 struct obd_info *oinfo = cookie;
1127 struct lov_request *lovreq;
1128 lovreq = container_of(oinfo, struct lov_request, rq_oi);
1129 return lov_update_punch_set(lovreq->rq_rqset, lovreq, rc);
1132 int lov_prep_punch_set(struct obd_export *exp, struct obd_info *oinfo,
1133 struct obd_trans_info *oti,
1134 struct lov_request_set **reqset)
1136 struct lov_request_set *set;
1137 struct lov_obd *lov = &exp->exp_obd->u.lov;
1141 OBD_ALLOC(set, sizeof(*set));
1146 set->set_oi = oinfo;
1149 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
1150 struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i];
1151 struct lov_request *req;
1154 if (!lov_stripe_intersects(oinfo->oi_md, i,
1155 oinfo->oi_policy.l_extent.start,
1156 oinfo->oi_policy.l_extent.end,
1160 if (!lov->lov_tgts[loi->loi_ost_idx] ||
1161 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
1162 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1163 GOTO(out_set, rc = -EIO);
1166 OBD_ALLOC(req, sizeof(*req));
1168 GOTO(out_set, rc = -ENOMEM);
1170 req->rq_idx = loi->loi_ost_idx;
1172 OBDO_ALLOC(req->rq_oi.oi_oa);
1173 if (req->rq_oi.oi_oa == NULL) {
1174 OBD_FREE(req, sizeof(*req));
1175 GOTO(out_set, rc = -ENOMEM);
1177 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
1178 sizeof(*req->rq_oi.oi_oa));
1179 req->rq_oi.oi_oa->o_oi = loi->loi_oi;
1180 req->rq_oi.oi_oa->o_valid |= OBD_MD_FLGROUP;
1182 req->rq_oi.oi_oa->o_stripe_idx = i;
1183 req->rq_oi.oi_cb_up = cb_update_punch;
1185 req->rq_oi.oi_policy.l_extent.start = rs;
1186 req->rq_oi.oi_policy.l_extent.end = re;
1187 req->rq_oi.oi_policy.l_extent.gid = -1;
1189 req->rq_oi.oi_capa = oinfo->oi_capa;
1191 lov_set_add_req(req, set);
1193 if (!set->set_count)
1194 GOTO(out_set, rc = -EIO);
1198 lov_fini_punch_set(set);
1202 int lov_fini_sync_set(struct lov_request_set *set)
1209 LASSERT(set->set_exp);
1210 if (cfs_atomic_read(&set->set_completes)) {
1211 if (!cfs_atomic_read(&set->set_success))
1213 /* FIXME update qos data here */
1216 lov_put_reqset(set);
1221 /* The callback for osc_sync that finilizes a request info when a
1222 * response is recieved. */
1223 static int cb_sync_update(void *cookie, int rc)
1225 struct obd_info *oinfo = cookie;
1226 struct lov_request *lovreq;
1228 lovreq = container_of(oinfo, struct lov_request, rq_oi);
1229 return lov_update_common_set(lovreq->rq_rqset, lovreq, rc);
1232 int lov_prep_sync_set(struct obd_export *exp, struct obd_info *oinfo,
1233 obd_off start, obd_off end,
1234 struct lov_request_set **reqset)
1236 struct lov_request_set *set;
1237 struct lov_obd *lov = &exp->exp_obd->u.lov;
1247 set->set_oi = oinfo;
1249 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
1250 struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i];
1251 struct lov_request *req;
1254 if (!lov->lov_tgts[loi->loi_ost_idx] ||
1255 !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
1256 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1260 if (!lov_stripe_intersects(oinfo->oi_md, i, start, end, &rs,
1266 GOTO(out_set, rc = -ENOMEM);
1268 req->rq_idx = loi->loi_ost_idx;
1270 OBDO_ALLOC(req->rq_oi.oi_oa);
1271 if (req->rq_oi.oi_oa == NULL) {
1272 OBD_FREE(req, sizeof(*req));
1273 GOTO(out_set, rc = -ENOMEM);
1275 *req->rq_oi.oi_oa = *oinfo->oi_oa;
1276 req->rq_oi.oi_oa->o_oi = loi->loi_oi;
1277 req->rq_oi.oi_oa->o_stripe_idx = i;
1279 req->rq_oi.oi_policy.l_extent.start = rs;
1280 req->rq_oi.oi_policy.l_extent.end = re;
1281 req->rq_oi.oi_policy.l_extent.gid = -1;
1282 req->rq_oi.oi_cb_up = cb_sync_update;
1284 lov_set_add_req(req, set);
1286 if (!set->set_count)
1287 GOTO(out_set, rc = -EIO);
1291 lov_fini_sync_set(set);
1295 #define LOV_U64_MAX ((__u64)~0ULL)
1296 #define LOV_SUM_MAX(tot, add) \
1298 if ((tot) + (add) < (tot)) \
1299 (tot) = LOV_U64_MAX; \
1304 int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs,int success)
1309 __u32 expected_stripes = lov_get_stripecnt(&obd->u.lov,
1311 if (osfs->os_files != LOV_U64_MAX)
1312 lov_do_div64(osfs->os_files, expected_stripes);
1313 if (osfs->os_ffree != LOV_U64_MAX)
1314 lov_do_div64(osfs->os_ffree, expected_stripes);
1316 spin_lock(&obd->obd_osfs_lock);
1317 memcpy(&obd->obd_osfs, osfs, sizeof(*osfs));
1318 obd->obd_osfs_age = cfs_time_current_64();
1319 spin_unlock(&obd->obd_osfs_lock);
1326 int lov_fini_statfs_set(struct lov_request_set *set)
1334 if (cfs_atomic_read(&set->set_completes)) {
1335 rc = lov_fini_statfs(set->set_obd, set->set_oi->oi_osfs,
1336 cfs_atomic_read(&set->set_success));
1338 lov_put_reqset(set);
1342 void lov_update_statfs(struct obd_statfs *osfs, struct obd_statfs *lov_sfs,
1345 int shift = 0, quit = 0;
1349 memcpy(osfs, lov_sfs, sizeof(*lov_sfs));
1351 if (osfs->os_bsize != lov_sfs->os_bsize) {
1352 /* assume all block sizes are always powers of 2 */
1353 /* get the bits difference */
1354 tmp = osfs->os_bsize | lov_sfs->os_bsize;
1355 for (shift = 0; shift <= 64; ++shift) {
1367 if (osfs->os_bsize < lov_sfs->os_bsize) {
1368 osfs->os_bsize = lov_sfs->os_bsize;
1370 osfs->os_bfree >>= shift;
1371 osfs->os_bavail >>= shift;
1372 osfs->os_blocks >>= shift;
1373 } else if (shift != 0) {
1374 lov_sfs->os_bfree >>= shift;
1375 lov_sfs->os_bavail >>= shift;
1376 lov_sfs->os_blocks >>= shift;
1379 /* Sandia requested that df (and so, statfs) only
1380 returned minimal available space on
1381 a single OST, so people would be able to
1382 write this much data guaranteed. */
1383 if (osfs->os_bavail > lov_sfs->os_bavail) {
1384 /* Presumably if new bavail is smaller,
1385 new bfree is bigger as well */
1386 osfs->os_bfree = lov_sfs->os_bfree;
1387 osfs->os_bavail = lov_sfs->os_bavail;
1390 osfs->os_bfree += lov_sfs->os_bfree;
1391 osfs->os_bavail += lov_sfs->os_bavail;
1393 osfs->os_blocks += lov_sfs->os_blocks;
1394 /* XXX not sure about this one - depends on policy.
1395 * - could be minimum if we always stripe on all OBDs
1396 * (but that would be wrong for any other policy,
1397 * if one of the OBDs has no more objects left)
1398 * - could be sum if we stripe whole objects
1399 * - could be average, just to give a nice number
1401 * To give a "reasonable" (if not wholly accurate)
1402 * number, we divide the total number of free objects
1403 * by expected stripe count (watch out for overflow).
1405 LOV_SUM_MAX(osfs->os_files, lov_sfs->os_files);
1406 LOV_SUM_MAX(osfs->os_ffree, lov_sfs->os_ffree);
1410 /* The callback for osc_statfs_async that finilizes a request info when a
1411 * response is received. */
1412 static int cb_statfs_update(void *cookie, int rc)
1414 struct obd_info *oinfo = cookie;
1415 struct lov_request *lovreq;
1416 struct lov_request_set *set;
1417 struct obd_statfs *osfs, *lov_sfs;
1418 struct lov_obd *lov;
1419 struct lov_tgt_desc *tgt;
1420 struct obd_device *lovobd, *tgtobd;
1424 lovreq = container_of(oinfo, struct lov_request, rq_oi);
1425 set = lovreq->rq_rqset;
1426 lovobd = set->set_obd;
1427 lov = &lovobd->u.lov;
1428 osfs = set->set_oi->oi_osfs;
1429 lov_sfs = oinfo->oi_osfs;
1430 success = cfs_atomic_read(&set->set_success);
1431 /* XXX: the same is done in lov_update_common_set, however
1432 lovset->set_exp is not initialized. */
1433 lov_update_set(set, lovreq, rc);
1438 tgt = lov->lov_tgts[lovreq->rq_idx];
1439 if (!tgt || !tgt->ltd_active)
1440 GOTO(out_update, rc);
1442 tgtobd = class_exp2obd(tgt->ltd_exp);
1443 spin_lock(&tgtobd->obd_osfs_lock);
1444 memcpy(&tgtobd->obd_osfs, lov_sfs, sizeof(*lov_sfs));
1445 if ((oinfo->oi_flags & OBD_STATFS_FROM_CACHE) == 0)
1446 tgtobd->obd_osfs_age = cfs_time_current_64();
1447 spin_unlock(&tgtobd->obd_osfs_lock);
1450 lov_update_statfs(osfs, lov_sfs, success);
1454 if (set->set_oi->oi_flags & OBD_STATFS_PTLRPCD &&
1455 lov_set_finished(set, 0)) {
1456 lov_statfs_interpret(NULL, set, set->set_count !=
1457 cfs_atomic_read(&set->set_success));
1463 int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo,
1464 struct lov_request_set **reqset)
1466 struct lov_request_set *set;
1467 struct lov_obd *lov = &obd->u.lov;
1471 OBD_ALLOC(set, sizeof(*set));
1477 set->set_oi = oinfo;
1479 /* We only get block data from the OBD */
1480 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
1481 struct lov_request *req;
1483 if (!lov->lov_tgts[i] || (!lov->lov_tgts[i]->ltd_active
1484 && (oinfo->oi_flags & OBD_STATFS_NODELAY))) {
1485 CDEBUG(D_HA, "lov idx %d inactive\n", i);
1489 /* skip targets that have been explicitely disabled by the
1491 if (!lov->lov_tgts[i]->ltd_exp) {
1492 CDEBUG(D_HA, "lov idx %d administratively disabled\n", i);
1496 OBD_ALLOC(req, sizeof(*req));
1498 GOTO(out_set, rc = -ENOMEM);
1500 OBD_ALLOC(req->rq_oi.oi_osfs, sizeof(*req->rq_oi.oi_osfs));
1501 if (req->rq_oi.oi_osfs == NULL) {
1502 OBD_FREE(req, sizeof(*req));
1503 GOTO(out_set, rc = -ENOMEM);
1507 req->rq_oi.oi_cb_up = cb_statfs_update;
1508 req->rq_oi.oi_flags = oinfo->oi_flags;
1510 lov_set_add_req(req, set);
1512 if (!set->set_count)
1513 GOTO(out_set, rc = -EIO);
1517 lov_fini_statfs_set(set);