Whamcloud - gitweb
c7293e52609876ee6e83da37863063b402777726
[fs/lustre-release.git] / lustre / lov / lov_request.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, Whamcloud, Inc.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  */
36
37 #ifndef EXPORT_SYMTAB
38 # define EXPORT_SYMTAB
39 #endif
40 #define DEBUG_SUBSYSTEM S_LOV
41
42 #ifdef __KERNEL__
43 #include <libcfs/libcfs.h>
44 #else
45 #include <liblustre.h>
46 #endif
47
48 #include <obd_class.h>
49 #include <obd_lov.h>
50 #include <lustre/lustre_idl.h>
51
52 #include "lov_internal.h"
53
54 static void lov_init_set(struct lov_request_set *set)
55 {
56         set->set_count = 0;
57         cfs_atomic_set(&set->set_completes, 0);
58         cfs_atomic_set(&set->set_success, 0);
59         set->set_cookies = 0;
60         CFS_INIT_LIST_HEAD(&set->set_list);
61         cfs_atomic_set(&set->set_refcount, 1);
62         cfs_waitq_init(&set->set_waitq);
63         cfs_spin_lock_init(&set->set_lock);
64 }
65
66 void lov_finish_set(struct lov_request_set *set)
67 {
68         cfs_list_t *pos, *n;
69         ENTRY;
70
71         LASSERT(set);
72         cfs_list_for_each_safe(pos, n, &set->set_list) {
73                 struct lov_request *req = cfs_list_entry(pos,
74                                                          struct lov_request,
75                                                          rq_link);
76                 cfs_list_del_init(&req->rq_link);
77
78                 if (req->rq_oi.oi_oa)
79                         OBDO_FREE(req->rq_oi.oi_oa);
80                 if (req->rq_oi.oi_md)
81                         OBD_FREE_LARGE(req->rq_oi.oi_md, req->rq_buflen);
82                 if (req->rq_oi.oi_osfs)
83                         OBD_FREE(req->rq_oi.oi_osfs,
84                                  sizeof(*req->rq_oi.oi_osfs));
85                 OBD_FREE(req, sizeof(*req));
86         }
87
88         if (set->set_pga) {
89                 int len = set->set_oabufs * sizeof(*set->set_pga);
90                 OBD_FREE_LARGE(set->set_pga, len);
91         }
92         if (set->set_lockh)
93                 lov_llh_put(set->set_lockh);
94
95         OBD_FREE(set, sizeof(*set));
96         EXIT;
97 }
98
99 int lov_finished_set(struct lov_request_set *set)
100 {
101         int completes = cfs_atomic_read(&set->set_completes);
102
103         CDEBUG(D_INFO, "check set %d/%d\n", completes,
104                set->set_count);
105         return completes == set->set_count;
106 }
107
108 void lov_update_set(struct lov_request_set *set,
109                     struct lov_request *req, int rc)
110 {
111         req->rq_complete = 1;
112         req->rq_rc = rc;
113
114         cfs_atomic_inc(&set->set_completes);
115         if (rc == 0)
116                 cfs_atomic_inc(&set->set_success);
117
118         cfs_waitq_signal(&set->set_waitq);
119 }
120
121 int lov_update_common_set(struct lov_request_set *set,
122                           struct lov_request *req, int rc)
123 {
124         struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
125         ENTRY;
126
127         lov_update_set(set, req, rc);
128
129         /* grace error on inactive ost */
130         if (rc && !(lov->lov_tgts[req->rq_idx] &&
131                     lov->lov_tgts[req->rq_idx]->ltd_active))
132                 rc = 0;
133
134         /* FIXME in raid1 regime, should return 0 */
135         RETURN(rc);
136 }
137
138 void lov_set_add_req(struct lov_request *req, struct lov_request_set *set)
139 {
140         cfs_list_add_tail(&req->rq_link, &set->set_list);
141         set->set_count++;
142         req->rq_rqset = set;
143 }
144
145 extern void osc_update_enqueue(struct lustre_handle *lov_lockhp,
146                                struct lov_oinfo *loi, int flags,
147                                struct ost_lvb *lvb, __u32 mode, int rc);
148
149 static int lov_update_enqueue_lov(struct obd_export *exp,
150                                   struct lustre_handle *lov_lockhp,
151                                   struct lov_oinfo *loi, int flags, int idx,
152                                   __u64 oid, int rc)
153 {
154         struct lov_obd *lov = &exp->exp_obd->u.lov;
155
156         if (rc != ELDLM_OK &&
157             !(rc == ELDLM_LOCK_ABORTED && (flags & LDLM_FL_HAS_INTENT))) {
158                 memset(lov_lockhp, 0, sizeof(*lov_lockhp));
159                 if (lov->lov_tgts[idx] && lov->lov_tgts[idx]->ltd_active) {
160                         /* -EUSERS used by OST to report file contention */
161                         if (rc != -EINTR && rc != -EUSERS)
162                                 CERROR("enqueue objid "LPX64" subobj "
163                                        LPX64" on OST idx %d: rc %d\n",
164                                        oid, loi->loi_id, loi->loi_ost_idx, rc);
165                 } else
166                         rc = ELDLM_OK;
167         }
168         return rc;
169 }
170
171 int lov_update_enqueue_set(struct lov_request *req, __u32 mode, int rc)
172 {
173         struct lov_request_set *set = req->rq_rqset;
174         struct lustre_handle *lov_lockhp;
175         struct obd_info *oi = set->set_oi;
176         struct lov_oinfo *loi;
177         ENTRY;
178
179         LASSERT(oi != NULL);
180
181         lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
182         loi = oi->oi_md->lsm_oinfo[req->rq_stripe];
183
184         /* XXX LOV STACKING: OSC gets a copy, created in lov_prep_enqueue_set
185          * and that copy can be arbitrarily out of date.
186          *
187          * The LOV API is due for a serious rewriting anyways, and this
188          * can be addressed then. */
189
190         lov_stripe_lock(oi->oi_md);
191         osc_update_enqueue(lov_lockhp, loi, oi->oi_flags,
192                            &req->rq_oi.oi_md->lsm_oinfo[0]->loi_lvb, mode, rc);
193         if (rc == ELDLM_LOCK_ABORTED && (oi->oi_flags & LDLM_FL_HAS_INTENT))
194                 memset(lov_lockhp, 0, sizeof *lov_lockhp);
195         rc = lov_update_enqueue_lov(set->set_exp, lov_lockhp, loi, oi->oi_flags,
196                                     req->rq_idx, oi->oi_md->lsm_object_id, rc);
197         lov_stripe_unlock(oi->oi_md);
198         lov_update_set(set, req, rc);
199         RETURN(rc);
200 }
201
202 /* The callback for osc_enqueue that updates lov info for every OSC request. */
203 static int cb_update_enqueue(void *cookie, int rc)
204 {
205         struct obd_info *oinfo = cookie;
206         struct ldlm_enqueue_info *einfo;
207         struct lov_request *lovreq;
208
209         lovreq = container_of(oinfo, struct lov_request, rq_oi);
210         einfo = lovreq->rq_rqset->set_ei;
211         return lov_update_enqueue_set(lovreq, einfo->ei_mode, rc);
212 }
213
214 static int enqueue_done(struct lov_request_set *set, __u32 mode)
215 {
216         struct lov_request *req;
217         struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
218         int completes = cfs_atomic_read(&set->set_completes);
219         int rc = 0;
220         ENTRY;
221
222         /* enqueue/match success, just return */
223         if (completes && completes == cfs_atomic_read(&set->set_success))
224                 RETURN(0);
225
226         /* cancel enqueued/matched locks */
227         cfs_list_for_each_entry(req, &set->set_list, rq_link) {
228                 struct lustre_handle *lov_lockhp;
229
230                 if (!req->rq_complete || req->rq_rc)
231                         continue;
232
233                 lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
234                 LASSERT(lov_lockhp);
235                 if (!lustre_handle_is_used(lov_lockhp))
236                         continue;
237
238                 rc = obd_cancel(lov->lov_tgts[req->rq_idx]->ltd_exp,
239                                 req->rq_oi.oi_md, mode, lov_lockhp);
240                 if (rc && lov->lov_tgts[req->rq_idx] &&
241                     lov->lov_tgts[req->rq_idx]->ltd_active)
242                         CERROR("cancelling obdjid "LPX64" on OST "
243                                "idx %d error: rc = %d\n",
244                                req->rq_oi.oi_md->lsm_object_id,
245                                req->rq_idx, rc);
246         }
247         if (set->set_lockh)
248                 lov_llh_put(set->set_lockh);
249         RETURN(rc);
250 }
251
252 int lov_fini_enqueue_set(struct lov_request_set *set, __u32 mode, int rc,
253                          struct ptlrpc_request_set *rqset)
254 {
255         int ret = 0;
256         ENTRY;
257
258         if (set == NULL)
259                 RETURN(0);
260         LASSERT(set->set_exp);
261         /* Do enqueue_done only for sync requests and if any request
262          * succeeded. */
263         if (!rqset) {
264                 if (rc)
265                         cfs_atomic_set(&set->set_completes, 0);
266                 ret = enqueue_done(set, mode);
267         } else if (set->set_lockh)
268                 lov_llh_put(set->set_lockh);
269
270         lov_put_reqset(set);
271
272         RETURN(rc ? rc : ret);
273 }
274
275 static void lov_llh_addref(void *llhp)
276 {
277         struct lov_lock_handles *llh = llhp;
278
279         cfs_atomic_inc(&llh->llh_refcount);
280         CDEBUG(D_INFO, "GETting llh %p : new refcount %d\n", llh,
281                cfs_atomic_read(&llh->llh_refcount));
282 }
283
284 static struct portals_handle_ops lov_handle_ops = {
285         .hop_addref = lov_llh_addref,
286         .hop_free   = NULL,
287 };
288
289 static struct lov_lock_handles *lov_llh_new(struct lov_stripe_md *lsm)
290 {
291         struct lov_lock_handles *llh;
292
293         OBD_ALLOC(llh, sizeof *llh +
294                   sizeof(*llh->llh_handles) * lsm->lsm_stripe_count);
295         if (llh == NULL)
296                 return NULL;
297
298         cfs_atomic_set(&llh->llh_refcount, 2);
299         llh->llh_stripe_count = lsm->lsm_stripe_count;
300         CFS_INIT_LIST_HEAD(&llh->llh_handle.h_link);
301         class_handle_hash(&llh->llh_handle, &lov_handle_ops);
302
303         return llh;
304 }
305
306 int lov_prep_enqueue_set(struct obd_export *exp, struct obd_info *oinfo,
307                          struct ldlm_enqueue_info *einfo,
308                          struct lov_request_set **reqset)
309 {
310         struct lov_obd *lov = &exp->exp_obd->u.lov;
311         struct lov_request_set *set;
312         int i, rc = 0;
313         ENTRY;
314
315         OBD_ALLOC(set, sizeof(*set));
316         if (set == NULL)
317                 RETURN(-ENOMEM);
318         lov_init_set(set);
319
320         set->set_exp = exp;
321         set->set_oi = oinfo;
322         set->set_ei = einfo;
323         set->set_lockh = lov_llh_new(oinfo->oi_md);
324         if (set->set_lockh == NULL)
325                 GOTO(out_set, rc = -ENOMEM);
326         oinfo->oi_lockh->cookie = set->set_lockh->llh_handle.h_cookie;
327
328         for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
329                 struct lov_oinfo *loi;
330                 struct lov_request *req;
331                 obd_off start, end;
332
333                 loi = oinfo->oi_md->lsm_oinfo[i];
334                 if (!lov_stripe_intersects(oinfo->oi_md, i,
335                                            oinfo->oi_policy.l_extent.start,
336                                            oinfo->oi_policy.l_extent.end,
337                                            &start, &end))
338                         continue;
339
340                 if (!lov->lov_tgts[loi->loi_ost_idx] ||
341                     !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
342                         CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
343                         continue;
344                 }
345
346                 OBD_ALLOC(req, sizeof(*req));
347                 if (req == NULL)
348                         GOTO(out_set, rc = -ENOMEM);
349
350                 req->rq_buflen = sizeof(*req->rq_oi.oi_md) +
351                         sizeof(struct lov_oinfo *) +
352                         sizeof(struct lov_oinfo);
353                 OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
354                 if (req->rq_oi.oi_md == NULL) {
355                         OBD_FREE(req, sizeof(*req));
356                         GOTO(out_set, rc = -ENOMEM);
357                 }
358                 req->rq_oi.oi_md->lsm_oinfo[0] =
359                         ((void *)req->rq_oi.oi_md) + sizeof(*req->rq_oi.oi_md) +
360                         sizeof(struct lov_oinfo *);
361
362                 /* Set lov request specific parameters. */
363                 req->rq_oi.oi_lockh = set->set_lockh->llh_handles + i;
364                 req->rq_oi.oi_cb_up = cb_update_enqueue;
365                 req->rq_oi.oi_flags = oinfo->oi_flags;
366
367                 LASSERT(req->rq_oi.oi_lockh);
368
369                 req->rq_oi.oi_policy.l_extent.gid =
370                         oinfo->oi_policy.l_extent.gid;
371                 req->rq_oi.oi_policy.l_extent.start = start;
372                 req->rq_oi.oi_policy.l_extent.end = end;
373
374                 req->rq_idx = loi->loi_ost_idx;
375                 req->rq_stripe = i;
376
377                 /* XXX LOV STACKING: submd should be from the subobj */
378                 req->rq_oi.oi_md->lsm_object_id = loi->loi_id;
379                 req->rq_oi.oi_md->lsm_object_seq = loi->loi_seq;
380                 req->rq_oi.oi_md->lsm_stripe_count = 0;
381                 req->rq_oi.oi_md->lsm_oinfo[0]->loi_kms_valid =
382                         loi->loi_kms_valid;
383                 req->rq_oi.oi_md->lsm_oinfo[0]->loi_kms = loi->loi_kms;
384                 req->rq_oi.oi_md->lsm_oinfo[0]->loi_lvb = loi->loi_lvb;
385
386                 lov_set_add_req(req, set);
387         }
388         if (!set->set_count)
389                 GOTO(out_set, rc = -EIO);
390         *reqset = set;
391         RETURN(0);
392 out_set:
393         lov_fini_enqueue_set(set, einfo->ei_mode, rc, NULL);
394         RETURN(rc);
395 }
396
397 int lov_fini_match_set(struct lov_request_set *set, __u32 mode, int flags)
398 {
399         int rc = 0;
400         ENTRY;
401
402         if (set == NULL)
403                 RETURN(0);
404         LASSERT(set->set_exp);
405         rc = enqueue_done(set, mode);
406         if ((set->set_count == cfs_atomic_read(&set->set_success)) &&
407             (flags & LDLM_FL_TEST_LOCK))
408                 lov_llh_put(set->set_lockh);
409
410         lov_put_reqset(set);
411
412         RETURN(rc);
413 }
414
415 int lov_prep_match_set(struct obd_export *exp, struct obd_info *oinfo,
416                        struct lov_stripe_md *lsm, ldlm_policy_data_t *policy,
417                        __u32 mode, struct lustre_handle *lockh,
418                        struct lov_request_set **reqset)
419 {
420         struct lov_obd *lov = &exp->exp_obd->u.lov;
421         struct lov_request_set *set;
422         int i, rc = 0;
423         ENTRY;
424
425         OBD_ALLOC(set, sizeof(*set));
426         if (set == NULL)
427                 RETURN(-ENOMEM);
428         lov_init_set(set);
429
430         set->set_exp = exp;
431         set->set_oi = oinfo;
432         set->set_oi->oi_md = lsm;
433         set->set_lockh = lov_llh_new(lsm);
434         if (set->set_lockh == NULL)
435                 GOTO(out_set, rc = -ENOMEM);
436         lockh->cookie = set->set_lockh->llh_handle.h_cookie;
437
438         for (i = 0; i < lsm->lsm_stripe_count; i++){
439                 struct lov_oinfo *loi;
440                 struct lov_request *req;
441                 obd_off start, end;
442
443                 loi = lsm->lsm_oinfo[i];
444                 if (!lov_stripe_intersects(lsm, i, policy->l_extent.start,
445                                            policy->l_extent.end, &start, &end))
446                         continue;
447
448                 /* FIXME raid1 should grace this error */
449                 if (!lov->lov_tgts[loi->loi_ost_idx] ||
450                     !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
451                         CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
452                         GOTO(out_set, rc = -EIO);
453                 }
454
455                 OBD_ALLOC(req, sizeof(*req));
456                 if (req == NULL)
457                         GOTO(out_set, rc = -ENOMEM);
458
459                 req->rq_buflen = sizeof(*req->rq_oi.oi_md);
460                 OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
461                 if (req->rq_oi.oi_md == NULL) {
462                         OBD_FREE(req, sizeof(*req));
463                         GOTO(out_set, rc = -ENOMEM);
464                 }
465
466                 req->rq_oi.oi_policy.l_extent.start = start;
467                 req->rq_oi.oi_policy.l_extent.end = end;
468                 req->rq_oi.oi_policy.l_extent.gid = policy->l_extent.gid;
469
470                 req->rq_idx = loi->loi_ost_idx;
471                 req->rq_stripe = i;
472
473                 /* XXX LOV STACKING: submd should be from the subobj */
474                 req->rq_oi.oi_md->lsm_object_id = loi->loi_id;
475                 req->rq_oi.oi_md->lsm_object_seq = loi->loi_seq;
476                 req->rq_oi.oi_md->lsm_stripe_count = 0;
477
478                 lov_set_add_req(req, set);
479         }
480         if (!set->set_count)
481                 GOTO(out_set, rc = -EIO);
482         *reqset = set;
483         RETURN(rc);
484 out_set:
485         lov_fini_match_set(set, mode, 0);
486         RETURN(rc);
487 }
488
489 int lov_fini_cancel_set(struct lov_request_set *set)
490 {
491         int rc = 0;
492         ENTRY;
493
494         if (set == NULL)
495                 RETURN(0);
496
497         LASSERT(set->set_exp);
498         if (set->set_lockh)
499                 lov_llh_put(set->set_lockh);
500
501         lov_put_reqset(set);
502
503         RETURN(rc);
504 }
505
506 int lov_prep_cancel_set(struct obd_export *exp, struct obd_info *oinfo,
507                         struct lov_stripe_md *lsm, __u32 mode,
508                         struct lustre_handle *lockh,
509                         struct lov_request_set **reqset)
510 {
511         struct lov_request_set *set;
512         int i, rc = 0;
513         ENTRY;
514
515         OBD_ALLOC(set, sizeof(*set));
516         if (set == NULL)
517                 RETURN(-ENOMEM);
518         lov_init_set(set);
519
520         set->set_exp = exp;
521         set->set_oi = oinfo;
522         set->set_oi->oi_md = lsm;
523         set->set_lockh = lov_handle2llh(lockh);
524         if (set->set_lockh == NULL) {
525                 CERROR("LOV: invalid lov lock handle %p\n", lockh);
526                 GOTO(out_set, rc = -EINVAL);
527         }
528         lockh->cookie = set->set_lockh->llh_handle.h_cookie;
529
530         for (i = 0; i < lsm->lsm_stripe_count; i++){
531                 struct lov_request *req;
532                 struct lustre_handle *lov_lockhp;
533                 struct lov_oinfo *loi = lsm->lsm_oinfo[i];
534
535                 lov_lockhp = set->set_lockh->llh_handles + i;
536                 if (!lustre_handle_is_used(lov_lockhp)) {
537                         CDEBUG(D_INFO, "lov idx %d subobj "LPX64" no lock\n",
538                                loi->loi_ost_idx, loi->loi_id);
539                         continue;
540                 }
541
542                 OBD_ALLOC(req, sizeof(*req));
543                 if (req == NULL)
544                         GOTO(out_set, rc = -ENOMEM);
545
546                 req->rq_buflen = sizeof(*req->rq_oi.oi_md);
547                 OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
548                 if (req->rq_oi.oi_md == NULL) {
549                         OBD_FREE(req, sizeof(*req));
550                         GOTO(out_set, rc = -ENOMEM);
551                 }
552
553                 req->rq_idx = loi->loi_ost_idx;
554                 req->rq_stripe = i;
555
556                 /* XXX LOV STACKING: submd should be from the subobj */
557                 req->rq_oi.oi_md->lsm_object_id = loi->loi_id;
558                 req->rq_oi.oi_md->lsm_object_seq = loi->loi_seq;
559                 req->rq_oi.oi_md->lsm_stripe_count = 0;
560
561                 lov_set_add_req(req, set);
562         }
563         if (!set->set_count)
564                 GOTO(out_set, rc = -EIO);
565         *reqset = set;
566         RETURN(rc);
567 out_set:
568         lov_fini_cancel_set(set);
569         RETURN(rc);
570 }
571
572 static int lov_update_create_set(struct lov_request_set *set,
573                                  struct lov_request *req, int rc)
574 {
575         struct obd_trans_info *oti = set->set_oti;
576         struct lov_stripe_md *lsm = set->set_oi->oi_md;
577         struct lov_oinfo *loi;
578         struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
579         ENTRY;
580
581         if (rc && lov->lov_tgts[req->rq_idx] &&
582             lov->lov_tgts[req->rq_idx]->ltd_active) {
583                 /* Pre-creating objects may timeout via -ETIMEDOUT or
584                  * -ENOTCONN both are always non-critical events. */
585                 CDEBUG(rc == -ETIMEDOUT || rc == -ENOTCONN ? D_HA : D_ERROR,
586                        "error creating fid "LPX64" sub-object "
587                        "on OST idx %d/%d: rc = %d\n",
588                        set->set_oi->oi_oa->o_id, req->rq_idx,
589                        lsm->lsm_stripe_count, rc);
590                 if (rc > 0) {
591                         CERROR("obd_create returned invalid err %d\n", rc);
592                         rc = -EIO;
593                 }
594         }
595
596         cfs_spin_lock(&set->set_lock);
597         req->rq_stripe = cfs_atomic_read(&set->set_success);
598         loi = lsm->lsm_oinfo[req->rq_stripe];
599
600
601         if (rc) {
602                 lov_update_set(set, req, rc);
603                 cfs_spin_unlock(&set->set_lock);
604                 RETURN(rc);
605         }
606
607         loi->loi_id = req->rq_oi.oi_oa->o_id;
608         loi->loi_seq = req->rq_oi.oi_oa->o_seq;
609         loi->loi_ost_idx = req->rq_idx;
610         loi_init(loi);
611
612         if (oti && set->set_cookies)
613                 ++oti->oti_logcookies;
614         if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLCOOKIE)
615                 set->set_cookie_sent++;
616
617         lov_update_set(set, req, rc);
618         cfs_spin_unlock(&set->set_lock);
619
620         CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64"/"LPU64" at idx %d\n",
621                lsm->lsm_object_id, loi->loi_id, loi->loi_id, req->rq_idx);
622         RETURN(rc);
623 }
624
625 static int create_done(struct obd_export *exp, struct lov_request_set *set,
626                        struct lov_stripe_md **lsmp)
627 {
628         struct lov_obd *lov = &exp->exp_obd->u.lov;
629         struct obd_trans_info *oti = set->set_oti;
630         struct obdo *src_oa = set->set_oi->oi_oa;
631         struct lov_request *req;
632         struct obdo *ret_oa = NULL;
633         int success, attrset = 0, rc = 0;
634         ENTRY;
635
636         LASSERT(cfs_atomic_read(&set->set_completes));
637
638         /* try alloc objects on other osts if osc_create fails for
639          * exceptions: RPC failure, ENOSPC, etc */
640         if (set->set_count != cfs_atomic_read(&set->set_success)) {
641                 cfs_list_for_each_entry (req, &set->set_list, rq_link) {
642                         if (req->rq_rc == 0)
643                                 continue;
644
645                         cfs_atomic_dec(&set->set_completes);
646                         req->rq_complete = 0;
647
648                         rc = qos_remedy_create(set, req);
649                         lov_update_create_set(set, req, rc);
650                 }
651         }
652
653         success = cfs_atomic_read(&set->set_success);
654         /* no successful creates */
655         if (success == 0)
656                 GOTO(cleanup, rc);
657
658         if (set->set_count != success) {
659                 set->set_count = success;
660                 qos_shrink_lsm(set);
661         }
662
663         OBDO_ALLOC(ret_oa);
664         if (ret_oa == NULL)
665                 GOTO(cleanup, rc = -ENOMEM);
666
667         cfs_list_for_each_entry(req, &set->set_list, rq_link) {
668                 if (!req->rq_complete || req->rq_rc)
669                         continue;
670                 lov_merge_attrs(ret_oa, req->rq_oi.oi_oa,
671                                 req->rq_oi.oi_oa->o_valid, set->set_oi->oi_md,
672                                 req->rq_stripe, &attrset);
673         }
674         if (src_oa->o_valid & OBD_MD_FLSIZE &&
675             ret_oa->o_size != src_oa->o_size) {
676                 CERROR("original size "LPU64" isn't new object size "LPU64"\n",
677                        src_oa->o_size, ret_oa->o_size);
678                 LBUG();
679         }
680         ret_oa->o_id = src_oa->o_id;
681         ret_oa->o_seq = src_oa->o_seq;
682         ret_oa->o_valid |= OBD_MD_FLID | OBD_MD_FLGROUP;
683         memcpy(src_oa, ret_oa, sizeof(*src_oa));
684         OBDO_FREE(ret_oa);
685
686         *lsmp = set->set_oi->oi_md;
687         GOTO(done, rc = 0);
688
689 cleanup:
690         cfs_list_for_each_entry(req, &set->set_list, rq_link) {
691                 struct obd_export *sub_exp;
692                 int err = 0;
693
694                 if (!req->rq_complete || req->rq_rc)
695                         continue;
696
697                 sub_exp = lov->lov_tgts[req->rq_idx]->ltd_exp;
698                 err = obd_destroy(NULL, sub_exp, req->rq_oi.oi_oa, NULL, oti,
699                                   NULL, NULL);
700                 if (err)
701                         CERROR("Failed to uncreate objid "LPX64" subobj "
702                                LPX64" on OST idx %d: rc = %d\n",
703                                src_oa->o_id, req->rq_oi.oi_oa->o_id,
704                                req->rq_idx, rc);
705         }
706         if (*lsmp == NULL)
707                 obd_free_memmd(exp, &set->set_oi->oi_md);
708 done:
709         if (oti && set->set_cookies) {
710                 oti->oti_logcookies = set->set_cookies;
711                 if (!set->set_cookie_sent) {
712                         oti_free_cookies(oti);
713                         src_oa->o_valid &= ~OBD_MD_FLCOOKIE;
714                 } else {
715                         src_oa->o_valid |= OBD_MD_FLCOOKIE;
716                 }
717         }
718         RETURN(rc);
719 }
720
721 int lov_fini_create_set(struct lov_request_set *set,struct lov_stripe_md **lsmp)
722 {
723         int rc = 0;
724         ENTRY;
725
726         if (set == NULL)
727                 RETURN(0);
728         LASSERT(set->set_exp);
729         if (cfs_atomic_read(&set->set_completes))
730                 rc = create_done(set->set_exp, set, lsmp);
731
732         lov_put_reqset(set);
733         RETURN(rc);
734 }
735
736 int cb_create_update(void *cookie, int rc)
737 {
738         struct obd_info *oinfo = cookie;
739         struct lov_request *lovreq;
740
741         lovreq = container_of(oinfo, struct lov_request, rq_oi);
742
743         if (CFS_FAIL_CHECK(OBD_FAIL_MDS_OSC_CREATE_FAIL))
744                 if (lovreq->rq_idx == cfs_fail_val)
745                         rc = -ENOTCONN;
746
747         rc= lov_update_create_set(lovreq->rq_rqset, lovreq, rc);
748         if (lov_finished_set(lovreq->rq_rqset))
749                 lov_put_reqset(lovreq->rq_rqset);
750         return rc;
751 }
752
753 int lov_prep_create_set(struct obd_export *exp, struct obd_info *oinfo,
754                         struct lov_stripe_md **lsmp, struct obdo *src_oa,
755                         struct obd_trans_info *oti,
756                         struct lov_request_set **reqset)
757 {
758         struct lov_request_set *set;
759         int rc = 0;
760         ENTRY;
761
762         OBD_ALLOC(set, sizeof(*set));
763         if (set == NULL)
764                 RETURN(-ENOMEM);
765         lov_init_set(set);
766
767         set->set_exp = exp;
768         set->set_oi = oinfo;
769         set->set_oi->oi_md = *lsmp;
770         set->set_oi->oi_oa = src_oa;
771         set->set_oti = oti;
772         lov_get_reqset(set);
773
774         rc = qos_prep_create(exp, set);
775         /* qos_shrink_lsm() may have allocated a new lsm */
776         *lsmp = oinfo->oi_md;
777         if (rc) {
778                 lov_fini_create_set(set, lsmp);
779                 lov_put_reqset(set);
780         } else {
781                 *reqset = set;
782         }
783         RETURN(rc);
784 }
785
786 static int common_attr_done(struct lov_request_set *set)
787 {
788         cfs_list_t *pos;
789         struct lov_request *req;
790         struct obdo *tmp_oa;
791         int rc = 0, attrset = 0;
792         ENTRY;
793
794         LASSERT(set->set_oi != NULL);
795
796         if (set->set_oi->oi_oa == NULL)
797                 RETURN(0);
798
799         if (!cfs_atomic_read(&set->set_success))
800                 RETURN(-EIO);
801
802         OBDO_ALLOC(tmp_oa);
803         if (tmp_oa == NULL)
804                 GOTO(out, rc = -ENOMEM);
805
806         cfs_list_for_each (pos, &set->set_list) {
807                 req = cfs_list_entry(pos, struct lov_request, rq_link);
808
809                 if (!req->rq_complete || req->rq_rc)
810                         continue;
811                 if (req->rq_oi.oi_oa->o_valid == 0)   /* inactive stripe */
812                         continue;
813                 lov_merge_attrs(tmp_oa, req->rq_oi.oi_oa,
814                                 req->rq_oi.oi_oa->o_valid,
815                                 set->set_oi->oi_md, req->rq_stripe, &attrset);
816         }
817         if (!attrset) {
818                 CERROR("No stripes had valid attrs\n");
819                 rc = -EIO;
820         }
821         if ((set->set_oi->oi_oa->o_valid & OBD_MD_FLEPOCH) &&
822             (set->set_oi->oi_md->lsm_stripe_count != attrset)) {
823                 /* When we take attributes of some epoch, we require all the
824                  * ost to be active. */
825                 CERROR("Not all the stripes had valid attrs\n");
826                 GOTO(out, rc = -EIO);
827         }
828
829         tmp_oa->o_id = set->set_oi->oi_oa->o_id;
830         memcpy(set->set_oi->oi_oa, tmp_oa, sizeof(*set->set_oi->oi_oa));
831 out:
832         if (tmp_oa)
833                 OBDO_FREE(tmp_oa);
834         RETURN(rc);
835
836 }
837
838 static int brw_done(struct lov_request_set *set)
839 {
840         struct lov_stripe_md *lsm = set->set_oi->oi_md;
841         struct lov_oinfo     *loi = NULL;
842         cfs_list_t *pos;
843         struct lov_request *req;
844         ENTRY;
845
846         cfs_list_for_each (pos, &set->set_list) {
847                 req = cfs_list_entry(pos, struct lov_request, rq_link);
848
849                 if (!req->rq_complete || req->rq_rc)
850                         continue;
851
852                 loi = lsm->lsm_oinfo[req->rq_stripe];
853
854                 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLBLOCKS)
855                         loi->loi_lvb.lvb_blocks = req->rq_oi.oi_oa->o_blocks;
856         }
857
858         RETURN(0);
859 }
860
861 int lov_fini_brw_set(struct lov_request_set *set)
862 {
863         int rc = 0;
864         ENTRY;
865
866         if (set == NULL)
867                 RETURN(0);
868         LASSERT(set->set_exp);
869         if (cfs_atomic_read(&set->set_completes)) {
870                 rc = brw_done(set);
871                 /* FIXME update qos data here */
872         }
873         lov_put_reqset(set);
874
875         RETURN(rc);
876 }
877
878 int lov_prep_brw_set(struct obd_export *exp, struct obd_info *oinfo,
879                      obd_count oa_bufs, struct brw_page *pga,
880                      struct obd_trans_info *oti,
881                      struct lov_request_set **reqset)
882 {
883         struct {
884                 obd_count       index;
885                 obd_count       count;
886                 obd_count       off;
887         } *info = NULL;
888         struct lov_request_set *set;
889         struct lov_obd *lov = &exp->exp_obd->u.lov;
890         int rc = 0, i, shift;
891         ENTRY;
892
893         OBD_ALLOC(set, sizeof(*set));
894         if (set == NULL)
895                 RETURN(-ENOMEM);
896         lov_init_set(set);
897
898         set->set_exp = exp;
899         set->set_oti = oti;
900         set->set_oi = oinfo;
901         set->set_oabufs = oa_bufs;
902         OBD_ALLOC_LARGE(set->set_pga, oa_bufs * sizeof(*set->set_pga));
903         if (!set->set_pga)
904                 GOTO(out, rc = -ENOMEM);
905
906         OBD_ALLOC_LARGE(info, sizeof(*info) * oinfo->oi_md->lsm_stripe_count);
907         if (!info)
908                 GOTO(out, rc = -ENOMEM);
909
910         /* calculate the page count for each stripe */
911         for (i = 0; i < oa_bufs; i++) {
912                 int stripe = lov_stripe_number(oinfo->oi_md, pga[i].off);
913                 info[stripe].count++;
914         }
915
916         /* alloc and initialize lov request */
917         shift = 0;
918         for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++){
919                 struct lov_oinfo *loi = NULL;
920                 struct lov_request *req;
921
922                 if (info[i].count == 0)
923                         continue;
924
925                 loi = oinfo->oi_md->lsm_oinfo[i];
926                 if (!lov->lov_tgts[loi->loi_ost_idx] ||
927                     !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
928                         CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
929                         GOTO(out, rc = -EIO);
930                 }
931
932                 OBD_ALLOC(req, sizeof(*req));
933                 if (req == NULL)
934                         GOTO(out, rc = -ENOMEM);
935
936                 OBDO_ALLOC(req->rq_oi.oi_oa);
937                 if (req->rq_oi.oi_oa == NULL) {
938                         OBD_FREE(req, sizeof(*req));
939                         GOTO(out, rc = -ENOMEM);
940                 }
941
942                 if (oinfo->oi_oa) {
943                         memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
944                                sizeof(*req->rq_oi.oi_oa));
945                 }
946                 req->rq_oi.oi_oa->o_id = loi->loi_id;
947                 req->rq_oi.oi_oa->o_seq = loi->loi_seq;
948                 req->rq_oi.oi_oa->o_stripe_idx = i;
949
950                 req->rq_buflen = sizeof(*req->rq_oi.oi_md);
951                 OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
952                 if (req->rq_oi.oi_md == NULL) {
953                         OBDO_FREE(req->rq_oi.oi_oa);
954                         OBD_FREE(req, sizeof(*req));
955                         GOTO(out, rc = -ENOMEM);
956                 }
957
958                 req->rq_idx = loi->loi_ost_idx;
959                 req->rq_stripe = i;
960
961                 /* XXX LOV STACKING */
962                 req->rq_oi.oi_md->lsm_object_id = loi->loi_id;
963                 req->rq_oi.oi_md->lsm_object_seq = loi->loi_seq;
964                 req->rq_oabufs = info[i].count;
965                 req->rq_pgaidx = shift;
966                 shift += req->rq_oabufs;
967
968                 /* remember the index for sort brw_page array */
969                 info[i].index = req->rq_pgaidx;
970
971                 req->rq_oi.oi_capa = oinfo->oi_capa;
972
973                 lov_set_add_req(req, set);
974         }
975         if (!set->set_count)
976                 GOTO(out, rc = -EIO);
977
978         /* rotate & sort the brw_page array */
979         for (i = 0; i < oa_bufs; i++) {
980                 int stripe = lov_stripe_number(oinfo->oi_md, pga[i].off);
981
982                 shift = info[stripe].index + info[stripe].off;
983                 LASSERT(shift < oa_bufs);
984                 set->set_pga[shift] = pga[i];
985                 lov_stripe_offset(oinfo->oi_md, pga[i].off, stripe,
986                                   &set->set_pga[shift].off);
987                 info[stripe].off++;
988         }
989 out:
990         if (info)
991                 OBD_FREE_LARGE(info,
992                                sizeof(*info) * oinfo->oi_md->lsm_stripe_count);
993
994         if (rc == 0)
995                 *reqset = set;
996         else
997                 lov_fini_brw_set(set);
998
999         RETURN(rc);
1000 }
1001
1002 int lov_fini_getattr_set(struct lov_request_set *set)
1003 {
1004         int rc = 0;
1005         ENTRY;
1006
1007         if (set == NULL)
1008                 RETURN(0);
1009         LASSERT(set->set_exp);
1010         if (cfs_atomic_read(&set->set_completes))
1011                 rc = common_attr_done(set);
1012
1013         lov_put_reqset(set);
1014
1015         RETURN(rc);
1016 }
1017
1018 /* The callback for osc_getattr_async that finilizes a request info when a
1019  * response is received. */
1020 static int cb_getattr_update(void *cookie, int rc)
1021 {
1022         struct obd_info *oinfo = cookie;
1023         struct lov_request *lovreq;
1024         lovreq = container_of(oinfo, struct lov_request, rq_oi);
1025         return lov_update_common_set(lovreq->rq_rqset, lovreq, rc);
1026 }
1027
1028 int lov_prep_getattr_set(struct obd_export *exp, struct obd_info *oinfo,
1029                          struct lov_request_set **reqset)
1030 {
1031         struct lov_request_set *set;
1032         struct lov_obd *lov = &exp->exp_obd->u.lov;
1033         int rc = 0, i;
1034         ENTRY;
1035
1036         OBD_ALLOC(set, sizeof(*set));
1037         if (set == NULL)
1038                 RETURN(-ENOMEM);
1039         lov_init_set(set);
1040
1041         set->set_exp = exp;
1042         set->set_oi = oinfo;
1043
1044         for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
1045                 struct lov_oinfo *loi;
1046                 struct lov_request *req;
1047
1048                 loi = oinfo->oi_md->lsm_oinfo[i];
1049                 if (!lov->lov_tgts[loi->loi_ost_idx] ||
1050                     !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
1051                         CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1052                         if (oinfo->oi_oa->o_valid & OBD_MD_FLEPOCH)
1053                                 /* SOM requires all the OSTs to be active. */
1054                                 GOTO(out_set, rc = -EIO);
1055                         continue;
1056                 }
1057
1058                 OBD_ALLOC(req, sizeof(*req));
1059                 if (req == NULL)
1060                         GOTO(out_set, rc = -ENOMEM);
1061
1062                 req->rq_stripe = i;
1063                 req->rq_idx = loi->loi_ost_idx;
1064
1065                 OBDO_ALLOC(req->rq_oi.oi_oa);
1066                 if (req->rq_oi.oi_oa == NULL) {
1067                         OBD_FREE(req, sizeof(*req));
1068                         GOTO(out_set, rc = -ENOMEM);
1069                 }
1070                 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
1071                        sizeof(*req->rq_oi.oi_oa));
1072                 req->rq_oi.oi_oa->o_id = loi->loi_id;
1073                 req->rq_oi.oi_oa->o_seq = loi->loi_seq;
1074                 req->rq_oi.oi_cb_up = cb_getattr_update;
1075                 req->rq_oi.oi_capa = oinfo->oi_capa;
1076
1077                 lov_set_add_req(req, set);
1078         }
1079         if (!set->set_count)
1080                 GOTO(out_set, rc = -EIO);
1081         *reqset = set;
1082         RETURN(rc);
1083 out_set:
1084         lov_fini_getattr_set(set);
1085         RETURN(rc);
1086 }
1087
1088 int lov_fini_destroy_set(struct lov_request_set *set)
1089 {
1090         ENTRY;
1091
1092         if (set == NULL)
1093                 RETURN(0);
1094         LASSERT(set->set_exp);
1095         if (cfs_atomic_read(&set->set_completes)) {
1096                 /* FIXME update qos data here */
1097         }
1098
1099         lov_put_reqset(set);
1100
1101         RETURN(0);
1102 }
1103
1104 int lov_prep_destroy_set(struct obd_export *exp, struct obd_info *oinfo,
1105                          struct obdo *src_oa, struct lov_stripe_md *lsm,
1106                          struct obd_trans_info *oti,
1107                          struct lov_request_set **reqset)
1108 {
1109         struct lov_request_set *set;
1110         struct lov_obd *lov = &exp->exp_obd->u.lov;
1111         int rc = 0, i;
1112         ENTRY;
1113
1114         OBD_ALLOC(set, sizeof(*set));
1115         if (set == NULL)
1116                 RETURN(-ENOMEM);
1117         lov_init_set(set);
1118
1119         set->set_exp = exp;
1120         set->set_oi = oinfo;
1121         set->set_oi->oi_md = lsm;
1122         set->set_oi->oi_oa = src_oa;
1123         set->set_oti = oti;
1124         if (oti != NULL && src_oa->o_valid & OBD_MD_FLCOOKIE)
1125                 set->set_cookies = oti->oti_logcookies;
1126
1127         for (i = 0; i < lsm->lsm_stripe_count; i++) {
1128                 struct lov_oinfo *loi;
1129                 struct lov_request *req;
1130
1131                 loi = lsm->lsm_oinfo[i];
1132                 if (!lov->lov_tgts[loi->loi_ost_idx] ||
1133                     !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
1134                         CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1135                         continue;
1136                 }
1137
1138                 OBD_ALLOC(req, sizeof(*req));
1139                 if (req == NULL)
1140                         GOTO(out_set, rc = -ENOMEM);
1141
1142                 req->rq_stripe = i;
1143                 req->rq_idx = loi->loi_ost_idx;
1144
1145                 OBDO_ALLOC(req->rq_oi.oi_oa);
1146                 if (req->rq_oi.oi_oa == NULL) {
1147                         OBD_FREE(req, sizeof(*req));
1148                         GOTO(out_set, rc = -ENOMEM);
1149                 }
1150                 memcpy(req->rq_oi.oi_oa, src_oa, sizeof(*req->rq_oi.oi_oa));
1151                 req->rq_oi.oi_oa->o_id = loi->loi_id;
1152                 req->rq_oi.oi_oa->o_seq = loi->loi_seq;
1153                 lov_set_add_req(req, set);
1154         }
1155         if (!set->set_count)
1156                 GOTO(out_set, rc = -EIO);
1157         *reqset = set;
1158         RETURN(rc);
1159 out_set:
1160         lov_fini_destroy_set(set);
1161         RETURN(rc);
1162 }
1163
1164 int lov_fini_setattr_set(struct lov_request_set *set)
1165 {
1166         int rc = 0;
1167         ENTRY;
1168
1169         if (set == NULL)
1170                 RETURN(0);
1171         LASSERT(set->set_exp);
1172         if (cfs_atomic_read(&set->set_completes)) {
1173                 rc = common_attr_done(set);
1174                 /* FIXME update qos data here */
1175         }
1176
1177         lov_put_reqset(set);
1178         RETURN(rc);
1179 }
1180
1181 int lov_update_setattr_set(struct lov_request_set *set,
1182                            struct lov_request *req, int rc)
1183 {
1184         struct lov_obd *lov = &req->rq_rqset->set_exp->exp_obd->u.lov;
1185         struct lov_stripe_md *lsm = req->rq_rqset->set_oi->oi_md;
1186         ENTRY;
1187
1188         lov_update_set(set, req, rc);
1189
1190         /* grace error on inactive ost */
1191         if (rc && !(lov->lov_tgts[req->rq_idx] &&
1192                     lov->lov_tgts[req->rq_idx]->ltd_active))
1193                 rc = 0;
1194
1195         if (rc == 0) {
1196                 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLCTIME)
1197                         lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_ctime =
1198                                 req->rq_oi.oi_oa->o_ctime;
1199                 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLMTIME)
1200                         lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_mtime =
1201                                 req->rq_oi.oi_oa->o_mtime;
1202                 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLATIME)
1203                         lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_atime =
1204                                 req->rq_oi.oi_oa->o_atime;
1205         }
1206
1207         RETURN(rc);
1208 }
1209
1210 /* The callback for osc_setattr_async that finilizes a request info when a
1211  * response is received. */
1212 static int cb_setattr_update(void *cookie, int rc)
1213 {
1214         struct obd_info *oinfo = cookie;
1215         struct lov_request *lovreq;
1216         lovreq = container_of(oinfo, struct lov_request, rq_oi);
1217         return lov_update_setattr_set(lovreq->rq_rqset, lovreq, rc);
1218 }
1219
1220 int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo,
1221                          struct obd_trans_info *oti,
1222                          struct lov_request_set **reqset)
1223 {
1224         struct lov_request_set *set;
1225         struct lov_obd *lov = &exp->exp_obd->u.lov;
1226         int rc = 0, i;
1227         ENTRY;
1228
1229         OBD_ALLOC(set, sizeof(*set));
1230         if (set == NULL)
1231                 RETURN(-ENOMEM);
1232         lov_init_set(set);
1233
1234         set->set_exp = exp;
1235         set->set_oti = oti;
1236         set->set_oi = oinfo;
1237         if (oti != NULL && oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE)
1238                 set->set_cookies = oti->oti_logcookies;
1239
1240         for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
1241                 struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i];
1242                 struct lov_request *req;
1243
1244                 if (!lov->lov_tgts[loi->loi_ost_idx] ||
1245                     !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
1246                         CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1247                         continue;
1248                 }
1249
1250                 OBD_ALLOC(req, sizeof(*req));
1251                 if (req == NULL)
1252                         GOTO(out_set, rc = -ENOMEM);
1253                 req->rq_stripe = i;
1254                 req->rq_idx = loi->loi_ost_idx;
1255
1256                 OBDO_ALLOC(req->rq_oi.oi_oa);
1257                 if (req->rq_oi.oi_oa == NULL) {
1258                         OBD_FREE(req, sizeof(*req));
1259                         GOTO(out_set, rc = -ENOMEM);
1260                 }
1261                 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
1262                        sizeof(*req->rq_oi.oi_oa));
1263                 req->rq_oi.oi_oa->o_id = loi->loi_id;
1264                 req->rq_oi.oi_oa->o_seq= loi->loi_seq;
1265                 req->rq_oi.oi_oa->o_stripe_idx = i;
1266                 req->rq_oi.oi_cb_up = cb_setattr_update;
1267                 req->rq_oi.oi_capa = oinfo->oi_capa;
1268
1269                 if (oinfo->oi_oa->o_valid & OBD_MD_FLSIZE) {
1270                         int off = lov_stripe_offset(oinfo->oi_md,
1271                                                     oinfo->oi_oa->o_size, i,
1272                                                     &req->rq_oi.oi_oa->o_size);
1273
1274                         if (off < 0 && req->rq_oi.oi_oa->o_size)
1275                                 req->rq_oi.oi_oa->o_size--;
1276
1277                         CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
1278                                i, req->rq_oi.oi_oa->o_size,
1279                                oinfo->oi_oa->o_size);
1280                 }
1281                 lov_set_add_req(req, set);
1282         }
1283         if (!set->set_count)
1284                 GOTO(out_set, rc = -EIO);
1285         *reqset = set;
1286         RETURN(rc);
1287 out_set:
1288         lov_fini_setattr_set(set);
1289         RETURN(rc);
1290 }
1291
1292 int lov_fini_punch_set(struct lov_request_set *set)
1293 {
1294         int rc = 0;
1295         ENTRY;
1296
1297         if (set == NULL)
1298                 RETURN(0);
1299         LASSERT(set->set_exp);
1300         if (cfs_atomic_read(&set->set_completes)) {
1301                 rc = -EIO;
1302                 /* FIXME update qos data here */
1303                 if (cfs_atomic_read(&set->set_success))
1304                         rc = common_attr_done(set);
1305         }
1306
1307         lov_put_reqset(set);
1308
1309         RETURN(rc);
1310 }
1311
1312 int lov_update_punch_set(struct lov_request_set *set,
1313                          struct lov_request *req, int rc)
1314 {
1315         struct lov_obd *lov = &req->rq_rqset->set_exp->exp_obd->u.lov;
1316         struct lov_stripe_md *lsm = req->rq_rqset->set_oi->oi_md;
1317         ENTRY;
1318
1319         lov_update_set(set, req, rc);
1320
1321         /* grace error on inactive ost */
1322         if (rc && !lov->lov_tgts[req->rq_idx]->ltd_active)
1323                 rc = 0;
1324
1325         if (rc == 0) {
1326                 lov_stripe_lock(lsm);
1327                 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLBLOCKS) {
1328                         lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_blocks =
1329                                 req->rq_oi.oi_oa->o_blocks;
1330                 }
1331
1332                 /* Do we need to update lvb_size here? It needn't because
1333                  * it have been done in ll_truncate(). -jay */
1334                 lov_stripe_unlock(lsm);
1335         }
1336
1337         RETURN(rc);
1338 }
1339
1340 /* The callback for osc_punch that finilizes a request info when a response
1341  * is received. */
1342 static int cb_update_punch(void *cookie, int rc)
1343 {
1344         struct obd_info *oinfo = cookie;
1345         struct lov_request *lovreq;
1346         lovreq = container_of(oinfo, struct lov_request, rq_oi);
1347         return lov_update_punch_set(lovreq->rq_rqset, lovreq, rc);
1348 }
1349
1350 int lov_prep_punch_set(struct obd_export *exp, struct obd_info *oinfo,
1351                        struct obd_trans_info *oti,
1352                        struct lov_request_set **reqset)
1353 {
1354         struct lov_request_set *set;
1355         struct lov_obd *lov = &exp->exp_obd->u.lov;
1356         int rc = 0, i;
1357         ENTRY;
1358
1359         OBD_ALLOC(set, sizeof(*set));
1360         if (set == NULL)
1361                 RETURN(-ENOMEM);
1362         lov_init_set(set);
1363
1364         set->set_oi = oinfo;
1365         set->set_exp = exp;
1366
1367         for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
1368                 struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i];
1369                 struct lov_request *req;
1370                 obd_off rs, re;
1371
1372                 if (!lov_stripe_intersects(oinfo->oi_md, i,
1373                                            oinfo->oi_policy.l_extent.start,
1374                                            oinfo->oi_policy.l_extent.end,
1375                                            &rs, &re))
1376                         continue;
1377
1378                 if (!lov->lov_tgts[loi->loi_ost_idx] ||
1379                     !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
1380                         CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1381                         GOTO(out_set, rc = -EIO);
1382                 }
1383
1384                 OBD_ALLOC(req, sizeof(*req));
1385                 if (req == NULL)
1386                         GOTO(out_set, rc = -ENOMEM);
1387                 req->rq_stripe = i;
1388                 req->rq_idx = loi->loi_ost_idx;
1389
1390                 OBDO_ALLOC(req->rq_oi.oi_oa);
1391                 if (req->rq_oi.oi_oa == NULL) {
1392                         OBD_FREE(req, sizeof(*req));
1393                         GOTO(out_set, rc = -ENOMEM);
1394                 }
1395                 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
1396                        sizeof(*req->rq_oi.oi_oa));
1397                 req->rq_oi.oi_oa->o_id = loi->loi_id;
1398                 req->rq_oi.oi_oa->o_seq = loi->loi_seq;
1399                 req->rq_oi.oi_oa->o_valid |= OBD_MD_FLGROUP;
1400
1401                 req->rq_oi.oi_oa->o_stripe_idx = i;
1402                 req->rq_oi.oi_cb_up = cb_update_punch;
1403
1404                 req->rq_oi.oi_policy.l_extent.start = rs;
1405                 req->rq_oi.oi_policy.l_extent.end = re;
1406                 req->rq_oi.oi_policy.l_extent.gid = -1;
1407
1408                 req->rq_oi.oi_capa = oinfo->oi_capa;
1409
1410                 lov_set_add_req(req, set);
1411         }
1412         if (!set->set_count)
1413                 GOTO(out_set, rc = -EIO);
1414         *reqset = set;
1415         RETURN(rc);
1416 out_set:
1417         lov_fini_punch_set(set);
1418         RETURN(rc);
1419 }
1420
1421 int lov_fini_sync_set(struct lov_request_set *set)
1422 {
1423         int rc = 0;
1424         ENTRY;
1425
1426         if (set == NULL)
1427                 RETURN(0);
1428         LASSERT(set->set_exp);
1429         if (cfs_atomic_read(&set->set_completes)) {
1430                 if (!cfs_atomic_read(&set->set_success))
1431                         rc = -EIO;
1432                 /* FIXME update qos data here */
1433         }
1434
1435         lov_put_reqset(set);
1436
1437         RETURN(rc);
1438 }
1439
1440 /* The callback for osc_sync that finilizes a request info when a
1441  * response is recieved. */
1442 static int cb_sync_update(void *cookie, int rc)
1443 {
1444         struct obd_info *oinfo = cookie;
1445         struct lov_request *lovreq;
1446
1447         lovreq = container_of(oinfo, struct lov_request, rq_oi);
1448         return lov_update_common_set(lovreq->rq_rqset, lovreq, rc);
1449 }
1450
1451 int lov_prep_sync_set(struct obd_export *exp, struct obd_info *oinfo,
1452                       obd_off start, obd_off end,
1453                       struct lov_request_set **reqset)
1454 {
1455         struct lov_request_set *set;
1456         struct lov_obd *lov = &exp->exp_obd->u.lov;
1457         int rc = 0, i;
1458         ENTRY;
1459
1460         OBD_ALLOC_PTR(set);
1461         if (set == NULL)
1462                 RETURN(-ENOMEM);
1463         lov_init_set(set);
1464
1465         set->set_exp = exp;
1466         set->set_oi = oinfo;
1467
1468         for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
1469                 struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i];
1470                 struct lov_request *req;
1471                 obd_off rs, re;
1472
1473                 if (!lov->lov_tgts[loi->loi_ost_idx] ||
1474                     !lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
1475                         CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
1476                         continue;
1477                 }
1478
1479                 if (!lov_stripe_intersects(oinfo->oi_md, i, start, end, &rs,
1480                                            &re))
1481                         continue;
1482
1483                 OBD_ALLOC_PTR(req);
1484                 if (req == NULL)
1485                         GOTO(out_set, rc = -ENOMEM);
1486                 req->rq_stripe = i;
1487                 req->rq_idx = loi->loi_ost_idx;
1488
1489                 OBDO_ALLOC(req->rq_oi.oi_oa);
1490                 if (req->rq_oi.oi_oa == NULL) {
1491                         OBD_FREE(req, sizeof(*req));
1492                         GOTO(out_set, rc = -ENOMEM);
1493                 }
1494                 *req->rq_oi.oi_oa = *oinfo->oi_oa;
1495                 req->rq_oi.oi_oa->o_id = loi->loi_id;
1496                 req->rq_oi.oi_oa->o_seq = loi->loi_seq;
1497                 req->rq_oi.oi_oa->o_stripe_idx = i;
1498
1499                 req->rq_oi.oi_policy.l_extent.start = rs;
1500                 req->rq_oi.oi_policy.l_extent.end = re;
1501                 req->rq_oi.oi_policy.l_extent.gid = -1;
1502                 req->rq_oi.oi_cb_up = cb_sync_update;
1503
1504                 lov_set_add_req(req, set);
1505         }
1506         if (!set->set_count)
1507                 GOTO(out_set, rc = -EIO);
1508         *reqset = set;
1509         RETURN(rc);
1510 out_set:
1511         lov_fini_sync_set(set);
1512         RETURN(rc);
1513 }
1514
1515 #define LOV_U64_MAX ((__u64)~0ULL)
1516 #define LOV_SUM_MAX(tot, add)                                           \
1517         do {                                                            \
1518                 if ((tot) + (add) < (tot))                              \
1519                         (tot) = LOV_U64_MAX;                            \
1520                 else                                                    \
1521                         (tot) += (add);                                 \
1522         } while(0)
1523
1524 int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs,int success)
1525 {
1526         ENTRY;
1527
1528         if (success) {
1529                 __u32 expected_stripes = lov_get_stripecnt(&obd->u.lov,
1530                                                            LOV_MAGIC, 0);
1531                 if (osfs->os_files != LOV_U64_MAX)
1532                         lov_do_div64(osfs->os_files, expected_stripes);
1533                 if (osfs->os_ffree != LOV_U64_MAX)
1534                         lov_do_div64(osfs->os_ffree, expected_stripes);
1535
1536                 cfs_spin_lock(&obd->obd_osfs_lock);
1537                 memcpy(&obd->obd_osfs, osfs, sizeof(*osfs));
1538                 obd->obd_osfs_age = cfs_time_current_64();
1539                 cfs_spin_unlock(&obd->obd_osfs_lock);
1540                 RETURN(0);
1541         }
1542
1543         RETURN(-EIO);
1544 }
1545
1546 int lov_fini_statfs_set(struct lov_request_set *set)
1547 {
1548         int rc = 0;
1549         ENTRY;
1550
1551         if (set == NULL)
1552                 RETURN(0);
1553
1554         if (cfs_atomic_read(&set->set_completes)) {
1555                 rc = lov_fini_statfs(set->set_obd, set->set_oi->oi_osfs,
1556                                      cfs_atomic_read(&set->set_success));
1557         }
1558         lov_put_reqset(set);
1559         RETURN(rc);
1560 }
1561
1562 void lov_update_statfs(struct obd_statfs *osfs, struct obd_statfs *lov_sfs,
1563                        int success)
1564 {
1565         int shift = 0, quit = 0;
1566         __u64 tmp;
1567
1568         if (success == 0) {
1569                 memcpy(osfs, lov_sfs, sizeof(*lov_sfs));
1570         } else {
1571                 if (osfs->os_bsize != lov_sfs->os_bsize) {
1572                         /* assume all block sizes are always powers of 2 */
1573                         /* get the bits difference */
1574                         tmp = osfs->os_bsize | lov_sfs->os_bsize;
1575                         for (shift = 0; shift <= 64; ++shift) {
1576                                 if (tmp & 1) {
1577                                         if (quit)
1578                                                 break;
1579                                         else
1580                                                 quit = 1;
1581                                         shift = 0;
1582                                 }
1583                                 tmp >>= 1;
1584                         }
1585                 }
1586
1587                 if (osfs->os_bsize < lov_sfs->os_bsize) {
1588                         osfs->os_bsize = lov_sfs->os_bsize;
1589
1590                         osfs->os_bfree  >>= shift;
1591                         osfs->os_bavail >>= shift;
1592                         osfs->os_blocks >>= shift;
1593                 } else if (shift != 0) {
1594                         lov_sfs->os_bfree  >>= shift;
1595                         lov_sfs->os_bavail >>= shift;
1596                         lov_sfs->os_blocks >>= shift;
1597                 }
1598 #ifdef MIN_DF
1599                 /* Sandia requested that df (and so, statfs) only
1600                    returned minimal available space on
1601                    a single OST, so people would be able to
1602                    write this much data guaranteed. */
1603                 if (osfs->os_bavail > lov_sfs->os_bavail) {
1604                         /* Presumably if new bavail is smaller,
1605                            new bfree is bigger as well */
1606                         osfs->os_bfree = lov_sfs->os_bfree;
1607                         osfs->os_bavail = lov_sfs->os_bavail;
1608                 }
1609 #else
1610                 osfs->os_bfree += lov_sfs->os_bfree;
1611                 osfs->os_bavail += lov_sfs->os_bavail;
1612 #endif
1613                 osfs->os_blocks += lov_sfs->os_blocks;
1614                 /* XXX not sure about this one - depends on policy.
1615                  *   - could be minimum if we always stripe on all OBDs
1616                  *     (but that would be wrong for any other policy,
1617                  *     if one of the OBDs has no more objects left)
1618                  *   - could be sum if we stripe whole objects
1619                  *   - could be average, just to give a nice number
1620                  *
1621                  * To give a "reasonable" (if not wholly accurate)
1622                  * number, we divide the total number of free objects
1623                  * by expected stripe count (watch out for overflow).
1624                  */
1625                 LOV_SUM_MAX(osfs->os_files, lov_sfs->os_files);
1626                 LOV_SUM_MAX(osfs->os_ffree, lov_sfs->os_ffree);
1627         }
1628 }
1629
1630 /* The callback for osc_statfs_async that finilizes a request info when a
1631  * response is received. */
1632 static int cb_statfs_update(void *cookie, int rc)
1633 {
1634         struct obd_info *oinfo = cookie;
1635         struct lov_request *lovreq;
1636         struct lov_request_set *set;
1637         struct obd_statfs *osfs, *lov_sfs;
1638         struct lov_obd *lov;
1639         struct lov_tgt_desc *tgt;
1640         struct obd_device *lovobd, *tgtobd;
1641         int success;
1642         ENTRY;
1643
1644         lovreq = container_of(oinfo, struct lov_request, rq_oi);
1645         set = lovreq->rq_rqset;
1646         lovobd = set->set_obd;
1647         lov = &lovobd->u.lov;
1648         osfs = set->set_oi->oi_osfs;
1649         lov_sfs = oinfo->oi_osfs;
1650         success = cfs_atomic_read(&set->set_success);
1651         /* XXX: the same is done in lov_update_common_set, however
1652            lovset->set_exp is not initialized. */
1653         lov_update_set(set, lovreq, rc);
1654         if (rc)
1655                 GOTO(out, rc);
1656
1657         obd_getref(lovobd);
1658         tgt = lov->lov_tgts[lovreq->rq_idx];
1659         if (!tgt || !tgt->ltd_active)
1660                 GOTO(out_update, rc);
1661
1662         tgtobd = class_exp2obd(tgt->ltd_exp);
1663         cfs_spin_lock(&tgtobd->obd_osfs_lock);
1664         memcpy(&tgtobd->obd_osfs, lov_sfs, sizeof(*lov_sfs));
1665         if ((oinfo->oi_flags & OBD_STATFS_FROM_CACHE) == 0)
1666                 tgtobd->obd_osfs_age = cfs_time_current_64();
1667         cfs_spin_unlock(&tgtobd->obd_osfs_lock);
1668
1669 out_update:
1670         lov_update_statfs(osfs, lov_sfs, success);
1671         qos_update(lov);
1672         obd_putref(lovobd);
1673
1674 out:
1675         if (set->set_oi->oi_flags & OBD_STATFS_PTLRPCD &&
1676             lov_finished_set(set)) {
1677                 lov_statfs_interpret(NULL, set, set->set_count !=
1678                                      cfs_atomic_read(&set->set_success));
1679                 if (lov->lov_qos.lq_statfs_in_progress)
1680                         qos_statfs_done(lov);
1681         }
1682
1683         RETURN(0);
1684 }
1685
1686 int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo,
1687                         struct lov_request_set **reqset)
1688 {
1689         struct lov_request_set *set;
1690         struct lov_obd *lov = &obd->u.lov;
1691         int rc = 0, i;
1692         ENTRY;
1693
1694         OBD_ALLOC(set, sizeof(*set));
1695         if (set == NULL)
1696                 RETURN(-ENOMEM);
1697         lov_init_set(set);
1698
1699         set->set_obd = obd;
1700         set->set_oi = oinfo;
1701
1702         /* We only get block data from the OBD */
1703         for (i = 0; i < lov->desc.ld_tgt_count; i++) {
1704                 struct lov_request *req;
1705
1706                 if (!lov->lov_tgts[i] || (!lov->lov_tgts[i]->ltd_active
1707                                           && (oinfo->oi_flags & OBD_STATFS_NODELAY))) {
1708                         CDEBUG(D_HA, "lov idx %d inactive\n", i);
1709                         continue;
1710                 }
1711
1712                 /* skip targets that have been explicitely disabled by the
1713                  * administrator */
1714                 if (!lov->lov_tgts[i]->ltd_exp) {
1715                         CDEBUG(D_HA, "lov idx %d administratively disabled\n", i);
1716                         continue;
1717                 }
1718
1719                 OBD_ALLOC(req, sizeof(*req));
1720                 if (req == NULL)
1721                         GOTO(out_set, rc = -ENOMEM);
1722
1723                 OBD_ALLOC(req->rq_oi.oi_osfs, sizeof(*req->rq_oi.oi_osfs));
1724                 if (req->rq_oi.oi_osfs == NULL) {
1725                         OBD_FREE(req, sizeof(*req));
1726                         GOTO(out_set, rc = -ENOMEM);
1727                 }
1728
1729                 req->rq_idx = i;
1730                 req->rq_oi.oi_cb_up = cb_statfs_update;
1731                 req->rq_oi.oi_flags = oinfo->oi_flags;
1732
1733                 lov_set_add_req(req, set);
1734         }
1735         if (!set->set_count)
1736                 GOTO(out_set, rc = -EIO);
1737         *reqset = set;
1738         RETURN(rc);
1739 out_set:
1740         lov_fini_statfs_set(set);
1741         RETURN(rc);
1742 }