4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2013, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
37 #define DEBUG_SUBSYSTEM S_LOV
39 #include <libcfs/libcfs.h>
41 #include <obd_class.h>
42 #include <lustre/lustre_idl.h>
44 #include "lov_internal.h"
46 static void lov_init_set(struct lov_request_set *set)
49 atomic_set(&set->set_completes, 0);
50 atomic_set(&set->set_success, 0);
51 atomic_set(&set->set_finish_checked, 0);
53 INIT_LIST_HEAD(&set->set_list);
54 atomic_set(&set->set_refcount, 1);
55 init_waitqueue_head(&set->set_waitq);
58 void lov_finish_set(struct lov_request_set *set)
60 struct list_head *pos, *n;
61 struct lov_request *req;
65 list_for_each_safe(pos, n, &set->set_list) {
66 req = list_entry(pos, struct lov_request, rq_link);
67 list_del_init(&req->rq_link);
69 if (req->rq_oi.oi_oa != NULL)
70 OBDO_FREE(req->rq_oi.oi_oa);
72 if (req->rq_oi.oi_osfs != NULL)
73 OBD_FREE_PTR(req->rq_oi.oi_osfs);
82 int lov_set_finished(struct lov_request_set *set, int idempotent)
84 int completes = atomic_read(&set->set_completes);
86 CDEBUG(D_INFO, "check set %d/%d\n", completes, set->set_count);
88 if (completes == set->set_count) {
91 if (atomic_inc_return(&set->set_finish_checked) == 1)
97 void lov_update_set(struct lov_request_set *set,
98 struct lov_request *req, int rc)
100 req->rq_complete = 1;
103 atomic_inc(&set->set_completes);
105 atomic_inc(&set->set_success);
107 wake_up(&set->set_waitq);
110 int lov_update_common_set(struct lov_request_set *set,
111 struct lov_request *req, int rc)
113 struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
116 lov_update_set(set, req, rc);
118 /* grace error on inactive ost */
119 if (rc && !(lov->lov_tgts[req->rq_idx] &&
120 lov->lov_tgts[req->rq_idx]->ltd_active))
123 /* FIXME in raid1 regime, should return 0 */
127 void lov_set_add_req(struct lov_request *req, struct lov_request_set *set)
129 list_add_tail(&req->rq_link, &set->set_list);
134 static int lov_check_set(struct lov_obd *lov, int idx)
137 mutex_lock(&lov->lov_lock);
139 if (lov->lov_tgts[idx] == NULL ||
140 lov->lov_tgts[idx]->ltd_active ||
141 (lov->lov_tgts[idx]->ltd_exp != NULL &&
142 class_exp2cliimp(lov->lov_tgts[idx]->ltd_exp)->imp_connect_tried))
145 mutex_unlock(&lov->lov_lock);
149 /* Check if the OSC connection exists and is active.
150 * If the OSC has not yet had a chance to connect to the OST the first time,
151 * wait once for it to connect instead of returning an error.
153 int lov_check_and_wait_active(struct lov_obd *lov, int ost_idx)
155 wait_queue_head_t waitq;
156 struct l_wait_info lwi;
157 struct lov_tgt_desc *tgt;
160 mutex_lock(&lov->lov_lock);
162 tgt = lov->lov_tgts[ost_idx];
164 if (unlikely(tgt == NULL))
167 if (likely(tgt->ltd_active))
170 if (tgt->ltd_exp && class_exp2cliimp(tgt->ltd_exp)->imp_connect_tried)
173 mutex_unlock(&lov->lov_lock);
175 init_waitqueue_head(&waitq);
176 lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(obd_timeout),
177 cfs_time_seconds(1), NULL, NULL);
179 rc = l_wait_event(waitq, lov_check_set(lov, ost_idx), &lwi);
186 mutex_unlock(&lov->lov_lock);
190 static int common_attr_done(struct lov_request_set *set)
192 struct list_head *pos;
193 struct lov_request *req;
195 int rc = 0, attrset = 0;
198 LASSERT(set->set_oi != NULL);
200 if (set->set_oi->oi_oa == NULL)
203 if (!atomic_read(&set->set_success))
208 GOTO(out, rc = -ENOMEM);
210 list_for_each(pos, &set->set_list) {
211 req = list_entry(pos, struct lov_request, rq_link);
213 if (!req->rq_complete || req->rq_rc)
215 if (req->rq_oi.oi_oa->o_valid == 0) /* inactive stripe */
217 lov_merge_attrs(tmp_oa, req->rq_oi.oi_oa,
218 req->rq_oi.oi_oa->o_valid,
219 set->set_oi->oi_md, req->rq_stripe, &attrset);
222 CERROR("No stripes had valid attrs\n");
225 if ((set->set_oi->oi_oa->o_valid & OBD_MD_FLEPOCH) &&
226 (set->set_oi->oi_md->lsm_stripe_count != attrset)) {
227 /* When we take attributes of some epoch, we require all the
228 * ost to be active. */
229 CERROR("Not all the stripes had valid attrs\n");
230 GOTO(out, rc = -EIO);
233 tmp_oa->o_oi = set->set_oi->oi_oa->o_oi;
234 memcpy(set->set_oi->oi_oa, tmp_oa, sizeof(*set->set_oi->oi_oa));
242 int lov_fini_getattr_set(struct lov_request_set *set)
249 LASSERT(set->set_exp);
250 if (atomic_read(&set->set_completes))
251 rc = common_attr_done(set);
258 /* The callback for osc_getattr_async that finilizes a request info when a
259 * response is received. */
260 static int cb_getattr_update(void *cookie, int rc)
262 struct obd_info *oinfo = cookie;
263 struct lov_request *lovreq;
264 lovreq = container_of(oinfo, struct lov_request, rq_oi);
265 return lov_update_common_set(lovreq->rq_rqset, lovreq, rc);
268 int lov_prep_getattr_set(struct obd_export *exp, struct obd_info *oinfo,
269 struct lov_request_set **reqset)
271 struct lov_request_set *set;
272 struct lov_obd *lov = &exp->exp_obd->u.lov;
276 OBD_ALLOC(set, sizeof(*set));
284 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
285 struct lov_oinfo *loi;
286 struct lov_request *req;
288 loi = oinfo->oi_md->lsm_oinfo[i];
289 if (lov_oinfo_is_dummy(loi))
292 if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
293 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
294 if (oinfo->oi_oa->o_valid & OBD_MD_FLEPOCH)
295 /* SOM requires all the OSTs to be active. */
296 GOTO(out_set, rc = -EIO);
300 OBD_ALLOC(req, sizeof(*req));
302 GOTO(out_set, rc = -ENOMEM);
305 req->rq_idx = loi->loi_ost_idx;
307 OBDO_ALLOC(req->rq_oi.oi_oa);
308 if (req->rq_oi.oi_oa == NULL) {
309 OBD_FREE(req, sizeof(*req));
310 GOTO(out_set, rc = -ENOMEM);
312 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
313 sizeof(*req->rq_oi.oi_oa));
314 req->rq_oi.oi_oa->o_oi = loi->loi_oi;
315 req->rq_oi.oi_cb_up = cb_getattr_update;
316 req->rq_oi.oi_capa = oinfo->oi_capa;
318 lov_set_add_req(req, set);
321 GOTO(out_set, rc = -EIO);
325 lov_fini_getattr_set(set);
329 int lov_fini_setattr_set(struct lov_request_set *set)
336 LASSERT(set->set_exp);
337 if (atomic_read(&set->set_completes)) {
338 rc = common_attr_done(set);
339 /* FIXME update qos data here */
346 int lov_update_setattr_set(struct lov_request_set *set,
347 struct lov_request *req, int rc)
349 struct lov_obd *lov = &req->rq_rqset->set_exp->exp_obd->u.lov;
350 struct lov_stripe_md *lsm = req->rq_rqset->set_oi->oi_md;
353 lov_update_set(set, req, rc);
355 /* grace error on inactive ost */
356 if (rc && !(lov->lov_tgts[req->rq_idx] &&
357 lov->lov_tgts[req->rq_idx]->ltd_active))
361 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLCTIME)
362 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_ctime =
363 req->rq_oi.oi_oa->o_ctime;
364 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLMTIME)
365 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_mtime =
366 req->rq_oi.oi_oa->o_mtime;
367 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLATIME)
368 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_atime =
369 req->rq_oi.oi_oa->o_atime;
375 /* The callback for osc_setattr_async that finilizes a request info when a
376 * response is received. */
377 static int cb_setattr_update(void *cookie, int rc)
379 struct obd_info *oinfo = cookie;
380 struct lov_request *lovreq;
381 lovreq = container_of(oinfo, struct lov_request, rq_oi);
382 return lov_update_setattr_set(lovreq->rq_rqset, lovreq, rc);
385 int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo,
386 struct obd_trans_info *oti,
387 struct lov_request_set **reqset)
389 struct lov_request_set *set;
390 struct lov_obd *lov = &exp->exp_obd->u.lov;
394 OBD_ALLOC(set, sizeof(*set));
401 if (oti != NULL && oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE)
402 set->set_cookies = oti->oti_logcookies;
404 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
405 struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i];
406 struct lov_request *req;
408 if (lov_oinfo_is_dummy(loi))
411 if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
412 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
416 OBD_ALLOC(req, sizeof(*req));
418 GOTO(out_set, rc = -ENOMEM);
420 req->rq_idx = loi->loi_ost_idx;
422 OBDO_ALLOC(req->rq_oi.oi_oa);
423 if (req->rq_oi.oi_oa == NULL) {
424 OBD_FREE(req, sizeof(*req));
425 GOTO(out_set, rc = -ENOMEM);
427 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
428 sizeof(*req->rq_oi.oi_oa));
429 req->rq_oi.oi_oa->o_oi = loi->loi_oi;
430 req->rq_oi.oi_oa->o_stripe_idx = i;
431 req->rq_oi.oi_cb_up = cb_setattr_update;
432 req->rq_oi.oi_capa = oinfo->oi_capa;
434 if (oinfo->oi_oa->o_valid & OBD_MD_FLSIZE) {
435 int off = lov_stripe_offset(oinfo->oi_md,
436 oinfo->oi_oa->o_size, i,
437 &req->rq_oi.oi_oa->o_size);
439 if (off < 0 && req->rq_oi.oi_oa->o_size)
440 req->rq_oi.oi_oa->o_size--;
442 CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
443 i, req->rq_oi.oi_oa->o_size,
444 oinfo->oi_oa->o_size);
446 lov_set_add_req(req, set);
449 GOTO(out_set, rc = -EIO);
453 lov_fini_setattr_set(set);
457 #define LOV_U64_MAX ((__u64)~0ULL)
458 #define LOV_SUM_MAX(tot, add) \
460 if ((tot) + (add) < (tot)) \
461 (tot) = LOV_U64_MAX; \
466 int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs,int success)
471 __u32 expected_stripes = lov_get_stripecnt(&obd->u.lov,
473 if (osfs->os_files != LOV_U64_MAX)
474 lov_do_div64(osfs->os_files, expected_stripes);
475 if (osfs->os_ffree != LOV_U64_MAX)
476 lov_do_div64(osfs->os_ffree, expected_stripes);
478 spin_lock(&obd->obd_osfs_lock);
479 memcpy(&obd->obd_osfs, osfs, sizeof(*osfs));
480 obd->obd_osfs_age = cfs_time_current_64();
481 spin_unlock(&obd->obd_osfs_lock);
488 int lov_fini_statfs_set(struct lov_request_set *set)
496 if (atomic_read(&set->set_completes)) {
497 rc = lov_fini_statfs(set->set_obd, set->set_oi->oi_osfs,
498 atomic_read(&set->set_success));
504 void lov_update_statfs(struct obd_statfs *osfs, struct obd_statfs *lov_sfs,
507 int shift = 0, quit = 0;
511 memcpy(osfs, lov_sfs, sizeof(*lov_sfs));
513 if (osfs->os_bsize != lov_sfs->os_bsize) {
514 /* assume all block sizes are always powers of 2 */
515 /* get the bits difference */
516 tmp = osfs->os_bsize | lov_sfs->os_bsize;
517 for (shift = 0; shift <= 64; ++shift) {
529 if (osfs->os_bsize < lov_sfs->os_bsize) {
530 osfs->os_bsize = lov_sfs->os_bsize;
532 osfs->os_bfree >>= shift;
533 osfs->os_bavail >>= shift;
534 osfs->os_blocks >>= shift;
535 } else if (shift != 0) {
536 lov_sfs->os_bfree >>= shift;
537 lov_sfs->os_bavail >>= shift;
538 lov_sfs->os_blocks >>= shift;
541 /* Sandia requested that df (and so, statfs) only
542 returned minimal available space on
543 a single OST, so people would be able to
544 write this much data guaranteed. */
545 if (osfs->os_bavail > lov_sfs->os_bavail) {
546 /* Presumably if new bavail is smaller,
547 new bfree is bigger as well */
548 osfs->os_bfree = lov_sfs->os_bfree;
549 osfs->os_bavail = lov_sfs->os_bavail;
552 osfs->os_bfree += lov_sfs->os_bfree;
553 osfs->os_bavail += lov_sfs->os_bavail;
555 osfs->os_blocks += lov_sfs->os_blocks;
556 /* XXX not sure about this one - depends on policy.
557 * - could be minimum if we always stripe on all OBDs
558 * (but that would be wrong for any other policy,
559 * if one of the OBDs has no more objects left)
560 * - could be sum if we stripe whole objects
561 * - could be average, just to give a nice number
563 * To give a "reasonable" (if not wholly accurate)
564 * number, we divide the total number of free objects
565 * by expected stripe count (watch out for overflow).
567 LOV_SUM_MAX(osfs->os_files, lov_sfs->os_files);
568 LOV_SUM_MAX(osfs->os_ffree, lov_sfs->os_ffree);
572 /* The callback for osc_statfs_async that finilizes a request info when a
573 * response is received. */
574 static int cb_statfs_update(void *cookie, int rc)
576 struct obd_info *oinfo = cookie;
577 struct lov_request *lovreq;
578 struct lov_request_set *set;
579 struct obd_statfs *osfs, *lov_sfs;
581 struct lov_tgt_desc *tgt;
582 struct obd_device *lovobd, *tgtobd;
586 lovreq = container_of(oinfo, struct lov_request, rq_oi);
587 set = lovreq->rq_rqset;
588 lovobd = set->set_obd;
589 lov = &lovobd->u.lov;
590 osfs = set->set_oi->oi_osfs;
591 lov_sfs = oinfo->oi_osfs;
592 success = atomic_read(&set->set_success);
593 /* XXX: the same is done in lov_update_common_set, however
594 lovset->set_exp is not initialized. */
595 lov_update_set(set, lovreq, rc);
600 tgt = lov->lov_tgts[lovreq->rq_idx];
601 if (!tgt || !tgt->ltd_active)
602 GOTO(out_update, rc);
604 tgtobd = class_exp2obd(tgt->ltd_exp);
605 spin_lock(&tgtobd->obd_osfs_lock);
606 memcpy(&tgtobd->obd_osfs, lov_sfs, sizeof(*lov_sfs));
607 if ((oinfo->oi_flags & OBD_STATFS_FROM_CACHE) == 0)
608 tgtobd->obd_osfs_age = cfs_time_current_64();
609 spin_unlock(&tgtobd->obd_osfs_lock);
612 lov_update_statfs(osfs, lov_sfs, success);
616 if (set->set_oi->oi_flags & OBD_STATFS_PTLRPCD &&
617 lov_set_finished(set, 0)) {
618 lov_statfs_interpret(NULL, set, set->set_count !=
619 atomic_read(&set->set_success));
625 int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo,
626 struct lov_request_set **reqset)
628 struct lov_request_set *set;
629 struct lov_obd *lov = &obd->u.lov;
633 OBD_ALLOC(set, sizeof(*set));
641 /* We only get block data from the OBD */
642 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
643 struct lov_request *req;
645 if (lov->lov_tgts[i] == NULL ||
646 (oinfo->oi_flags & OBD_STATFS_NODELAY &&
647 !lov->lov_tgts[i]->ltd_active)) {
648 CDEBUG(D_HA, "lov idx %d inactive\n", i);
652 if (!lov->lov_tgts[i]->ltd_active)
653 lov_check_and_wait_active(lov, i);
655 /* skip targets that have been explicitely disabled by the
657 if (!lov->lov_tgts[i]->ltd_exp) {
658 CDEBUG(D_HA, "lov idx %d administratively disabled\n", i);
662 OBD_ALLOC(req, sizeof(*req));
664 GOTO(out_set, rc = -ENOMEM);
666 OBD_ALLOC(req->rq_oi.oi_osfs, sizeof(*req->rq_oi.oi_osfs));
667 if (req->rq_oi.oi_osfs == NULL) {
668 OBD_FREE(req, sizeof(*req));
669 GOTO(out_set, rc = -ENOMEM);
673 req->rq_oi.oi_cb_up = cb_statfs_update;
674 req->rq_oi.oi_flags = oinfo->oi_flags;
676 lov_set_add_req(req, set);
679 GOTO(out_set, rc = -EIO);
683 lov_fini_statfs_set(set);