4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2014, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
37 #define DEBUG_SUBSYSTEM S_LOV
39 #include <libcfs/libcfs.h>
41 #include <obd_class.h>
42 #include <lustre/lustre_idl.h>
44 #include "lov_internal.h"
46 static void lov_init_set(struct lov_request_set *set)
49 atomic_set(&set->set_completes, 0);
50 atomic_set(&set->set_success, 0);
51 atomic_set(&set->set_finish_checked, 0);
52 INIT_LIST_HEAD(&set->set_list);
53 atomic_set(&set->set_refcount, 1);
54 init_waitqueue_head(&set->set_waitq);
57 void lov_finish_set(struct lov_request_set *set)
59 struct list_head *pos, *n;
60 struct lov_request *req;
64 list_for_each_safe(pos, n, &set->set_list) {
65 req = list_entry(pos, struct lov_request, rq_link);
66 list_del_init(&req->rq_link);
68 if (req->rq_oi.oi_oa != NULL)
69 OBDO_FREE(req->rq_oi.oi_oa);
71 if (req->rq_oi.oi_osfs != NULL)
72 OBD_FREE_PTR(req->rq_oi.oi_osfs);
81 int lov_set_finished(struct lov_request_set *set, int idempotent)
83 int completes = atomic_read(&set->set_completes);
85 CDEBUG(D_INFO, "check set %d/%d\n", completes, set->set_count);
87 if (completes == set->set_count) {
90 if (atomic_inc_return(&set->set_finish_checked) == 1)
96 void lov_update_set(struct lov_request_set *set,
97 struct lov_request *req, int rc)
102 atomic_inc(&set->set_completes);
104 atomic_inc(&set->set_success);
106 wake_up(&set->set_waitq);
109 void lov_set_add_req(struct lov_request *req, struct lov_request_set *set)
111 list_add_tail(&req->rq_link, &set->set_list);
116 static int lov_check_set(struct lov_obd *lov, int idx)
119 mutex_lock(&lov->lov_lock);
121 if (lov->lov_tgts[idx] == NULL ||
122 lov->lov_tgts[idx]->ltd_active ||
123 (lov->lov_tgts[idx]->ltd_exp != NULL &&
124 class_exp2cliimp(lov->lov_tgts[idx]->ltd_exp)->imp_connect_tried))
127 mutex_unlock(&lov->lov_lock);
131 /* Check if the OSC connection exists and is active.
132 * If the OSC has not yet had a chance to connect to the OST the first time,
133 * wait once for it to connect instead of returning an error.
135 int lov_check_and_wait_active(struct lov_obd *lov, int ost_idx)
137 wait_queue_head_t waitq;
138 struct l_wait_info lwi;
139 struct lov_tgt_desc *tgt;
142 mutex_lock(&lov->lov_lock);
144 tgt = lov->lov_tgts[ost_idx];
146 if (unlikely(tgt == NULL))
149 if (likely(tgt->ltd_active))
152 if (tgt->ltd_exp && class_exp2cliimp(tgt->ltd_exp)->imp_connect_tried)
155 mutex_unlock(&lov->lov_lock);
157 init_waitqueue_head(&waitq);
158 lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(obd_timeout),
159 cfs_time_seconds(1), NULL, NULL);
161 rc = l_wait_event(waitq, lov_check_set(lov, ost_idx), &lwi);
168 mutex_unlock(&lov->lov_lock);
172 #define LOV_U64_MAX ((__u64)~0ULL)
173 #define LOV_SUM_MAX(tot, add) \
175 if ((tot) + (add) < (tot)) \
176 (tot) = LOV_U64_MAX; \
181 int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs,int success)
186 __u32 expected_stripes = lov_get_stripecnt(&obd->u.lov,
188 if (osfs->os_files != LOV_U64_MAX)
189 lov_do_div64(osfs->os_files, expected_stripes);
190 if (osfs->os_ffree != LOV_U64_MAX)
191 lov_do_div64(osfs->os_ffree, expected_stripes);
193 spin_lock(&obd->obd_osfs_lock);
194 memcpy(&obd->obd_osfs, osfs, sizeof(*osfs));
195 obd->obd_osfs_age = cfs_time_current_64();
196 spin_unlock(&obd->obd_osfs_lock);
203 int lov_fini_statfs_set(struct lov_request_set *set)
211 if (atomic_read(&set->set_completes)) {
212 rc = lov_fini_statfs(set->set_obd, set->set_oi->oi_osfs,
213 atomic_read(&set->set_success));
219 void lov_update_statfs(struct obd_statfs *osfs, struct obd_statfs *lov_sfs,
222 int shift = 0, quit = 0;
226 memcpy(osfs, lov_sfs, sizeof(*lov_sfs));
228 if (osfs->os_bsize != lov_sfs->os_bsize) {
229 /* assume all block sizes are always powers of 2 */
230 /* get the bits difference */
231 tmp = osfs->os_bsize | lov_sfs->os_bsize;
232 for (shift = 0; shift <= 64; ++shift) {
244 if (osfs->os_bsize < lov_sfs->os_bsize) {
245 osfs->os_bsize = lov_sfs->os_bsize;
247 osfs->os_bfree >>= shift;
248 osfs->os_bavail >>= shift;
249 osfs->os_blocks >>= shift;
250 } else if (shift != 0) {
251 lov_sfs->os_bfree >>= shift;
252 lov_sfs->os_bavail >>= shift;
253 lov_sfs->os_blocks >>= shift;
256 /* Sandia requested that df (and so, statfs) only
257 returned minimal available space on
258 a single OST, so people would be able to
259 write this much data guaranteed. */
260 if (osfs->os_bavail > lov_sfs->os_bavail) {
261 /* Presumably if new bavail is smaller,
262 new bfree is bigger as well */
263 osfs->os_bfree = lov_sfs->os_bfree;
264 osfs->os_bavail = lov_sfs->os_bavail;
267 osfs->os_bfree += lov_sfs->os_bfree;
268 osfs->os_bavail += lov_sfs->os_bavail;
270 osfs->os_blocks += lov_sfs->os_blocks;
271 /* XXX not sure about this one - depends on policy.
272 * - could be minimum if we always stripe on all OBDs
273 * (but that would be wrong for any other policy,
274 * if one of the OBDs has no more objects left)
275 * - could be sum if we stripe whole objects
276 * - could be average, just to give a nice number
278 * To give a "reasonable" (if not wholly accurate)
279 * number, we divide the total number of free objects
280 * by expected stripe count (watch out for overflow).
282 LOV_SUM_MAX(osfs->os_files, lov_sfs->os_files);
283 LOV_SUM_MAX(osfs->os_ffree, lov_sfs->os_ffree);
287 /* The callback for osc_statfs_async that finilizes a request info when a
288 * response is received. */
289 static int cb_statfs_update(void *cookie, int rc)
291 struct obd_info *oinfo = cookie;
292 struct lov_request *lovreq;
293 struct lov_request_set *set;
294 struct obd_statfs *osfs, *lov_sfs;
296 struct lov_tgt_desc *tgt;
297 struct obd_device *lovobd, *tgtobd;
301 lovreq = container_of(oinfo, struct lov_request, rq_oi);
302 set = lovreq->rq_rqset;
303 lovobd = set->set_obd;
304 lov = &lovobd->u.lov;
305 osfs = set->set_oi->oi_osfs;
306 lov_sfs = oinfo->oi_osfs;
307 success = atomic_read(&set->set_success);
308 /* XXX: the same is done in lov_update_common_set, however
309 lovset->set_exp is not initialized. */
310 lov_update_set(set, lovreq, rc);
315 tgt = lov->lov_tgts[lovreq->rq_idx];
316 if (!tgt || !tgt->ltd_active)
317 GOTO(out_update, rc);
319 tgtobd = class_exp2obd(tgt->ltd_exp);
320 spin_lock(&tgtobd->obd_osfs_lock);
321 memcpy(&tgtobd->obd_osfs, lov_sfs, sizeof(*lov_sfs));
322 if ((oinfo->oi_flags & OBD_STATFS_FROM_CACHE) == 0)
323 tgtobd->obd_osfs_age = cfs_time_current_64();
324 spin_unlock(&tgtobd->obd_osfs_lock);
327 lov_update_statfs(osfs, lov_sfs, success);
331 if (set->set_oi->oi_flags & OBD_STATFS_PTLRPCD &&
332 lov_set_finished(set, 0)) {
333 lov_statfs_interpret(NULL, set, set->set_count !=
334 atomic_read(&set->set_success));
340 int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo,
341 struct lov_request_set **reqset)
343 struct lov_request_set *set;
344 struct lov_obd *lov = &obd->u.lov;
348 OBD_ALLOC(set, sizeof(*set));
356 /* We only get block data from the OBD */
357 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
358 struct lov_request *req;
360 if (lov->lov_tgts[i] == NULL ||
361 (oinfo->oi_flags & OBD_STATFS_NODELAY &&
362 !lov->lov_tgts[i]->ltd_active)) {
363 CDEBUG(D_HA, "lov idx %d inactive\n", i);
367 if (!lov->lov_tgts[i]->ltd_active)
368 lov_check_and_wait_active(lov, i);
370 /* skip targets that have been explicitely disabled by the
372 if (!lov->lov_tgts[i]->ltd_exp) {
373 CDEBUG(D_HA, "lov idx %d administratively disabled\n", i);
377 OBD_ALLOC(req, sizeof(*req));
379 GOTO(out_set, rc = -ENOMEM);
381 OBD_ALLOC(req->rq_oi.oi_osfs, sizeof(*req->rq_oi.oi_osfs));
382 if (req->rq_oi.oi_osfs == NULL) {
383 OBD_FREE(req, sizeof(*req));
384 GOTO(out_set, rc = -ENOMEM);
388 req->rq_oi.oi_cb_up = cb_statfs_update;
389 req->rq_oi.oi_flags = oinfo->oi_flags;
391 lov_set_add_req(req, set);
394 GOTO(out_set, rc = -EIO);
398 lov_fini_statfs_set(set);