4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2016, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
32 #define DEBUG_SUBSYSTEM S_LOV
34 #include <linux/delay.h>
35 #include <libcfs/libcfs.h>
37 #include <obd_class.h>
38 #include "lov_internal.h"
40 static void lov_init_set(struct lov_request_set *set)
43 atomic_set(&set->set_completes, 0);
44 atomic_set(&set->set_success, 0);
45 INIT_LIST_HEAD(&set->set_list);
48 static void lov_finish_set(struct lov_request_set *set)
50 struct lov_request *req;
54 while ((req = list_first_entry_or_null(&set->set_list,
57 list_del_init(&req->rq_link);
58 if (req->rq_oi.oi_osfs)
59 OBD_FREE_PTR(req->rq_oi.oi_osfs);
68 lov_update_set(struct lov_request_set *set, struct lov_request *req, int rc)
70 atomic_inc(&set->set_completes);
72 atomic_inc(&set->set_success);
76 lov_set_add_req(struct lov_request *req, struct lov_request_set *set)
78 list_add_tail(&req->rq_link, &set->set_list);
83 static int lov_check_set(struct lov_obd *lov, int idx)
87 mutex_lock(&lov->lov_lock);
89 if (!lov->lov_tgts[idx] || lov->lov_tgts[idx]->ltd_active ||
90 (lov->lov_tgts[idx]->ltd_exp &&
91 class_exp2cliimp(lov->lov_tgts[idx]->ltd_exp)->imp_connect_tried))
94 mutex_unlock(&lov->lov_lock);
99 * Check if the OSC connection exists and is active.
100 * If the OSC has not yet had a chance to connect to the OST the first time,
101 * wait once for it to connect instead of returning an error.
103 static int lov_check_and_wait_active(struct lov_obd *lov, int ost_idx)
105 struct lov_tgt_desc *tgt;
106 struct obd_import *imp = NULL;
110 mutex_lock(&lov->lov_lock);
112 tgt = lov->lov_tgts[ost_idx];
117 if (likely(tgt->ltd_active))
121 imp = class_exp2cliimp(tgt->ltd_exp);
122 if (imp && imp->imp_connect_tried)
124 if (imp && imp->imp_state == LUSTRE_IMP_IDLE)
127 mutex_unlock(&lov->lov_lock);
131 !lov_check_set(lov, ost_idx)) {
141 mutex_unlock(&lov->lov_lock);
145 #define LOV_U64_MAX ((__u64)~0ULL)
146 #define LOV_SUM_MAX(tot, add) \
148 if ((tot) + (add) < (tot)) \
149 (tot) = LOV_U64_MAX; \
155 lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs, int success)
160 __u32 expected_stripes = lov_get_stripe_count(&obd->u.lov,
162 if (osfs->os_files != LOV_U64_MAX)
163 lov_do_div64(osfs->os_files, expected_stripes);
164 if (osfs->os_ffree != LOV_U64_MAX)
165 lov_do_div64(osfs->os_ffree, expected_stripes);
167 spin_lock(&obd->obd_osfs_lock);
168 memcpy(&obd->obd_osfs, osfs, sizeof(*osfs));
169 obd->obd_osfs_age = ktime_get_seconds();
170 spin_unlock(&obd->obd_osfs_lock);
177 int lov_fini_statfs_set(struct lov_request_set *set)
185 if (atomic_read(&set->set_completes)) {
186 rc = lov_fini_statfs(set->set_obd, set->set_oi->oi_osfs,
187 atomic_read(&set->set_success));
196 lov_update_statfs(struct obd_statfs *osfs, struct obd_statfs *lov_sfs,
199 int shift = 0, quit = 0;
203 memcpy(osfs, lov_sfs, sizeof(*lov_sfs));
205 if (osfs->os_bsize != lov_sfs->os_bsize) {
206 /* assume all block sizes are always powers of 2 */
207 /* get the bits difference */
208 tmp = osfs->os_bsize | lov_sfs->os_bsize;
209 for (shift = 0; shift <= 64; ++shift) {
220 if (osfs->os_bsize < lov_sfs->os_bsize) {
221 osfs->os_bsize = lov_sfs->os_bsize;
223 osfs->os_bfree >>= shift;
224 osfs->os_bavail >>= shift;
225 osfs->os_blocks >>= shift;
226 } else if (shift != 0) {
227 lov_sfs->os_bfree >>= shift;
228 lov_sfs->os_bavail >>= shift;
229 lov_sfs->os_blocks >>= shift;
233 * Sandia requested that df (and so, statfs) only
234 * returned minimal available space on
235 * a single OST, so people would be able to
236 * write this much data guaranteed.
238 if (osfs->os_bavail > lov_sfs->os_bavail) {
240 * Presumably if new bavail is smaller,
241 * new bfree is bigger as well
243 osfs->os_bfree = lov_sfs->os_bfree;
244 osfs->os_bavail = lov_sfs->os_bavail;
247 osfs->os_bfree += lov_sfs->os_bfree;
248 osfs->os_bavail += lov_sfs->os_bavail;
250 osfs->os_blocks += lov_sfs->os_blocks;
252 * XXX not sure about this one - depends on policy.
253 * - could be minimum if we always stripe on all OBDs
254 * (but that would be wrong for any other policy,
255 * if one of the OBDs has no more objects left)
256 * - could be sum if we stripe whole objects
257 * - could be average, just to give a nice number
259 * To give a "reasonable" (if not wholly accurate)
260 * number, we divide the total number of free objects
261 * by expected stripe count (watch out for overflow).
263 LOV_SUM_MAX(osfs->os_files, lov_sfs->os_files);
264 LOV_SUM_MAX(osfs->os_ffree, lov_sfs->os_ffree);
269 * The callback for osc_statfs_async that finilizes a request info when a
270 * response is received.
272 static int cb_statfs_update(void *cookie, int rc)
274 struct obd_info *oinfo = cookie;
275 struct lov_request *lovreq;
276 struct lov_request_set *set;
277 struct obd_statfs *osfs, *lov_sfs;
279 struct lov_tgt_desc *tgt;
280 struct obd_device *lovobd, *tgtobd;
285 lovreq = container_of(oinfo, struct lov_request, rq_oi);
286 set = lovreq->rq_rqset;
287 lovobd = set->set_obd;
288 lov = &lovobd->u.lov;
289 osfs = set->set_oi->oi_osfs;
290 lov_sfs = oinfo->oi_osfs;
291 success = atomic_read(&set->set_success);
293 * XXX: the same is done in lov_update_common_set, however
294 * lovset->set_exp is not initialized.
296 lov_update_set(set, lovreq, rc);
300 lov_tgts_getref(lovobd);
301 tgt = lov->lov_tgts[lovreq->rq_idx];
302 if (!tgt || !tgt->ltd_active)
303 GOTO(out_update, rc);
305 tgtobd = class_exp2obd(tgt->ltd_exp);
306 spin_lock(&tgtobd->obd_osfs_lock);
307 memcpy(&tgtobd->obd_osfs, lov_sfs, sizeof(*lov_sfs));
308 if ((oinfo->oi_flags & OBD_STATFS_FROM_CACHE) == 0)
309 tgtobd->obd_osfs_age = ktime_get_seconds();
310 spin_unlock(&tgtobd->obd_osfs_lock);
313 lov_update_statfs(osfs, lov_sfs, success);
314 lov_tgts_putref(lovobd);
319 int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo,
320 struct lov_request_set **reqset)
322 struct lov_request_set *set;
323 struct lov_obd *lov = &obd->u.lov;
328 OBD_ALLOC(set, sizeof(*set));
336 /* We only get block data from the OBD */
337 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
338 struct lov_tgt_desc *ltd = lov->lov_tgts[i];
339 struct lov_request *req;
342 CDEBUG(D_HA, "lov idx %d inactive\n", i);
347 * skip targets that have been explicitely disabled by the
351 CDEBUG(D_HA, "lov idx %d administratively disabled\n",
356 if (oinfo->oi_flags & OBD_STATFS_NODELAY &&
357 class_exp2cliimp(ltd->ltd_exp)->imp_state !=
358 LUSTRE_IMP_IDLE && !ltd->ltd_active) {
359 CDEBUG(D_HA, "lov idx %d inactive\n", i);
363 if (!ltd->ltd_active)
364 lov_check_and_wait_active(lov, i);
366 OBD_ALLOC(req, sizeof(*req));
368 GOTO(out_set, rc = -ENOMEM);
370 OBD_ALLOC(req->rq_oi.oi_osfs, sizeof(*req->rq_oi.oi_osfs));
371 if (!req->rq_oi.oi_osfs) {
372 OBD_FREE(req, sizeof(*req));
373 GOTO(out_set, rc = -ENOMEM);
377 req->rq_oi.oi_cb_up = cb_statfs_update;
378 req->rq_oi.oi_flags = oinfo->oi_flags;
380 lov_set_add_req(req, set);
383 GOTO(out_set, rc = -EIO);
387 lov_fini_statfs_set(set);