X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Flov%2Flov_obd.c;h=1b31a17e2fd135b33c8a583db5575868bbd55c34;hb=53d5b14f0dd4fda046bb1d0ce578a1849cec59db;hp=919e698c76c72e9b6082a455dd1eb50ac167cfa2;hpb=4883eb606e69f4e44a9ecd0b729d255d2ede4b63;p=fs%2Flustre-release.git diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 919e698..1b31a17 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -28,6 +26,8 @@ /* * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, 2013, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -41,9 +41,6 @@ * Author: Nathan Rutman */ -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif #define DEBUG_SUBSYSTEM S_LOV #ifdef __KERNEL__ #include @@ -52,19 +49,19 @@ #endif #include +#include #include #include #include #include #include -#include #include -#include -#include #include #include #include +#include #include +#include #include "lov_internal.h" @@ -72,30 +69,31 @@ Any function that expects lov_tgts to remain stationary must take a ref. */ static void lov_getref(struct obd_device *obd) { - struct lov_obd *lov = &obd->u.lov; + struct lov_obd *lov = &obd->u.lov; - /* nobody gets through here until lov_putref is done */ - cfs_mutex_down(&lov->lov_lock); - cfs_atomic_inc(&lov->lov_refcount); - cfs_mutex_up(&lov->lov_lock); - return; + /* nobody gets through here until lov_putref is done */ + mutex_lock(&lov->lov_lock); + atomic_inc(&lov->lov_refcount); + mutex_unlock(&lov->lov_lock); + return; } static void __lov_del_obd(struct obd_device *obd, struct lov_tgt_desc *tgt); static void lov_putref(struct obd_device *obd) { - struct lov_obd *lov = &obd->u.lov; - - cfs_mutex_down(&lov->lov_lock); - /* ok to dec to 0 more than once -- ltd_exp's will be null */ - if (cfs_atomic_dec_and_test(&lov->lov_refcount) && lov->lov_death_row) { - CFS_LIST_HEAD(kill); - int i; - struct lov_tgt_desc *tgt, *n; - CDEBUG(D_CONFIG, "destroying %d lov targets\n", - lov->lov_death_row); - for (i = 0; i < lov->desc.ld_tgt_count; i++) { + struct lov_obd *lov = &obd->u.lov; + + mutex_lock(&lov->lov_lock); + /* ok to dec to 0 more than once -- ltd_exp's will be null */ + if (atomic_dec_and_test(&lov->lov_refcount) && lov->lov_death_row) { + struct list_head kill = LIST_HEAD_INIT(kill); + struct lov_tgt_desc *tgt, *n; + int i; + + CDEBUG(D_CONFIG, "destroying %d lov targets\n", + lov->lov_death_row); + for (i = 0; i < lov->desc.ld_tgt_count; i++) { tgt = lov->lov_tgts[i]; if (!tgt || !tgt->ltd_reap) @@ -108,7 +106,7 @@ static void lov_putref(struct obd_device *obd) lov->lov_tgts[i] = NULL; lov->lov_death_row--; } - cfs_mutex_up(&lov->lov_lock); + mutex_unlock(&lov->lov_lock); cfs_list_for_each_entry_safe(tgt, n, &kill, ltd_kill) { cfs_list_del(&tgt->ltd_kill); @@ -116,33 +114,28 @@ static void lov_putref(struct obd_device *obd) __lov_del_obd(obd, tgt); } } else { - cfs_mutex_up(&lov->lov_lock); + mutex_unlock(&lov->lov_lock); } } static int lov_set_osc_active(struct obd_device *obd, struct obd_uuid *uuid, - int activate); + enum obd_notify_event ev); static int lov_notify(struct obd_device *obd, struct obd_device *watched, enum obd_notify_event ev, void *data); - -#define MAX_STRING_SIZE 128 int lov_connect_obd(struct obd_device *obd, __u32 index, int activate, struct obd_connect_data *data) { - struct lov_obd *lov = &obd->u.lov; - struct obd_uuid *tgt_uuid; - struct obd_device *tgt_obd; - static struct obd_uuid lov_osc_uuid = { "LOV_OSC_UUID" }; - struct obd_import *imp; -#ifdef __KERNEL__ - cfs_proc_dir_entry_t *lov_proc_dir; -#endif - int rc; - ENTRY; + struct lov_obd *lov = &obd->u.lov; + struct obd_uuid *tgt_uuid; + struct obd_device *tgt_obd; + static struct obd_uuid lov_osc_uuid = { "LOV_OSC_UUID" }; + struct obd_import *imp; + int rc; + ENTRY; - if (!lov->lov_tgts[index]) - RETURN(-EINVAL); + if (lov->lov_tgts[index] == NULL) + RETURN(-EINVAL); tgt_uuid = &lov->lov_tgts[index]->ltd_uuid; tgt_obd = lov->lov_tgts[index]->ltd_obd; @@ -179,7 +172,7 @@ int lov_connect_obd(struct obd_device *obd, __u32 index, int activate, if (imp->imp_invalid) { - CERROR("not connecting OSC %s; administratively " + CDEBUG(D_CONFIG, "not connecting OSC %s; administratively " "disabled\n", obd_uuid2str(tgt_uuid)); RETURN(0); } @@ -197,35 +190,29 @@ int lov_connect_obd(struct obd_device *obd, __u32 index, int activate, CDEBUG(D_CONFIG, "Connected tgt idx %d %s (%s) %sactive\n", index, obd_uuid2str(tgt_uuid), tgt_obd->obd_name, activate ? "":"in"); -#ifdef __KERNEL__ - lov_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds"); - if (lov_proc_dir) { - struct obd_device *osc_obd = lov->lov_tgts[index]->ltd_exp->exp_obd; - cfs_proc_dir_entry_t *osc_symlink; - - LASSERT(osc_obd != NULL); - LASSERT(osc_obd->obd_magic == OBD_DEVICE_MAGIC); - LASSERT(osc_obd->obd_type->typ_name != NULL); - - osc_symlink = lprocfs_add_symlink(osc_obd->obd_name, - lov_proc_dir, - "../../../%s/%s", - osc_obd->obd_type->typ_name, - osc_obd->obd_name); - if (osc_symlink == NULL) { - CERROR("could not register LOV target " - "/proc/fs/lustre/%s/%s/target_obds/%s.", - obd->obd_type->typ_name, obd->obd_name, - osc_obd->obd_name); - lprocfs_remove(&lov_proc_dir); + if (obd->obd_type->typ_procsym != NULL) { + struct proc_dir_entry *osc_symlink; + struct obd_device *osc_obd; + + osc_obd = lov->lov_tgts[index]->ltd_exp->exp_obd; + + LASSERT(osc_obd != NULL); + LASSERT(osc_obd->obd_magic == OBD_DEVICE_MAGIC); + LASSERT(osc_obd->obd_type->typ_name != NULL); + + osc_symlink = lprocfs_add_symlink(osc_obd->obd_name, + obd->obd_type->typ_procsym, + "../../../%s/%s", + osc_obd->obd_type->typ_name, + osc_obd->obd_name); + if (osc_symlink == NULL) { + CERROR("could not register LOV target " + "/proc/fs/lustre/%s/%s/target_obds/%s.", + obd->obd_type->typ_name, obd->obd_name, + osc_obd->obd_name); + lprocfs_remove(&obd->obd_type->typ_procsym); } } -#endif - - rc = qos_add_tgt(obd, index); - if (rc) - CERROR("qos_add_tgt failed %d\n", rc); - RETURN(0); } @@ -256,6 +243,15 @@ static int lov_connect(const struct lu_env *env, if (data) lov->lov_ocd = *data; + obd->obd_type->typ_procsym = lprocfs_seq_register("target_obds", + obd->obd_proc_entry, + NULL, NULL); + if (IS_ERR(obd->obd_type->typ_procsym)) { + CERROR("%s: could not register /proc/fs/lustre/%s/%s/target_obds.", + obd->obd_name, obd->obd_type->typ_name, obd->obd_name); + obd->obd_type->typ_procsym = NULL; + } + obd_getref(obd); for (i = 0; i < lov->desc.ld_tgt_count; i++) { tgt = lov->lov_tgts[i]; @@ -286,7 +282,6 @@ static int lov_connect(const struct lu_env *env, static int lov_disconnect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt) { - cfs_proc_dir_entry_t *lov_proc_dir; struct lov_obd *lov = &obd->u.lov; struct obd_device *osc_obd; int rc; @@ -302,29 +297,19 @@ static int lov_disconnect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt) tgt->ltd_exp->exp_obd->obd_inactive = 1; } - lov_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds"); - if (lov_proc_dir) { - cfs_proc_dir_entry_t *osc_symlink; - - osc_symlink = lprocfs_srch(lov_proc_dir, osc_obd->obd_name); - if (osc_symlink) { - lprocfs_remove(&osc_symlink); - } else { - CERROR("/proc/fs/lustre/%s/%s/target_obds/%s missing.", - obd->obd_type->typ_name, obd->obd_name, - osc_obd->obd_name); - } - } + if (osc_obd) { + /* Pass it on to our clients. + * XXX This should be an argument to disconnect, + * XXX not a back-door flag on the OBD. Ah well. + */ + osc_obd->obd_force = obd->obd_force; + osc_obd->obd_fail = obd->obd_fail; + osc_obd->obd_no_recov = obd->obd_no_recov; - if (osc_obd) { - /* Pass it on to our clients. - * XXX This should be an argument to disconnect, - * XXX not a back-door flag on the OBD. Ah well. - */ - osc_obd->obd_force = obd->obd_force; - osc_obd->obd_fail = obd->obd_fail; - osc_obd->obd_no_recov = obd->obd_no_recov; - } + if (obd->obd_type->typ_procsym) + lprocfs_remove_proc_entry(osc_obd->obd_name, + obd->obd_type->typ_procsym); + } obd_register_observer(osc_obd, NULL); @@ -335,8 +320,6 @@ static int lov_disconnect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt) rc = 0; } - qos_del_tgt(obd, tgt); - tgt->ltd_exp = NULL; RETURN(0); } @@ -371,6 +354,9 @@ static int lov_disconnect(struct obd_export *exp) } obd_putref(obd); + if (obd->obd_type->typ_procsym) + lprocfs_remove(&obd->obd_type->typ_procsym); + out: rc = class_disconnect(exp); /* bz 9811 */ RETURN(rc); @@ -384,21 +370,34 @@ out: * any >= 0 : is log target index */ static int lov_set_osc_active(struct obd_device *obd, struct obd_uuid *uuid, - int activate) + enum obd_notify_event ev) { struct lov_obd *lov = &obd->u.lov; struct lov_tgt_desc *tgt; - int index; + int index, activate, active; ENTRY; - CDEBUG(D_INFO, "Searching in lov %p for uuid %s (activate=%d)\n", - lov, uuid->uuid, activate); - - obd_getref(obd); - for (index = 0; index < lov->desc.ld_tgt_count; index++) { - tgt = lov->lov_tgts[index]; - if (!tgt || !tgt->ltd_exp) - continue; + CDEBUG(D_INFO, "Searching in lov %p for uuid %s event(%d)\n", + lov, uuid->uuid, ev); + + obd_getref(obd); + for (index = 0; index < lov->desc.ld_tgt_count; index++) { + tgt = lov->lov_tgts[index]; + if (!tgt) + continue; + /* + * LU-642, initially inactive OSC could miss the obd_connect, + * we make up for it here. + */ + if (ev == OBD_NOTIFY_ACTIVATE && tgt->ltd_exp == NULL && + obd_uuid_equals(uuid, &tgt->ltd_uuid)) { + struct obd_uuid lov_osc_uuid = {"LOV_OSC_UUID"}; + + obd_connect(NULL, &tgt->ltd_exp, tgt->ltd_obd, + &lov_osc_uuid, &lov->lov_ocd, NULL); + } + if (!tgt->ltd_exp) + continue; CDEBUG(D_INFO, "lov idx %d is %s conn "LPX64"\n", index, obd_uuid2str(&tgt->ltd_uuid), @@ -410,26 +409,41 @@ static int lov_set_osc_active(struct obd_device *obd, struct obd_uuid *uuid, if (index == lov->desc.ld_tgt_count) GOTO(out, index = -EINVAL); - if (lov->lov_tgts[index]->ltd_active == activate) { - CDEBUG(D_INFO, "OSC %s already %sactive!\n", uuid->uuid, - activate ? "" : "in"); - GOTO(out, index); - } + if (ev == OBD_NOTIFY_DEACTIVATE || ev == OBD_NOTIFY_ACTIVATE) { + activate = (ev == OBD_NOTIFY_ACTIVATE) ? 1 : 0; - CDEBUG(D_CONFIG, "Marking OSC %s %sactive\n", obd_uuid2str(uuid), - activate ? "" : "in"); + if (lov->lov_tgts[index]->ltd_activate == activate) { + CDEBUG(D_INFO, "OSC %s already %sactivate!\n", + uuid->uuid, activate ? "" : "de"); + } else { + lov->lov_tgts[index]->ltd_activate = activate; + CDEBUG(D_CONFIG, "%sactivate OSC %s\n", + activate ? "" : "de", obd_uuid2str(uuid)); + } - lov->lov_tgts[index]->ltd_active = activate; + } else if (ev == OBD_NOTIFY_INACTIVE || ev == OBD_NOTIFY_ACTIVE) { + active = (ev == OBD_NOTIFY_ACTIVE) ? 1 : 0; - if (activate) { - lov->desc.ld_active_tgt_count++; - lov->lov_tgts[index]->ltd_exp->exp_obd->obd_inactive = 0; + if (lov->lov_tgts[index]->ltd_active == active) { + CDEBUG(D_INFO, "OSC %s already %sactive!\n", + uuid->uuid, active ? "" : "in"); + GOTO(out, index); + } else { + CDEBUG(D_CONFIG, "Marking OSC %s %sactive\n", + obd_uuid2str(uuid), active ? "" : "in"); + } + + lov->lov_tgts[index]->ltd_active = active; + if (active) { + lov->desc.ld_active_tgt_count++; + lov->lov_tgts[index]->ltd_exp->exp_obd->obd_inactive = 0; + } else { + lov->desc.ld_active_tgt_count--; + lov->lov_tgts[index]->ltd_exp->exp_obd->obd_inactive = 1; + } } else { - lov->desc.ld_active_tgt_count--; - lov->lov_tgts[index]->ltd_exp->exp_obd->obd_inactive = 1; + CERROR("Unknown event(%d) for uuid %s", ev, uuid->uuid); } - /* remove any old qos penalty */ - lov->lov_tgts[index]->ltd_qos.ltq_penalty = 0; out: obd_putref(obd); @@ -440,14 +454,23 @@ static int lov_notify(struct obd_device *obd, struct obd_device *watched, enum obd_notify_event ev, void *data) { int rc = 0; + struct lov_obd *lov = &obd->u.lov; ENTRY; - if (ev == OBD_NOTIFY_ACTIVE || ev == OBD_NOTIFY_INACTIVE) { + down_read(&lov->lov_notify_lock); + if (!lov->lov_connects) { + up_read(&lov->lov_notify_lock); + RETURN(rc); + } + + if (ev == OBD_NOTIFY_ACTIVE || ev == OBD_NOTIFY_INACTIVE || + ev == OBD_NOTIFY_ACTIVATE || ev == OBD_NOTIFY_DEACTIVATE) { struct obd_uuid *uuid; LASSERT(watched); if (strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) { + up_read(&lov->lov_notify_lock); CERROR("unexpected notification of %s %s!\n", watched->obd_type->typ_name, watched->obd_name); @@ -458,10 +481,10 @@ static int lov_notify(struct obd_device *obd, struct obd_device *watched, /* Set OSC as active before notifying the observer, so the * observer can use the OSC normally. */ - rc = lov_set_osc_active(obd, uuid, ev == OBD_NOTIFY_ACTIVE); + rc = lov_set_osc_active(obd, uuid, ev); if (rc < 0) { - CERROR("%sactivation of %s failed: %d\n", - (ev == OBD_NOTIFY_ACTIVE) ? "" : "de", + up_read(&lov->lov_notify_lock); + CERROR("event(%d) of %s failed: %d\n", ev, obd_uuid2str(uuid), rc); RETURN(rc); } @@ -476,39 +499,41 @@ static int lov_notify(struct obd_device *obd, struct obd_device *watched, /* NULL watched means all osc's in the lov (only for syncs) */ /* sync event should be send lov idx as data */ struct lov_obd *lov = &obd->u.lov; - struct obd_device *tgt_obd; - int i; + int i, is_sync; + + data = &i; + is_sync = (ev == OBD_NOTIFY_SYNC) || + (ev == OBD_NOTIFY_SYNC_NONBLOCK); + obd_getref(obd); for (i = 0; i < lov->desc.ld_tgt_count; i++) { + if (!lov->lov_tgts[i]) + continue; + /* don't send sync event if target not * connected/activated */ - if (!lov->lov_tgts[i] || - !lov->lov_tgts[i]->ltd_active) - continue; - - if ((ev == OBD_NOTIFY_SYNC) || - (ev == OBD_NOTIFY_SYNC_NONBLOCK)) - data = &i; - - tgt_obd = class_exp2obd(lov->lov_tgts[i]->ltd_exp); + if (is_sync && !lov->lov_tgts[i]->ltd_active) + continue; - rc = obd_notify_observer(obd, tgt_obd, ev, data); + rc = obd_notify_observer(obd, lov->lov_tgts[i]->ltd_obd, + ev, data); if (rc) { CERROR("%s: notify %s of %s failed %d\n", obd->obd_name, obd->obd_observer->obd_name, - tgt_obd->obd_name, rc); - break; + lov->lov_tgts[i]->ltd_obd->obd_name, + rc); } } obd_putref(obd); } + up_read(&lov->lov_notify_lock); RETURN(rc); } -int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, - __u32 index, int gen, int active) +static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, + __u32 index, int gen, int active) { struct lov_obd *lov = &obd->u.lov; struct lov_tgt_desc *tgt; @@ -530,13 +555,13 @@ int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, if (tgt_obd == NULL) RETURN(-EINVAL); - cfs_mutex_down(&lov->lov_lock); + mutex_lock(&lov->lov_lock); if ((index < lov->lov_tgt_size) && (lov->lov_tgts[index] != NULL)) { tgt = lov->lov_tgts[index]; CERROR("UUID %s already assigned at LOV target index %d\n", obd_uuid2str(&tgt->ltd_uuid), index); - cfs_mutex_up(&lov->lov_lock); + mutex_unlock(&lov->lov_lock); RETURN(-EEXIST); } @@ -550,7 +575,7 @@ int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, newsize = newsize << 1; OBD_ALLOC(newtgts, sizeof(*newtgts) * newsize); if (newtgts == NULL) { - cfs_mutex_up(&lov->lov_lock); + mutex_unlock(&lov->lov_lock); RETURN(-ENOMEM); } @@ -561,13 +586,11 @@ int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, oldsize = lov->lov_tgt_size; } - lov->lov_tgts = newtgts; - lov->lov_tgt_size = newsize; -#ifdef __KERNEL__ - smp_rmb(); -#endif - if (old) - OBD_FREE(old, sizeof(*old) * oldsize); + lov->lov_tgts = newtgts; + lov->lov_tgt_size = newsize; + smp_rmb(); + if (old) + OBD_FREE(old, sizeof(*old) * oldsize); CDEBUG(D_CONFIG, "tgts: %p size: %d\n", lov->lov_tgts, lov->lov_tgt_size); @@ -575,18 +598,17 @@ int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, OBD_ALLOC_PTR(tgt); if (!tgt) { - cfs_mutex_up(&lov->lov_lock); + mutex_unlock(&lov->lov_lock); RETURN(-ENOMEM); } rc = lov_ost_pool_add(&lov->lov_packed, index, lov->lov_tgt_size); if (rc) { - cfs_mutex_up(&lov->lov_lock); + mutex_unlock(&lov->lov_lock); OBD_FREE_PTR(tgt); RETURN(rc); } - memset(tgt, 0, sizeof(*tgt)); tgt->ltd_uuid = *uuidp; tgt->ltd_obd = tgt_obd; /* XXX - add a sanity check on the generation number. */ @@ -597,11 +619,13 @@ int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, if (index >= lov->desc.ld_tgt_count) lov->desc.ld_tgt_count = index + 1; - cfs_mutex_up(&lov->lov_lock); + mutex_unlock(&lov->lov_lock); CDEBUG(D_CONFIG, "idx=%d ltd_gen=%d ld_tgt_count=%d\n", index, tgt->ltd_gen, lov->desc.ld_tgt_count); + rc = obd_notify(obd, tgt_obd, OBD_NOTIFY_CREATE, &index); + if (lov->lov_connects == 0) { /* lov_connect hasn't been called yet. We'll do the lov_connect_obd on this target when that fn first runs, @@ -619,6 +643,15 @@ int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, if (!tgt->ltd_exp) GOTO(out, rc = 0); + if (lov->lov_cache != NULL) { + rc = obd_set_info_async(NULL, tgt->ltd_exp, + sizeof(KEY_CACHE_SET), KEY_CACHE_SET, + sizeof(struct cl_client_cache), lov->lov_cache, + NULL); + if (rc < 0) + GOTO(out, rc); + } + rc = lov_notify(obd, tgt->ltd_exp->exp_obd, active ? OBD_NOTIFY_CONNECT : OBD_NOTIFY_INACTIVE, (void *)&index); @@ -648,6 +681,8 @@ int lov_del_target(struct obd_device *obd, __u32 index, RETURN(-EINVAL); } + /* to make sure there's no ongoing lov_notify() now */ + down_write(&lov->lov_notify_lock); obd_getref(obd); if (!lov->lov_tgts[index]) { @@ -672,6 +707,7 @@ int lov_del_target(struct obd_device *obd, __u32 index, /* we really delete it from obd_putref */ out: obd_putref(obd); + up_write(&lov->lov_notify_lock); RETURN(rc); } @@ -703,16 +739,18 @@ static void __lov_del_obd(struct obd_device *obd, struct lov_tgt_desc *tgt) void lov_fix_desc_stripe_size(__u64 *val) { - if (*val < PTLRPC_MAX_BRW_SIZE) { - LCONSOLE_WARN("Increasing default stripe size to min %u\n", - PTLRPC_MAX_BRW_SIZE); - *val = PTLRPC_MAX_BRW_SIZE; - } else if (*val & (LOV_MIN_STRIPE_SIZE - 1)) { - *val &= ~(LOV_MIN_STRIPE_SIZE - 1); - LCONSOLE_WARN("Changing default stripe size to "LPU64" (a " - "multiple of %u)\n", - *val, LOV_MIN_STRIPE_SIZE); - } + if (*val < LOV_MIN_STRIPE_SIZE) { + if (*val != 0) + LCONSOLE_INFO("Increasing default stripe size to " + "minimum %u\n", + LOV_DESC_STRIPE_SIZE_DEFAULT); + *val = LOV_DESC_STRIPE_SIZE_DEFAULT; + } else if (*val & (LOV_MIN_STRIPE_SIZE - 1)) { + *val &= ~(LOV_MIN_STRIPE_SIZE - 1); + LCONSOLE_WARN("Changing default stripe size to "LPU64" (a " + "multiple of %u)\n", + *val, LOV_MIN_STRIPE_SIZE); + } } void lov_fix_desc_stripe_count(__u32 *val) @@ -732,9 +770,8 @@ void lov_fix_desc_pattern(__u32 *val) void lov_fix_desc_qos_maxage(__u32 *val) { - /* fix qos_maxage */ - if (*val == 0) - *val = QOS_DEFAULT_MAXAGE; + if (*val == 0) + *val = LOV_DESC_QOS_MAXAGE_DEFAULT; } void lov_fix_desc(struct lov_desc *desc) @@ -747,11 +784,13 @@ void lov_fix_desc(struct lov_desc *desc) int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg) { - struct lprocfs_static_vars lvars = { 0 }; - struct lov_desc *desc; - struct lov_obd *lov = &obd->u.lov; - int rc; - ENTRY; + struct lov_desc *desc; + struct lov_obd *lov = &obd->u.lov; +#ifdef LPROCFS + struct obd_type *type; +#endif + int rc; + ENTRY; if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) { CERROR("LOV setup requires a descriptor\n"); @@ -780,59 +819,71 @@ int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg) lov_fix_desc(desc); - desc->ld_active_tgt_count = 0; - lov->desc = *desc; - lov->lov_tgt_size = 0; - - cfs_sema_init(&lov->lov_lock, 1); - cfs_atomic_set(&lov->lov_refcount, 0); - CFS_INIT_LIST_HEAD(&lov->lov_qos.lq_oss_list); - cfs_init_rwsem(&lov->lov_qos.lq_rw_sem); - lov->lov_sp_me = LUSTRE_SP_CLI; - lov->lov_qos.lq_dirty = 1; - lov->lov_qos.lq_rr.lqr_dirty = 1; - lov->lov_qos.lq_reset = 1; - /* Default priority is toward free space balance */ - lov->lov_qos.lq_prio_free = 232; - /* Default threshold for rr (roughly 17%) */ - lov->lov_qos.lq_threshold_rr = 43; - /* Init statfs fields */ - OBD_ALLOC_PTR(lov->lov_qos.lq_statfs_data); - if (NULL == lov->lov_qos.lq_statfs_data) - RETURN(-ENOMEM); - cfs_waitq_init(&lov->lov_qos.lq_statfs_waitq); + desc->ld_active_tgt_count = 0; + lov->desc = *desc; + lov->lov_tgt_size = 0; + + mutex_init(&lov->lov_lock); + atomic_set(&lov->lov_refcount, 0); + lov->lov_sp_me = LUSTRE_SP_CLI; + + init_rwsem(&lov->lov_notify_lock); lov->lov_pools_hash_body = cfs_hash_create("POOLS", HASH_POOLS_CUR_BITS, HASH_POOLS_MAX_BITS, - &pool_hash_operations, CFS_HASH_REHASH); + HASH_POOLS_BKT_BITS, 0, + CFS_HASH_MIN_THETA, + CFS_HASH_MAX_THETA, + &pool_hash_operations, + CFS_HASH_DEFAULT); CFS_INIT_LIST_HEAD(&lov->lov_pool_list); lov->lov_pool_count = 0; rc = lov_ost_pool_init(&lov->lov_packed, 0); if (rc) - RETURN(rc); - rc = lov_ost_pool_init(&lov->lov_qos.lq_rr.lqr_pool, 0); - if (rc) { - lov_ost_pool_free(&lov->lov_packed); - RETURN(rc); - } + GOTO(out, rc); - lprocfs_lov_init_vars(&lvars); - lprocfs_obd_setup(obd, lvars.obd_vars); #ifdef LPROCFS - { - int rc; - - rc = lprocfs_seq_create(obd->obd_proc_entry, "target_obd", - 0444, &lov_proc_target_fops, obd); - if (rc) - CWARN("Error adding the target_obd file\n"); - } + obd->obd_vars = lprocfs_lov_obd_vars; + /* If this is true then both client (lov) and server + * (lod) are on the same node. The lod layer if loaded + * first will register the lov proc directory. In that + * case obd->obd_type->typ_procroot will be not set. + * Instead we use type->typ_procsym as the parent. */ + type = class_search_type(LUSTRE_LOD_NAME); + if (type != NULL && type->typ_procsym != NULL) { + obd->obd_proc_entry = lprocfs_seq_register(obd->obd_name, + type->typ_procsym, + obd->obd_vars, obd); + if (IS_ERR(obd->obd_proc_entry)) { + rc = PTR_ERR(obd->obd_proc_entry); + CERROR("error %d setting up lprocfs for %s\n", rc, + obd->obd_name); + obd->obd_proc_entry = NULL; + } + } else { + rc = lprocfs_seq_obd_setup(obd); + } + + if (rc == 0) { + rc = lprocfs_seq_create(obd->obd_proc_entry, "target_obd", + 0444, &lov_proc_target_fops, obd); + if (rc) + CWARN("Error adding the target_obd file\n"); + + lov->lov_pool_proc_entry = lprocfs_seq_register("pools", + obd->obd_proc_entry, + NULL, NULL); + if (IS_ERR(lov->lov_pool_proc_entry)) { + rc = PTR_ERR(lov->lov_pool_proc_entry); + CERROR("error %d setting up lprocfs for pools\n", rc); + lov->lov_pool_proc_entry = NULL; + } + } #endif - lov->lov_pool_proc_entry = lprocfs_register("pools", - obd->obd_proc_entry, - NULL, NULL); + RETURN(0); - RETURN(0); +out: + return rc; } static int lov_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) @@ -852,14 +903,12 @@ static int lov_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) OBD_CLEANUP_EARLY); } break; - } - case OBD_CLEANUP_EXPORTS: - rc = obd_llog_finish(obd, 0); - if (rc != 0) - CERROR("failed to cleanup llogging subsystems\n"); - break; - } - RETURN(rc); + } + default: + break; + } + + RETURN(rc); } static int lov_cleanup(struct obd_device *obd) @@ -867,46 +916,45 @@ static int lov_cleanup(struct obd_device *obd) struct lov_obd *lov = &obd->u.lov; cfs_list_t *pos, *tmp; struct pool_desc *pool; + ENTRY; cfs_list_for_each_safe(pos, tmp, &lov->lov_pool_list) { pool = cfs_list_entry(pos, struct pool_desc, pool_list); /* free pool structs */ CDEBUG(D_INFO, "delete pool %p\n", pool); + /* In the function below, .hs_keycmp resolves to + * pool_hashkey_keycmp() */ + /* coverity[overrun-buffer-val] */ lov_pool_del(obd, pool->pool_name); } cfs_hash_putref(lov->lov_pools_hash_body); - lov_ost_pool_free(&(lov->lov_qos.lq_rr.lqr_pool)); lov_ost_pool_free(&lov->lov_packed); + lprocfs_obd_cleanup(obd); if (lov->lov_tgts) { int i; obd_getref(obd); for (i = 0; i < lov->desc.ld_tgt_count; i++) { - if (!lov->lov_tgts[i]) - continue; - - /* Inactive targets may never have connected */ - if (lov->lov_tgts[i]->ltd_active || - cfs_atomic_read(&lov->lov_refcount)) - /* We should never get here - these - should have been removed in the - disconnect. */ - CERROR("lov tgt %d not cleaned!" - " deathrow=%d, lovrc=%d\n", - i, lov->lov_death_row, - cfs_atomic_read(&lov->lov_refcount)); - lov_del_target(obd, i, 0, 0); - } + if (!lov->lov_tgts[i]) + continue; + + /* Inactive targets may never have connected */ + if (lov->lov_tgts[i]->ltd_active || + atomic_read(&lov->lov_refcount)) + /* We should never get here - these + * should have been removed in the + * disconnect. */ + CERROR("%s: lov tgt %d not cleaned! " + "deathrow=%d, lovrc=%d\n", + obd->obd_name, i, lov->lov_death_row, + atomic_read(&lov->lov_refcount)); + lov_del_target(obd, i, 0, 0); + } obd_putref(obd); OBD_FREE(lov->lov_tgts, sizeof(*lov->lov_tgts) * lov->lov_tgt_size); lov->lov_tgt_size = 0; } - - /* clear pools parent proc entry only after all pools is killed */ - lprocfs_obd_cleanup(obd); - - OBD_FREE_PTR(lov->lov_qos.lq_statfs_data); RETURN(0); } @@ -930,7 +978,7 @@ int lov_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg, obd_str2uuid(&obd_uuid, lustre_cfg_buf(lcfg, 1)); - if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", indexp) != 1) + if (sscanf(lustre_cfg_buf(lcfg, 2), "%u", indexp) != 1) GOTO(out, rc = -EINVAL); if (sscanf(lustre_cfg_buf(lcfg, 3), "%d", genp) != 1) GOTO(out, rc = -EINVAL); @@ -945,18 +993,15 @@ int lov_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg, GOTO(out, rc); } case LCFG_PARAM: { - struct lprocfs_static_vars lvars = { 0 }; - struct lov_desc *desc = &(obd->u.lov.desc); + struct lov_desc *desc = &(obd->u.lov.desc); - if (!desc) - GOTO(out, rc = -EINVAL); - - lprocfs_lov_init_vars(&lvars); + if (!desc) + GOTO(out, rc = -EINVAL); - rc = class_process_proc_param(PARAM_LOV, lvars.obd_vars, - lcfg, obd); - if (rc > 0) - rc = 0; + rc = class_process_proc_seq_param(PARAM_LOV, obd->obd_vars, + lcfg, obd); + if (rc > 0) + rc = 0; GOTO(out, rc); } case LCFG_POOL_NEW: @@ -975,82 +1020,6 @@ out: RETURN(rc); } -#ifndef log2 -#define log2(n) cfs_ffz(~(n)) -#endif - -static int lov_clear_orphans(struct obd_export *export, struct obdo *src_oa, - struct lov_stripe_md **ea, - struct obd_trans_info *oti) -{ - struct lov_obd *lov; - struct obdo *tmp_oa; - struct obd_uuid *ost_uuid = NULL; - int rc = 0, i; - ENTRY; - - LASSERT(src_oa->o_valid & OBD_MD_FLFLAGS && - src_oa->o_flags == OBD_FL_DELORPHAN); - - lov = &export->exp_obd->u.lov; - - OBDO_ALLOC(tmp_oa); - if (tmp_oa == NULL) - RETURN(-ENOMEM); - - if (oti->oti_ost_uuid) { - ost_uuid = oti->oti_ost_uuid; - CDEBUG(D_HA, "clearing orphans only for %s\n", - ost_uuid->uuid); - } - - obd_getref(export->exp_obd); - for (i = 0; i < lov->desc.ld_tgt_count; i++) { - struct lov_stripe_md obj_md; - struct lov_stripe_md *obj_mdp = &obj_md; - struct lov_tgt_desc *tgt; - int err; - - tgt = lov->lov_tgts[i]; - if (!tgt) - continue; - - /* if called for a specific target, we don't - care if it is not active. */ - if (!lov->lov_tgts[i]->ltd_active && ost_uuid == NULL) { - CDEBUG(D_HA, "lov idx %d inactive\n", i); - continue; - } - - if (ost_uuid && !obd_uuid_equals(ost_uuid, &tgt->ltd_uuid)) - continue; - - CDEBUG(D_CONFIG,"Clear orphans for %d:%s\n", i, - obd_uuid2str(ost_uuid)); - - memcpy(tmp_oa, src_oa, sizeof(*tmp_oa)); - - LASSERT(lov->lov_tgts[i]->ltd_exp); - /* XXX: LOV STACKING: use real "obj_mdp" sub-data */ - err = obd_create(lov->lov_tgts[i]->ltd_exp, - tmp_oa, &obj_mdp, oti); - if (err) { - /* This export will be disabled until it is recovered, - and then orphan recovery will be completed. */ - CERROR("error in orphan recovery on OST idx %d/%d: " - "rc = %d\n", i, lov->desc.ld_tgt_count, err); - rc = err; - } - - if (ost_uuid) - break; - } - obd_putref(export->exp_obd); - - OBDO_FREE(tmp_oa); - RETURN(rc); -} - static int lov_recreate(struct obd_export *exp, struct obdo *src_oa, struct lov_stripe_md **ea, struct obd_trans_info *oti) { @@ -1075,31 +1044,30 @@ static int lov_recreate(struct obd_export *exp, struct obdo *src_oa, !lov->lov_tgts[ost_idx]) GOTO(out, rc = -EINVAL); - for (i = 0; i < lsm->lsm_stripe_count; i++) { - if (lsm->lsm_oinfo[i]->loi_ost_idx == ost_idx) { - if (lsm->lsm_oinfo[i]->loi_id != src_oa->o_id) - GOTO(out, rc = -EINVAL); - break; - } - } + for (i = 0; i < lsm->lsm_stripe_count; i++) { + if (lsm->lsm_oinfo[i]->loi_ost_idx == ost_idx) { + if (ostid_id(&lsm->lsm_oinfo[i]->loi_oi) != + ostid_id(&src_oa->o_oi)) + GOTO(out, rc = -EINVAL); + break; + } + } if (i == lsm->lsm_stripe_count) GOTO(out, rc = -EINVAL); - rc = obd_create(lov->lov_tgts[ost_idx]->ltd_exp, src_oa, &obj_mdp, oti); + rc = obd_create(NULL, lov->lov_tgts[ost_idx]->ltd_exp, + src_oa, &obj_mdp, oti); out: OBD_FREE(obj_mdp, sizeof(*obj_mdp)); RETURN(rc); } /* the LOV expects oa->o_id to be set to the LOV object id */ -static int lov_create(struct obd_export *exp, struct obdo *src_oa, - struct lov_stripe_md **ea, struct obd_trans_info *oti) +static int lov_create(const struct lu_env *env, struct obd_export *exp, + struct obdo *src_oa, struct lov_stripe_md **ea, + struct obd_trans_info *oti) { struct lov_obd *lov; - struct obd_info oinfo; - struct lov_request_set *set = NULL; - struct lov_request *req; - struct l_wait_info lwi = { 0 }; int rc = 0; ENTRY; @@ -1109,8 +1077,8 @@ static int lov_create(struct obd_export *exp, struct obdo *src_oa, if ((src_oa->o_valid & OBD_MD_FLFLAGS) && src_oa->o_flags == OBD_FL_DELORPHAN) { - rc = lov_clear_orphans(exp, src_oa, ea, oti); - RETURN(rc); + /* should be used with LOV anymore */ + LBUG(); } lov = &exp->exp_obd->u.lov; @@ -1122,37 +1090,8 @@ static int lov_create(struct obd_export *exp, struct obdo *src_oa, if ((src_oa->o_valid & OBD_MD_FLFLAGS) && (src_oa->o_flags & OBD_FL_RECREATE_OBJS)) { rc = lov_recreate(exp, src_oa, ea, oti); - GOTO(out, rc); - } - - /* issue statfs rpcs if the osfs data is older than qos_maxage - 1s, - * later in alloc_qos(), we will wait for those rpcs to complete if - * the osfs age is older than 2 * qos_maxage */ - qos_statfs_update(exp->exp_obd, - cfs_time_shift_64(-lov->desc.ld_qos_maxage + - OBD_STATFS_CACHE_SECONDS), - 0); - - rc = lov_prep_create_set(exp, &oinfo, ea, src_oa, oti, &set); - if (rc) - GOTO(out, rc); - - cfs_list_for_each_entry(req, &set->set_list, rq_link) { - /* XXX: LOV STACKING: use real "obj_mdp" sub-data */ - rc = obd_create_async(lov->lov_tgts[req->rq_idx]->ltd_exp, - &req->rq_oi, &req->rq_oi.oi_md, oti); } - /* osc_create have timeout equ obd_timeout/2 so waiting don't be - * longer then this */ - l_wait_event(set->set_waitq, lov_finished_set(set), &lwi); - - /* we not have ptlrpc set for assign set->interpret and should - * be call interpret function himself. calling from cb_create_update - * not permited because lov_fini_create_set can sleep for long time, - * but we must avoid sleeping in ptlrpcd interpret function. */ - rc = lov_fini_create_set(set, ea); -out: obd_putref(exp->exp_obd); RETURN(rc); } @@ -1165,9 +1104,10 @@ do { "%p->lsm_magic=%x\n", (lsmp), (lsmp)->lsm_magic); \ } while (0) -static int lov_destroy(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *lsm, struct obd_trans_info *oti, - struct obd_export *md_exp, void *capa) +static int lov_destroy(const struct lu_env *env, struct obd_export *exp, + struct obdo *oa, struct lov_stripe_md *lsm, + struct obd_trans_info *oti, struct obd_export *md_exp, + void *capa) { struct lov_request_set *set; struct obd_info oinfo; @@ -1199,17 +1139,18 @@ static int lov_destroy(struct obd_export *exp, struct obdo *oa, if (oa->o_valid & OBD_MD_FLCOOKIE) oti->oti_logcookies = set->set_cookies + req->rq_stripe; - err = obd_destroy(lov->lov_tgts[req->rq_idx]->ltd_exp, - req->rq_oi.oi_oa, NULL, oti, NULL, capa); - err = lov_update_common_set(set, req, err); - if (err) { - CERROR("error: destroying objid "LPX64" subobj " - LPX64" on OST idx %d: rc = %d\n", - oa->o_id, req->rq_oi.oi_oa->o_id, - req->rq_idx, err); - if (!rc) - rc = err; - } + err = obd_destroy(env, lov->lov_tgts[req->rq_idx]->ltd_exp, + req->rq_oi.oi_oa, NULL, oti, NULL, capa); + err = lov_update_common_set(set, req, err); + if (err) { + CERROR("%s: destroying objid "DOSTID" subobj " + DOSTID" on OST idx %d: rc = %d\n", + exp->exp_obd->obd_name, POSTID(&oa->o_oi), + POSTID(&req->rq_oi.oi_oa->o_oi), + req->rq_idx, err); + if (!rc) + rc = err; + } } if (rc == 0) { @@ -1222,64 +1163,18 @@ out: RETURN(rc ? rc : err); } -static int lov_getattr(struct obd_export *exp, struct obd_info *oinfo) -{ - struct lov_request_set *set; - struct lov_request *req; - cfs_list_t *pos; - struct lov_obd *lov; - int err = 0, rc = 0; - ENTRY; - - LASSERT(oinfo); - ASSERT_LSM_MAGIC(oinfo->oi_md); - - if (!exp || !exp->exp_obd) - RETURN(-ENODEV); - - lov = &exp->exp_obd->u.lov; - - rc = lov_prep_getattr_set(exp, oinfo, &set); - if (rc) - RETURN(rc); - - cfs_list_for_each (pos, &set->set_list) { - req = cfs_list_entry(pos, struct lov_request, rq_link); - - CDEBUG(D_INFO, "objid "LPX64"[%d] has subobj "LPX64" at idx " - "%u\n", oinfo->oi_oa->o_id, req->rq_stripe, - req->rq_oi.oi_oa->o_id, req->rq_idx); - - rc = obd_getattr(lov->lov_tgts[req->rq_idx]->ltd_exp, - &req->rq_oi); - err = lov_update_common_set(set, req, rc); - if (err) { - CERROR("error: getattr objid "LPX64" subobj " - LPX64" on OST idx %d: rc = %d\n", - oinfo->oi_oa->o_id, req->rq_oi.oi_oa->o_id, - req->rq_idx, err); - break; - } - } - - rc = lov_fini_getattr_set(set); - if (err) - rc = err; - RETURN(rc); -} - static int lov_getattr_interpret(struct ptlrpc_request_set *rqset, - void *data, int rc) + void *data, int rc) { - struct lov_request_set *lovset = (struct lov_request_set *)data; - int err; - ENTRY; - - /* don't do attribute merge if this aysnc op failed */ - if (rc) - lovset->set_completes = 0; - err = lov_fini_getattr_set(lovset); - RETURN(rc ? rc : err); + struct lov_request_set *lovset = (struct lov_request_set *)data; + int err; + ENTRY; + + /* don't do attribute merge if this aysnc op failed */ + if (rc) + atomic_set(&lovset->set_completes, 0); + err = lov_fini_getattr_set(lovset); + RETURN(rc ? rc : err); } static int lov_getattr_async(struct obd_export *exp, struct obd_info *oinfo, @@ -1304,26 +1199,28 @@ static int lov_getattr_async(struct obd_export *exp, struct obd_info *oinfo, if (rc) RETURN(rc); - CDEBUG(D_INFO, "objid "LPX64": %ux%u byte stripes\n", - oinfo->oi_md->lsm_object_id, oinfo->oi_md->lsm_stripe_count, - oinfo->oi_md->lsm_stripe_size); - - cfs_list_for_each (pos, &lovset->set_list) { - req = cfs_list_entry(pos, struct lov_request, rq_link); - - CDEBUG(D_INFO, "objid "LPX64"[%d] has subobj "LPX64" at idx " - "%u\n", oinfo->oi_oa->o_id, req->rq_stripe, - req->rq_oi.oi_oa->o_id, req->rq_idx); - rc = obd_getattr_async(lov->lov_tgts[req->rq_idx]->ltd_exp, - &req->rq_oi, rqset); - if (rc) { - CERROR("error: getattr objid "LPX64" subobj " - LPX64" on OST idx %d: rc = %d\n", - oinfo->oi_oa->o_id, req->rq_oi.oi_oa->o_id, - req->rq_idx, rc); - GOTO(out, rc); - } - } + CDEBUG(D_INFO, "objid "DOSTID": %ux%u byte stripes\n", + POSTID(&oinfo->oi_md->lsm_oi), oinfo->oi_md->lsm_stripe_count, + oinfo->oi_md->lsm_stripe_size); + + cfs_list_for_each(pos, &lovset->set_list) { + req = cfs_list_entry(pos, struct lov_request, rq_link); + + CDEBUG(D_INFO, "objid "DOSTID"[%d] has subobj "DOSTID" at idx" + "%u\n", POSTID(&oinfo->oi_oa->o_oi), req->rq_stripe, + POSTID(&req->rq_oi.oi_oa->o_oi), req->rq_idx); + rc = obd_getattr_async(lov->lov_tgts[req->rq_idx]->ltd_exp, + &req->rq_oi, rqset); + if (rc) { + CERROR("%s: getattr objid "DOSTID" subobj" + DOSTID" on OST idx %d: rc = %d\n", + exp->exp_obd->obd_name, + POSTID(&oinfo->oi_oa->o_oi), + POSTID(&req->rq_oi.oi_oa->o_oi), + req->rq_idx, rc); + GOTO(out, rc); + } + } if (!cfs_list_empty(&rqset->set_requests)) { LASSERT(rc == 0); @@ -1333,73 +1230,23 @@ static int lov_getattr_async(struct obd_export *exp, struct obd_info *oinfo, RETURN(rc); } out: - if (rc) - lovset->set_completes = 0; - err = lov_fini_getattr_set(lovset); - RETURN(rc ? rc : err); -} - -static int lov_setattr(struct obd_export *exp, struct obd_info *oinfo, - struct obd_trans_info *oti) -{ - struct lov_request_set *set; - struct lov_obd *lov; - cfs_list_t *pos; - struct lov_request *req; - int err = 0, rc = 0; - ENTRY; - - LASSERT(oinfo); - ASSERT_LSM_MAGIC(oinfo->oi_md); - - if (!exp || !exp->exp_obd) - RETURN(-ENODEV); - - /* for now, we only expect the following updates here */ - LASSERT(!(oinfo->oi_oa->o_valid & ~(OBD_MD_FLID | OBD_MD_FLTYPE | - OBD_MD_FLMODE | OBD_MD_FLATIME | - OBD_MD_FLMTIME | OBD_MD_FLCTIME | - OBD_MD_FLFLAGS | OBD_MD_FLSIZE | - OBD_MD_FLGROUP | OBD_MD_FLUID | - OBD_MD_FLGID | OBD_MD_FLFID | - OBD_MD_FLGENER))); - lov = &exp->exp_obd->u.lov; - rc = lov_prep_setattr_set(exp, oinfo, oti, &set); - if (rc) - RETURN(rc); - - cfs_list_for_each (pos, &set->set_list) { - req = cfs_list_entry(pos, struct lov_request, rq_link); - - rc = obd_setattr(lov->lov_tgts[req->rq_idx]->ltd_exp, - &req->rq_oi, NULL); - err = lov_update_setattr_set(set, req, rc); - if (err) { - CERROR("error: setattr objid "LPX64" subobj " - LPX64" on OST idx %d: rc = %d\n", - set->set_oi->oi_oa->o_id, - req->rq_oi.oi_oa->o_id, req->rq_idx, err); - if (!rc) - rc = err; - } - } - err = lov_fini_setattr_set(set); - if (!rc) - rc = err; - RETURN(rc); + if (rc) + atomic_set(&lovset->set_completes, 0); + err = lov_fini_getattr_set(lovset); + RETURN(rc ? rc : err); } static int lov_setattr_interpret(struct ptlrpc_request_set *rqset, - void *data, int rc) + void *data, int rc) { - struct lov_request_set *lovset = (struct lov_request_set *)data; - int err; - ENTRY; - - if (rc) - lovset->set_completes = 0; - err = lov_fini_setattr_set(lovset); - RETURN(rc ? rc : err); + struct lov_request_set *lovset = (struct lov_request_set *)data; + int err; + ENTRY; + + if (rc) + atomic_set(&lovset->set_completes, 0); + err = lov_fini_setattr_set(lovset); + RETURN(rc ? rc : err); } /* If @oti is given, the request goes from MDS and responses from OSTs are not @@ -1430,322 +1277,79 @@ static int lov_setattr_async(struct obd_export *exp, struct obd_info *oinfo, if (rc) RETURN(rc); - CDEBUG(D_INFO, "objid "LPX64": %ux%u byte stripes\n", - oinfo->oi_md->lsm_object_id, oinfo->oi_md->lsm_stripe_count, - oinfo->oi_md->lsm_stripe_size); + CDEBUG(D_INFO, "objid "DOSTID": %ux%u byte stripes\n", + POSTID(&oinfo->oi_md->lsm_oi), + oinfo->oi_md->lsm_stripe_count, + oinfo->oi_md->lsm_stripe_size); + + cfs_list_for_each(pos, &set->set_list) { + req = cfs_list_entry(pos, struct lov_request, rq_link); + + if (oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE) + oti->oti_logcookies = set->set_cookies + req->rq_stripe; + + CDEBUG(D_INFO, "objid "DOSTID"[%d] has subobj "DOSTID" at idx" + "%u\n", POSTID(&oinfo->oi_oa->o_oi), req->rq_stripe, + POSTID(&req->rq_oi.oi_oa->o_oi), req->rq_idx); + + rc = obd_setattr_async(lov->lov_tgts[req->rq_idx]->ltd_exp, + &req->rq_oi, oti, rqset); + if (rc) { + CERROR("error: setattr objid "DOSTID" subobj" + DOSTID" on OST idx %d: rc = %d\n", + POSTID(&set->set_oi->oi_oa->o_oi), + POSTID(&req->rq_oi.oi_oa->o_oi), + req->rq_idx, rc); + break; + } + } + + /* If we are not waiting for responses on async requests, return. */ + if (rc || !rqset || cfs_list_empty(&rqset->set_requests)) { + int err; + if (rc) + atomic_set(&set->set_completes, 0); + err = lov_fini_setattr_set(set); + RETURN(rc ? rc : err); + } + + LASSERT(rqset->set_interpret == NULL); + rqset->set_interpret = lov_setattr_interpret; + rqset->set_arg = (void *)set; + + RETURN(0); +} - cfs_list_for_each (pos, &set->set_list) { - req = cfs_list_entry(pos, struct lov_request, rq_link); +static int lov_change_cbdata(struct obd_export *exp, + struct lov_stripe_md *lsm, ldlm_iterator_t it, + void *data) +{ + struct lov_obd *lov; + int rc = 0, i; + ENTRY; - if (oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE) - oti->oti_logcookies = set->set_cookies + req->rq_stripe; + ASSERT_LSM_MAGIC(lsm); + + if (!exp || !exp->exp_obd) + RETURN(-ENODEV); - CDEBUG(D_INFO, "objid "LPX64"[%d] has subobj "LPX64" at idx " - "%u\n", oinfo->oi_oa->o_id, req->rq_stripe, - req->rq_oi.oi_oa->o_id, req->rq_idx); + lov = &exp->exp_obd->u.lov; + for (i = 0; i < lsm->lsm_stripe_count; i++) { + struct lov_stripe_md submd; + struct lov_oinfo *loi = lsm->lsm_oinfo[i]; - rc = obd_setattr_async(lov->lov_tgts[req->rq_idx]->ltd_exp, - &req->rq_oi, oti, rqset); - if (rc) { - CERROR("error: setattr objid "LPX64" subobj " - LPX64" on OST idx %d: rc = %d\n", - set->set_oi->oi_oa->o_id, - req->rq_oi.oi_oa->o_id, - req->rq_idx, rc); - break; + if (!lov->lov_tgts[loi->loi_ost_idx]) { + CDEBUG(D_HA, "lov idx %d NULL \n", loi->loi_ost_idx); + continue; } - } - - /* If we are not waiting for responses on async requests, return. */ - if (rc || !rqset || cfs_list_empty(&rqset->set_requests)) { - int err; - if (rc) - set->set_completes = 0; - err = lov_fini_setattr_set(set); - RETURN(rc ? rc : err); - } - LASSERT(rqset->set_interpret == NULL); - rqset->set_interpret = lov_setattr_interpret; - rqset->set_arg = (void *)set; - - RETURN(0); -} - -static int lov_punch_interpret(struct ptlrpc_request_set *rqset, - void *data, int rc) -{ - struct lov_request_set *lovset = (struct lov_request_set *)data; - int err; - ENTRY; - - if (rc) - lovset->set_completes = 0; - err = lov_fini_punch_set(lovset); - RETURN(rc ? rc : err); -} - -/* FIXME: maybe we'll just make one node the authoritative attribute node, then - * we can send this 'punch' to just the authoritative node and the nodes - * that the punch will affect. */ -static int lov_punch(struct obd_export *exp, struct obd_info *oinfo, - struct obd_trans_info *oti, - struct ptlrpc_request_set *rqset) -{ - struct lov_request_set *set; - struct lov_obd *lov; - cfs_list_t *pos; - struct lov_request *req; - int rc = 0; - ENTRY; - - LASSERT(oinfo); - ASSERT_LSM_MAGIC(oinfo->oi_md); - - if (!exp || !exp->exp_obd) - RETURN(-ENODEV); - - lov = &exp->exp_obd->u.lov; - rc = lov_prep_punch_set(exp, oinfo, oti, &set); - if (rc) - RETURN(rc); - - cfs_list_for_each (pos, &set->set_list) { - req = cfs_list_entry(pos, struct lov_request, rq_link); - - rc = obd_punch(lov->lov_tgts[req->rq_idx]->ltd_exp, - &req->rq_oi, NULL, rqset); - if (rc) { - CERROR("error: punch objid "LPX64" subobj "LPX64 - " on OST idx %d: rc = %d\n", - set->set_oi->oi_oa->o_id, - req->rq_oi.oi_oa->o_id, req->rq_idx, rc); - break; - } - } - - if (rc || cfs_list_empty(&rqset->set_requests)) { - int err; - err = lov_fini_punch_set(set); - RETURN(rc ? rc : err); - } - - LASSERT(rqset->set_interpret == NULL); - rqset->set_interpret = lov_punch_interpret; - rqset->set_arg = (void *)set; - - RETURN(0); -} - -static int lov_sync(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *lsm, obd_off start, obd_off end, - void *capa) -{ - struct lov_request_set *set; - struct obd_info oinfo; - struct lov_obd *lov; - cfs_list_t *pos; - struct lov_request *req; - int err = 0, rc = 0; - ENTRY; - - ASSERT_LSM_MAGIC(lsm); - - if (!exp->exp_obd) - RETURN(-ENODEV); - - lov = &exp->exp_obd->u.lov; - rc = lov_prep_sync_set(exp, &oinfo, oa, lsm, start, end, &set); - if (rc) - RETURN(rc); - - cfs_list_for_each (pos, &set->set_list) { - req = cfs_list_entry(pos, struct lov_request, rq_link); - - rc = obd_sync(lov->lov_tgts[req->rq_idx]->ltd_exp, - req->rq_oi.oi_oa, NULL, - req->rq_oi.oi_policy.l_extent.start, - req->rq_oi.oi_policy.l_extent.end, capa); - err = lov_update_common_set(set, req, rc); - if (err) { - CERROR("error: fsync objid "LPX64" subobj "LPX64 - " on OST idx %d: rc = %d\n", - set->set_oi->oi_oa->o_id, - req->rq_oi.oi_oa->o_id, req->rq_idx, rc); - if (!rc) - rc = err; - } - } - err = lov_fini_sync_set(set); - if (!rc) - rc = err; - RETURN(rc); -} - -static int lov_brw_check(struct lov_obd *lov, struct obd_info *lov_oinfo, - obd_count oa_bufs, struct brw_page *pga) -{ - struct obd_info oinfo = { { { 0 } } }; - int i, rc = 0; - - oinfo.oi_oa = lov_oinfo->oi_oa; - - /* The caller just wants to know if there's a chance that this - * I/O can succeed */ - for (i = 0; i < oa_bufs; i++) { - int stripe = lov_stripe_number(lov_oinfo->oi_md, pga[i].off); - int ost = lov_oinfo->oi_md->lsm_oinfo[stripe]->loi_ost_idx; - obd_off start, end; - - if (!lov_stripe_intersects(lov_oinfo->oi_md, i, pga[i].off, - pga[i].off + pga[i].count - 1, - &start, &end)) - continue; - - if (!lov->lov_tgts[ost] || !lov->lov_tgts[ost]->ltd_active) { - CDEBUG(D_HA, "lov idx %d inactive\n", ost); - return -EIO; - } - - rc = obd_brw(OBD_BRW_CHECK, lov->lov_tgts[ost]->ltd_exp, &oinfo, - 1, &pga[i], NULL); - if (rc) - break; - } - return rc; -} - -static int lov_brw(int cmd, struct obd_export *exp, struct obd_info *oinfo, - obd_count oa_bufs, struct brw_page *pga, - struct obd_trans_info *oti) -{ - struct lov_request_set *set; - struct lov_request *req; - cfs_list_t *pos; - struct lov_obd *lov = &exp->exp_obd->u.lov; - int err, rc = 0; - ENTRY; - - ASSERT_LSM_MAGIC(oinfo->oi_md); - - if (cmd == OBD_BRW_CHECK) { - rc = lov_brw_check(lov, oinfo, oa_bufs, pga); - RETURN(rc); - } - - rc = lov_prep_brw_set(exp, oinfo, oa_bufs, pga, oti, &set); - if (rc) - RETURN(rc); - - cfs_list_for_each (pos, &set->set_list) { - struct obd_export *sub_exp; - struct brw_page *sub_pga; - req = cfs_list_entry(pos, struct lov_request, rq_link); - - sub_exp = lov->lov_tgts[req->rq_idx]->ltd_exp; - sub_pga = set->set_pga + req->rq_pgaidx; - rc = obd_brw(cmd, sub_exp, &req->rq_oi, req->rq_oabufs, - sub_pga, oti); - if (rc) - break; - lov_update_common_set(set, req, rc); - } - - err = lov_fini_brw_set(set); - if (!rc) - rc = err; - RETURN(rc); -} - -static int lov_enqueue_interpret(struct ptlrpc_request_set *rqset, - void *data, int rc) -{ - struct lov_request_set *lovset = (struct lov_request_set *)data; - ENTRY; - rc = lov_fini_enqueue_set(lovset, lovset->set_ei->ei_mode, rc, rqset); - RETURN(rc); -} - -static int lov_enqueue(struct obd_export *exp, struct obd_info *oinfo, - struct ldlm_enqueue_info *einfo, - struct ptlrpc_request_set *rqset) -{ - ldlm_mode_t mode = einfo->ei_mode; - struct lov_request_set *set; - struct lov_request *req; - cfs_list_t *pos; - struct lov_obd *lov; - ldlm_error_t rc; - ENTRY; - - LASSERT(oinfo); - ASSERT_LSM_MAGIC(oinfo->oi_md); - LASSERT(mode == (mode & -mode)); - - /* we should never be asked to replay a lock this way. */ - LASSERT((oinfo->oi_flags & LDLM_FL_REPLAY) == 0); - - if (!exp || !exp->exp_obd) - RETURN(-ENODEV); - - lov = &exp->exp_obd->u.lov; - rc = lov_prep_enqueue_set(exp, oinfo, einfo, &set); - if (rc) - RETURN(rc); - - cfs_list_for_each (pos, &set->set_list) { - req = cfs_list_entry(pos, struct lov_request, rq_link); - - rc = obd_enqueue(lov->lov_tgts[req->rq_idx]->ltd_exp, - &req->rq_oi, einfo, rqset); - if (rc != ELDLM_OK) - GOTO(out, rc); - } - - if (rqset && !cfs_list_empty(&rqset->set_requests)) { - LASSERT(rc == 0); - LASSERT(rqset->set_interpret == NULL); - rqset->set_interpret = lov_enqueue_interpret; - rqset->set_arg = (void *)set; - RETURN(rc); - } -out: - rc = lov_fini_enqueue_set(set, mode, rc, rqset); - RETURN(rc); -} - -static int lov_change_cbdata(struct obd_export *exp, - struct lov_stripe_md *lsm, ldlm_iterator_t it, - void *data) -{ - struct lov_obd *lov; - int rc = 0, i; - ENTRY; - - ASSERT_LSM_MAGIC(lsm); - - if (!exp || !exp->exp_obd) - RETURN(-ENODEV); - - lov = &exp->exp_obd->u.lov; - for (i = 0; i < lsm->lsm_stripe_count; i++) { - struct lov_stripe_md submd; - struct lov_oinfo *loi = lsm->lsm_oinfo[i]; - - if (!lov->lov_tgts[loi->loi_ost_idx]) { - CDEBUG(D_HA, "lov idx %d NULL \n", loi->loi_ost_idx); - continue; - } - - LASSERT_SEQ_IS_MDT(loi->loi_seq); - submd.lsm_object_id = loi->loi_id; - submd.lsm_object_seq = loi->loi_seq; - submd.lsm_stripe_count = 0; - rc = obd_change_cbdata(lov->lov_tgts[loi->loi_ost_idx]->ltd_exp, - &submd, it, data); - } - RETURN(rc); -} + submd.lsm_oi = loi->loi_oi; + submd.lsm_stripe_count = 0; + rc = obd_change_cbdata(lov->lov_tgts[loi->loi_ost_idx]->ltd_exp, + &submd, it, data); + } + RETURN(rc); +} /* find any ldlm lock of the inode in lov * return 0 not find @@ -1773,138 +1377,33 @@ static int lov_find_cbdata(struct obd_export *exp, CDEBUG(D_HA, "lov idx %d NULL \n", loi->loi_ost_idx); continue; } - - LASSERT_SEQ_IS_MDT(loi->loi_seq); - submd.lsm_object_id = loi->loi_id; - submd.lsm_object_seq = loi->loi_seq; - submd.lsm_stripe_count = 0; - rc = obd_find_cbdata(lov->lov_tgts[loi->loi_ost_idx]->ltd_exp, - &submd, it, data); - if (rc != 0) - RETURN(rc); - } - RETURN(rc); -} - -static int lov_cancel(struct obd_export *exp, struct lov_stripe_md *lsm, - __u32 mode, struct lustre_handle *lockh) -{ - struct lov_request_set *set; - struct obd_info oinfo; - struct lov_request *req; - cfs_list_t *pos; - struct lov_obd *lov; - struct lustre_handle *lov_lockhp; - int err = 0, rc = 0; - ENTRY; - - ASSERT_LSM_MAGIC(lsm); - - if (!exp || !exp->exp_obd) - RETURN(-ENODEV); - - LASSERT_SEQ_IS_MDT(lsm->lsm_object_seq); - LASSERT(lockh); - lov = &exp->exp_obd->u.lov; - rc = lov_prep_cancel_set(exp, &oinfo, lsm, mode, lockh, &set); - if (rc) - RETURN(rc); - - cfs_list_for_each (pos, &set->set_list) { - req = cfs_list_entry(pos, struct lov_request, rq_link); - lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe; - - rc = obd_cancel(lov->lov_tgts[req->rq_idx]->ltd_exp, - req->rq_oi.oi_md, mode, lov_lockhp); - rc = lov_update_common_set(set, req, rc); - if (rc) { - CERROR("error: cancel objid "LPX64" subobj " - LPX64" on OST idx %d: rc = %d\n", - lsm->lsm_object_id, - req->rq_oi.oi_md->lsm_object_id, - req->rq_idx, rc); - err = rc; - } - - } - lov_fini_cancel_set(set); - RETURN(err); -} - -static int lov_cancel_unused(struct obd_export *exp, - struct lov_stripe_md *lsm, - ldlm_cancel_flags_t flags, void *opaque) -{ - struct lov_obd *lov; - int rc = 0, i; - ENTRY; - - if (!exp || !exp->exp_obd) - RETURN(-ENODEV); - - lov = &exp->exp_obd->u.lov; - if (lsm == NULL) { - for (i = 0; i < lov->desc.ld_tgt_count; i++) { - int err; - if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_exp) - continue; - - err = obd_cancel_unused(lov->lov_tgts[i]->ltd_exp, NULL, - flags, opaque); - if (!rc) - rc = err; - } - RETURN(rc); - } - - ASSERT_LSM_MAGIC(lsm); - - LASSERT_SEQ_IS_MDT(lsm->lsm_object_seq); - for (i = 0; i < lsm->lsm_stripe_count; i++) { - struct lov_stripe_md submd; - struct lov_oinfo *loi = lsm->lsm_oinfo[i]; - int err; - - if (!lov->lov_tgts[loi->loi_ost_idx]) { - CDEBUG(D_HA, "lov idx %d NULL\n", loi->loi_ost_idx); - continue; - } - - if (!lov->lov_tgts[loi->loi_ost_idx]->ltd_active) - CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); - - submd.lsm_object_id = loi->loi_id; - submd.lsm_object_seq = loi->loi_seq; - submd.lsm_stripe_count = 0; - err = obd_cancel_unused(lov->lov_tgts[loi->loi_ost_idx]->ltd_exp, - &submd, flags, opaque); - if (err && lov->lov_tgts[loi->loi_ost_idx]->ltd_active) { - CERROR("error: cancel unused objid "LPX64" subobj "LPX64 - " on OST idx %d: rc = %d\n", lsm->lsm_object_id, - loi->loi_id, loi->loi_ost_idx, err); - if (!rc) - rc = err; - } - } - RETURN(rc); + submd.lsm_oi = loi->loi_oi; + submd.lsm_stripe_count = 0; + rc = obd_find_cbdata(lov->lov_tgts[loi->loi_ost_idx]->ltd_exp, + &submd, it, data); + if (rc != 0) + RETURN(rc); + } + RETURN(rc); } int lov_statfs_interpret(struct ptlrpc_request_set *rqset, void *data, int rc) { - struct lov_request_set *lovset = (struct lov_request_set *)data; - int err; - ENTRY; + struct lov_request_set *lovset = (struct lov_request_set *)data; + int err; + ENTRY; - if (rc) - lovset->set_completes = 0; + if (rc) + atomic_set(&lovset->set_completes, 0); - err = lov_fini_statfs_set(lovset); - RETURN(rc ? rc : err); + err = lov_fini_statfs_set(lovset); + RETURN(rc ? rc : err); } -static int lov_statfs_async(struct obd_device *obd, struct obd_info *oinfo, +static int lov_statfs_async(struct obd_export *exp, struct obd_info *oinfo, __u64 max_age, struct ptlrpc_request_set *rqset) { + struct obd_device *obd = class_exp2obd(exp); struct lov_request_set *set; struct lov_request *req; cfs_list_t *pos; @@ -1921,32 +1420,29 @@ static int lov_statfs_async(struct obd_device *obd, struct obd_info *oinfo, RETURN(rc); cfs_list_for_each (pos, &set->set_list) { - struct obd_device *osc_obd; - req = cfs_list_entry(pos, struct lov_request, rq_link); - - osc_obd = class_exp2obd(lov->lov_tgts[req->rq_idx]->ltd_exp); - rc = obd_statfs_async(osc_obd, &req->rq_oi, max_age, rqset); + rc = obd_statfs_async(lov->lov_tgts[req->rq_idx]->ltd_exp, + &req->rq_oi, max_age, rqset); if (rc) break; } - if (rc || cfs_list_empty(&rqset->set_requests)) { - int err; - if (rc) - set->set_completes = 0; - err = lov_fini_statfs_set(set); - RETURN(rc ? rc : err); - } + if (rc || cfs_list_empty(&rqset->set_requests)) { + int err; + if (rc) + atomic_set(&set->set_completes, 0); + err = lov_fini_statfs_set(set); + RETURN(rc ? rc : err); + } - LASSERT(rqset->set_interpret == NULL); - rqset->set_interpret = lov_statfs_interpret; - rqset->set_arg = (void *)set; - RETURN(0); + LASSERT(rqset->set_interpret == NULL); + rqset->set_interpret = lov_statfs_interpret; + rqset->set_arg = (void *)set; + RETURN(0); } -static int lov_statfs(struct obd_device *obd, struct obd_statfs *osfs, - __u64 max_age, __u32 flags) +static int lov_statfs(const struct lu_env *env, struct obd_export *exp, + struct obd_statfs *osfs, __u64 max_age, __u32 flags) { struct ptlrpc_request_set *set = NULL; struct obd_info oinfo = { { { 0 } } }; @@ -1962,7 +1458,7 @@ static int lov_statfs(struct obd_device *obd, struct obd_statfs *osfs, oinfo.oi_osfs = osfs; oinfo.oi_flags = flags; - rc = lov_statfs_async(obd, &oinfo, max_age, set); + rc = lov_statfs_async(exp, &oinfo, max_age, set); if (rc == 0) rc = ptlrpc_set_wait(set); ptlrpc_set_destroy(set); @@ -1985,6 +1481,7 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len, struct obd_device *osc_obd; struct obd_statfs stat_buf = {0}; __u32 index; + __u32 flags; memcpy(&index, data->ioc_inlbuf2, sizeof(__u32)); if ((index >= count)) @@ -2001,18 +1498,19 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len, RETURN(-EINVAL); /* copy UUID */ - if (cfs_copy_to_user(data->ioc_pbuf2, obd2cli_tgt(osc_obd), - min((int) data->ioc_plen2, - (int) sizeof(struct obd_uuid)))) + if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(osc_obd), + min((int)data->ioc_plen2, + (int)sizeof(struct obd_uuid)))) RETURN(-EFAULT); + flags = uarg ? *(__u32*)uarg : 0; /* got statfs data */ - rc = obd_statfs(osc_obd, &stat_buf, + rc = obd_statfs(NULL, lov->lov_tgts[index]->ltd_exp, &stat_buf, cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS), - 0); + flags); if (rc) RETURN(rc); - if (cfs_copy_to_user(data->ioc_pbuf1, &stat_buf, + if (copy_to_user(data->ioc_pbuf1, &stat_buf, min((int) data->ioc_plen1, (int) sizeof(stat_buf)))) RETURN(-EFAULT); @@ -2058,32 +1556,26 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len, *genp = lov->lov_tgts[i]->ltd_gen; } - if (cfs_copy_to_user((void *)uarg, buf, len)) + if (copy_to_user((void *)uarg, buf, len)) rc = -EFAULT; obd_ioctl_freedata(buf, len); break; } - case LL_IOC_LOV_SETSTRIPE: - rc = lov_setstripe(exp, len, karg, uarg); - break; - case LL_IOC_LOV_GETSTRIPE: - rc = lov_getstripe(exp, karg, uarg); - break; - case LL_IOC_LOV_SETEA: - rc = lov_setea(exp, karg, uarg); - break; + case LL_IOC_LOV_GETSTRIPE: + rc = lov_getstripe(exp, karg, uarg); + break; case OBD_IOC_QUOTACTL: { struct if_quotactl *qctl = karg; struct lov_tgt_desc *tgt = NULL; struct obd_quotactl *oqctl; - if (qctl->qc_valid == QC_OSTIDX) { - if (qctl->qc_idx < 0 || count <= qctl->qc_idx) - RETURN(-EINVAL); + if (qctl->qc_valid == QC_OSTIDX) { + if (count <= qctl->qc_idx) + RETURN(-EINVAL); - tgt = lov->lov_tgts[qctl->qc_idx]; - if (!tgt || !tgt->ltd_exp) - RETURN(-EINVAL); + tgt = lov->lov_tgts[qctl->qc_idx]; + if (!tgt || !tgt->ltd_exp) + RETURN(-EINVAL); } else if (qctl->qc_valid == QC_UUID) { for (i = 0; i < count; i++) { tgt = lov->lov_tgts[i]; @@ -2127,11 +1619,16 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len, for (i = 0; i < count; i++) { int err; + struct obd_device *osc_obd; /* OST was disconnected */ if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_exp) continue; + /* ll_umount_begin() sets force flag but for lov, not + * osc. Let's pass it through */ + osc_obd = class_exp2obd(lov->lov_tgts[i]->ltd_exp); + osc_obd->obd_force = obddev->obd_force; err = obd_iocontrol(cmd, lov->lov_tgts[i]->ltd_exp, len, karg, uarg); if (err == -ENODATA && cmd == OBD_IOC_POLL_QUOTACHECK) { @@ -2199,6 +1696,8 @@ obd_size fiemap_calc_fm_end_offset(struct ll_user_fiemap *fiemap, break; } } + if (stripe_no == -1) + return -EINVAL; /* If we have finished mapping on previous device, shift logical * offset to start of next device */ @@ -2302,7 +1801,7 @@ static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key, int count_local; unsigned int get_num_extents = 0; int ost_index = 0, actual_start_stripe, start_stripe; - obd_size fm_start, fm_end, fm_length, fm_end_offset = 0; + obd_size fm_start, fm_end, fm_length, fm_end_offset; obd_size curr_loc; int current_extent = 0, rc = 0, i; int ost_eof = 0; /* EOF for object */ @@ -2311,13 +1810,34 @@ static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key, int cur_stripe = 0, cur_stripe_wrap = 0, stripe_count; unsigned int buffer_size = FIEMAP_BUFFER_SIZE; - if (lsm == NULL) - GOTO(out, rc = 0); + if (!lsm_has_objects(lsm)) { + if (lsm && lsm_is_released(lsm) && (fm_key->fiemap.fm_start < + fm_key->oa.o_size)) { + /* released file, return a minimal FIEMAP if + * request fits in file-size. + */ + fiemap->fm_mapped_extents = 1; + fiemap->fm_extents[0].fe_logical = + fm_key->fiemap.fm_start; + if (fm_key->fiemap.fm_start + fm_key->fiemap.fm_length < + fm_key->oa.o_size) + fiemap->fm_extents[0].fe_length = + fm_key->fiemap.fm_length; + else + fiemap->fm_extents[0].fe_length = + fm_key->oa.o_size - + fm_key->fiemap.fm_start; + fiemap->fm_extents[0].fe_flags |= + (FIEMAP_EXTENT_UNKNOWN | + FIEMAP_EXTENT_LAST); + } + GOTO(out, rc = 0); + } if (fiemap_count_to_size(fm_key->fiemap.fm_extent_count) < buffer_size) buffer_size = fiemap_count_to_size(fm_key->fiemap.fm_extent_count); - OBD_ALLOC(fm_local, buffer_size); + OBD_ALLOC_LARGE(fm_local, buffer_size); if (fm_local == NULL) GOTO(out, rc = -ENOMEM); lcl_fm_ext = &fm_local->fm_extents[0]; @@ -2338,14 +1858,17 @@ static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key, last_stripe = fiemap_calc_last_stripe(lsm, fm_start, fm_end, actual_start_stripe, &stripe_count); - fm_end_offset = fiemap_calc_fm_end_offset(fiemap, lsm, fm_start, fm_end, - &start_stripe); - - if (fiemap->fm_extent_count == 0) { - get_num_extents = 1; - count_local = 0; - } - + fm_end_offset = fiemap_calc_fm_end_offset(fiemap, lsm, fm_start, + fm_end, &start_stripe); + if (fm_end_offset == -EINVAL) + GOTO(out, rc = -EINVAL); + + if (fiemap_count_to_size(fiemap->fm_extent_count) > *vallen) + fiemap->fm_extent_count = fiemap_size_to_count(*vallen); + if (fiemap->fm_extent_count == 0) { + get_num_extents = 1; + count_local = 0; + } /* Check each stripe */ for (cur_stripe = start_stripe, i = 0; i < stripe_count; i++, cur_stripe = (cur_stripe + 1) % lsm->lsm_stripe_count) { @@ -2404,8 +1927,7 @@ static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key, fm_local->fm_mapped_extents = 0; fm_local->fm_flags = fiemap->fm_flags; - fm_key->oa.o_id = lsm->lsm_oinfo[cur_stripe]->loi_id; - fm_key->oa.o_seq = lsm->lsm_oinfo[cur_stripe]->loi_seq; + fm_key->oa.o_oi = lsm->lsm_oinfo[cur_stripe]->loi_oi; ost_index = lsm->lsm_oinfo[cur_stripe]->loi_ost_idx; if (ost_index < 0 || ost_index >=lov->desc.ld_tgt_count) @@ -2428,7 +1950,8 @@ static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key, fm_local->fm_flags &= ~FIEMAP_FLAG_DEVICE_ORDER; memcpy(&fm_key->fiemap, fm_local, sizeof(*fm_local)); *vallen=fiemap_count_to_size(fm_local->fm_extent_count); - rc = obd_get_info(lov->lov_tgts[ost_index]->ltd_exp, + rc = obd_get_info(NULL, + lov->lov_tgts[ost_index]->ltd_exp, keylen, key, vallen, fm_local, lsm); if (rc != 0) GOTO(out, rc); @@ -2508,12 +2031,13 @@ skip_last_device_calc: fiemap->fm_mapped_extents = current_extent; out: - OBD_FREE(fm_local, buffer_size); + if (fm_local) + OBD_FREE_LARGE(fm_local, buffer_size); return rc; } -static int lov_get_info(struct obd_export *exp, __u32 keylen, - void *key, __u32 *vallen, void *val, +static int lov_get_info(const struct lu_env *env, struct obd_export *exp, + __u32 keylen, void *key, __u32 *vallen, void *val, struct lov_stripe_md *lsm) { struct obd_device *obddev = class_exp2obd(exp); @@ -2549,12 +2073,12 @@ static int lov_get_info(struct obd_export *exp, __u32 keylen, loi = lsm->lsm_oinfo[i]; if (!lov->lov_tgts[loi->loi_ost_idx]) continue; - if (lov->lov_tgts[loi->loi_ost_idx]->ltd_exp == - data->lock->l_conn_export && - osc_res_name_eq(loi->loi_id, loi->loi_seq, res_id)) { - *stripe = i; - GOTO(out, rc = 0); - } + if (lov->lov_tgts[loi->loi_ost_idx]->ltd_exp == + data->lock->l_conn_export && + ostid_res_name_eq(&loi->loi_oi, res_id)) { + *stripe = i; + GOTO(out, rc = 0); + } } LDLM_ERROR(data->lock, "lock on inode without such object"); dump_lsm(D_ERROR, lsm); @@ -2570,7 +2094,8 @@ static int lov_get_info(struct obd_export *exp, __u32 keylen, if (!tgt || !tgt->ltd_active) GOTO(out, rc = -ESRCH); - rc = obd_get_info(tgt->ltd_exp, keylen, key, &size, info->data, NULL); + rc = obd_get_info(env, tgt->ltd_exp, keylen, key, + &size, info->data, NULL); GOTO(out, rc = 0); } else if (KEY_IS(KEY_LOVDESC)) { struct lov_desc *desc_ret = val; @@ -2591,7 +2116,10 @@ static int lov_get_info(struct obd_export *exp, __u32 keylen, if (!tgt || !tgt->ltd_exp) GOTO(out, rc = -ESRCH); - *((__u64*)val) = tgt->ltd_exp->exp_connect_flags; + *((__u64 *)val) = exp_connect_flags(tgt->ltd_exp); + GOTO(out, rc = 0); + } else if (KEY_IS(KEY_TGT_COUNT)) { + *((int *)val) = lov->desc.ld_tgt_count; GOTO(out, rc = 0); } @@ -2602,9 +2130,9 @@ out: RETURN(rc); } -static int lov_set_info_async(struct obd_export *exp, obd_count keylen, - void *key, obd_count vallen, void *val, - struct ptlrpc_request_set *set) +static int lov_set_info_async(const struct lu_env *env, struct obd_export *exp, + obd_count keylen, void *key, obd_count vallen, + void *val, struct ptlrpc_request_set *set) { struct obd_device *obddev = class_exp2obd(exp); struct lov_obd *lov = &obddev->u.lov; @@ -2641,7 +2169,11 @@ static int lov_set_info_async(struct obd_export *exp, obd_count keylen, mds_con = 1; } else if (KEY_IS(KEY_CAPA_KEY)) { capa = 1; - } + } else if (KEY_IS(KEY_CACHE_SET)) { + LASSERT(lov->lov_cache == NULL); + lov->lov_cache = val; + do_inactive = 1; + } for (i = 0; i < count; i++, val = (char *)val + incr) { if (next_id) { @@ -2668,11 +2200,11 @@ static int lov_set_info_async(struct obd_export *exp, obd_count keylen, &tgt->ltd_uuid)) continue; - err = obd_set_info_async(tgt->ltd_exp, + err = obd_set_info_async(env, tgt->ltd_exp, keylen, key, sizeof(int), &mgi->group, set); } else if (next_id) { - err = obd_set_info_async(tgt->ltd_exp, + err = obd_set_info_async(env, tgt->ltd_exp, keylen, key, vallen, ((struct obd_id_info*)val)->data, set); } else if (capa) { @@ -2685,17 +2217,16 @@ static int lov_set_info_async(struct obd_export *exp, obd_count keylen, !obd_uuid_equals(info->uuid, &tgt->ltd_uuid)) continue; - err = obd_set_info_async(tgt->ltd_exp, keylen, key, - sizeof(*info->capa), + err = obd_set_info_async(env, tgt->ltd_exp, keylen, + key, sizeof(*info->capa), info->capa, set); - } else { /* Only want a specific OSC */ if (check_uuid && !obd_uuid_equals(val, &tgt->ltd_uuid)) continue; - err = obd_set_info_async(tgt->ltd_exp, + err = obd_set_info_async(env, tgt->ltd_exp, keylen, key, vallen, val, set); } @@ -2713,150 +2244,198 @@ static int lov_set_info_async(struct obd_export *exp, obd_count keylen, RETURN(rc); } -int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm) +void lov_stripe_lock(struct lov_stripe_md *md) +{ + LASSERT(md->lsm_lock_owner != current_pid()); + spin_lock(&md->lsm_lock); + LASSERT(md->lsm_lock_owner == 0); + md->lsm_lock_owner = current_pid(); +} + +void lov_stripe_unlock(struct lov_stripe_md *md) { - int i, rc = 0; + LASSERT(md->lsm_lock_owner == current_pid()); + md->lsm_lock_owner = 0; + spin_unlock(&md->lsm_lock); +} + +static int lov_quotactl(struct obd_device *obd, struct obd_export *exp, + struct obd_quotactl *oqctl) +{ + struct lov_obd *lov = &obd->u.lov; + struct lov_tgt_desc *tgt; + __u64 curspace = 0; + __u64 bhardlimit = 0; + int i, rc = 0; ENTRY; - for (i = 0; i < lsm->lsm_stripe_count; i++) { - struct lov_oinfo *loi = lsm->lsm_oinfo[i]; - if (loi->loi_ar.ar_rc && !rc) - rc = loi->loi_ar.ar_rc; - loi->loi_ar.ar_rc = 0; + if (oqctl->qc_cmd != LUSTRE_Q_QUOTAON && + oqctl->qc_cmd != LUSTRE_Q_QUOTAOFF && + oqctl->qc_cmd != Q_GETOQUOTA && + oqctl->qc_cmd != Q_INITQUOTA && + oqctl->qc_cmd != LUSTRE_Q_SETQUOTA && + oqctl->qc_cmd != Q_FINVALIDATE) { + CERROR("bad quota opc %x for lov obd", oqctl->qc_cmd); + RETURN(-EFAULT); } - RETURN(rc); -} -EXPORT_SYMBOL(lov_test_and_clear_async_rc); + /* for lov tgt */ + obd_getref(obd); + for (i = 0; i < lov->desc.ld_tgt_count; i++) { + int err; + + tgt = lov->lov_tgts[i]; -static int lov_extent_calc(struct obd_export *exp, struct lov_stripe_md *lsm, - int cmd, __u64 *offset) -{ - __u32 ssize = lsm->lsm_stripe_size; - __u64 start; - - start = *offset; - do_div(start, ssize); - start = start * ssize; - - CDEBUG(D_DLMTRACE, "offset "LPU64", stripe %u, start "LPU64 - ", end "LPU64"\n", *offset, ssize, start, - start + ssize - 1); - if (cmd == OBD_CALC_STRIPE_END) { - *offset = start + ssize - 1; - } else if (cmd == OBD_CALC_STRIPE_START) { - *offset = start; - } else { - LBUG(); + if (!tgt) + continue; + + if (!tgt->ltd_active || tgt->ltd_reap) { + if (oqctl->qc_cmd == Q_GETOQUOTA && + lov->lov_tgts[i]->ltd_activate) { + rc = -ENETDOWN; + CERROR("ost %d is inactive\n", i); + } else { + CDEBUG(D_HA, "ost %d is inactive\n", i); + } + continue; + } + + err = obd_quotactl(tgt->ltd_exp, oqctl); + if (err) { + if (tgt->ltd_active && !rc) + rc = err; + continue; + } + + if (oqctl->qc_cmd == Q_GETOQUOTA) { + curspace += oqctl->qc_dqblk.dqb_curspace; + bhardlimit += oqctl->qc_dqblk.dqb_bhardlimit; + } } + obd_putref(obd); - RETURN(0); + if (oqctl->qc_cmd == Q_GETOQUOTA) { + oqctl->qc_dqblk.dqb_curspace = curspace; + oqctl->qc_dqblk.dqb_bhardlimit = bhardlimit; + } + RETURN(rc); } -void lov_stripe_lock(struct lov_stripe_md *md) +static int lov_quotacheck(struct obd_device *obd, struct obd_export *exp, + struct obd_quotactl *oqctl) { - LASSERT(md->lsm_lock_owner != cfs_curproc_pid()); - cfs_spin_lock(&md->lsm_lock); - LASSERT(md->lsm_lock_owner == 0); - md->lsm_lock_owner = cfs_curproc_pid(); -} -EXPORT_SYMBOL(lov_stripe_lock); + struct lov_obd *lov = &obd->u.lov; + int i, rc = 0; + ENTRY; -void lov_stripe_unlock(struct lov_stripe_md *md) -{ - LASSERT(md->lsm_lock_owner == cfs_curproc_pid()); - md->lsm_lock_owner = 0; - cfs_spin_unlock(&md->lsm_lock); + obd_getref(obd); + + for (i = 0; i < lov->desc.ld_tgt_count; i++) { + if (!lov->lov_tgts[i]) + continue; + + /* Skip quota check on the administratively disabled OSTs. */ + if (!lov->lov_tgts[i]->ltd_activate) { + CWARN("lov idx %d was administratively disabled, " + "skip quotacheck on it.\n", i); + continue; + } + + if (!lov->lov_tgts[i]->ltd_active) { + CERROR("lov idx %d inactive\n", i); + rc = -EIO; + goto out; + } + } + + for (i = 0; i < lov->desc.ld_tgt_count; i++) { + int err; + + if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_activate) + continue; + + err = obd_quotacheck(lov->lov_tgts[i]->ltd_exp, oqctl); + if (err && !rc) + rc = err; + } + +out: + obd_putref(obd); + + RETURN(rc); } -EXPORT_SYMBOL(lov_stripe_unlock); - - -struct obd_ops lov_obd_ops = { - .o_owner = THIS_MODULE, - .o_setup = lov_setup, - .o_precleanup = lov_precleanup, - .o_cleanup = lov_cleanup, - //.o_process_config = lov_process_config, - .o_connect = lov_connect, - .o_disconnect = lov_disconnect, - .o_statfs = lov_statfs, - .o_statfs_async = lov_statfs_async, - .o_packmd = lov_packmd, - .o_unpackmd = lov_unpackmd, - .o_create = lov_create, - .o_destroy = lov_destroy, - .o_getattr = lov_getattr, - .o_getattr_async = lov_getattr_async, - .o_setattr = lov_setattr, - .o_setattr_async = lov_setattr_async, - .o_brw = lov_brw, - .o_merge_lvb = lov_merge_lvb, - .o_adjust_kms = lov_adjust_kms, - .o_punch = lov_punch, - .o_sync = lov_sync, - .o_enqueue = lov_enqueue, - .o_change_cbdata = lov_change_cbdata, - .o_find_cbdata = lov_find_cbdata, - .o_cancel = lov_cancel, - .o_cancel_unused = lov_cancel_unused, - .o_iocontrol = lov_iocontrol, - .o_get_info = lov_get_info, - .o_set_info_async = lov_set_info_async, - .o_extent_calc = lov_extent_calc, - .o_llog_init = lov_llog_init, - .o_llog_finish = lov_llog_finish, - .o_notify = lov_notify, - .o_pool_new = lov_pool_new, - .o_pool_rem = lov_pool_remove, - .o_pool_add = lov_pool_add, - .o_pool_del = lov_pool_del, - .o_getref = lov_getref, - .o_putref = lov_putref, -}; -static quota_interface_t *quota_interface; -extern quota_interface_t lov_quota_interface; +static struct obd_ops lov_obd_ops = { + .o_owner = THIS_MODULE, + .o_setup = lov_setup, + .o_precleanup = lov_precleanup, + .o_cleanup = lov_cleanup, + .o_connect = lov_connect, + .o_disconnect = lov_disconnect, + .o_statfs = lov_statfs, + .o_statfs_async = lov_statfs_async, + .o_packmd = lov_packmd, + .o_unpackmd = lov_unpackmd, + .o_create = lov_create, + .o_destroy = lov_destroy, + .o_getattr_async = lov_getattr_async, + .o_setattr_async = lov_setattr_async, + .o_change_cbdata = lov_change_cbdata, + .o_find_cbdata = lov_find_cbdata, + .o_iocontrol = lov_iocontrol, + .o_get_info = lov_get_info, + .o_set_info_async = lov_set_info_async, + .o_notify = lov_notify, + .o_pool_new = lov_pool_new, + .o_pool_rem = lov_pool_remove, + .o_pool_add = lov_pool_add, + .o_pool_del = lov_pool_del, + .o_getref = lov_getref, + .o_putref = lov_putref, + .o_quotactl = lov_quotactl, + .o_quotacheck = lov_quotacheck, +}; -cfs_mem_cache_t *lov_oinfo_slab; +struct kmem_cache *lov_oinfo_slab; extern struct lu_kmem_descr lov_caches[]; int __init lov_init(void) { - struct lprocfs_static_vars lvars = { 0 }; - int rc, rc2; - ENTRY; + bool enable_proc = true; + struct obd_type *type; + int rc; + ENTRY; /* print an address of _any_ initialized kernel symbol from this * module, to allow debugging with gdb that doesn't support data * symbols from modules.*/ - CDEBUG(D_CONSOLE, "Lustre LOV module (%p).\n", &lov_caches); + CDEBUG(D_INFO, "Lustre LOV module (%p).\n", &lov_caches); rc = lu_kmem_init(lov_caches); if (rc) return rc; - lov_oinfo_slab = cfs_mem_cache_create("lov_oinfo", - sizeof(struct lov_oinfo), - 0, CFS_SLAB_HWCACHE_ALIGN); + lov_oinfo_slab = kmem_cache_create("lov_oinfo", + sizeof(struct lov_oinfo), 0, + SLAB_HWCACHE_ALIGN, NULL); if (lov_oinfo_slab == NULL) { lu_kmem_fini(lov_caches); return -ENOMEM; } - lprocfs_lov_init_vars(&lvars); - cfs_request_module("lquota"); - quota_interface = PORTAL_SYMBOL_GET(lov_quota_interface); - init_obd_quota_ops(quota_interface, &lov_obd_ops); + type = class_search_type(LUSTRE_LOD_NAME); + if (type != NULL && type->typ_procsym != NULL) + enable_proc = false; - rc = class_register_type(&lov_obd_ops, NULL, lvars.module_vars, - LUSTRE_LOV_NAME, &lov_device_type); + rc = class_register_type(&lov_obd_ops, NULL, enable_proc, NULL, +#ifndef HAVE_ONLY_PROCFS_SEQ + NULL, +#endif + LUSTRE_LOV_NAME, &lov_device_type); if (rc) { - if (quota_interface) - PORTAL_SYMBOL_PUT(lov_quota_interface); - rc2 = cfs_mem_cache_destroy(lov_oinfo_slab); - LASSERT(rc2 == 0); + kmem_cache_destroy(lov_oinfo_slab); lu_kmem_fini(lov_caches); } @@ -2866,17 +2445,9 @@ int __init lov_init(void) #ifdef __KERNEL__ static void /*__exit*/ lov_exit(void) { - int rc; - - lu_device_type_fini(&lov_device_type); - lu_kmem_fini(lov_caches); - - if (quota_interface) - PORTAL_SYMBOL_PUT(lov_quota_interface); - - class_unregister_type(LUSTRE_LOV_NAME); - rc = cfs_mem_cache_destroy(lov_oinfo_slab); - LASSERT(rc == 0); + class_unregister_type(LUSTRE_LOV_NAME); + kmem_cache_destroy(lov_oinfo_slab); + lu_kmem_fini(lov_caches); } MODULE_AUTHOR("Sun Microsystems, Inc. ");