X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Flov%2Flov_obd.c;h=61fdb7b6b0a7e747ebac72360624852c1a8c7b52;hp=ca0f68c6a2edaba662275cf6cc3a7ef9fd7585ed;hb=8ea6a840e00e40220ab769ec329e10f0ad190c09;hpb=f2e5761d5b64f360b114d434f46fdc50d607a55c diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index ca0f68c..61fdb7b 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -26,7 +26,7 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. */ /* @@ -68,44 +68,60 @@ #include "lov_internal.h" - /* Keep a refcount of lov->tgt usage to prevent racing with addition/deletion. Any function that expects lov_tgts to remain stationary must take a ref. */ -void lov_getref(struct obd_device *obd) +static void lov_getref(struct obd_device *obd) { struct lov_obd *lov = &obd->u.lov; /* nobody gets through here until lov_putref is done */ - mutex_down(&lov->lov_lock); - atomic_inc(&lov->lov_refcount); - mutex_up(&lov->lov_lock); + cfs_mutex_down(&lov->lov_lock); + cfs_atomic_inc(&lov->lov_refcount); + cfs_mutex_up(&lov->lov_lock); return; } -static void __lov_del_obd(struct obd_device *obd, __u32 index); +static void __lov_del_obd(struct obd_device *obd, struct lov_tgt_desc *tgt); -void lov_putref(struct obd_device *obd) +static void lov_putref(struct obd_device *obd) { struct lov_obd *lov = &obd->u.lov; - mutex_down(&lov->lov_lock); + + cfs_mutex_down(&lov->lov_lock); /* ok to dec to 0 more than once -- ltd_exp's will be null */ - if (atomic_dec_and_test(&lov->lov_refcount) && lov->lov_death_row) { + if (cfs_atomic_dec_and_test(&lov->lov_refcount) && lov->lov_death_row) { + CFS_LIST_HEAD(kill); int i; + struct lov_tgt_desc *tgt, *n; CDEBUG(D_CONFIG, "destroying %d lov targets\n", lov->lov_death_row); for (i = 0; i < lov->desc.ld_tgt_count; i++) { - if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_reap) + tgt = lov->lov_tgts[i]; + + if (!tgt || !tgt->ltd_reap) continue; - /* Disconnect and delete from list */ - __lov_del_obd(obd, i); + cfs_list_add(&tgt->ltd_kill, &kill); + /* XXX - right now there is a dependency on ld_tgt_count + * being the maximum tgt index for computing the + * mds_max_easize. So we can't shrink it. */ + lov_ost_pool_remove(&lov->lov_packed, i); + lov->lov_tgts[i] = NULL; lov->lov_death_row--; } + cfs_mutex_up(&lov->lov_lock); + + cfs_list_for_each_entry_safe(tgt, n, &kill, ltd_kill) { + cfs_list_del(&tgt->ltd_kill); + /* Disconnect */ + __lov_del_obd(obd, tgt); + } + } else { + cfs_mutex_up(&lov->lov_lock); } - mutex_up(&lov->lov_lock); } static int lov_set_osc_active(struct obd_device *obd, struct obd_uuid *uuid, - int activate); + enum obd_notify_event ev); static int lov_notify(struct obd_device *obd, struct obd_device *watched, enum obd_notify_event ev, void *data); @@ -115,12 +131,10 @@ int lov_connect_obd(struct obd_device *obd, __u32 index, int activate, struct obd_connect_data *data) { struct lov_obd *lov = &obd->u.lov; - struct obd_uuid tgt_uuid; + struct obd_uuid *tgt_uuid; struct obd_device *tgt_obd; - struct obd_uuid lov_osc_uuid = { "LOV_OSC_UUID" }; - struct lustre_handle conn = {0, }; + static struct obd_uuid lov_osc_uuid = { "LOV_OSC_UUID" }; struct obd_import *imp; - #ifdef __KERNEL__ cfs_proc_dir_entry_t *lov_proc_dir; #endif @@ -130,20 +144,17 @@ int lov_connect_obd(struct obd_device *obd, __u32 index, int activate, if (!lov->lov_tgts[index]) RETURN(-EINVAL); - tgt_uuid = lov->lov_tgts[index]->ltd_uuid; + tgt_uuid = &lov->lov_tgts[index]->ltd_uuid; + tgt_obd = lov->lov_tgts[index]->ltd_obd; - tgt_obd = class_find_client_obd(&tgt_uuid, LUSTRE_OSC_NAME, - &obd->obd_uuid); - - if (!tgt_obd) { - CERROR("Target %s not attached\n", obd_uuid2str(&tgt_uuid)); - RETURN(-EINVAL); - } if (!tgt_obd->obd_set_up) { - CERROR("Target %s not set up\n", obd_uuid2str(&tgt_uuid)); + CERROR("Target %s not set up\n", obd_uuid2str(tgt_uuid)); RETURN(-EINVAL); } + /* override the sp_me from lov */ + tgt_obd->u.cli.cl_sp_me = lov->lov_sp_me; + if (data && (data->ocd_connect_flags & OBD_CONNECT_INDEX)) data->ocd_index = index; @@ -159,68 +170,53 @@ int lov_connect_obd(struct obd_device *obd, __u32 index, int activate, ptlrpc_activate_import(imp); } + rc = obd_register_observer(tgt_obd, obd); + if (rc) { + CERROR("Target %s register_observer error %d\n", + obd_uuid2str(tgt_uuid), rc); + RETURN(rc); + } + + if (imp->imp_invalid) { CERROR("not connecting OSC %s; administratively " - "disabled\n", obd_uuid2str(&tgt_uuid)); - rc = obd_register_observer(tgt_obd, obd); - if (rc) { - CERROR("Target %s register_observer error %d; " - "will not be able to reactivate\n", - obd_uuid2str(&tgt_uuid), rc); - } + "disabled\n", obd_uuid2str(tgt_uuid)); RETURN(0); } - rc = obd_connect(NULL, &conn, tgt_obd, &lov_osc_uuid, data, NULL); - if (rc) { + rc = obd_connect(NULL, &lov->lov_tgts[index]->ltd_exp, tgt_obd, + &lov_osc_uuid, data, NULL); + if (rc || !lov->lov_tgts[index]->ltd_exp) { CERROR("Target %s connect error %d\n", - obd_uuid2str(&tgt_uuid), rc); - RETURN(rc); - } - lov->lov_tgts[index]->ltd_exp = class_conn2export(&conn); - if (!lov->lov_tgts[index]->ltd_exp) { - CERROR("Target %s: null export!\n", obd_uuid2str(&tgt_uuid)); + obd_uuid2str(tgt_uuid), rc); RETURN(-ENODEV); } - rc = obd_register_observer(tgt_obd, obd); - if (rc) { - CERROR("Target %s register_observer error %d\n", - obd_uuid2str(&tgt_uuid), rc); - obd_disconnect(lov->lov_tgts[index]->ltd_exp); - lov->lov_tgts[index]->ltd_exp = NULL; - RETURN(rc); - } - lov->lov_tgts[index]->ltd_reap = 0; - if (activate) { - lov->lov_tgts[index]->ltd_active = 1; - lov->desc.ld_active_tgt_count++; - lov->lov_tgts[index]->ltd_exp->exp_obd->obd_inactive = 0; - } + CDEBUG(D_CONFIG, "Connected tgt idx %d %s (%s) %sactive\n", index, - obd_uuid2str(&tgt_uuid), tgt_obd->obd_name, activate ? "":"in"); + obd_uuid2str(tgt_uuid), tgt_obd->obd_name, activate ? "":"in"); #ifdef __KERNEL__ lov_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds"); if (lov_proc_dir) { - struct obd_device *osc_obd = class_conn2obd(&conn); + struct obd_device *osc_obd = lov->lov_tgts[index]->ltd_exp->exp_obd; cfs_proc_dir_entry_t *osc_symlink; - char name[MAX_STRING_SIZE]; LASSERT(osc_obd != NULL); LASSERT(osc_obd->obd_magic == OBD_DEVICE_MAGIC); LASSERT(osc_obd->obd_type->typ_name != NULL); - snprintf(name, MAX_STRING_SIZE, "../../../%s/%s", - osc_obd->obd_type->typ_name, - osc_obd->obd_name); - osc_symlink = lprocfs_add_symlink(osc_obd->obd_name, lov_proc_dir, - name); + + osc_symlink = lprocfs_add_symlink(osc_obd->obd_name, + lov_proc_dir, + "../../../%s/%s", + osc_obd->obd_type->typ_name, + osc_obd->obd_name); if (osc_symlink == NULL) { CERROR("could not register LOV target " - "/proc/fs/lustre/%s/%s/target_obds/%s.", - obd->obd_type->typ_name, obd->obd_name, - osc_obd->obd_name); + "/proc/fs/lustre/%s/%s/target_obds/%s.", + obd->obd_type->typ_name, obd->obd_name, + osc_obd->obd_name); lprocfs_remove(&lov_proc_dir); } } @@ -234,21 +230,24 @@ int lov_connect_obd(struct obd_device *obd, __u32 index, int activate, } static int lov_connect(const struct lu_env *env, - struct lustre_handle *conn, struct obd_device *obd, + struct obd_export **exp, struct obd_device *obd, struct obd_uuid *cluuid, struct obd_connect_data *data, void *localdata) { struct lov_obd *lov = &obd->u.lov; struct lov_tgt_desc *tgt; + struct lustre_handle conn; int i, rc; ENTRY; CDEBUG(D_CONFIG, "connect #%d\n", lov->lov_connects); - rc = class_connect(conn, obd, cluuid); + rc = class_connect(&conn, obd, cluuid); if (rc) RETURN(rc); + *exp = class_conn2export(&conn); + /* Why should there ever be more than 1 connect? */ lov->lov_connects++; LASSERT(lov->lov_connects == 1); @@ -257,7 +256,7 @@ static int lov_connect(const struct lu_env *env, if (data) lov->lov_ocd = *data; - lov_getref(obd); + obd_getref(obd); for (i = 0; i < lov->desc.ld_tgt_count; i++) { tgt = lov->lov_tgts[i]; if (!tgt || obd_uuid_empty(&tgt->ltd_uuid)) @@ -274,37 +273,33 @@ static int lov_connect(const struct lu_env *env, continue; rc = lov_notify(obd, lov->lov_tgts[i]->ltd_exp->exp_obd, - OBD_NOTIFY_ACTIVE, (void *)&i); + OBD_NOTIFY_CONNECT, (void *)&i); if (rc) { CERROR("%s error sending notify %d\n", obd->obd_name, rc); } } - lov_putref(obd); + obd_putref(obd); RETURN(0); } -static int lov_disconnect_obd(struct obd_device *obd, __u32 index) +static int lov_disconnect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt) { cfs_proc_dir_entry_t *lov_proc_dir; struct lov_obd *lov = &obd->u.lov; struct obd_device *osc_obd; int rc; - ENTRY; - if (lov->lov_tgts[index] == NULL) - RETURN(-EINVAL); - - osc_obd = class_exp2obd(lov->lov_tgts[index]->ltd_exp); + osc_obd = class_exp2obd(tgt->ltd_exp); CDEBUG(D_CONFIG, "%s: disconnecting target %s\n", obd->obd_name, osc_obd->obd_name); - if (lov->lov_tgts[index]->ltd_active) { - lov->lov_tgts[index]->ltd_active = 0; + if (tgt->ltd_active) { + tgt->ltd_active = 0; lov->desc.ld_active_tgt_count--; - lov->lov_tgts[index]->ltd_exp->exp_obd->obd_inactive = 1; + tgt->ltd_exp->exp_obd->obd_inactive = 1; } lov_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds"); @@ -333,16 +328,16 @@ static int lov_disconnect_obd(struct obd_device *obd, __u32 index) obd_register_observer(osc_obd, NULL); - rc = obd_disconnect(lov->lov_tgts[index]->ltd_exp); + rc = obd_disconnect(tgt->ltd_exp); if (rc) { CERROR("Target %s disconnect error %d\n", - lov_uuid2str(lov, index), rc); + tgt->ltd_uuid.uuid, rc); rc = 0; } - qos_del_tgt(obd, index); + qos_del_tgt(obd, tgt); - lov->lov_tgts[index]->ltd_exp = NULL; + tgt->ltd_exp = NULL; RETURN(0); } @@ -366,7 +361,7 @@ static int lov_disconnect(struct obd_export *exp) /* Let's hold another reference so lov_del_obd doesn't spin through putref every time */ - lov_getref(obd); + obd_getref(obd); for (i = 0; i < lov->desc.ld_tgt_count; i++) { if (lov->lov_tgts[i] && lov->lov_tgts[i]->ltd_exp) { @@ -374,7 +369,7 @@ static int lov_disconnect(struct obd_export *exp) lov_del_target(obd, i, 0, lov->lov_tgts[i]->ltd_gen); } } - lov_putref(obd); + obd_putref(obd); out: rc = class_disconnect(exp); /* bz 9811 */ @@ -389,17 +384,17 @@ out: * any >= 0 : is log target index */ static int lov_set_osc_active(struct obd_device *obd, struct obd_uuid *uuid, - int activate) + enum obd_notify_event ev) { struct lov_obd *lov = &obd->u.lov; struct lov_tgt_desc *tgt; - int index; + int index, activate, active; ENTRY; - CDEBUG(D_INFO, "Searching in lov %p for uuid %s (activate=%d)\n", - lov, uuid->uuid, activate); + CDEBUG(D_INFO, "Searching in lov %p for uuid %s event(%d)\n", + lov, uuid->uuid, ev); - lov_getref(obd); + obd_getref(obd); for (index = 0; index < lov->desc.ld_tgt_count; index++) { tgt = lov->lov_tgts[index]; if (!tgt || !tgt->ltd_exp) @@ -415,29 +410,46 @@ static int lov_set_osc_active(struct obd_device *obd, struct obd_uuid *uuid, if (index == lov->desc.ld_tgt_count) GOTO(out, index = -EINVAL); - if (lov->lov_tgts[index]->ltd_active == activate) { - CDEBUG(D_INFO, "OSC %s already %sactive!\n", uuid->uuid, - activate ? "" : "in"); - GOTO(out, index); - } + if (ev == OBD_NOTIFY_DEACTIVATE || ev == OBD_NOTIFY_ACTIVATE) { + activate = (ev == OBD_NOTIFY_ACTIVATE) ? 1 : 0; - CDEBUG(D_CONFIG, "Marking OSC %s %sactive\n", obd_uuid2str(uuid), - activate ? "" : "in"); + if (lov->lov_tgts[index]->ltd_activate == activate) { + CDEBUG(D_INFO, "OSC %s already %sactivate!\n", + uuid->uuid, activate ? "" : "de"); + } else { + lov->lov_tgts[index]->ltd_activate = activate; + CDEBUG(D_CONFIG, "%sactivate OSC %s\n", + activate ? "" : "de", obd_uuid2str(uuid)); + } - lov->lov_tgts[index]->ltd_active = activate; + } else if (ev == OBD_NOTIFY_INACTIVE || ev == OBD_NOTIFY_ACTIVE) { + active = (ev == OBD_NOTIFY_ACTIVE) ? 1 : 0; - if (activate) { - lov->desc.ld_active_tgt_count++; - lov->lov_tgts[index]->ltd_exp->exp_obd->obd_inactive = 0; + if (lov->lov_tgts[index]->ltd_active == active) { + CDEBUG(D_INFO, "OSC %s already %sactive!\n", + uuid->uuid, active ? "" : "in"); + GOTO(out, index); + } else { + CDEBUG(D_CONFIG, "Marking OSC %s %sactive\n", + obd_uuid2str(uuid), active ? "" : "in"); + } + + lov->lov_tgts[index]->ltd_active = active; + if (active) { + lov->desc.ld_active_tgt_count++; + lov->lov_tgts[index]->ltd_exp->exp_obd->obd_inactive = 0; + } else { + lov->desc.ld_active_tgt_count--; + lov->lov_tgts[index]->ltd_exp->exp_obd->obd_inactive = 1; + } + /* remove any old qos penalty */ + lov->lov_tgts[index]->ltd_qos.ltq_penalty = 0; } else { - lov->desc.ld_active_tgt_count--; - lov->lov_tgts[index]->ltd_exp->exp_obd->obd_inactive = 1; + CERROR("Unknown event(%d) for uuid %s", ev, uuid->uuid); } - /* remove any old qos penalty */ - lov->lov_tgts[index]->ltd_qos.ltq_penalty = 0; out: - lov_putref(obd); + obd_putref(obd); RETURN(index); } @@ -446,8 +458,9 @@ static int lov_notify(struct obd_device *obd, struct obd_device *watched, { int rc = 0; ENTRY; - - if (ev == OBD_NOTIFY_ACTIVE || ev == OBD_NOTIFY_INACTIVE) { + + if (ev == OBD_NOTIFY_ACTIVE || ev == OBD_NOTIFY_INACTIVE || + ev == OBD_NOTIFY_ACTIVATE || ev == OBD_NOTIFY_DEACTIVATE) { struct obd_uuid *uuid; LASSERT(watched); @@ -463,10 +476,9 @@ static int lov_notify(struct obd_device *obd, struct obd_device *watched, /* Set OSC as active before notifying the observer, so the * observer can use the OSC normally. */ - rc = lov_set_osc_active(obd, uuid, ev == OBD_NOTIFY_ACTIVE); + rc = lov_set_osc_active(obd, uuid, ev); if (rc < 0) { - CERROR("%sactivation of %s failed: %d\n", - (ev == OBD_NOTIFY_ACTIVE) ? "" : "de", + CERROR("event(%d) of %s failed: %d\n", ev, obd_uuid2str(uuid), rc); RETURN(rc); } @@ -481,39 +493,44 @@ static int lov_notify(struct obd_device *obd, struct obd_device *watched, /* NULL watched means all osc's in the lov (only for syncs) */ /* sync event should be send lov idx as data */ struct lov_obd *lov = &obd->u.lov; - struct obd_device *tgt_obd; - int i; - lov_getref(obd); + int i, is_sync; + + data = &i; + is_sync = (ev == OBD_NOTIFY_SYNC) || + (ev == OBD_NOTIFY_SYNC_NONBLOCK); + + obd_getref(obd); for (i = 0; i < lov->desc.ld_tgt_count; i++) { if (!lov->lov_tgts[i]) continue; - if ((ev == OBD_NOTIFY_SYNC) || - (ev == OBD_NOTIFY_SYNC_NONBLOCK)) - data = &i; - - tgt_obd = class_exp2obd(lov->lov_tgts[i]->ltd_exp); + /* don't send sync event if target not + * connected/activated */ + if (is_sync && !lov->lov_tgts[i]->ltd_active) + continue; - rc = obd_notify_observer(obd, tgt_obd, ev, data); + rc = obd_notify_observer(obd, lov->lov_tgts[i]->ltd_obd, + ev, data); if (rc) { CERROR("%s: notify %s of %s failed %d\n", obd->obd_name, obd->obd_observer->obd_name, - tgt_obd->obd_name, rc); - break; + lov->lov_tgts[i]->ltd_obd->obd_name, + rc); } } - lov_putref(obd); + obd_putref(obd); } RETURN(rc); } -int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, - __u32 index, int gen, int active) +static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, + __u32 index, int gen, int active) { struct lov_obd *lov = &obd->u.lov; struct lov_tgt_desc *tgt; + struct obd_device *tgt_obd; int rc; ENTRY; @@ -526,13 +543,18 @@ int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, RETURN(-EINVAL); } - mutex_down(&lov->lov_lock); + tgt_obd = class_find_client_obd(uuidp, LUSTRE_OSC_NAME, + &obd->obd_uuid); + if (tgt_obd == NULL) + RETURN(-EINVAL); + + cfs_mutex_down(&lov->lov_lock); if ((index < lov->lov_tgt_size) && (lov->lov_tgts[index] != NULL)) { tgt = lov->lov_tgts[index]; CERROR("UUID %s already assigned at LOV target index %d\n", obd_uuid2str(&tgt->ltd_uuid), index); - mutex_up(&lov->lov_lock); + cfs_mutex_up(&lov->lov_lock); RETURN(-EEXIST); } @@ -546,7 +568,7 @@ int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, newsize = newsize << 1; OBD_ALLOC(newtgts, sizeof(*newtgts) * newsize); if (newtgts == NULL) { - mutex_up(&lov->lov_lock); + cfs_mutex_up(&lov->lov_lock); RETURN(-ENOMEM); } @@ -571,12 +593,19 @@ int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, OBD_ALLOC_PTR(tgt); if (!tgt) { - mutex_up(&lov->lov_lock); + cfs_mutex_up(&lov->lov_lock); RETURN(-ENOMEM); } - memset(tgt, 0, sizeof(*tgt)); + rc = lov_ost_pool_add(&lov->lov_packed, index, lov->lov_tgt_size); + if (rc) { + cfs_mutex_up(&lov->lov_lock); + OBD_FREE_PTR(tgt); + RETURN(rc); + } + tgt->ltd_uuid = *uuidp; + tgt->ltd_obd = tgt_obd; /* XXX - add a sanity check on the generation number. */ tgt->ltd_gen = gen; tgt->ltd_index = index; @@ -585,15 +614,13 @@ int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, if (index >= lov->desc.ld_tgt_count) lov->desc.ld_tgt_count = index + 1; - rc = lov_ost_pool_add(&lov->lov_packed, index, lov->lov_tgt_size); - if (rc) - RETURN(rc); - - mutex_up(&lov->lov_lock); + cfs_mutex_up(&lov->lov_lock); CDEBUG(D_CONFIG, "idx=%d ltd_gen=%d ld_tgt_count=%d\n", index, tgt->ltd_gen, lov->desc.ld_tgt_count); + rc = obd_notify(obd, tgt_obd, OBD_NOTIFY_CREATE, &index); + if (lov->lov_connects == 0) { /* lov_connect hasn't been called yet. We'll do the lov_connect_obd on this target when that fn first runs, @@ -601,7 +628,7 @@ int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, RETURN(0); } - lov_getref(obd); + obd_getref(obd); rc = lov_connect_obd(obd, index, active, &lov->lov_ocd); if (rc) @@ -612,7 +639,7 @@ int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, GOTO(out, rc = 0); rc = lov_notify(obd, tgt->ltd_exp->exp_obd, - active ? OBD_NOTIFY_ACTIVE : OBD_NOTIFY_INACTIVE, + active ? OBD_NOTIFY_CONNECT : OBD_NOTIFY_INACTIVE, (void *)&index); out: @@ -621,7 +648,7 @@ out: obd_uuid2str(&tgt->ltd_uuid)); lov_del_target(obd, index, 0, 0); } - lov_putref(obd); + obd_putref(obd); RETURN(rc); } @@ -640,7 +667,7 @@ int lov_del_target(struct obd_device *obd, __u32 index, RETURN(-EINVAL); } - lov_getref(obd); + obd_getref(obd); if (!lov->lov_tgts[index]) { CERROR("LOV target at index %d is not setup.\n", index); @@ -661,19 +688,16 @@ int lov_del_target(struct obd_device *obd, __u32 index, lov->lov_tgts[index]->ltd_reap = 1; lov->lov_death_row++; - /* we really delete it from lov_putref */ + /* we really delete it from obd_putref */ out: - lov_putref(obd); + obd_putref(obd); RETURN(rc); } -/* We are holding lov_lock */ -static void __lov_del_obd(struct obd_device *obd, __u32 index) +static void __lov_del_obd(struct obd_device *obd, struct lov_tgt_desc *tgt) { - struct lov_obd *lov = &obd->u.lov; struct obd_device *osc_obd; - struct lov_tgt_desc *tgt = lov->lov_tgts[index]; LASSERT(tgt); LASSERT(tgt->ltd_reap); @@ -681,18 +705,12 @@ static void __lov_del_obd(struct obd_device *obd, __u32 index) osc_obd = class_exp2obd(tgt->ltd_exp); CDEBUG(D_CONFIG, "Removing tgt %s : %s\n", - lov_uuid2str(lov, index), + tgt->ltd_uuid.uuid, osc_obd ? osc_obd->obd_name : ""); if (tgt->ltd_exp) - lov_disconnect_obd(obd, index); - - /* XXX - right now there is a dependency on ld_tgt_count being the - * maximum tgt index for computing the mds_max_easize. So we can't - * shrink it. */ + lov_disconnect_obd(obd, tgt); - lov_ost_pool_remove(&lov->lov_packed, index); - lov->lov_tgts[index] = NULL; OBD_FREE_PTR(tgt); /* Manual cleanup - no cleanup logs to clean up the osc's. We must @@ -751,7 +769,6 @@ int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg) struct lprocfs_static_vars lvars = { 0 }; struct lov_desc *desc; struct lov_obd *lov = &obd->u.lov; - int count; int rc; ENTRY; @@ -782,43 +799,43 @@ int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg) lov_fix_desc(desc); - /* Because of 64-bit divide/mod operations only work with a 32-bit - * divisor in a 32-bit kernel, we cannot support a stripe width - * of 4GB or larger on 32-bit CPUs. */ - count = desc->ld_default_stripe_count; - if ((count > 0 ? count : desc->ld_tgt_count) * - desc->ld_default_stripe_size > 0xffffffff) { - CERROR("LOV: stripe width "LPU64"x%u > 4294967295 bytes\n", - desc->ld_default_stripe_size, count); - RETURN(-EINVAL); - } - desc->ld_active_tgt_count = 0; lov->desc = *desc; lov->lov_tgt_size = 0; - sema_init(&lov->lov_lock, 1); - atomic_set(&lov->lov_refcount, 0); + cfs_sema_init(&lov->lov_lock, 1); + cfs_atomic_set(&lov->lov_refcount, 0); CFS_INIT_LIST_HEAD(&lov->lov_qos.lq_oss_list); - init_rwsem(&lov->lov_qos.lq_rw_sem); + cfs_init_rwsem(&lov->lov_qos.lq_rw_sem); + lov->lov_sp_me = LUSTRE_SP_CLI; lov->lov_qos.lq_dirty = 1; lov->lov_qos.lq_rr.lqr_dirty = 1; lov->lov_qos.lq_reset = 1; /* Default priority is toward free space balance */ lov->lov_qos.lq_prio_free = 232; - - lov->lov_pools_hash_body = lustre_hash_init("POOLS", 7, 7, - &pool_hash_operations, 0); + /* Default threshold for rr (roughly 17%) */ + lov->lov_qos.lq_threshold_rr = 43; + /* Init statfs fields */ + OBD_ALLOC_PTR(lov->lov_qos.lq_statfs_data); + if (NULL == lov->lov_qos.lq_statfs_data) + RETURN(-ENOMEM); + cfs_waitq_init(&lov->lov_qos.lq_statfs_waitq); + + lov->lov_pools_hash_body = cfs_hash_create("POOLS", HASH_POOLS_CUR_BITS, + HASH_POOLS_MAX_BITS, + HASH_POOLS_BKT_BITS, 0, + CFS_HASH_MIN_THETA, + CFS_HASH_MAX_THETA, + &pool_hash_operations, + CFS_HASH_DEFAULT); CFS_INIT_LIST_HEAD(&lov->lov_pool_list); lov->lov_pool_count = 0; rc = lov_ost_pool_init(&lov->lov_packed, 0); if (rc) - RETURN(rc); + GOTO(out_free_statfs, rc); rc = lov_ost_pool_init(&lov->lov_qos.lq_rr.lqr_pool, 0); - if (rc) { - lov_ost_pool_free(&lov->lov_packed); - RETURN(rc); - } + if (rc) + GOTO(out_free_lov_packed, rc); lprocfs_lov_init_vars(&lvars); lprocfs_obd_setup(obd, lvars.obd_vars); @@ -837,16 +854,23 @@ int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg) NULL, NULL); RETURN(0); + +out_free_lov_packed: + lov_ost_pool_free(&lov->lov_packed); +out_free_statfs: + OBD_FREE_PTR(lov->lov_qos.lq_statfs_data); + return rc; } static int lov_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) { int rc = 0; + struct lov_obd *lov = &obd->u.lov; + ENTRY; switch (stage) { case OBD_CLEANUP_EARLY: { - struct lov_obd *lov = &obd->u.lov; int i; for (i = 0; i < lov->desc.ld_tgt_count; i++) { if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active) @@ -868,45 +892,48 @@ static int lov_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) static int lov_cleanup(struct obd_device *obd) { struct lov_obd *lov = &obd->u.lov; - struct list_head *pos, *tmp; + cfs_list_t *pos, *tmp; struct pool_desc *pool; - lprocfs_obd_cleanup(obd); - - list_for_each_safe(pos, tmp, &lov->lov_pool_list) { - pool = list_entry(pos, struct pool_desc, pool_list); - list_del(&pool->pool_list); - lov_ost_pool_free(&(pool->pool_rr.lqr_pool)); - lov_ost_pool_free(&(pool->pool_obds)); - OBD_FREE_PTR(pool); + cfs_list_for_each_safe(pos, tmp, &lov->lov_pool_list) { + pool = cfs_list_entry(pos, struct pool_desc, pool_list); + /* free pool structs */ + CDEBUG(D_INFO, "delete pool %p\n", pool); + lov_pool_del(obd, pool->pool_name); } + cfs_hash_putref(lov->lov_pools_hash_body); lov_ost_pool_free(&(lov->lov_qos.lq_rr.lqr_pool)); lov_ost_pool_free(&lov->lov_packed); - lustre_hash_exit(lov->lov_pools_hash_body); if (lov->lov_tgts) { int i; + obd_getref(obd); for (i = 0; i < lov->desc.ld_tgt_count; i++) { if (!lov->lov_tgts[i]) continue; /* Inactive targets may never have connected */ if (lov->lov_tgts[i]->ltd_active || - atomic_read(&lov->lov_refcount)) + cfs_atomic_read(&lov->lov_refcount)) /* We should never get here - these should have been removed in the disconnect. */ CERROR("lov tgt %d not cleaned!" " deathrow=%d, lovrc=%d\n", i, lov->lov_death_row, - atomic_read(&lov->lov_refcount)); + cfs_atomic_read(&lov->lov_refcount)); lov_del_target(obd, i, 0, 0); } + obd_putref(obd); OBD_FREE(lov->lov_tgts, sizeof(*lov->lov_tgts) * lov->lov_tgt_size); lov->lov_tgt_size = 0; } + /* clear pools parent proc entry only after all pools is killed */ + lprocfs_obd_cleanup(obd); + + OBD_FREE_PTR(lov->lov_qos.lq_statfs_data); RETURN(0); } @@ -955,8 +982,8 @@ int lov_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg, rc = class_process_proc_param(PARAM_LOV, lvars.obd_vars, lcfg, obd); - if (rc > 0) - rc = 0; + if (rc > 0) + rc = 0; GOTO(out, rc); } case LCFG_POOL_NEW: @@ -976,7 +1003,7 @@ out: } #ifndef log2 -#define log2(n) ffz(~(n)) +#define log2(n) cfs_ffz(~(n)) #endif static int lov_clear_orphans(struct obd_export *export, struct obdo *src_oa, @@ -1004,7 +1031,7 @@ static int lov_clear_orphans(struct obd_export *export, struct obdo *src_oa, ost_uuid->uuid); } - lov_getref(export->exp_obd); + obd_getref(export->exp_obd); for (i = 0; i < lov->desc.ld_tgt_count; i++) { struct lov_stripe_md obj_md; struct lov_stripe_md *obj_mdp = &obj_md; @@ -1045,7 +1072,7 @@ static int lov_clear_orphans(struct obd_export *export, struct obdo *src_oa, if (ost_uuid) break; } - lov_putref(export->exp_obd); + obd_putref(export->exp_obd); OBDO_FREE(tmp_oa); RETURN(rc); @@ -1099,8 +1126,7 @@ static int lov_create(struct obd_export *exp, struct obdo *src_oa, struct obd_info oinfo; struct lov_request_set *set = NULL; struct lov_request *req; - struct obd_statfs osfs; - __u64 maxage; + struct l_wait_info lwi = { 0 }; int rc = 0; ENTRY; @@ -1118,7 +1144,7 @@ static int lov_create(struct obd_export *exp, struct obdo *src_oa, if (!lov->desc.ld_active_tgt_count) RETURN(-EIO); - lov_getref(exp->exp_obd); + obd_getref(exp->exp_obd); /* Recreate a specific object id at the given OST index */ if ((src_oa->o_valid & OBD_MD_FLFLAGS) && (src_oa->o_flags & OBD_FL_RECREATE_OBJS)) { @@ -1126,22 +1152,35 @@ static int lov_create(struct obd_export *exp, struct obdo *src_oa, GOTO(out, rc); } - maxage = cfs_time_shift_64(-lov->desc.ld_qos_maxage); - obd_statfs_rqset(exp->exp_obd, &osfs, maxage, OBD_STATFS_NODELAY); + /* issue statfs rpcs if the osfs data is older than qos_maxage - 1s, + * later in alloc_qos(), we will wait for those rpcs to complete if + * the osfs age is older than 2 * qos_maxage */ + qos_statfs_update(exp->exp_obd, + cfs_time_shift_64(-lov->desc.ld_qos_maxage + + OBD_STATFS_CACHE_SECONDS), + 0); rc = lov_prep_create_set(exp, &oinfo, ea, src_oa, oti, &set); if (rc) GOTO(out, rc); - list_for_each_entry(req, &set->set_list, rq_link) { + cfs_list_for_each_entry(req, &set->set_list, rq_link) { /* XXX: LOV STACKING: use real "obj_mdp" sub-data */ - rc = obd_create(lov->lov_tgts[req->rq_idx]->ltd_exp, - req->rq_oi.oi_oa, &req->rq_oi.oi_md, oti); - lov_update_create_set(set, req, rc); + rc = obd_create_async(lov->lov_tgts[req->rq_idx]->ltd_exp, + &req->rq_oi, &req->rq_oi.oi_md, oti); } + + /* osc_create have timeout equ obd_timeout/2 so waiting don't be + * longer then this */ + l_wait_event(set->set_waitq, lov_finished_set(set), &lwi); + + /* we not have ptlrpc set for assign set->interpret and should + * be call interpret function himself. calling from cb_create_update + * not permited because lov_fini_create_set can sleep for long time, + * but we must avoid sleeping in ptlrpcd interpret function. */ rc = lov_fini_create_set(set, ea); out: - lov_putref(exp->exp_obd); + obd_putref(exp->exp_obd); RETURN(rc); } @@ -1149,9 +1188,8 @@ out: do { \ LASSERT((lsmp) != NULL); \ LASSERTF(((lsmp)->lsm_magic == LOV_MAGIC_V1 || \ - (lsmp)->lsm_magic == LOV_MAGIC_V3 || \ - (lsmp)->lsm_magic == LOV_MAGIC_JOIN), "%p->lsm_magic=%x\n", \ - (lsmp), (lsmp)->lsm_magic); \ + (lsmp)->lsm_magic == LOV_MAGIC_V3), \ + "%p->lsm_magic=%x\n", (lsmp), (lsmp)->lsm_magic); \ } while (0) static int lov_destroy(struct obd_export *exp, struct obdo *oa, @@ -1161,7 +1199,7 @@ static int lov_destroy(struct obd_export *exp, struct obdo *oa, struct lov_request_set *set; struct obd_info oinfo; struct lov_request *req; - struct list_head *pos; + cfs_list_t *pos; struct lov_obd *lov; int rc = 0, err = 0; ENTRY; @@ -1177,13 +1215,13 @@ static int lov_destroy(struct obd_export *exp, struct obdo *oa, } lov = &exp->exp_obd->u.lov; - lov_getref(exp->exp_obd); + obd_getref(exp->exp_obd); rc = lov_prep_destroy_set(exp, &oinfo, oa, lsm, oti, &set); if (rc) GOTO(out, rc); - list_for_each (pos, &set->set_list) { - req = list_entry(pos, struct lov_request, rq_link); + cfs_list_for_each (pos, &set->set_list) { + req = cfs_list_entry(pos, struct lov_request, rq_link); if (oa->o_valid & OBD_MD_FLCOOKIE) oti->oti_logcookies = set->set_cookies + req->rq_stripe; @@ -1207,7 +1245,7 @@ static int lov_destroy(struct obd_export *exp, struct obdo *oa, } err = lov_fini_destroy_set(set); out: - lov_putref(exp->exp_obd); + obd_putref(exp->exp_obd); RETURN(rc ? rc : err); } @@ -1215,7 +1253,7 @@ static int lov_getattr(struct obd_export *exp, struct obd_info *oinfo) { struct lov_request_set *set; struct lov_request *req; - struct list_head *pos; + cfs_list_t *pos; struct lov_obd *lov; int err = 0, rc = 0; ENTRY; @@ -1232,8 +1270,8 @@ static int lov_getattr(struct obd_export *exp, struct obd_info *oinfo) if (rc) RETURN(rc); - list_for_each (pos, &set->set_list) { - req = list_entry(pos, struct lov_request, rq_link); + cfs_list_for_each (pos, &set->set_list) { + req = cfs_list_entry(pos, struct lov_request, rq_link); CDEBUG(D_INFO, "objid "LPX64"[%d] has subobj "LPX64" at idx " "%u\n", oinfo->oi_oa->o_id, req->rq_stripe, @@ -1276,7 +1314,7 @@ static int lov_getattr_async(struct obd_export *exp, struct obd_info *oinfo, { struct lov_request_set *lovset; struct lov_obd *lov; - struct list_head *pos; + cfs_list_t *pos; struct lov_request *req; int rc = 0, err; ENTRY; @@ -1297,8 +1335,8 @@ static int lov_getattr_async(struct obd_export *exp, struct obd_info *oinfo, oinfo->oi_md->lsm_object_id, oinfo->oi_md->lsm_stripe_count, oinfo->oi_md->lsm_stripe_size); - list_for_each (pos, &lovset->set_list) { - req = list_entry(pos, struct lov_request, rq_link); + cfs_list_for_each (pos, &lovset->set_list) { + req = cfs_list_entry(pos, struct lov_request, rq_link); CDEBUG(D_INFO, "objid "LPX64"[%d] has subobj "LPX64" at idx " "%u\n", oinfo->oi_oa->o_id, req->rq_stripe, @@ -1314,7 +1352,7 @@ static int lov_getattr_async(struct obd_export *exp, struct obd_info *oinfo, } } - if (!list_empty(&rqset->set_requests)) { + if (!cfs_list_empty(&rqset->set_requests)) { LASSERT(rc == 0); LASSERT (rqset->set_interpret == NULL); rqset->set_interpret = lov_getattr_interpret; @@ -1333,7 +1371,7 @@ static int lov_setattr(struct obd_export *exp, struct obd_info *oinfo, { struct lov_request_set *set; struct lov_obd *lov; - struct list_head *pos; + cfs_list_t *pos; struct lov_request *req; int err = 0, rc = 0; ENTRY; @@ -1357,8 +1395,8 @@ static int lov_setattr(struct obd_export *exp, struct obd_info *oinfo, if (rc) RETURN(rc); - list_for_each (pos, &set->set_list) { - req = list_entry(pos, struct lov_request, rq_link); + cfs_list_for_each (pos, &set->set_list) { + req = cfs_list_entry(pos, struct lov_request, rq_link); rc = obd_setattr(lov->lov_tgts[req->rq_idx]->ltd_exp, &req->rq_oi, NULL); @@ -1399,7 +1437,7 @@ static int lov_setattr_async(struct obd_export *exp, struct obd_info *oinfo, { struct lov_request_set *set; struct lov_request *req; - struct list_head *pos; + cfs_list_t *pos; struct lov_obd *lov; int rc = 0; ENTRY; @@ -1423,8 +1461,8 @@ static int lov_setattr_async(struct obd_export *exp, struct obd_info *oinfo, oinfo->oi_md->lsm_object_id, oinfo->oi_md->lsm_stripe_count, oinfo->oi_md->lsm_stripe_size); - list_for_each (pos, &set->set_list) { - req = list_entry(pos, struct lov_request, rq_link); + cfs_list_for_each (pos, &set->set_list) { + req = cfs_list_entry(pos, struct lov_request, rq_link); if (oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE) oti->oti_logcookies = set->set_cookies + req->rq_stripe; @@ -1446,7 +1484,7 @@ static int lov_setattr_async(struct obd_export *exp, struct obd_info *oinfo, } /* If we are not waiting for responses on async requests, return. */ - if (rc || !rqset || list_empty(&rqset->set_requests)) { + if (rc || !rqset || cfs_list_empty(&rqset->set_requests)) { int err; if (rc) set->set_completes = 0; @@ -1483,7 +1521,7 @@ static int lov_punch(struct obd_export *exp, struct obd_info *oinfo, { struct lov_request_set *set; struct lov_obd *lov; - struct list_head *pos; + cfs_list_t *pos; struct lov_request *req; int rc = 0; ENTRY; @@ -1499,8 +1537,8 @@ static int lov_punch(struct obd_export *exp, struct obd_info *oinfo, if (rc) RETURN(rc); - list_for_each (pos, &set->set_list) { - req = list_entry(pos, struct lov_request, rq_link); + cfs_list_for_each (pos, &set->set_list) { + req = cfs_list_entry(pos, struct lov_request, rq_link); rc = obd_punch(lov->lov_tgts[req->rq_idx]->ltd_exp, &req->rq_oi, NULL, rqset); @@ -1513,7 +1551,7 @@ static int lov_punch(struct obd_export *exp, struct obd_info *oinfo, } } - if (rc || list_empty(&rqset->set_requests)) { + if (rc || cfs_list_empty(&rqset->set_requests)) { int err; err = lov_fini_punch_set(set); RETURN(rc ? rc : err); @@ -1526,49 +1564,71 @@ static int lov_punch(struct obd_export *exp, struct obd_info *oinfo, RETURN(0); } -static int lov_sync(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *lsm, obd_off start, obd_off end, - void *capa) +static int lov_sync_interpret(struct ptlrpc_request_set *rqset, + void *data, int rc) { - struct lov_request_set *set; - struct obd_info oinfo; + struct lov_request_set *lovset = data; + int err; + ENTRY; + + if (rc) + lovset->set_completes = 0; + err = lov_fini_sync_set(lovset); + RETURN(rc ?: err); +} + +static int lov_sync(struct obd_export *exp, struct obd_info *oinfo, + obd_off start, obd_off end, + struct ptlrpc_request_set *rqset) +{ + struct lov_request_set *set = NULL; struct lov_obd *lov; - struct list_head *pos; + cfs_list_t *pos; struct lov_request *req; - int err = 0, rc = 0; + int rc = 0; ENTRY; - ASSERT_LSM_MAGIC(lsm); + ASSERT_LSM_MAGIC(oinfo->oi_md); + LASSERT(rqset != NULL); if (!exp->exp_obd) RETURN(-ENODEV); lov = &exp->exp_obd->u.lov; - rc = lov_prep_sync_set(exp, &oinfo, oa, lsm, start, end, &set); + rc = lov_prep_sync_set(exp, oinfo, start, end, &set); if (rc) RETURN(rc); - list_for_each (pos, &set->set_list) { - req = list_entry(pos, struct lov_request, rq_link); + CDEBUG(D_INFO, "fsync objid "LPX64" ["LPX64", "LPX64"]\n", + set->set_oi->oi_oa->o_id, start, end); - rc = obd_sync(lov->lov_tgts[req->rq_idx]->ltd_exp, - req->rq_oi.oi_oa, NULL, + cfs_list_for_each (pos, &set->set_list) { + req = cfs_list_entry(pos, struct lov_request, rq_link); + + rc = obd_sync(lov->lov_tgts[req->rq_idx]->ltd_exp, &req->rq_oi, req->rq_oi.oi_policy.l_extent.start, - req->rq_oi.oi_policy.l_extent.end, capa); - err = lov_update_common_set(set, req, rc); - if (err) { + req->rq_oi.oi_policy.l_extent.end, rqset); + if (rc) { CERROR("error: fsync objid "LPX64" subobj "LPX64 " on OST idx %d: rc = %d\n", set->set_oi->oi_oa->o_id, req->rq_oi.oi_oa->o_id, req->rq_idx, rc); - if (!rc) - rc = err; + break; } } - err = lov_fini_sync_set(set); - if (!rc) - rc = err; - RETURN(rc); + + /* If we are not waiting for responses on async requests, return. */ + if (rc || cfs_list_empty(&rqset->set_requests)) { + int err = lov_fini_sync_set(set); + + RETURN(rc ?: err); + } + + LASSERT(rqset->set_interpret == NULL); + rqset->set_interpret = lov_sync_interpret; + rqset->set_arg = (void *)set; + + RETURN(0); } static int lov_brw_check(struct lov_obd *lov, struct obd_info *lov_oinfo, @@ -1610,7 +1670,7 @@ static int lov_brw(int cmd, struct obd_export *exp, struct obd_info *oinfo, { struct lov_request_set *set; struct lov_request *req; - struct list_head *pos; + cfs_list_t *pos; struct lov_obd *lov = &exp->exp_obd->u.lov; int err, rc = 0; ENTRY; @@ -1626,10 +1686,10 @@ static int lov_brw(int cmd, struct obd_export *exp, struct obd_info *oinfo, if (rc) RETURN(rc); - list_for_each (pos, &set->set_list) { + cfs_list_for_each (pos, &set->set_list) { struct obd_export *sub_exp; struct brw_page *sub_pga; - req = list_entry(pos, struct lov_request, rq_link); + req = cfs_list_entry(pos, struct lov_request, rq_link); sub_exp = lov->lov_tgts[req->rq_idx]->ltd_exp; sub_pga = set->set_pga + req->rq_pgaidx; @@ -1662,7 +1722,7 @@ static int lov_enqueue(struct obd_export *exp, struct obd_info *oinfo, ldlm_mode_t mode = einfo->ei_mode; struct lov_request_set *set; struct lov_request *req; - struct list_head *pos; + cfs_list_t *pos; struct lov_obd *lov; ldlm_error_t rc; ENTRY; @@ -1682,8 +1742,8 @@ static int lov_enqueue(struct obd_export *exp, struct obd_info *oinfo, if (rc) RETURN(rc); - list_for_each (pos, &set->set_list) { - req = list_entry(pos, struct lov_request, rq_link); + cfs_list_for_each (pos, &set->set_list) { + req = cfs_list_entry(pos, struct lov_request, rq_link); rc = obd_enqueue(lov->lov_tgts[req->rq_idx]->ltd_exp, &req->rq_oi, einfo, rqset); @@ -1691,7 +1751,7 @@ static int lov_enqueue(struct obd_export *exp, struct obd_info *oinfo, GOTO(out, rc); } - if (rqset && !list_empty(&rqset->set_requests)) { + if (rqset && !cfs_list_empty(&rqset->set_requests)) { LASSERT(rc == 0); LASSERT(rqset->set_interpret == NULL); rqset->set_interpret = lov_enqueue_interpret; @@ -1716,8 +1776,6 @@ static int lov_change_cbdata(struct obd_export *exp, if (!exp || !exp->exp_obd) RETURN(-ENODEV); - LASSERT_MDS_GROUP(lsm->lsm_object_gr); - lov = &exp->exp_obd->u.lov; for (i = 0; i < lsm->lsm_stripe_count; i++) { struct lov_stripe_md submd; @@ -1728,8 +1786,9 @@ static int lov_change_cbdata(struct obd_export *exp, continue; } + LASSERT_SEQ_IS_MDT(loi->loi_seq); submd.lsm_object_id = loi->loi_id; - submd.lsm_object_gr = lsm->lsm_object_gr; + submd.lsm_object_seq = loi->loi_seq; submd.lsm_stripe_count = 0; rc = obd_change_cbdata(lov->lov_tgts[loi->loi_ost_idx]->ltd_exp, &submd, it, data); @@ -1737,14 +1796,53 @@ static int lov_change_cbdata(struct obd_export *exp, RETURN(rc); } +/* find any ldlm lock of the inode in lov + * return 0 not find + * 1 find one + * < 0 error */ +static int lov_find_cbdata(struct obd_export *exp, + struct lov_stripe_md *lsm, ldlm_iterator_t it, + void *data) +{ + struct lov_obd *lov; + int rc = 0, i; + ENTRY; + + ASSERT_LSM_MAGIC(lsm); + + if (!exp || !exp->exp_obd) + RETURN(-ENODEV); + + lov = &exp->exp_obd->u.lov; + for (i = 0; i < lsm->lsm_stripe_count; i++) { + struct lov_stripe_md submd; + struct lov_oinfo *loi = lsm->lsm_oinfo[i]; + + if (!lov->lov_tgts[loi->loi_ost_idx]) { + CDEBUG(D_HA, "lov idx %d NULL \n", loi->loi_ost_idx); + continue; + } + + LASSERT_SEQ_IS_MDT(loi->loi_seq); + submd.lsm_object_id = loi->loi_id; + submd.lsm_object_seq = loi->loi_seq; + submd.lsm_stripe_count = 0; + rc = obd_find_cbdata(lov->lov_tgts[loi->loi_ost_idx]->ltd_exp, + &submd, it, data); + if (rc != 0) + RETURN(rc); + } + RETURN(rc); +} + static int lov_cancel(struct obd_export *exp, struct lov_stripe_md *lsm, __u32 mode, struct lustre_handle *lockh) { struct lov_request_set *set; struct obd_info oinfo; struct lov_request *req; - struct list_head *pos; - struct lov_obd *lov = &exp->exp_obd->u.lov; + cfs_list_t *pos; + struct lov_obd *lov; struct lustre_handle *lov_lockhp; int err = 0, rc = 0; ENTRY; @@ -1754,15 +1852,15 @@ static int lov_cancel(struct obd_export *exp, struct lov_stripe_md *lsm, if (!exp || !exp->exp_obd) RETURN(-ENODEV); - LASSERT_MDS_GROUP(lsm->lsm_object_gr); + LASSERT_SEQ_IS_MDT(lsm->lsm_object_seq); LASSERT(lockh); lov = &exp->exp_obd->u.lov; rc = lov_prep_cancel_set(exp, &oinfo, lsm, mode, lockh, &set); if (rc) RETURN(rc); - list_for_each (pos, &set->set_list) { - req = list_entry(pos, struct lov_request, rq_link); + cfs_list_for_each (pos, &set->set_list) { + req = cfs_list_entry(pos, struct lov_request, rq_link); lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe; rc = obd_cancel(lov->lov_tgts[req->rq_idx]->ltd_exp, @@ -1784,7 +1882,7 @@ static int lov_cancel(struct obd_export *exp, struct lov_stripe_md *lsm, static int lov_cancel_unused(struct obd_export *exp, struct lov_stripe_md *lsm, - int flags, void *opaque) + ldlm_cancel_flags_t flags, void *opaque) { struct lov_obd *lov; int rc = 0, i; @@ -1810,7 +1908,7 @@ static int lov_cancel_unused(struct obd_export *exp, ASSERT_LSM_MAGIC(lsm); - LASSERT_MDS_GROUP(lsm->lsm_object_gr); + LASSERT_SEQ_IS_MDT(lsm->lsm_object_seq); for (i = 0; i < lsm->lsm_stripe_count; i++) { struct lov_stripe_md submd; struct lov_oinfo *loi = lsm->lsm_oinfo[i]; @@ -1825,7 +1923,7 @@ static int lov_cancel_unused(struct obd_export *exp, CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); submd.lsm_object_id = loi->loi_id; - submd.lsm_object_gr = lsm->lsm_object_gr; + submd.lsm_object_seq = loi->loi_seq; submd.lsm_stripe_count = 0; err = obd_cancel_unused(lov->lov_tgts[loi->loi_ost_idx]->ltd_exp, &submd, flags, opaque); @@ -1840,8 +1938,7 @@ static int lov_cancel_unused(struct obd_export *exp, RETURN(rc); } -static int lov_statfs_interpret(struct ptlrpc_request_set *rqset, - void *data, int rc) +int lov_statfs_interpret(struct ptlrpc_request_set *rqset, void *data, int rc) { struct lov_request_set *lovset = (struct lov_request_set *)data; int err; @@ -1859,7 +1956,7 @@ static int lov_statfs_async(struct obd_device *obd, struct obd_info *oinfo, { struct lov_request_set *set; struct lov_request *req; - struct list_head *pos; + cfs_list_t *pos; struct lov_obd *lov; int rc = 0; ENTRY; @@ -1872,10 +1969,10 @@ static int lov_statfs_async(struct obd_device *obd, struct obd_info *oinfo, if (rc) RETURN(rc); - list_for_each (pos, &set->set_list) { + cfs_list_for_each (pos, &set->set_list) { struct obd_device *osc_obd; - req = list_entry(pos, struct lov_request, rq_link); + req = cfs_list_entry(pos, struct lov_request, rq_link); osc_obd = class_exp2obd(lov->lov_tgts[req->rq_idx]->ltd_exp); rc = obd_statfs_async(osc_obd, &req->rq_oi, max_age, rqset); @@ -1883,7 +1980,7 @@ static int lov_statfs_async(struct obd_device *obd, struct obd_info *oinfo, break; } - if (rc || list_empty(&rqset->set_requests)) { + if (rc || cfs_list_empty(&rqset->set_requests)) { int err; if (rc) set->set_completes = 0; @@ -1939,8 +2036,6 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len, __u32 index; memcpy(&index, data->ioc_inlbuf2, sizeof(__u32)); - LASSERT(data->ioc_plen1 == sizeof(struct obd_statfs)); - if ((index >= count)) RETURN(-ENODEV); @@ -1954,16 +2049,21 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len, if (!osc_obd) RETURN(-EINVAL); + /* copy UUID */ + if (cfs_copy_to_user(data->ioc_pbuf2, obd2cli_tgt(osc_obd), + min((int) data->ioc_plen2, + (int) sizeof(struct obd_uuid)))) + RETURN(-EFAULT); + /* got statfs data */ rc = obd_statfs(osc_obd, &stat_buf, - cfs_time_current_64() - HZ, 0); + cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS), + 0); if (rc) RETURN(rc); - if (copy_to_user(data->ioc_pbuf1, &stat_buf, data->ioc_plen1)) - RETURN(-EFAULT); - /* copy UUID */ - if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(osc_obd), - data->ioc_plen2)) + if (cfs_copy_to_user(data->ioc_pbuf1, &stat_buf, + min((int) data->ioc_plen1, + (int) sizeof(stat_buf)))) RETURN(-EFAULT); break; } @@ -2007,13 +2107,13 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len, *genp = lov->lov_tgts[i]->ltd_gen; } - if (copy_to_user((void *)uarg, buf, len)) + if (cfs_copy_to_user((void *)uarg, buf, len)) rc = -EFAULT; obd_ioctl_freedata(buf, len); break; } case LL_IOC_LOV_SETSTRIPE: - rc = lov_setstripe(exp, karg, uarg); + rc = lov_setstripe(exp, len, karg, uarg); break; case LL_IOC_LOV_GETSTRIPE: rc = lov_getstripe(exp, karg, uarg); @@ -2076,11 +2176,16 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len, for (i = 0; i < count; i++) { int err; + struct obd_device *osc_obd; /* OST was disconnected */ if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_exp) continue; + /* ll_umount_begin() sets force flag but for lov, not + * osc. Let's pass it through */ + osc_obd = class_exp2obd(lov->lov_tgts[i]->ltd_exp); + osc_obd->obd_force = obddev->obd_force; err = obd_iocontrol(cmd, lov->lov_tgts[i]->ltd_exp, len, karg, uarg); if (err == -ENODATA && cmd == OBD_IOC_POLL_QUOTACHECK) { @@ -2266,7 +2371,7 @@ static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key, if (fiemap_count_to_size(fm_key->fiemap.fm_extent_count) < buffer_size) buffer_size = fiemap_count_to_size(fm_key->fiemap.fm_extent_count); - OBD_ALLOC(fm_local, buffer_size); + OBD_ALLOC_LARGE(fm_local, buffer_size); if (fm_local == NULL) GOTO(out, rc = -ENOMEM); lcl_fm_ext = &fm_local->fm_extents[0]; @@ -2354,13 +2459,14 @@ static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key, fm_local->fm_flags = fiemap->fm_flags; fm_key->oa.o_id = lsm->lsm_oinfo[cur_stripe]->loi_id; + fm_key->oa.o_seq = lsm->lsm_oinfo[cur_stripe]->loi_seq; ost_index = lsm->lsm_oinfo[cur_stripe]->loi_ost_idx; if (ost_index < 0 || ost_index >=lov->desc.ld_tgt_count) GOTO(out, rc = -EINVAL); /* If OST is inactive, return extent with UNKNOWN flag */ - if (lov && !lov->lov_tgts[ost_index]->ltd_active) { + if (!lov->lov_tgts[ost_index]->ltd_active) { fm_local->fm_flags |= FIEMAP_EXTENT_LAST; fm_local->fm_mapped_extents = 1; @@ -2456,7 +2562,7 @@ skip_last_device_calc: fiemap->fm_mapped_extents = current_extent; out: - OBD_FREE(fm_local, buffer_size); + OBD_FREE_LARGE(fm_local, buffer_size); return rc; } @@ -2472,7 +2578,7 @@ static int lov_get_info(struct obd_export *exp, __u32 keylen, if (!vallen || !val) RETURN(-EFAULT); - lov_getref(obddev); + obd_getref(obddev); if (KEY_IS(KEY_LOCK_TO_STRIPE)) { struct { @@ -2499,7 +2605,7 @@ static int lov_get_info(struct obd_export *exp, __u32 keylen, continue; if (lov->lov_tgts[loi->loi_ost_idx]->ltd_exp == data->lock->l_conn_export && - osc_res_name_eq(loi->loi_id, loi->loi_gr, res_id)) { + osc_res_name_eq(loi->loi_id, loi->loi_seq, res_id)) { *stripe = i; GOTO(out, rc = 0); } @@ -2528,12 +2634,25 @@ static int lov_get_info(struct obd_export *exp, __u32 keylen, } else if (KEY_IS(KEY_FIEMAP)) { rc = lov_fiemap(lov, keylen, key, vallen, val, lsm); GOTO(out, rc); + } else if (KEY_IS(KEY_CONNECT_FLAG)) { + struct lov_tgt_desc *tgt; + __u64 ost_idx = *((__u64*)val); + + LASSERT(*vallen == sizeof(__u64)); + LASSERT(ost_idx < lov->desc.ld_tgt_count); + tgt = lov->lov_tgts[ost_idx]; + + if (!tgt || !tgt->ltd_exp) + GOTO(out, rc = -ESRCH); + + *((__u64*)val) = tgt->ltd_exp->exp_connect_flags; + GOTO(out, rc = 0); } rc = -EINVAL; out: - lov_putref(obddev); + obd_putref(obddev); RETURN(rc); } @@ -2548,7 +2667,7 @@ static int lov_set_info_async(struct obd_export *exp, obd_count keylen, struct lov_tgt_desc *tgt; unsigned incr, check_uuid, do_inactive, no_set; - unsigned next_id = 0, mds_con = 0; + unsigned next_id = 0, mds_con = 0, capa = 0; ENTRY; incr = check_uuid = do_inactive = no_set = 0; @@ -2559,7 +2678,7 @@ static int lov_set_info_async(struct obd_export *exp, obd_count keylen, RETURN(-ENOMEM); } - lov_getref(obddev); + obd_getref(obddev); count = lov->desc.ld_tgt_count; if (KEY_IS(KEY_NEXT_ID)) { @@ -2570,12 +2689,12 @@ static int lov_set_info_async(struct obd_export *exp, obd_count keylen, next_id = 1; } else if (KEY_IS(KEY_CHECKSUM)) { do_inactive = 1; - } else if (KEY_IS(KEY_UNLINKED)) { - check_uuid = val ? 1 : 0; } else if (KEY_IS(KEY_EVICT_BY_NID)) { /* use defaults: do_inactive = incr = 0; */ } else if (KEY_IS(KEY_MDS_CONN)) { mds_con = 1; + } else if (KEY_IS(KEY_CAPA_KEY)) { + capa = 1; } for (i = 0; i < count; i++, val = (char *)val + incr) { @@ -2610,7 +2729,21 @@ static int lov_set_info_async(struct obd_export *exp, obd_count keylen, err = obd_set_info_async(tgt->ltd_exp, keylen, key, vallen, ((struct obd_id_info*)val)->data, set); - } else { + } else if (capa) { + struct mds_capa_info *info = (struct mds_capa_info*)val; + + LASSERT(vallen == sizeof(*info)); + + /* Only want a specific OSC */ + if (info->uuid && + !obd_uuid_equals(info->uuid, &tgt->ltd_uuid)) + continue; + + err = obd_set_info_async(tgt->ltd_exp, keylen, key, + sizeof(*info->capa), + info->capa, set); + + } else { /* Only want a specific OSC */ if (check_uuid && !obd_uuid_equals(val, &tgt->ltd_uuid)) @@ -2624,7 +2757,7 @@ static int lov_set_info_async(struct obd_export *exp, obd_count keylen, rc = err; } - lov_putref(obddev); + obd_putref(obddev); if (no_set) { err = ptlrpc_set_wait(set); if (!rc) @@ -2634,21 +2767,6 @@ static int lov_set_info_async(struct obd_export *exp, obd_count keylen, RETURN(rc); } -static int lov_checkmd(struct obd_export *exp, struct obd_export *md_exp, - struct lov_stripe_md *lsm) -{ - int rc; - ENTRY; - - if (!lsm) - RETURN(0); - LASSERT(md_exp); - LASSERT(lsm_op_find(lsm->lsm_magic) != NULL); - rc = lsm_op_find(lsm->lsm_magic)->lsm_revalidate(lsm, md_exp->exp_obd); - - RETURN(rc); -} - int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm) { int i, rc = 0; @@ -2689,99 +2807,10 @@ static int lov_extent_calc(struct obd_export *exp, struct lov_stripe_md *lsm, RETURN(0); } - -#if 0 -struct lov_multi_wait { - struct ldlm_lock *lock; - wait_queue_t wait; - int completed; - int generation; -}; - -int lov_complete_many(struct obd_export *exp, struct lov_stripe_md *lsm, - struct lustre_handle *lockh) -{ - struct lov_lock_handles *lov_lockh = NULL; - struct lustre_handle *lov_lockhp; - struct lov_obd *lov; - struct lov_oinfo *loi; - struct lov_multi_wait *queues; - int rc = 0, i; - ENTRY; - - ASSERT_LSM_MAGIC(lsm); - - if (!exp || !exp->exp_obd) - RETURN(-ENODEV); - - LASSERT(lockh != NULL); - if (lsm->lsm_stripe_count > 1) { - lov_lockh = lov_handle2llh(lockh); - if (lov_lockh == NULL) { - CERROR("LOV: invalid lov lock handle %p\n", lockh); - RETURN(-EINVAL); - } - - lov_lockhp = lov_lockh->llh_handles; - } else { - lov_lockhp = lockh; - } - - OBD_ALLOC(queues, lsm->lsm_stripe_count * sizeof(*queues)); - if (queues == NULL) - GOTO(out, rc = -ENOMEM); - - lov = &exp->exp_obd->u.lov; - for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; - i++, loi++, lov_lockhp++) { - struct ldlm_lock *lock; - struct obd_device *obd; - - lock = ldlm_handle2lock_long(lov_lockhp, 0); - if (lock == NULL) { - CDEBUG(D_HA, "lov idx %d subobj "LPX64" no lock?\n", - loi->loi_ost_idx, loi->loi_id); - queues[i].completed = 1; - continue; - } - - queues[i].lock = lock; - init_waitqueue_entry(&(queues[i].wait), current); - add_wait_queue(lock->l_waitq, &(queues[i].wait)); - - obd = class_exp2obd(lock->l_conn_export); - if (obd != NULL) - imp = obd->u.cli.cl_import; - if (imp != NULL) { - spin_lock(&imp->imp_lock); - queues[i].generation = imp->imp_generation; - spin_unlock(&imp->imp_lock); - } - } - - lwi = LWI_TIMEOUT_INTR(obd_timeout * HZ, ldlm_expired_completion_wait, - interrupted_completion_wait, &lwd); - rc = l_wait_event_added(check_multi_complete(queues, lsm), &lwi); - - for (i = 0; i < lsm->lsm_stripe_count; i++) - remove_wait_queue(lock->l_waitq, &(queues[i].wait)); - - if (rc == -EINTR || rc == -ETIMEDOUT) { - - - } - - out: - if (lov_lockh != NULL) - lov_llh_put(lov_lockh); - RETURN(rc); -} -#endif - void lov_stripe_lock(struct lov_stripe_md *md) { LASSERT(md->lsm_lock_owner != cfs_curproc_pid()); - spin_lock(&md->lsm_lock); + cfs_spin_lock(&md->lsm_lock); LASSERT(md->lsm_lock_owner == 0); md->lsm_lock_owner = cfs_curproc_pid(); } @@ -2791,7 +2820,7 @@ void lov_stripe_unlock(struct lov_stripe_md *md) { LASSERT(md->lsm_lock_owner == cfs_curproc_pid()); md->lsm_lock_owner = 0; - spin_unlock(&md->lsm_lock); + cfs_spin_unlock(&md->lsm_lock); } EXPORT_SYMBOL(lov_stripe_unlock); @@ -2808,7 +2837,6 @@ struct obd_ops lov_obd_ops = { .o_statfs_async = lov_statfs_async, .o_packmd = lov_packmd, .o_unpackmd = lov_unpackmd, - .o_checkmd = lov_checkmd, .o_create = lov_create, .o_destroy = lov_destroy, .o_getattr = lov_getattr, @@ -2822,6 +2850,7 @@ struct obd_ops lov_obd_ops = { .o_sync = lov_sync, .o_enqueue = lov_enqueue, .o_change_cbdata = lov_change_cbdata, + .o_find_cbdata = lov_find_cbdata, .o_cancel = lov_cancel, .o_cancel_unused = lov_cancel_unused, .o_iocontrol = lov_iocontrol, @@ -2835,6 +2864,8 @@ struct obd_ops lov_obd_ops = { .o_pool_rem = lov_pool_remove, .o_pool_add = lov_pool_add, .o_pool_del = lov_pool_del, + .o_getref = lov_getref, + .o_putref = lov_putref, }; static quota_interface_t *quota_interface; @@ -2861,14 +2892,14 @@ int __init lov_init(void) lov_oinfo_slab = cfs_mem_cache_create("lov_oinfo", sizeof(struct lov_oinfo), - 0, SLAB_HWCACHE_ALIGN); + 0, CFS_SLAB_HWCACHE_ALIGN); if (lov_oinfo_slab == NULL) { lu_kmem_fini(lov_caches); return -ENOMEM; } lprocfs_lov_init_vars(&lvars); - request_module("lquota"); + cfs_request_module("lquota"); quota_interface = PORTAL_SYMBOL_GET(lov_quota_interface); init_obd_quota_ops(quota_interface, &lov_obd_ops);