X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fosc%2Fosc_create.c;h=53d69121e8dfca01ba2dd27ea6a6176ccf783e97;hb=3dcf18d3;hp=cd0d3730ef9d7e321d037a5d4ae156323b2e78f0;hpb=d2d56f38da01001c92a09afc6b52b5acbd9bc13c;p=fs%2Flustre-release.git diff --git a/lustre/osc/osc_create.c b/lustre/osc/osc_create.c index cd0d373..53d6912 100644 --- a/lustre/osc/osc_create.c +++ b/lustre/osc/osc_create.c @@ -1,32 +1,45 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * Copyright (C) 2001-2003 Cluster File Systems, Inc. - * Author Peter Braam + * GPL HEADER START * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * - * You may have signed or agreed to another license before downloading - * this software. If so, you are bound by the terms and conditions - * of that agreement, and the following does not apply to you. See the - * LICENSE file included with this distribution for more information. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. * - * If you did not agree to a different license, then this copy of Lustre - * is open source software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). * - * In either case, Lustre is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * license text for more details. + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf * - * For testing and management it is treated as an obd_device, - * although * it does not export a full OBD method table (the - * requests are coming * in over the wire, so object target modules - * do not have a full * method table.) + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. * + * GPL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/osc/osc_create.c + * For testing and management it is treated as an obd_device, + * although * it does not export a full OBD method table (the + * requests are coming * in over the wire, so object target modules + * do not have a full * method table.) + * + * Author: Peter Braam */ #ifndef EXPORT_SYMTAB @@ -44,14 +57,28 @@ # include #endif -# include +#include #include #include "osc_internal.h" -static int osc_interpret_create(struct ptlrpc_request *req, void *data, int rc) +/* XXX need AT adjust ? */ +#define osc_create_timeout (obd_timeout / 2) + +struct osc_create_async_args { + struct osc_creator *rq_oscc; + struct lov_stripe_md *rq_lsm; + struct obd_info *rq_oinfo; +}; + +static int oscc_internal_create(struct osc_creator *oscc); +static int handle_async_create(struct ptlrpc_request *req, int rc); + +static int osc_interpret_create(const struct lu_env *env, + struct ptlrpc_request *req, void *data, int rc) { struct osc_creator *oscc; struct ost_body *body = NULL; + struct ptlrpc_request *fake_req, *pos; ENTRY; if (req->rq_repmsg) { @@ -63,10 +90,42 @@ static int osc_interpret_create(struct ptlrpc_request *req, void *data, int rc) oscc = req->rq_async_args.pointer_arg[0]; LASSERT(oscc && (oscc->oscc_obd != LP_POISON)); - + spin_lock(&oscc->oscc_lock); oscc->oscc_flags &= ~OSCC_FLAG_CREATING; - if (rc == -ENOSPC || rc == -EROFS) { + switch (rc) { + case 0: { + if (body) { + int diff = body->oa.o_id - oscc->oscc_last_id; + + /* oscc_internal_create() stores the original value of + * grow_count in rq_async_args.space[0]. + * We can't compare against oscc_grow_count directly, + * because it may have been increased while the RPC + * is in flight, so we would always find ourselves + * having created fewer objects and decreasing the + * precreate request size. b=18577 */ + if (diff < (int) req->rq_async_args.space[0]) { + /* the OST has not managed to create all the + * objects we asked for */ + oscc->oscc_grow_count = max(diff, + OST_MIN_PRECREATE); + /* don't bump grow_count next time */ + oscc->oscc_flags |= OSCC_FLAG_LOW; + } else { + /* the OST is able to keep up with the work, + * we could consider increasing grow_count + * next time if needed */ + oscc->oscc_flags &= ~OSCC_FLAG_LOW; + } + oscc->oscc_last_id = body->oa.o_id; + } + spin_unlock(&oscc->oscc_lock); + break; + } + case -ENOSPC: + case -EROFS: + case -EFBIG: { oscc->oscc_flags |= OSCC_FLAG_NOSPC; if (body && rc == -ENOSPC) { oscc->oscc_grow_count = OST_MIN_PRECREATE; @@ -74,7 +133,26 @@ static int osc_interpret_create(struct ptlrpc_request *req, void *data, int rc) } spin_unlock(&oscc->oscc_lock); DEBUG_REQ(D_INODE, req, "OST out of space, flagging"); - } else if (rc != 0 && rc != -EIO) { + break; + } + case -EIO: { + /* filter always set body->oa.o_id as the last_id + * of filter (see filter_handle_precreate for detail)*/ + if (body && body->oa.o_id > oscc->oscc_last_id) + oscc->oscc_last_id = body->oa.o_id; + spin_unlock(&oscc->oscc_lock); + break; + } + case -EWOULDBLOCK: { + /* aka EAGAIN we should not delay create if import failed - + * this avoid client stick in create and avoid race with + * delorphan */ + oscc->oscc_flags |= OSCC_FLAG_RECOVERING; + /* oscc->oscc_grow_count = OST_MIN_PRECREATE; */ + spin_unlock(&oscc->oscc_lock); + break; + } + default: { oscc->oscc_flags |= OSCC_FLAG_RECOVERING; oscc->oscc_grow_count = OST_MIN_PRECREATE; spin_unlock(&oscc->oscc_lock); @@ -82,31 +160,25 @@ static int osc_interpret_create(struct ptlrpc_request *req, void *data, int rc) "Unknown rc %d from async create: failing oscc", rc); ptlrpc_fail_import(req->rq_import, lustre_msg_get_conn_cnt(req->rq_reqmsg)); - } else { - if (rc == 0) { - if (body) { - int diff = body->oa.o_id - oscc->oscc_last_id; - - if (diff < oscc->oscc_grow_count) - oscc->oscc_grow_count = - max(diff/3, OST_MIN_PRECREATE); - else - oscc->oscc_flags &= ~OSCC_FLAG_LOW; - oscc->oscc_last_id = body->oa.o_id; - } - } else { - /* filter always set body->oa.o_id as the last_id - * of filter (see filter_handle_precreate for detail)*/ - if (body && body->oa.o_id > oscc->oscc_last_id) - oscc->oscc_last_id = body->oa.o_id; - } - spin_unlock(&oscc->oscc_lock); - + } } CDEBUG(D_HA, "preallocated through id "LPU64" (next to use "LPU64")\n", oscc->oscc_last_id, oscc->oscc_next_id); + spin_lock(&oscc->oscc_lock); + list_for_each_entry_safe(fake_req, pos, + &oscc->oscc_wait_create_list, rq_list) { + if (handle_async_create(fake_req, rc) == -EAGAIN) { + oscc_internal_create(oscc); + /* sending request should be never fail because + * osc use preallocated requests pool */ + GOTO(exit_wakeup, rc); + } + } + spin_unlock(&oscc->oscc_lock); + +exit_wakeup: cfs_waitq_signal(&oscc->oscc_waitq); RETURN(rc); } @@ -115,26 +187,34 @@ static int oscc_internal_create(struct osc_creator *oscc) { struct ptlrpc_request *request; struct ost_body *body; - int size[] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + __u32 size[] = { sizeof(struct ptlrpc_body), sizeof(*body) }; ENTRY; - spin_lock(&oscc->oscc_lock); - if (oscc->oscc_grow_count < OST_MAX_PRECREATE && - !(oscc->oscc_flags & (OSCC_FLAG_LOW | OSCC_FLAG_RECOVERING)) && + LASSERT_SPIN_LOCKED(&oscc->oscc_lock); + + if(oscc->oscc_flags & OSCC_FLAG_RECOVERING) { + spin_unlock(&oscc->oscc_lock); + RETURN(0); + } + + /* we need check it before OSCC_FLAG_CREATING - because need + * see lower number of precreate objects */ + if (oscc->oscc_grow_count < oscc->oscc_max_grow_count && + ((oscc->oscc_flags & OSCC_FLAG_LOW) == 0) && (__s64)(oscc->oscc_last_id - oscc->oscc_next_id) <= (oscc->oscc_grow_count / 4 + 1)) { oscc->oscc_flags |= OSCC_FLAG_LOW; oscc->oscc_grow_count *= 2; } - if (oscc->oscc_grow_count > OST_MAX_PRECREATE / 2) - oscc->oscc_grow_count = OST_MAX_PRECREATE / 2; - - if (oscc->oscc_flags & OSCC_FLAG_CREATING || - oscc->oscc_flags & OSCC_FLAG_RECOVERING) { + if (oscc->oscc_flags & OSCC_FLAG_CREATING) { spin_unlock(&oscc->oscc_lock); RETURN(0); } + + if (oscc->oscc_grow_count > oscc->oscc_max_grow_count / 2) + oscc->oscc_grow_count = oscc->oscc_max_grow_count / 2; + oscc->oscc_flags |= OSCC_FLAG_CREATING; spin_unlock(&oscc->oscc_lock); @@ -148,37 +228,46 @@ static int oscc_internal_create(struct osc_creator *oscc) RETURN(-ENOMEM); } - request->rq_request_portal = OST_CREATE_PORTAL; //XXX FIXME bug 249 + request->rq_request_portal = OST_CREATE_PORTAL; + ptlrpc_at_set_req_timeout(request); body = lustre_msg_buf(request->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); spin_lock(&oscc->oscc_lock); body->oa.o_id = oscc->oscc_last_id + oscc->oscc_grow_count; body->oa.o_gr = oscc->oscc_oa.o_gr; - LASSERT(body->oa.o_gr > 0); + LASSERT_MDS_GROUP(body->oa.o_gr); body->oa.o_valid |= OBD_MD_FLID | OBD_MD_FLGROUP; + request->rq_async_args.space[0] = oscc->oscc_grow_count; spin_unlock(&oscc->oscc_lock); - CDEBUG(D_HA, "preallocating through id "LPU64" (last seen "LPU64")\n", + CDEBUG(D_RPCTRACE, "prealloc through id "LPU64" (last seen "LPU64")\n", body->oa.o_id, oscc->oscc_last_id); + /* we should not resend create request - anyway we will have delorphan + * and kill these objects */ + request->rq_no_delay = request->rq_no_resend = 1; ptlrpc_req_set_repsize(request, 2, size); request->rq_async_args.pointer_arg[0] = oscc; request->rq_interpret_reply = osc_interpret_create; - ptlrpcd_add_req(request); + ptlrpcd_add_req(request, PSCOPE_OTHER); RETURN(0); } +static int oscc_has_objects_nolock(struct osc_creator *oscc, int count) +{ + return ((__s64)(oscc->oscc_last_id - oscc->oscc_next_id) >= count); +} + + static int oscc_has_objects(struct osc_creator *oscc, int count) { int have_objs; + spin_lock(&oscc->oscc_lock); - have_objs = ((__s64)(oscc->oscc_last_id - oscc->oscc_next_id) >= count); + have_objs = oscc_has_objects_nolock(oscc, count); spin_unlock(&oscc->oscc_lock); - if (!have_objs) - oscc_internal_create(oscc); - return have_objs; } @@ -188,33 +277,39 @@ static int oscc_wait_for_objects(struct osc_creator *oscc, int count) int ost_full; int osc_invalid; - have_objs = oscc_has_objects(oscc, count); + osc_invalid = oscc->oscc_obd->u.cli.cl_import->imp_invalid; spin_lock(&oscc->oscc_lock); ost_full = (oscc->oscc_flags & OSCC_FLAG_NOSPC); - spin_unlock(&oscc->oscc_lock); + have_objs = oscc_has_objects_nolock(oscc, count); + osc_invalid |= oscc->oscc_flags & OSCC_FLAG_EXITING; - osc_invalid = oscc->oscc_obd->u.cli.cl_import->imp_invalid; + if (!ost_full && !osc_invalid) + /* they release lock himself */ + oscc_internal_create(oscc); + else + spin_unlock(&oscc->oscc_lock); return have_objs || ost_full || osc_invalid; } -static int oscc_precreate(struct osc_creator *oscc, int wait) +static int oscc_precreate(struct osc_creator *oscc) { - struct l_wait_info lwi = { 0 }; + struct l_wait_info lwi; int rc = 0; ENTRY; if (oscc_has_objects(oscc, oscc->oscc_grow_count / 2)) RETURN(0); - if (!wait) - RETURN(0); + /* we should be not block forever - because client's create rpc can + * stick in mds for long time and forbid client reconnect */ + lwi = LWI_TIMEOUT(cfs_timeout_cap(cfs_time_seconds(osc_create_timeout)), + NULL, NULL); - /* no rc check -- a no-INTR, no-TIMEOUT wait can't fail */ - l_wait_event(oscc->oscc_waitq, oscc_wait_for_objects(oscc, 1), &lwi); + rc = l_wait_event(oscc->oscc_waitq, oscc_wait_for_objects(oscc, 1), &lwi); - if (!oscc_has_objects(oscc, 1) && (oscc->oscc_flags & OSCC_FLAG_NOSPC)) + if (!oscc_has_objects(oscc, 1) || (oscc->oscc_flags & OSCC_FLAG_NOSPC)) rc = -ENOSPC; if (oscc->oscc_obd->u.cli.cl_import->imp_invalid) @@ -223,9 +318,9 @@ static int oscc_precreate(struct osc_creator *oscc, int wait) RETURN(rc); } -int oscc_recovering(struct osc_creator *oscc) +static int oscc_recovering(struct osc_creator *oscc) { - int recov = 0; + int recov; spin_lock(&oscc->oscc_lock); recov = oscc->oscc_flags & OSCC_FLAG_RECOVERING; @@ -234,17 +329,192 @@ int oscc_recovering(struct osc_creator *oscc) return recov; } +static int oscc_in_sync(struct osc_creator *oscc) +{ + int sync; + + spin_lock(&oscc->oscc_lock); + sync = oscc->oscc_flags & OSCC_FLAG_SYNC_IN_PROGRESS; + spin_unlock(&oscc->oscc_lock); + + return sync; +} + +/* decide if the OST has remaining object, return value : + 0 : the OST has remaining object, and don't need to do precreate. + 1 : the OST has no remaining object, and will send a RPC for precreate. + 2 : the OST has no remaining object, and will not get any for + a potentially very long time + 1000 : unusable + */ +int osc_precreate(struct obd_export *exp) +{ + struct osc_creator *oscc = &exp->exp_obd->u.cli.cl_oscc; + struct obd_import *imp = exp->exp_imp_reverse; + ENTRY; + + LASSERT(oscc != NULL); + if (imp != NULL && imp->imp_deactive) + RETURN(1000); + + /* until oscc in recovery - other flags is wrong */ + if (oscc_recovering(oscc)) + RETURN(2); + + if (oscc->oscc_flags & OSCC_FLAG_NOSPC) + RETURN(1000); + + if (oscc_has_objects(oscc, oscc->oscc_grow_count / 2)) + RETURN(0); + + spin_lock(&oscc->oscc_lock); + if ((oscc->oscc_flags & OSCC_FLAG_SYNC_IN_PROGRESS) || + (oscc->oscc_flags & OSCC_FLAG_CREATING)) { + spin_unlock(&oscc->oscc_lock); + RETURN(1); + } + + oscc_internal_create(oscc); + RETURN(1); +} + +static int handle_async_create(struct ptlrpc_request *req, int rc) +{ + struct osc_create_async_args *args = ptlrpc_req_async_args(req); + struct osc_creator *oscc = args->rq_oscc; + struct lov_stripe_md *lsm = args->rq_lsm; + struct obd_info *oinfo = args->rq_oinfo; + struct obdo *oa = oinfo->oi_oa; + + LASSERT_SPIN_LOCKED(&oscc->oscc_lock); + + if(rc) + GOTO(out_wake, rc); + + if ((oscc->oscc_flags & OSCC_FLAG_EXITING)) + GOTO(out_wake, rc = -EIO); + + if (oscc_has_objects_nolock(oscc, 1)) { + memcpy(oa, &oscc->oscc_oa, sizeof(*oa)); + oa->o_id = oscc->oscc_next_id; + lsm->lsm_object_id = oscc->oscc_next_id; + oscc->oscc_next_id++; + + CDEBUG(D_RPCTRACE, " set oscc_next_id = "LPU64"\n", + oscc->oscc_next_id); + GOTO(out_wake, rc = 0); + } + + /* should be try wait until recovery finished */ + if(oscc->oscc_flags & OSCC_FLAG_RECOVERING) + RETURN(-EAGAIN); + + if (oscc->oscc_flags & OSCC_FLAG_NOSPC) + GOTO(out_wake, rc = -ENOSPC); + + /* we not have objects now - continue wait */ + RETURN(-EAGAIN); + +out_wake: + + rc = oinfo->oi_cb_up(oinfo, rc); + ptlrpc_fakereq_finished(req); + + RETURN(rc); +} + +static int async_create_interpret(const struct lu_env *env, + struct ptlrpc_request *req, void *data, int rc) +{ + struct osc_create_async_args *args = ptlrpc_req_async_args(req); + struct osc_creator *oscc = args->rq_oscc; + int ret; + + spin_lock(&oscc->oscc_lock); + ret = handle_async_create(req, rc); + spin_unlock(&oscc->oscc_lock); + + return ret; +} + +int osc_create_async(struct obd_export *exp, struct obd_info *oinfo, + struct lov_stripe_md **ea, struct obd_trans_info *oti) +{ + int rc; + struct ptlrpc_request *fake_req; + struct osc_create_async_args *args; + struct osc_creator *oscc = &exp->exp_obd->u.cli.cl_oscc; + struct obdo *oa = oinfo->oi_oa; + ENTRY; + + if ((oa->o_valid & OBD_MD_FLGROUP) && (oa->o_gr != 0)){ + rc = osc_real_create(exp, oinfo->oi_oa, ea, oti); + rc = oinfo->oi_cb_up(oinfo, rc); + RETURN(rc); + } + + if ((oa->o_valid & OBD_MD_FLFLAGS) && + oa->o_flags == OBD_FL_RECREATE_OBJS) { + rc = osc_real_create(exp, oinfo->oi_oa, ea, oti); + rc = oinfo->oi_cb_up(oinfo, rc); + RETURN(rc); + } + + LASSERT((*ea) != NULL); + + fake_req = ptlrpc_prep_fakereq(oscc->oscc_obd->u.cli.cl_import, + osc_create_timeout, + async_create_interpret); + if (fake_req == NULL) { + rc = oinfo->oi_cb_up(oinfo, -ENOMEM); + RETURN(-ENOMEM); + } + + args = ptlrpc_req_async_args(fake_req); + CLASSERT(sizeof(*args) <= sizeof(fake_req->rq_async_args)); + + args->rq_oscc = oscc; + args->rq_lsm = *ea; + args->rq_oinfo = oinfo; + + spin_lock(&oscc->oscc_lock); + /* try fast path */ + rc = handle_async_create(fake_req, 0); + if (rc == -EAGAIN) { + int is_add; + /* we not have objects - try wait */ + is_add = ptlrpcd_add_req(fake_req, PSCOPE_OTHER); + if (!is_add) + list_add(&fake_req->rq_list, + &oscc->oscc_wait_create_list); + else + rc = is_add; + } + spin_unlock(&oscc->oscc_lock); + + if (rc != -EAGAIN) + /* need free request if was error hit or + * objects already allocated */ + ptlrpc_req_finished(fake_req); + else + /* EAGAIN mean - request is delayed */ + rc = 0; + + RETURN(rc); +} + int osc_create(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md **ea, struct obd_trans_info *oti) { struct osc_creator *oscc = &exp->exp_obd->u.cli.cl_oscc; + struct obd_import *imp = exp->exp_obd->u.cli.cl_import; struct lov_stripe_md *lsm; - int try_again = 1, rc = 0; + int rc = 0; ENTRY; LASSERT(oa); LASSERT(ea); - LASSERT(oa->o_gr > 0); + LASSERT_MDS_GROUP(oa->o_gr); LASSERT(oa->o_valid & OBD_MD_FLGROUP); if ((oa->o_valid & OBD_MD_FLFLAGS) && @@ -267,39 +537,54 @@ int osc_create(struct obd_export *exp, struct obdo *oa, spin_unlock(&oscc->oscc_lock); RETURN(0); } + oscc->oscc_flags |= OSCC_FLAG_SYNC_IN_PROGRESS; + /* seting flag LOW we prevent extra grow precreate size + * and enforce use last assigned size */ + oscc->oscc_flags |= OSCC_FLAG_LOW; spin_unlock(&oscc->oscc_lock); - CDEBUG(D_HA, "%s: oscc recovery started\n", - oscc->oscc_obd->obd_name); + CDEBUG(D_HA, "%s: oscc recovery started - delete to "LPU64"\n", + oscc->oscc_obd->obd_name, oscc->oscc_next_id - 1); /* delete from next_id on up */ oa->o_valid |= OBD_MD_FLID; oa->o_id = oscc->oscc_next_id - 1; - CDEBUG(D_HA, "%s: deleting to next_id: "LPU64"\n", - oscc->oscc_obd->obd_name, oa->o_id); - rc = osc_real_create(exp, oa, ea, NULL); spin_lock(&oscc->oscc_lock); oscc->oscc_flags &= ~OSCC_FLAG_SYNC_IN_PROGRESS; if (rc == 0 || rc == -ENOSPC) { + struct obd_connect_data *ocd; + if (rc == -ENOSPC) oscc->oscc_flags |= OSCC_FLAG_NOSPC; oscc->oscc_flags &= ~OSCC_FLAG_RECOVERING; + oscc->oscc_last_id = oa->o_id; + ocd = &imp->imp_connect_data; + if (ocd->ocd_connect_flags & OBD_CONNECT_SKIP_ORPHAN) { + CDEBUG(D_HA, "%s: Skip orphan set, reset last " + "objid\n", oscc->oscc_obd->obd_name); + oscc->oscc_next_id = oa->o_id + 1; + } + + /* sanity check for next objid. see bug 17025 */ + LASSERT(oscc->oscc_next_id == oa->o_id + 1); + CDEBUG(D_HA, "%s: oscc recovery finished, last_id: " LPU64", rc: %d\n", oscc->oscc_obd->obd_name, oscc->oscc_last_id, rc); - cfs_waitq_signal(&oscc->oscc_waitq); } else { CDEBUG(D_ERROR, "%s: oscc recovery failed: %d\n", oscc->oscc_obd->obd_name, rc); } - spin_unlock(&oscc->oscc_lock); + cfs_waitq_signal(&oscc->oscc_waitq); + spin_unlock(&oscc->oscc_lock); - RETURN(rc); + if (rc < 0) + RETURN(rc); } lsm = *ea; @@ -309,27 +594,16 @@ int osc_create(struct obd_export *exp, struct obdo *oa, RETURN(rc); } - while (try_again) { - /* If orphans are being recovered, then we must wait until - it is finished before we can continue with create. */ - if (oscc_recovering(oscc)) { - struct l_wait_info lwi; - - CDEBUG(D_HA,"%p: oscc recovery in progress, waiting\n", - oscc); - - lwi = LWI_TIMEOUT(cfs_timeout_cap(cfs_time_seconds(obd_timeout/4)), - NULL, NULL); - rc = l_wait_event(oscc->oscc_waitq, - !oscc_recovering(oscc), &lwi); - LASSERT(rc == 0 || rc == -ETIMEDOUT); - if (rc == -ETIMEDOUT) { - CDEBUG(D_HA,"%p: timeout waiting on recovery\n", - oscc); - RETURN(rc); - } - CDEBUG(D_HA, "%p: oscc recovery over, waking up\n", - oscc); + while (1) { + if (oscc_in_sync(oscc)) + CDEBUG(D_HA,"%s: oscc recovery in progress, waiting\n", + oscc->oscc_obd->obd_name); + + rc = oscc_precreate(oscc); + if (rc) { + CDEBUG(D_HA,"%s: error create %d\n", + oscc->oscc_obd->obd_name, rc); + break; } spin_lock(&oscc->oscc_lock); @@ -337,30 +611,35 @@ int osc_create(struct obd_export *exp, struct obdo *oa, spin_unlock(&oscc->oscc_lock); break; } + /* wakeup but recovery not finished */ + if (oscc->oscc_flags & OSCC_FLAG_RECOVERING) { + rc = -EIO; + spin_unlock(&oscc->oscc_lock); + break; + } - if (oscc->oscc_last_id >= oscc->oscc_next_id) { + if (oscc_has_objects_nolock(oscc, 1)) { memcpy(oa, &oscc->oscc_oa, sizeof(*oa)); oa->o_id = oscc->oscc_next_id; lsm->lsm_object_id = oscc->oscc_next_id; *ea = lsm; oscc->oscc_next_id++; - try_again = 0; + spin_unlock(&oscc->oscc_lock); - CDEBUG(D_HA, "%s: set oscc_next_id = "LPU64"\n", + CDEBUG(D_RPCTRACE, "%s: set oscc_next_id = "LPU64"\n", exp->exp_obd->obd_name, oscc->oscc_next_id); + break; } else if (oscc->oscc_flags & OSCC_FLAG_NOSPC) { rc = -ENOSPC; spin_unlock(&oscc->oscc_lock); break; } + spin_unlock(&oscc->oscc_lock); - rc = oscc_precreate(oscc, try_again); - if (rc) - break; } if (rc == 0) - CDEBUG(D_HA, "%s: returning objid "LPU64"\n", + CDEBUG(D_INFO, "%s: returning objid "LPU64"\n", obd2cli_tgt(oscc->oscc_obd), lsm->lsm_object_id); else if (*ea == NULL) obd_free_memmd(exp, &lsm); @@ -377,15 +656,31 @@ void oscc_init(struct obd_device *obd) oscc = &obd->u.cli.cl_oscc; memset(oscc, 0, sizeof(*oscc)); - CFS_INIT_LIST_HEAD(&oscc->oscc_list); + cfs_waitq_init(&oscc->oscc_waitq); spin_lock_init(&oscc->oscc_lock); oscc->oscc_obd = obd; oscc->oscc_grow_count = OST_MIN_PRECREATE; + oscc->oscc_max_grow_count = OST_MAX_PRECREATE; oscc->oscc_next_id = 2; oscc->oscc_last_id = 1; oscc->oscc_flags |= OSCC_FLAG_RECOVERING; + + CFS_INIT_LIST_HEAD(&oscc->oscc_wait_create_list); + /* XXX the export handle should give the oscc the last object */ /* oed->oed_oscc.oscc_last_id = exph->....; */ } + +void oscc_fini(struct obd_device *obd) +{ + struct osc_creator *oscc = &obd->u.cli.cl_oscc; + ENTRY; + + + spin_lock(&oscc->oscc_lock); + oscc->oscc_flags &= ~OSCC_FLAG_RECOVERING; + oscc->oscc_flags |= OSCC_FLAG_EXITING; + spin_unlock(&oscc->oscc_lock); +}