X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fosc%2Fosc_create.c;h=f3eb6bc1a19e4e70a3942969a6512300ee87a3bd;hp=a3ebcc55e4783b3002c5e5dee3c1a2dbd388129e;hb=6869932b552ac705f411de3362f01bd50c1f6f7d;hpb=23de47e82bd999ec651f927097922413527cca71 diff --git a/lustre/osc/osc_create.c b/lustre/osc/osc_create.c index a3ebcc5..f3eb6bc 100644 --- a/lustre/osc/osc_create.c +++ b/lustre/osc/osc_create.c @@ -1,29 +1,45 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * Copyright (C) 2001-2003 Cluster File Systems, Inc. - * Author Peter Braam + * GPL HEADER START * - * This file is part of Lustre, http://www.lustre.org. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf * - * For testing and management it is treated as an obd_device, - * although * it does not export a full OBD method table (the - * requests are coming * in over the wire, so object target modules - * do not have a full * method table.) + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. * + * GPL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/osc/osc_create.c + * For testing and management it is treated as an obd_device, + * although * it does not export a full OBD method table (the + * requests are coming * in over the wire, so object target modules + * do not have a full * method table.) + * + * Author: Peter Braam */ #ifndef EXPORT_SYMTAB @@ -32,18 +48,7 @@ #define DEBUG_SUBSYSTEM S_OSC #ifdef __KERNEL__ -# include -# include -# include -# include -# include -# include -# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -# include -# include -# else -# include -# endif +# include #else /* __KERNEL__ */ # include #endif @@ -52,45 +57,83 @@ # include #endif -# include -#include +# include +#include #include "osc_internal.h" -static int osc_interpret_create(struct ptlrpc_request *req, void *data, - int rc) +static int osc_interpret_create(struct ptlrpc_request *req, void *data, int rc) { struct osc_creator *oscc; struct ost_body *body = NULL; ENTRY; if (req->rq_repmsg) { - body = lustre_swab_repbuf(req, 0, sizeof(*body), + body = lustre_swab_repbuf(req, REPLY_REC_OFF, sizeof(*body), lustre_swab_ost_body); if (body == NULL && rc == 0) rc = -EPROTO; } oscc = req->rq_async_args.pointer_arg[0]; + LASSERT(oscc && (oscc->oscc_obd != LP_POISON)); + spin_lock(&oscc->oscc_lock); - if (body) - oscc->oscc_last_id = body->oa.o_id; - if (rc == -ENOSPC) { - DEBUG_REQ(D_INODE, req, "OST out of space, flagging"); + oscc->oscc_flags &= ~OSCC_FLAG_CREATING; + switch (rc) { + case 0: { + if (body) { + int diff = body->oa.o_id - oscc->oscc_last_id; + + if (diff < oscc->oscc_grow_count) + oscc->oscc_grow_count = + max(diff/3, OST_MIN_PRECREATE); + else + oscc->oscc_flags &= ~OSCC_FLAG_LOW; + oscc->oscc_last_id = body->oa.o_id; + } + spin_unlock(&oscc->oscc_lock); + break; + } + case -EAGAIN: + /* valid race delorphan vs create, or somthing after resend */ + spin_unlock(&oscc->oscc_lock); + DEBUG_REQ(D_INODE, req, "Got EGAIN - resend \n"); + break; + case -ENOSPC: + case -EROFS: + case -EFBIG: { oscc->oscc_flags |= OSCC_FLAG_NOSPC; - } else if (rc != 0 && rc != -EIO) { - DEBUG_REQ(D_ERROR, req, - "unknown rc %d from async create: failing oscc", - rc); + if (body && rc == -ENOSPC) { + oscc->oscc_grow_count = OST_MIN_PRECREATE; + oscc->oscc_last_id = body->oa.o_id; + } + spin_unlock(&oscc->oscc_lock); + DEBUG_REQ(D_INODE, req, "OST out of space, flagging"); + break; + } + case -EIO: { + /* filter always set body->oa.o_id as the last_id + * of filter (see filter_handle_precreate for detail)*/ + if (body && body->oa.o_id > oscc->oscc_last_id) + oscc->oscc_last_id = body->oa.o_id; + spin_unlock(&oscc->oscc_lock); + break; + } + default: { oscc->oscc_flags |= OSCC_FLAG_RECOVERING; - ptlrpc_fail_import(req->rq_import, req->rq_import_generation); + oscc->oscc_grow_count = OST_MIN_PRECREATE; + spin_unlock(&oscc->oscc_lock); + DEBUG_REQ(D_ERROR, req, + "Unknown rc %d from async create: failing oscc", rc); + ptlrpc_fail_import(req->rq_import, + lustre_msg_get_conn_cnt(req->rq_reqmsg)); + } } - oscc->oscc_flags &= ~OSCC_FLAG_CREATING; - spin_unlock(&oscc->oscc_lock); - CDEBUG(D_HA, "preallocated through id "LPU64" (last used "LPU64")\n", + CDEBUG(D_HA, "preallocated through id "LPU64" (next to use "LPU64")\n", oscc->oscc_last_id, oscc->oscc_next_id); - wake_up(&oscc->oscc_waitq); + cfs_waitq_signal(&oscc->oscc_waitq); RETURN(rc); } @@ -98,20 +141,34 @@ static int oscc_internal_create(struct osc_creator *oscc) { struct ptlrpc_request *request; struct ost_body *body; - int size = sizeof(*body); + __u32 size[] = { sizeof(struct ptlrpc_body), sizeof(*body) }; ENTRY; - spin_lock(&oscc->oscc_lock); + LASSERT_SPIN_LOCKED(&oscc->oscc_lock); + if (oscc->oscc_flags & OSCC_FLAG_CREATING || oscc->oscc_flags & OSCC_FLAG_RECOVERING) { spin_unlock(&oscc->oscc_lock); RETURN(0); } + + if (oscc->oscc_grow_count < oscc->oscc_max_grow_count && + ((oscc->oscc_flags & OSCC_FLAG_LOW) == 0) && + (__s64)(oscc->oscc_last_id - oscc->oscc_next_id) <= + (oscc->oscc_grow_count / 4 + 1)) { + oscc->oscc_flags |= OSCC_FLAG_LOW; + oscc->oscc_grow_count *= 2; + } + + if (oscc->oscc_grow_count > oscc->oscc_max_grow_count / 2) + oscc->oscc_grow_count = oscc->oscc_max_grow_count / 2; + oscc->oscc_flags |= OSCC_FLAG_CREATING; spin_unlock(&oscc->oscc_lock); - request = ptlrpc_prep_req(oscc->oscc_obd->u.cli.cl_import, OST_CREATE, - 1, &size, NULL); + request = ptlrpc_prep_req(oscc->oscc_obd->u.cli.cl_import, + LUSTRE_OST_VERSION, OST_CREATE, 2, + size, NULL); if (request == NULL) { spin_lock(&oscc->oscc_lock); oscc->oscc_flags &= ~OSCC_FLAG_CREATING; @@ -119,19 +176,20 @@ static int oscc_internal_create(struct osc_creator *oscc) RETURN(-ENOMEM); } - request->rq_request_portal = OST_CREATE_PORTAL; //XXX FIXME bug 249 - body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof(*body)); + request->rq_request_portal = OST_CREATE_PORTAL; + ptlrpc_at_set_req_timeout(request); + body = lustre_msg_buf(request->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); spin_lock(&oscc->oscc_lock); body->oa.o_id = oscc->oscc_last_id + oscc->oscc_grow_count; - body->oa.o_gr = oscc->oscc_gr; + body->oa.o_gr = oscc->oscc_oa.o_gr; LASSERT(body->oa.o_gr > 0); body->oa.o_valid |= OBD_MD_FLID | OBD_MD_FLGROUP; - CDEBUG(D_INFO, "preallocating through id "LPU64" (last used "LPU64")\n", - body->oa.o_id, oscc->oscc_next_id); spin_unlock(&oscc->oscc_lock); + CDEBUG(D_RPCTRACE, "prealloc through id "LPU64" (last seen "LPU64")\n", + body->oa.o_id, oscc->oscc_last_id); - request->rq_replen = lustre_msg_size(1, &size); + ptlrpc_req_set_repsize(request, 2, size); request->rq_async_args.pointer_arg[0] = oscc; request->rq_interpret_reply = osc_interpret_create; @@ -145,10 +203,12 @@ static int oscc_has_objects(struct osc_creator *oscc, int count) int have_objs; spin_lock(&oscc->oscc_lock); have_objs = ((__s64)(oscc->oscc_last_id - oscc->oscc_next_id) >= count); - spin_unlock(&oscc->oscc_lock); - if (!have_objs) + if (!have_objs) { oscc_internal_create(oscc); + } else { + spin_unlock(&oscc->oscc_lock); + } return have_objs; } @@ -166,7 +226,7 @@ static int oscc_wait_for_objects(struct osc_creator *oscc, int count) spin_unlock(&oscc->oscc_lock); osc_invalid = oscc->oscc_obd->u.cli.cl_import->imp_invalid; - + return have_objs || ost_full || osc_invalid; } @@ -176,7 +236,7 @@ static int oscc_precreate(struct osc_creator *oscc, int wait) int rc = 0; ENTRY; - if (oscc_has_objects(oscc, oscc->oscc_kick_barrier)) + if (oscc_has_objects(oscc, oscc->oscc_grow_count / 2)) RETURN(0); if (!wait) @@ -194,7 +254,7 @@ static int oscc_precreate(struct osc_creator *oscc, int wait) RETURN(rc); } -int oscc_recovering(struct osc_creator *oscc) +int oscc_recovering(struct osc_creator *oscc) { int recov = 0; @@ -205,112 +265,163 @@ int oscc_recovering(struct osc_creator *oscc) return recov; } +/* decide if the OST has remaining object, return value : + 0 : the OST has remaining object, and don't need to do precreate. + 1 : the OST has no remaining object, and will send a RPC for precreate. + 2 : the OST has no remaining object, and will not get any for + a potentially very long time + 1000 : unusable + */ +int osc_precreate(struct obd_export *exp) +{ + struct osc_creator *oscc = &exp->exp_obd->u.cli.cl_oscc; + struct obd_import *imp = exp->exp_imp_reverse; + ENTRY; + + LASSERT(oscc != NULL); + if (imp != NULL && imp->imp_deactive) + RETURN(1000); + + if (oscc->oscc_last_id < oscc->oscc_next_id) { + spin_lock(&oscc->oscc_lock); + if (oscc->oscc_flags & OSCC_FLAG_NOSPC) { + spin_unlock(&oscc->oscc_lock); + RETURN(1000); + } + if (oscc->oscc_flags & OSCC_FLAG_SYNC_IN_PROGRESS) { + spin_unlock(&oscc->oscc_lock); + RETURN(1); + } + if (oscc->oscc_flags & OSCC_FLAG_RECOVERING) { + spin_unlock(&oscc->oscc_lock); + RETURN(2); + } + + if (oscc->oscc_flags & OSCC_FLAG_CREATING) { + spin_unlock(&oscc->oscc_lock); + RETURN(1); + } + + oscc_internal_create(oscc); + RETURN(1); + } + RETURN(0); +} + int osc_create(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md **ea, struct obd_trans_info *oti) { - struct lov_stripe_md *lsm; struct osc_creator *oscc = &exp->exp_obd->u.cli.cl_oscc; + struct lov_stripe_md *lsm; int try_again = 1, rc = 0; ENTRY; + LASSERT(oa); LASSERT(ea); - LASSERT(oa->o_valid & OBD_MD_FLGROUP); LASSERT(oa->o_gr > 0); - - LASSERT(oscc->oscc_gr == 0 || oscc->oscc_gr == oa->o_gr); - oscc->oscc_gr = oa->o_gr; - - if (oa->o_gr == FILTER_GROUP_LLOG || oa->o_gr == FILTER_GROUP_ECHO) - RETURN(osc_real_create(exp, oa, ea, oti)); + LASSERT(oa->o_valid & OBD_MD_FLGROUP); if ((oa->o_valid & OBD_MD_FLFLAGS) && - oa->o_flags == OBD_FL_RECREATE_OBJS) { + oa->o_flags == OBD_FL_RECREATE_OBJS) { RETURN(osc_real_create(exp, oa, ea, oti)); } - lsm = *ea; - if (lsm == NULL) { - rc = obd_alloc_memmd(exp, &lsm); - if (rc < 0) - RETURN(rc); - } + if (oa->o_gr == FILTER_GROUP_LLOG || oa->o_gr == FILTER_GROUP_ECHO) + RETURN(osc_real_create(exp, oa, ea, oti)); - /* this is the special case where create removes orphans */ - if ((oa->o_valid & OBD_MD_FLFLAGS) && - oa->o_flags == OBD_FL_DELORPHAN) { - CDEBUG(D_HA, "%s; oscc recovery started\n", - exp->exp_obd->obd_name); - LASSERT(oscc->oscc_flags & OSCC_FLAG_RECOVERING); + /* this is the special case where create removes orphans */ + if ((oa->o_valid & OBD_MD_FLFLAGS) && + oa->o_flags == OBD_FL_DELORPHAN) { + spin_lock(&oscc->oscc_lock); + if (oscc->oscc_flags & OSCC_FLAG_SYNC_IN_PROGRESS) { + spin_unlock(&oscc->oscc_lock); + RETURN(-EBUSY); + } + if (!(oscc->oscc_flags & OSCC_FLAG_RECOVERING)) { + spin_unlock(&oscc->oscc_lock); + RETURN(0); + } + oscc->oscc_flags |= OSCC_FLAG_SYNC_IN_PROGRESS; + /* seting flag LOW we prevent extra grow precreate size + * and enforce use last assigned size */ + oscc->oscc_flags |= OSCC_FLAG_LOW; + spin_unlock(&oscc->oscc_lock); + CDEBUG(D_HA, "%s: oscc recovery started - delete to "LPU64"\n", + oscc->oscc_obd->obd_name, oscc->oscc_next_id - 1); /* delete from next_id on up */ oa->o_valid |= OBD_MD_FLID; oa->o_id = oscc->oscc_next_id - 1; - CDEBUG(D_HA, "%s: deleting to next_id: "LPU64"\n", - exp->exp_obd->obd_name, oa->o_id); - rc = osc_real_create(exp, oa, ea, NULL); - if (oscc->oscc_obd == NULL) { - CWARN("the obd for oscc %p has been freed\n", oscc); - RETURN(rc); - } spin_lock(&oscc->oscc_lock); + oscc->oscc_flags &= ~OSCC_FLAG_SYNC_IN_PROGRESS; if (rc == 0 || rc == -ENOSPC) { if (rc == -ENOSPC) oscc->oscc_flags |= OSCC_FLAG_NOSPC; oscc->oscc_flags &= ~OSCC_FLAG_RECOVERING; oscc->oscc_last_id = oa->o_id; - - /* recover happen in mds_setup, before cobd_setup, so - * reset oscc_gr = 0 here, it sould be no harm to CMD */ - oscc->oscc_gr = 0; - - CDEBUG(D_HA, "%s: oscc recovery finished: %d\n", - exp->exp_obd->obd_name, rc); - wake_up(&oscc->oscc_waitq); - + CDEBUG(D_HA, "%s: oscc recovery finished, last_id: " + LPU64", rc: %d\n", oscc->oscc_obd->obd_name, + oscc->oscc_last_id, rc); + cfs_waitq_signal(&oscc->oscc_waitq); } else { - CDEBUG(D_ERROR, "%s: oscc recovery failed: %d\n", - exp->exp_obd->obd_name, rc); + CDEBUG(D_ERROR, "%s: oscc recovery failed: %d\n", + oscc->oscc_obd->obd_name, rc); } spin_unlock(&oscc->oscc_lock); - RETURN(rc); - } + + RETURN(rc); + } + + lsm = *ea; + if (lsm == NULL) { + rc = obd_alloc_memmd(exp, &lsm); + if (rc < 0) + RETURN(rc); + } while (try_again) { - /* If orphans are being recovered, then we must wait until + /* If orphans are being recovered, then we must wait until it is finished before we can continue with create. */ if (oscc_recovering(oscc)) { struct l_wait_info lwi; - - CDEBUG(D_HA,"%s: oscc sync in progress, waiting\n", - exp->exp_obd->obd_name); - - lwi = LWI_TIMEOUT(MAX(obd_timeout * HZ, 1), NULL, NULL); - rc = l_wait_event(oscc->oscc_waitq, + + CDEBUG(D_HA,"%s: oscc recovery in progress, waiting\n", + oscc->oscc_obd->obd_name); + + lwi = LWI_TIMEOUT(cfs_timeout_cap(cfs_time_seconds( + obd_timeout / 4)), NULL, NULL); + rc = l_wait_event(oscc->oscc_waitq, !oscc_recovering(oscc), &lwi); LASSERT(rc == 0 || rc == -ETIMEDOUT); if (rc == -ETIMEDOUT) { - CDEBUG(D_HA, "%s: timed out waiting for sync\n", - exp->exp_obd->obd_name); + CDEBUG(D_HA,"%s: timeout waiting on recovery\n", + oscc->oscc_obd->obd_name); RETURN(rc); } - CDEBUG(D_HA, "%s: oscc sync over, waking up\n", - exp->exp_obd->obd_name); + CDEBUG(D_HA, "%s: oscc recovery over, waking up\n", + oscc->oscc_obd->obd_name); } - + spin_lock(&oscc->oscc_lock); + if (oscc->oscc_flags & OSCC_FLAG_EXITING) { + spin_unlock(&oscc->oscc_lock); + break; + } + if (oscc->oscc_last_id >= oscc->oscc_next_id) { memcpy(oa, &oscc->oscc_oa, sizeof(*oa)); oa->o_id = oscc->oscc_next_id; - oa->o_gr = oscc->oscc_gr; lsm->lsm_object_id = oscc->oscc_next_id; - lsm->lsm_object_gr = oscc->oscc_gr; *ea = lsm; oscc->oscc_next_id++; try_again = 0; + + CDEBUG(D_RPCTRACE, "%s: set oscc_next_id = "LPU64"\n", + exp->exp_obd->obd_name, oscc->oscc_next_id); } else if (oscc->oscc_flags & OSCC_FLAG_NOSPC) { rc = -ENOSPC; spin_unlock(&oscc->oscc_lock); @@ -318,13 +429,13 @@ int osc_create(struct obd_export *exp, struct obdo *oa, } spin_unlock(&oscc->oscc_lock); rc = oscc_precreate(oscc, try_again); - if (rc == -EIO) + if (rc) break; } if (rc == 0) - CDEBUG(D_INFO, "returning objid "LPU64"/"LPU64"\n", - lsm->lsm_object_id, lsm->lsm_object_gr); + CDEBUG(D_INFO, "%s: returning objid "LPU64"\n", + obd2cli_tgt(oscc->oscc_obd), lsm->lsm_object_id); else if (*ea == NULL) obd_free_memmd(exp, &lsm); RETURN(rc); @@ -340,13 +451,12 @@ void oscc_init(struct obd_device *obd) oscc = &obd->u.cli.cl_oscc; memset(oscc, 0, sizeof(*oscc)); - INIT_LIST_HEAD(&oscc->oscc_list); - init_waitqueue_head(&oscc->oscc_waitq); + CFS_INIT_LIST_HEAD(&oscc->oscc_list); + cfs_waitq_init(&oscc->oscc_waitq); spin_lock_init(&oscc->oscc_lock); oscc->oscc_obd = obd; - oscc->oscc_kick_barrier = 100; - oscc->oscc_grow_count = 2000; - oscc->oscc_initial_create_count = 2000; + oscc->oscc_grow_count = OST_MIN_PRECREATE; + oscc->oscc_max_grow_count = OST_MAX_PRECREATE; oscc->oscc_next_id = 2; oscc->oscc_last_id = 1;