X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fllite%2Fllite_close.c;h=26a2c8cc2322c05dbfd200429d3b3e3b0371e00d;hp=53b85140154a03b6112e66e9d31cf17bf57e3eed;hb=ea766bd959cc2347b223d597b3f8a31e010bed72;hpb=fbf5870b9848929d352460f1f005b79c0b5ccc5a diff --git a/lustre/llite/llite_close.c b/lustre/llite/llite_close.c index 53b8514..26a2c8c 100644 --- a/lustre/llite/llite_close.c +++ b/lustre/llite/llite_close.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -26,8 +24,10 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -42,40 +42,40 @@ #define DEBUG_SUBSYSTEM S_LLITE -//#include #include #include "llite_internal.h" /** records that a write is in flight */ void vvp_write_pending(struct ccc_object *club, struct ccc_page *page) { - struct ll_inode_info *lli = ll_i2info(club->cob_inode); - - ENTRY; - spin_lock(&lli->lli_lock); - lli->lli_flags |= LLIF_SOM_DIRTY; - if (page != NULL && list_empty(&page->cpg_pending_linkage)) - list_add(&page->cpg_pending_linkage, &club->cob_pending_list); - spin_unlock(&lli->lli_lock); - EXIT; + struct ll_inode_info *lli = ll_i2info(club->cob_inode); + + ENTRY; + spin_lock(&lli->lli_lock); + lli->lli_flags |= LLIF_SOM_DIRTY; + if (page != NULL && cfs_list_empty(&page->cpg_pending_linkage)) + cfs_list_add(&page->cpg_pending_linkage, + &club->cob_pending_list); + spin_unlock(&lli->lli_lock); + EXIT; } /** records that a write has completed */ void vvp_write_complete(struct ccc_object *club, struct ccc_page *page) { - struct ll_inode_info *lli = ll_i2info(club->cob_inode); - int rc = 0; - - ENTRY; - spin_lock(&lli->lli_lock); - if (page != NULL && !list_empty(&page->cpg_pending_linkage)) { - list_del_init(&page->cpg_pending_linkage); - rc = 1; - } - spin_unlock(&lli->lli_lock); - if (rc) - ll_queue_done_writing(club->cob_inode, 0); - EXIT; + struct ll_inode_info *lli = ll_i2info(club->cob_inode); + int rc = 0; + + ENTRY; + spin_lock(&lli->lli_lock); + if (page != NULL && !cfs_list_empty(&page->cpg_pending_linkage)) { + cfs_list_del_init(&page->cpg_pending_linkage); + rc = 1; + } + spin_unlock(&lli->lli_lock); + if (rc) + ll_queue_done_writing(club->cob_inode, 0); + EXIT; } /** Queues DONE_WRITING if @@ -83,27 +83,29 @@ void vvp_write_complete(struct ccc_object *club, struct ccc_page *page) * - inode has no no dirty pages; */ void ll_queue_done_writing(struct inode *inode, unsigned long flags) { - struct ll_inode_info *lli = ll_i2info(inode); + struct ll_inode_info *lli = ll_i2info(inode); + struct ccc_object *club = cl2ccc(ll_i2info(inode)->lli_clob); + ENTRY; - spin_lock(&lli->lli_lock); + spin_lock(&lli->lli_lock); lli->lli_flags |= flags; if ((lli->lli_flags & LLIF_DONE_WRITING) && - list_empty(&lli->lli_pending_write_llaps)) { + cfs_list_empty(&club->cob_pending_list)) { struct ll_close_queue *lcq = ll_i2sbi(inode)->ll_lcq; if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) - CWARN("ino %lu/%u(flags %lu) som valid it just after " - "recovery\n", - inode->i_ino, inode->i_generation, - lli->lli_flags); + CWARN("ino %lu/%u(flags %u) som valid it just after " + "recovery\n", + inode->i_ino, inode->i_generation, + lli->lli_flags); /* DONE_WRITING is allowed and inode has no dirty page. */ - spin_lock(&lcq->lcq_lock); + spin_lock(&lcq->lcq_lock); - LASSERT(list_empty(&lli->lli_close_list)); + LASSERT(cfs_list_empty(&lli->lli_close_list)); CDEBUG(D_INODE, "adding inode %lu/%u to close list\n", inode->i_ino, inode->i_generation); - list_add_tail(&lli->lli_close_list, &lcq->lcq_head); + cfs_list_add_tail(&lli->lli_close_list, &lcq->lcq_head); /* Avoid a concurrent insertion into the close thread queue: * an inode is already in the close thread, open(), write(), @@ -113,30 +115,52 @@ void ll_queue_done_writing(struct inode *inode, unsigned long flags) * it. */ lli->lli_flags &= ~LLIF_DONE_WRITING; - wake_up(&lcq->lcq_waitq); - spin_unlock(&lcq->lcq_lock); - } - spin_unlock(&lli->lli_lock); + cfs_waitq_signal(&lcq->lcq_waitq); + spin_unlock(&lcq->lcq_lock); + } + spin_unlock(&lli->lli_lock); + EXIT; } -/** Closes epoch and sends Size-on-MDS attribute update if possible. Call - * this under ll_inode_info::lli_lock spinlock. */ -void ll_epoch_close(struct inode *inode, struct md_op_data *op_data, - struct obd_client_handle **och, unsigned long flags) +/** Pack SOM attributes info @opdata for CLOSE, DONE_WRITING rpc. */ +void ll_done_writing_attr(struct inode *inode, struct md_op_data *op_data) { struct ll_inode_info *lli = ll_i2info(inode); ENTRY; - spin_lock(&lli->lli_lock); - if (!(list_empty(&lli->lli_pending_write_llaps))) { - if (!(lli->lli_flags & LLIF_EPOCH_PENDING)) { - LASSERT(*och != NULL); - LASSERT(lli->lli_pending_och == NULL); - /* Inode is dirty and there is no pending write done - * request yet, DONE_WRITE is to be sent later. */ - lli->lli_flags |= LLIF_EPOCH_PENDING; - lli->lli_pending_och = *och; - spin_unlock(&lli->lli_lock); + op_data->op_flags |= MF_SOM_CHANGE; + /* Check if Size-on-MDS attributes are valid. */ + if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) + CERROR("ino %lu/%u(flags %u) som valid it just after " + "recovery\n", inode->i_ino, inode->i_generation, + lli->lli_flags); + + if (!cl_local_size(inode)) { + /* Send Size-on-MDS Attributes if valid. */ + op_data->op_attr.ia_valid |= ATTR_MTIME_SET | ATTR_CTIME_SET | + ATTR_ATIME_SET | ATTR_SIZE | ATTR_BLOCKS; + } + EXIT; +} + +/** Closes ioepoch and packs Size-on-MDS attribute if needed into @op_data. */ +void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data, + struct obd_client_handle **och, unsigned long flags) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct ccc_object *club = cl2ccc(ll_i2info(inode)->lli_clob); + ENTRY; + + spin_lock(&lli->lli_lock); + if (!(cfs_list_empty(&club->cob_pending_list))) { + if (!(lli->lli_flags & LLIF_EPOCH_PENDING)) { + LASSERT(*och != NULL); + LASSERT(lli->lli_pending_och == NULL); + /* Inode is dirty and there is no pending write done + * request yet, DONE_WRITE is to be sent later. */ + lli->lli_flags |= LLIF_EPOCH_PENDING; + lli->lli_pending_och = *och; + spin_unlock(&lli->lli_lock); inode = igrab(inode); LASSERT(inode); @@ -148,7 +172,7 @@ void ll_epoch_close(struct inode *inode, struct md_op_data *op_data, * and try DONE_WRITE again later. */ LASSERT(!(lli->lli_flags & LLIF_DONE_WRITING)); lli->lli_flags |= LLIF_DONE_WRITING; - spin_unlock(&lli->lli_lock); + spin_unlock(&lli->lli_lock); inode = igrab(inode); LASSERT(inode); @@ -168,93 +192,102 @@ void ll_epoch_close(struct inode *inode, struct md_op_data *op_data, } else { /* Pack Size-on-MDS inode attributes only if they has changed */ if (!(lli->lli_flags & LLIF_SOM_DIRTY)) { - spin_unlock(&lli->lli_lock); - GOTO(out, 0); - } - - /* There is a pending DONE_WRITE -- close epoch with no - * attribute change. */ - if (lli->lli_flags & LLIF_EPOCH_PENDING) { - spin_unlock(&lli->lli_lock); - GOTO(out, 0); - } - } - - LASSERT(list_empty(&lli->lli_pending_write_llaps)); - lli->lli_flags &= ~LLIF_SOM_DIRTY; - spin_unlock(&lli->lli_lock); - op_data->op_flags |= MF_SOM_CHANGE; - - /* Check if Size-on-MDS attributes are valid. */ - if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) - CWARN("ino %lu/%u(flags %lu) som valid it just after " - "recovery\n", - inode->i_ino, inode->i_generation, lli->lli_flags); - - if (!cl_local_size(inode)) { - /* Send Size-on-MDS Attributes if valid. Atime is sent along - * with all the attributes. */ - op_data->op_attr.ia_valid |= ATTR_MTIME_SET | ATTR_CTIME_SET | - ATTR_ATIME_SET | ATTR_SIZE | ATTR_BLOCKS; - } - EXIT; + spin_unlock(&lli->lli_lock); + GOTO(out, 0); + } + + /* There is a pending DONE_WRITE -- close epoch with no + * attribute change. */ + if (lli->lli_flags & LLIF_EPOCH_PENDING) { + spin_unlock(&lli->lli_lock); + GOTO(out, 0); + } + } + + LASSERT(cfs_list_empty(&club->cob_pending_list)); + lli->lli_flags &= ~LLIF_SOM_DIRTY; + spin_unlock(&lli->lli_lock); + ll_done_writing_attr(inode, op_data); + + EXIT; out: - return; + return; } -int ll_sizeonmds_update(struct inode *inode, struct md_open_data *mod, - struct lustre_handle *fh, __u64 ioepoch) +/** + * Cliens updates SOM attributes on MDS (including llog cookies): + * obd_getattr with no lock and md_setattr. + */ +int ll_som_update(struct inode *inode, struct md_op_data *op_data) { struct ll_inode_info *lli = ll_i2info(inode); - struct md_op_data *op_data; + struct ptlrpc_request *request = NULL; + __u32 old_flags; struct obdo *oa; int rc; ENTRY; - /* LASSERT(!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)); */ - /* After recovery that can be valid. */ + LASSERT(op_data != NULL); if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) - CWARN("ino %lu/%u(flags %lu) som valid it just after " - "recovery\n", inode->i_ino, inode->i_generation, - lli->lli_flags); + CERROR("ino %lu/%u(flags %u) som valid it just after " + "recovery\n", inode->i_ino, inode->i_generation, + lli->lli_flags); OBDO_ALLOC(oa); - OBD_ALLOC_PTR(op_data); - if (!oa || !op_data) { + if (!oa) { CERROR("can't allocate memory for Size-on-MDS update.\n"); RETURN(-ENOMEM); } - rc = ll_inode_getattr(inode, oa); - if (rc == -ENOENT) { - oa->o_valid = 0; - CDEBUG(D_INODE, "objid "LPX64" is already destroyed\n", - lli->lli_smd->lsm_object_id); - } else if (rc) { - CERROR("inode_getattr failed (%d): unable to send a " - "Size-on-MDS attribute update for inode %lu/%u\n", - rc, inode->i_ino, inode->i_generation); - GOTO(out, rc); + + old_flags = op_data->op_flags; + op_data->op_flags = MF_SOM_CHANGE; + + /* If inode is already in another epoch, skip getattr from OSTs. */ + if (lli->lli_ioepoch == op_data->op_ioepoch) { + rc = ll_inode_getattr(inode, oa, op_data->op_ioepoch, + old_flags & MF_GETATTR_LOCK); + if (rc) { + oa->o_valid = 0; + if (rc != -ENOENT) + CERROR("inode_getattr failed (%d): unable to " + "send a Size-on-MDS attribute update " + "for inode %lu/%u\n", rc, inode->i_ino, + inode->i_generation); + } else { + CDEBUG(D_INODE, "Size-on-MDS update on "DFID"\n", + PFID(&lli->lli_fid)); + } + /* Install attributes into op_data. */ + md_from_obdo(op_data, oa, oa->o_valid); } - CDEBUG(D_INODE, "Size-on-MDS update on "DFID"\n", PFID(&lli->lli_fid)); - md_from_obdo(op_data, oa, oa->o_valid); - memcpy(&op_data->op_handle, fh, sizeof(*fh)); + rc = md_setattr(ll_i2sbi(inode)->ll_md_exp, op_data, + NULL, 0, NULL, 0, &request, NULL); + ptlrpc_req_finished(request); - op_data->op_ioepoch = ioepoch; - op_data->op_flags |= MF_SOM_CHANGE; + OBDO_FREE(oa); + RETURN(rc); +} - rc = ll_md_setattr(inode, op_data, &mod); - EXIT; -out: - if (oa) - OBDO_FREE(oa); - if (op_data) - ll_finish_md_op_data(op_data); - return rc; +/** + * Closes the ioepoch and packs all the attributes into @op_data for + * DONE_WRITING rpc. + */ +static void ll_prepare_done_writing(struct inode *inode, + struct md_op_data *op_data, + struct obd_client_handle **och) +{ + ll_ioepoch_close(inode, op_data, och, LLIF_DONE_WRITING); + /* If there is no @och, we do not do D_W yet. */ + if (*och == NULL) + return; + + ll_pack_inode2opdata(inode, op_data, &(*och)->och_fh); + ll_prep_md_op_data(op_data, inode, NULL, NULL, + 0, 0, LUSTRE_OPC_ANY, NULL); } -/** Sends a DONE_WRITING rpc, packs Size-on-MDS attributes into it, if - * possible */ +/** Send a DONE_WRITING rpc. */ static void ll_done_writing(struct inode *inode) { struct obd_client_handle *och = NULL; @@ -262,7 +295,7 @@ static void ll_done_writing(struct inode *inode) int rc; ENTRY; - LASSERT(ll_i2mdexp(inode)->exp_connect_flags & OBD_CONNECT_SOM); + LASSERT(exp_connect_som(ll_i2mdexp(inode))); OBD_ALLOC_PTR(op_data); if (op_data == NULL) { @@ -271,19 +304,16 @@ static void ll_done_writing(struct inode *inode) return; } - ll_epoch_close(inode, op_data, &och, LLIF_DONE_WRITING); + ll_prepare_done_writing(inode, op_data, &och); /* If there is no @och, we do not do D_W yet. */ if (och == NULL) GOTO(out, 0); - ll_pack_inode2opdata(inode, op_data, &och->och_fh); - - rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, och->och_mod); + rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, NULL); if (rc == -EAGAIN) { /* MDS has instructed us to obtain Size-on-MDS attribute from * OSTs and send setattr to back to MDS. */ - rc = ll_sizeonmds_update(inode, och->och_mod, - &och->och_fh, op_data->op_ioepoch); + rc = ll_som_update(inode, op_data); } else if (rc) { CERROR("inode %lu mdc done_writing failed: rc = %d\n", inode->i_ino, rc); @@ -299,19 +329,19 @@ out: static struct ll_inode_info *ll_close_next_lli(struct ll_close_queue *lcq) { - struct ll_inode_info *lli = NULL; + struct ll_inode_info *lli = NULL; - spin_lock(&lcq->lcq_lock); + spin_lock(&lcq->lcq_lock); - if (!list_empty(&lcq->lcq_head)) { - lli = list_entry(lcq->lcq_head.next, struct ll_inode_info, - lli_close_list); - list_del_init(&lli->lli_close_list); - } else if (atomic_read(&lcq->lcq_stop)) + if (!cfs_list_empty(&lcq->lcq_head)) { + lli = cfs_list_entry(lcq->lcq_head.next, struct ll_inode_info, + lli_close_list); + cfs_list_del_init(&lli->lli_close_list); + } else if (cfs_atomic_read(&lcq->lcq_stop)) lli = ERR_PTR(-EALREADY); - spin_unlock(&lcq->lcq_lock); - return lli; + spin_unlock(&lcq->lcq_lock); + return lli; } static int ll_close_thread(void *arg) @@ -325,7 +355,7 @@ static int ll_close_thread(void *arg) cfs_daemonize(name); } - complete(&lcq->lcq_comp); + complete(&lcq->lcq_comp); while (1) { struct l_wait_info lwi = { 0 }; @@ -346,8 +376,8 @@ static int ll_close_thread(void *arg) } CDEBUG(D_INFO, "ll_close exiting\n"); - complete(&lcq->lcq_comp); - RETURN(0); + complete(&lcq->lcq_comp); + RETURN(0); } int ll_close_thread_start(struct ll_close_queue **lcq_ret) @@ -362,27 +392,27 @@ int ll_close_thread_start(struct ll_close_queue **lcq_ret) if (lcq == NULL) return -ENOMEM; - spin_lock_init(&lcq->lcq_lock); - INIT_LIST_HEAD(&lcq->lcq_head); - init_waitqueue_head(&lcq->lcq_waitq); - init_completion(&lcq->lcq_comp); + spin_lock_init(&lcq->lcq_lock); + CFS_INIT_LIST_HEAD(&lcq->lcq_head); + cfs_waitq_init(&lcq->lcq_waitq); + init_completion(&lcq->lcq_comp); - pid = kernel_thread(ll_close_thread, lcq, 0); - if (pid < 0) { - OBD_FREE(lcq, sizeof(*lcq)); - return pid; - } + pid = cfs_create_thread(ll_close_thread, lcq, 0); + if (pid < 0) { + OBD_FREE(lcq, sizeof(*lcq)); + return pid; + } - wait_for_completion(&lcq->lcq_comp); - *lcq_ret = lcq; - return 0; + wait_for_completion(&lcq->lcq_comp); + *lcq_ret = lcq; + return 0; } void ll_close_thread_shutdown(struct ll_close_queue *lcq) { - init_completion(&lcq->lcq_comp); - atomic_inc(&lcq->lcq_stop); - wake_up(&lcq->lcq_waitq); - wait_for_completion(&lcq->lcq_comp); - OBD_FREE(lcq, sizeof(*lcq)); + init_completion(&lcq->lcq_comp); + cfs_atomic_inc(&lcq->lcq_stop); + cfs_waitq_signal(&lcq->lcq_waitq); + wait_for_completion(&lcq->lcq_comp); + OBD_FREE(lcq, sizeof(*lcq)); }