Whamcloud - gitweb
31ac436739dcbe293967a8fdcf838dfcefa2fbd7
[fs/lustre-release.git] / lustre / llite / llite_close.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Lustre Lite routines to issue a secondary close after writeback
5  *
6  *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
7  *
8  *   This file is part of Lustre, http://www.lustre.org.
9  *
10  *   Lustre is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   Lustre is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with Lustre; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24 #include <linux/module.h>
25
26 #define DEBUG_SUBSYSTEM S_LLITE
27
28 #include <linux/lustre_mds.h>
29 #include <linux/lustre_lite.h>
30 #include <linux/lustre_gs.h>
31 #include "llite_internal.h"
32
33 /* record that a write is in flight */
34 void llap_write_pending(struct inode *inode, struct ll_async_page *llap)
35 {
36         struct ll_inode_info *lli = ll_i2info(inode);
37         struct page *page = llap->llap_page;
38         spin_lock(&lli->lli_lock);
39         CDEBUG(D_INODE, "track page 0x%p/%lu %s\n",
40                page, (unsigned long) page->index,
41                !list_empty(&llap->llap_pending_write) ? "(already)" : "");
42         if (list_empty(&llap->llap_pending_write))
43                 list_add(&llap->llap_pending_write,
44                          &lli->lli_pending_write_llaps);
45         spin_unlock(&lli->lli_lock);
46 }
47
48 /* record that a write has completed */
49 void llap_write_complete(struct inode *inode, struct ll_async_page *llap)
50 {
51         struct ll_inode_info *lli = ll_i2info(inode);
52         spin_lock(&lli->lli_lock);
53         if (!list_empty(&llap->llap_pending_write))
54                 list_del_init(&llap->llap_pending_write);
55         spin_unlock(&lli->lli_lock);
56 }
57
58 void ll_open_complete(struct inode *inode)
59 {
60         struct ll_inode_info *lli = ll_i2info(inode);
61         spin_lock(&lli->lli_lock);
62         lli->lli_send_done_writing = 0;
63         spin_unlock(&lli->lli_lock);
64 }
65
66 /* if we close with writes in flight then we want the completion or cancelation
67  * of those writes to send a DONE_WRITING rpc to the MDS */
68 int ll_is_inode_dirty(struct inode *inode)
69 {
70         struct ll_inode_info *lli = ll_i2info(inode);
71         int rc = 0;
72         ENTRY;
73
74         spin_lock(&lli->lli_lock);
75         if (!list_empty(&lli->lli_pending_write_llaps))
76                 rc = 1;
77         spin_unlock(&lli->lli_lock);
78         RETURN(rc);
79 }
80
81 void ll_try_done_writing(struct inode *inode)
82 {
83         struct ll_inode_info *lli = ll_i2info(inode);
84         struct ll_close_queue *lcq = ll_i2sbi(inode)->ll_lcq;
85         int added = 0;
86
87         spin_lock(&lli->lli_lock);
88
89         if (lli->lli_send_done_writing &&
90             list_empty(&lli->lli_pending_write_llaps)) {
91                 spin_lock(&lcq->lcq_lock);
92                 if (list_empty(&lli->lli_close_item)) {
93                         CDEBUG(D_INODE, "adding inode %lu/%u to close list\n",
94                                inode->i_ino, inode->i_generation);
95                         list_add_tail(&lli->lli_close_item, &lcq->lcq_list);
96                         wake_up(&lcq->lcq_waitq);
97                         added = 1;
98                 }
99                 spin_unlock(&lcq->lcq_lock);
100         }
101
102         spin_unlock(&lli->lli_lock);
103        
104         /* 
105          * we can't grab inode under lli_lock, because:
106          * ll_try_done_writing:                 ll_prep_inode:
107          *   spin_lock(&lli_lock)                 spin_lock(&inode_lock)
108          *     igrab()                              ll_update_inode()
109          *       spin_lock(&inode_lock)               spin_lock(&lli_lock)
110          */
111         if (added)
112                 LASSERT(igrab(inode) == inode);
113 }
114
115 /* The MDS needs us to get the real file attributes, then send a DONE_WRITING */
116 void ll_queue_done_writing(struct inode *inode)
117 {
118         struct ll_inode_info *lli = ll_i2info(inode);
119         ENTRY;
120
121         CDEBUG(D_INODE, "queue closing for %lu/%u\n",
122                inode->i_ino, inode->i_generation);
123         spin_lock(&lli->lli_lock);
124         lli->lli_send_done_writing = 1;
125         spin_unlock(&lli->lli_lock);
126
127         ll_try_done_writing(inode);
128         EXIT;
129 }
130
131 /* If we know the file size and have the cookies:
132  *  - send a DONE_WRITING rpc
133  *
134  * Otherwise:
135  *  - get a whole-file lock
136  *  - get the authoritative size and all cookies with GETATTRs
137  *  - send a DONE_WRITING rpc
138  */
139 static void ll_try_to_close(struct inode *inode)
140 {
141         struct ll_sb_info *sbi = ll_i2sbi(inode);
142         ll_md_real_close(sbi->ll_md_exp, inode, FMODE_WRITE | FMODE_SYNC);
143 }
144
145 static struct ll_inode_info *ll_close_next_lli(struct ll_close_queue *lcq)
146 {
147         struct ll_inode_info *lli = NULL;
148
149         spin_lock(&lcq->lcq_lock);
150
151         if (lcq->lcq_list.next == NULL)
152                 lli = ERR_PTR(-1);
153         else if (!list_empty(&lcq->lcq_list)) {
154                 lli = list_entry(lcq->lcq_list.next, struct ll_inode_info,
155                                  lli_close_item);
156                 list_del_init(&lli->lli_close_item);
157         }
158
159         spin_unlock(&lcq->lcq_lock);
160         return lli;
161 }
162
163 static int ll_close_thread(void *arg)
164 {
165         struct ll_close_queue *lcq = arg;
166         ENTRY;
167
168         /* XXX boiler-plate */
169         {
170                 char name[sizeof(current->comm)];
171                 unsigned long flags;
172                 snprintf(name, sizeof(name) - 1, "ll_close");
173                 kportal_daemonize(name);
174                 SIGNAL_MASK_LOCK(current, flags);
175                 sigfillset(&current->blocked);
176                 RECALC_SIGPENDING;
177                 SIGNAL_MASK_UNLOCK(current, flags);
178         }
179
180         complete(&lcq->lcq_comp);
181
182         while (1) {
183                 struct l_wait_info lwi = { 0 };
184                 struct ll_inode_info *lli;
185                 struct inode *inode;
186
187                 l_wait_event_exclusive(lcq->lcq_waitq,
188                                        (lli = ll_close_next_lli(lcq)) != NULL,
189                                        &lwi);
190                 if (IS_ERR(lli))
191                         break;
192
193                 inode = ll_info2i(lli);
194                 ll_try_to_close(inode);
195                 iput(inode);
196         }
197
198         EXIT;
199
200         /* SMF-safe way to finish threads */
201         complete_and_exit(&lcq->lcq_comp, 0);
202 }
203
204 int ll_close_thread_start(struct ll_close_queue **lcq_ret)
205 {
206         struct ll_close_queue *lcq;
207         pid_t pid;
208
209         OBD_ALLOC(lcq, sizeof(*lcq));
210         if (lcq == NULL)
211                 return -ENOMEM;
212
213         spin_lock_init(&lcq->lcq_lock);
214         INIT_LIST_HEAD(&lcq->lcq_list);
215         init_waitqueue_head(&lcq->lcq_waitq);
216         init_completion(&lcq->lcq_comp);
217
218         pid = kernel_thread(ll_close_thread, lcq, 0);
219         if (pid < 0) {
220                 OBD_FREE(lcq, sizeof(*lcq));
221                 return pid;
222         }
223
224         wait_for_completion(&lcq->lcq_comp);
225         *lcq_ret = lcq;
226         return 0;
227 }
228
229 void ll_close_thread_stop(struct ll_close_queue *lcq)
230 {
231         init_completion(&lcq->lcq_comp);
232         lcq->lcq_list.next = NULL;
233         wake_up(&lcq->lcq_waitq);
234         wait_for_completion(&lcq->lcq_comp);
235         OBD_FREE(lcq, sizeof(*lcq));
236 }