Whamcloud - gitweb
Install and distribute lconf and lmc.
[fs/lustre-release.git] / lustre / lib / page.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.sf.net/projects/lustre/
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  *
21  */
22
23
24
25 #include <linux/config.h>
26 #include <linux/kernel.h>
27 #include <linux/mm.h>
28 #include <linux/string.h>
29 #include <linux/stat.h>
30 #include <linux/errno.h>
31 #include <linux/locks.h>
32 #include <linux/unistd.h>
33 #include <linux/version.h>
34
35 #include <asm/system.h>
36 #include <asm/uaccess.h>
37
38 #include <linux/fs.h>
39 #include <linux/stat.h>
40 #include <asm/uaccess.h>
41 #include <asm/segment.h>
42 #include <linux/mm.h>
43 #include <linux/pagemap.h>
44 #include <linux/smp_lock.h>
45
46 #define DEBUG_SUBSYSTEM S_OST
47
48 #include <linux/obd_class.h>
49 #include <linux/lustre_net.h>
50 #include <linux/lustre_lib.h>
51 #include <linux/lustre_ha.h>
52
53 static int sync_io_timeout(void *data)
54 {
55         struct io_cb_data *cbd = data;
56         struct ptlrpc_bulk_desc *desc = cbd->desc;
57
58         ENTRY;
59         desc->b_connection->c_level = LUSTRE_CONN_RECOVD;
60         desc->b_flags |= PTL_RPC_FL_TIMEOUT;
61         if (desc->b_connection && desc->b_connection->c_recovd &&
62             class_signal_connection_failure) {
63                 /* XXXshaver Do we need a resend strategy, or do we just
64                  * XXXshaver return -ERESTARTSYS and punt it?
65                  */
66                 CERROR("signalling failure of conn %p\n", desc->b_connection);
67                 class_signal_connection_failure(desc->b_connection);
68
69                 /* We go back to sleep, until we're resumed or interrupted. */
70                 RETURN(0);
71         }
72         
73         /* If we can't be recovered, just abort the syscall with -ETIMEDOUT. */
74         RETURN(1);
75 }
76
77 static int sync_io_intr(void *data)
78 {
79         struct io_cb_data *cbd = data;
80         struct ptlrpc_bulk_desc *desc = cbd->desc;
81
82         ENTRY;
83         desc->b_flags |= PTL_RPC_FL_INTR;
84         RETURN(1); /* ignored, as of this writing */
85 }
86
87 int ll_sync_io_cb(struct io_cb_data *data, int err, int phase)
88 {
89         int ret;
90         ENTRY; 
91
92         if (phase == CB_PHASE_START) { 
93                 struct l_wait_info lwi;
94                 lwi = LWI_TIMEOUT_INTR(obd_timeout * HZ, sync_io_timeout,
95                                        sync_io_intr, data);
96                 ret = l_wait_event(data->waitq, data->complete, &lwi);
97                 if (atomic_dec_and_test(&data->refcount))
98                         OBD_FREE(data, sizeof(*data));
99                 if (ret == -ERESTARTSYS)
100                         return ret;
101         } else if (phase == CB_PHASE_FINISH) { 
102                 data->err = err;
103                 data->complete = 1;
104                 wake_up(&data->waitq); 
105                 if (atomic_dec_and_test(&data->refcount))
106                         OBD_FREE(data, sizeof(*data));
107                 return err;
108         } else 
109                 LBUG();
110         EXIT;
111         return 0;
112 }
113
114 struct io_cb_data *ll_init_cb(void)
115 {
116         struct io_cb_data *d;
117
118
119         OBD_ALLOC(d, sizeof(*d));
120         if (d) { 
121                 init_waitqueue_head(&d->waitq);
122                 atomic_set(&d->refcount, 2);
123         }
124         RETURN(d); 
125 }
126
127 /*
128  * Remove page from dirty list
129  */
130 static void __set_page_clean(struct page *page)
131 {
132         struct address_space *mapping = page->mapping;
133         struct inode *inode;
134         
135         if (!mapping)
136                 return;
137
138 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,4,9))
139         spin_lock(&pagecache_lock);
140 #endif
141
142         list_del(&page->list);
143         list_add(&page->list, &mapping->clean_pages);
144
145         inode = mapping->host;
146         if (list_empty(&mapping->dirty_pages)) { 
147                 CDEBUG(D_INODE, "inode clean\n");
148                 inode->i_state &= ~I_DIRTY_PAGES;
149         }
150 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,4,10))
151         spin_unlock(&pagecache_lock);
152 #endif
153         EXIT;
154 }
155
156 inline void set_page_clean(struct page *page)
157 {
158         if (PageDirty(page)) { 
159                 ClearPageDirty(page);
160                 __set_page_clean(page);
161         }
162 }
163
164 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,10))
165 /*
166  * Add a page to the dirty page list.
167  */
168 void __set_page_dirty(struct page *page)
169 {
170         struct address_space *mapping;
171         spinlock_t *pg_lock;
172
173         pg_lock = PAGECACHE_LOCK(page);
174         spin_lock(pg_lock);
175
176         mapping = page->mapping;
177         spin_lock(&mapping->page_lock);
178
179         list_del(&page->list);
180         list_add(&page->list, &mapping->dirty_pages);
181
182         spin_unlock(&mapping->page_lock);
183         spin_unlock(pg_lock);
184
185         if (mapping->host)
186                 mark_inode_dirty_pages(mapping->host);
187 }
188 #else
189 /*
190  * Add a page to the dirty page list.
191  */
192 void set_page_dirty(struct page *page)
193 {
194         if (!test_and_set_bit(PG_dirty, &page->flags)) {
195                 struct address_space *mapping = page->mapping;
196
197                 if (mapping) {
198                         spin_lock(&pagecache_lock);
199                         list_del(&page->list);
200                         list_add(&page->list, &mapping->dirty_pages);
201                         spin_unlock(&pagecache_lock);
202
203                         if (mapping->host)
204                                 mark_inode_dirty_pages(mapping->host);
205                 }
206         }
207 }
208 #endif
209
210 inline void lustre_put_page(struct page *page)
211 {
212         kunmap(page);
213         page_cache_release(page);
214 }
215
216 struct page *lustre_get_page_read(struct inode *inode, unsigned long index)
217 {
218         struct address_space *mapping = inode->i_mapping;
219         struct page *page;
220         int rc;
221
222         page = read_cache_page(mapping, index,
223                                (filler_t*)mapping->a_ops->readpage, NULL);
224         if (!IS_ERR(page)) {
225                 wait_on_page(page);
226                 kmap(page);
227                 if (!Page_Uptodate(page)) {
228                         CERROR("page index %lu not uptodate\n", index);
229                         GOTO(err_page, rc = -EIO);
230                 }
231                 if (PageError(page)) {
232                         CERROR("page index %lu has error\n", index);
233                         GOTO(err_page, rc = -EIO);
234                 }
235         }
236         return page;
237
238 err_page:
239         lustre_put_page(page);
240         return ERR_PTR(rc);
241 }
242
243 struct page *lustre_get_page_write(struct inode *inode, unsigned long index)
244 {
245         struct address_space *mapping = inode->i_mapping;
246         struct page *page;
247         int rc;
248
249         page = grab_cache_page(mapping, index); /* locked page */
250
251         if (!IS_ERR(page)) {
252                 kmap(page);
253                 /* Note: Called with "O" and "PAGE_SIZE" this is essentially
254                  * a no-op for most filesystems, because we write the whole
255                  * page.  For partial-page I/O this will read in the page.
256                  */
257                 rc = mapping->a_ops->prepare_write(NULL, page, 0, PAGE_SIZE);
258                 if (rc) {
259                         CERROR("page index %lu, rc = %d\n", index, rc);
260                         if (rc != -ENOSPC)
261                                 LBUG();
262                         GOTO(err_unlock, rc);
263                 }
264                 /* XXX not sure if we need this if we are overwriting page */
265                 if (PageError(page)) {
266                         CERROR("error on page index %lu, rc = %d\n", index, rc);
267                         LBUG();
268                         GOTO(err_unlock, rc = -EIO);
269                 }
270         }
271         return page;
272
273 err_unlock:
274         unlock_page(page);
275         lustre_put_page(page);
276         return ERR_PTR(rc);
277 }
278
279 int lustre_commit_write(struct page *page, unsigned from, unsigned to)
280 {
281         struct inode *inode = page->mapping->host;
282         int err;
283
284         err = page->mapping->a_ops->commit_write(NULL, page, from, to);
285         if (!err && IS_SYNC(inode))
286                 err = waitfor_one_page(page);
287
288         //SetPageUptodate(page); // the client commit_write will do this
289
290         SetPageReferenced(page);
291         unlock_page(page);
292         lustre_put_page(page);
293         return err;
294 }