Whamcloud - gitweb
Found one bug; testing for list_empty(slist) is ok, but you
[fs/lustre-release.git] / lustre / obdfs / flushd.c
1 /*
2  * OBDFS Super operations - also used for Lustre file system
3  *
4  *
5  *  Copyright (C) 1991, 1992  Linus Torvalds
6  * Copryright (C) 1999 Stelias Computing Inc. <braam@stelias.com>
7  * Copryright (C) 1999 Seagate Technology Inc.
8  *
9  */
10 #define __NO_VERSION__
11 #include <linux/module.h>
12 #include <linux/sched.h>
13 #include <linux/fs.h>
14 #include <linux/malloc.h>
15 #include <linux/locks.h>
16 #include <linux/errno.h>
17 #include <linux/swap.h>
18 #include <linux/smp_lock.h>
19 #include <linux/vmalloc.h>
20 #include <linux/blkdev.h>
21 #include <linux/sysrq.h>
22 #include <linux/file.h>
23 #include <linux/init.h>
24 #include <linux/quotaops.h>
25 #include <linux/iobuf.h>
26 #include <linux/highmem.h>
27
28 #include <asm/uaccess.h>
29 #include <asm/io.h>
30 #include <asm/bitops.h>
31 #include <asm/mmu_context.h>
32
33 #include <linux/obd_support.h>
34 #include <linux/obd_class.h>
35 #include <linux/obdfs.h>
36
37
38
39 struct {
40         int nfract;  /* Percentage of buffer cache dirty to 
41                         activate bdflush */
42         int ndirty;  /* Maximum number of dirty blocks to write out per
43                         wake-cycle */
44         int nrefill; /* Number of clean buffers to try to obtain
45                                 each time we call refill */
46         int nref_dirt; /* Dirty buffer threshold for activating bdflush
47                           when trying to refill buffers. */
48         int interval; /* jiffies delay between kupdate flushes */
49         int age_buffer;  /* Time for normal buffer to age before we flush it */
50         int age_super;  /* Time for superblock to age before we flush it */
51 } pupd_prm = {40, 500, 64, 256, 5*HZ, 30*HZ, 5*HZ }; 
52
53
54 static int obdfs_enqueue_pages(struct inode *inode, struct obdo **obdo,
55                                int nr_slots, struct page **pages, char **bufs,
56                                obd_size *counts, obd_off *offsets,
57                                obd_flag *flag, int check_time)
58 {
59         struct list_head *page_list = obdfs_iplist(inode);
60         struct list_head *tmp;
61         int i = 0;
62
63         ENTRY;
64         /* if there are no pages, remove from super block list */
65         if (list_empty(obdfs_iplist(inode))) {
66                 list_del(obdfs_islist(inode));
67                 /* we check for "empty" on this animal: must init it! */
68                 INIT_LIST_HEAD(obdfs_islist(inode));
69                 CDEBUG(D_INODE, "empty list\n");
70                 EXIT;
71                 return 0;
72         }
73
74         *obdo = obdo_fromid(IID(inode), inode->i_ino, OBD_MD_FLNOTOBD);
75         if ( IS_ERR(*obdo) ) {
76                 EXIT;
77                 return PTR_ERR(*obdo);
78         }
79
80         obdfs_from_inode(*obdo, inode);
81         *flag = OBD_BRW_CREATE;
82
83         tmp = page_list;
84         while ( (tmp = tmp->next) != page_list && (i < nr_slots) ) {
85                 struct obdfs_pgrq *req;
86                 struct page *page;
87                 
88                 req = list_entry(tmp, struct obdfs_pgrq, rq_plist);
89                 /* remove request from list before write to avoid conflict */
90                 obdfs_pgrq_del(req);
91                 page = req->rq_page;
92
93                 if ( !page  ) {
94                         CDEBUG(D_INODE, "no page \n");
95                         EXIT;
96                         return 0;
97                 }
98
99                 if (check_time && 
100                     req->rq_jiffies > (jiffies - pupd_prm.age_buffer))
101                         continue;
102                 
103                 CDEBUG(D_INODE, "adding page %p to vector\n", page);
104                 bufs[i] = (char *)page_address(page);
105                 pages[i] = page;
106                 counts[i] = PAGE_SIZE;
107                 offsets[i] = ((obd_off)page->index) << PAGE_SHIFT;
108                 i++;
109         }
110
111         /* If no more pages for this inode, remove from superblock list */
112         if ( list_empty(obdfs_iplist(inode)) )
113                 list_del(obdfs_islist(inode));
114
115         EXIT;
116         return i;  
117 }
118
119
120 /* Remove writeback requests for the superblock */
121 int obdfs_flush_reqs(struct list_head *inode_list, int flush_inode,
122                      int check_time)
123 {
124         struct list_head *tmp = inode_list;
125         obd_count         num_io = 0;
126         obd_count         num_obdos = 0;
127         struct inode     *inodes[MAX_IOVEC];
128         struct obdo      *obdos[MAX_IOVEC];
129         struct page      *pages[MAX_IOVEC];
130         char             *bufs[MAX_IOVEC];
131         obd_size          counts[MAX_IOVEC];
132         obd_off           offsets[MAX_IOVEC];
133         obd_flag          flags[MAX_IOVEC];
134         obd_count         bufs_per_obdo[MAX_IOVEC];
135         int               err = 0;
136         int i;
137
138         ENTRY;
139
140         if (!inode_list) {
141                 CDEBUG(D_INODE, "no list\n");
142                 EXIT;
143                 return 0;
144         }
145
146         if ( list_empty(inode_list)) {
147                 CDEBUG(D_INODE, "list empty\n");
148                 EXIT;
149                 return 0;
150         }
151
152
153         /* add all of the outstanding pages to a write vector, and write it */
154         while ( (tmp = tmp->next) != inode_list ) {
155                 struct obdfs_inode_info *ii;
156                 int res;
157
158                 ii = list_entry(tmp, struct obdfs_inode_info, oi_inodes);
159                 inodes[num_obdos] = list_entry(ii, struct inode, u);
160
161                 res = obdfs_enqueue_pages(inodes[num_obdos], &obdos[num_obdos],
162                                           MAX_IOVEC - num_io, &pages[num_io],
163                                           &bufs[num_io], &counts[num_io],
164                                           &offsets[num_io], &flags[num_obdos],1);
165                 if ( res < 0 ) {
166                         return -EIO;
167                 }
168                 
169                 bufs_per_obdo[num_obdos] = res;
170                 num_io += res;
171                 num_obdos++;
172
173                 if ( num_io == MAX_IOVEC ) {
174                         err = obdfs_do_vec_wr(inodes[0]->i_sb, num_io,
175                                               num_obdos, obdos, bufs_per_obdo,
176                                               pages, bufs, counts, offsets,
177                                               flags);
178                         for (i = 0 ; i < num_obdos ; i++) {
179                                 obdfs_to_inode(inodes[i], obdos[i]);
180                                 obdo_free(obdos[i]);
181                         }
182                         if ( err ) {
183                                 EXIT;
184                                 goto ERR;
185                         }
186                         num_io = 0;
187                         num_obdos = 0;
188                 }
189         } 
190
191         /* flush any remaining I/Os */
192         if ( num_io ) {
193                 err = obdfs_do_vec_wr(inodes[0]->i_sb, num_io, num_obdos, 
194                                       obdos, bufs_per_obdo, pages, bufs,
195                                       counts, offsets, flags);
196                 for (i = 0 ; i < num_obdos ; i++) {
197                         obdfs_to_inode(inodes[i], obdos[i]);
198                         obdo_free(obdos[i]);
199                 }
200         }
201         EXIT;
202 ERR:
203
204         return err;
205 } /* obdfs_remove_pages_from_cache */
206
207
208 static void obdfs_flush_dirty_pages(int check_time)
209 {
210         struct list_head *sl;
211
212         sl = &obdfs_super_list;
213         while ( (sl = sl->next) != &obdfs_super_list ) {
214                 struct obdfs_sb_info *sbi = 
215                         list_entry(sl, struct obdfs_sb_info, osi_list);
216
217                 /* walk write requests here, use the sb, check the time */
218                 obdfs_flush_reqs(&sbi->osi_inodes, 0, 1);
219         }
220
221 #if 0
222         /* again, but now we wait for completion */
223         sl = &obdfs_super_list;
224         while ( (sl = sl->next) != &obdfs_super_list ) {
225                 struct obdfs_sb_info *sbi = 
226                         list_entry(sl, struct obdfs_sb_info, sl_chain);
227
228                 /* walk write requests here */
229                 obdfs_flush_reqs(&sbi->osi_pages, 0, check_time);
230         }
231 #endif
232 }
233
234
235 static struct task_struct *pupdated;
236
237 static int pupdate(void *unused) 
238 {
239         struct task_struct * tsk = current;
240         int interval;
241         
242         pupdated = current;
243
244         exit_files(current);
245         exit_mm(current);
246
247         tsk->session = 1;
248         tsk->pgrp = 1;
249         sprintf(tsk->comm, "pupdated");
250         pupdated = current;
251
252         printk("pupdated activated...\n");
253
254         /* sigstop and sigcont will stop and wakeup pupdate */
255         spin_lock_irq(&tsk->sigmask_lock);
256         sigfillset(&tsk->blocked);
257         siginitsetinv(&tsk->blocked, sigmask(SIGTERM));
258         recalc_sigpending(tsk);
259         spin_unlock_irq(&tsk->sigmask_lock);
260
261         for (;;) {
262                 /* update interval */
263                 interval = pupd_prm.interval;
264                 if (interval)
265                 {
266                         tsk->state = TASK_INTERRUPTIBLE;
267                         schedule_timeout(interval);
268                 }
269                 else
270                 {
271                 stop_pupdate:
272                         tsk->state = TASK_STOPPED;
273                         /* MOD_DEC_USE_COUNT; */
274                         printk("pupdated stopped...\n");
275                         return 0;
276                 }
277                 /* check for sigstop */
278                 if (signal_pending(tsk))
279                 {
280                         int stopped = 0;
281                         spin_lock_irq(&tsk->sigmask_lock);
282                         if (sigismember(&tsk->signal, SIGTERM))
283                         {
284                                 sigdelset(&tsk->signal, SIGTERM);
285                                 stopped = 1;
286                         }
287                         recalc_sigpending(tsk);
288                         spin_unlock_irq(&tsk->sigmask_lock);
289                         if (stopped)
290                                 goto stop_pupdate;
291                 }
292                 /* asynchronous setattr etc for the future ... */
293                 /* flush_inodes(); */
294                 CDEBUG(D_INODE, "about to flush pages...\n");
295                 obdfs_flush_dirty_pages(1);
296                 CDEBUG(D_INODE, "done flushing pages...\n");
297         }
298 }
299
300
301 int flushd_init(void)
302 {
303         /*
304         kernel_thread(bdflush, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
305          */
306         /* MOD_INC_USE_COUNT; */
307         kernel_thread(pupdate, NULL, 0);
308         printk("flushd inited\n");
309         return 0;
310 }
311
312 int flushd_cleanup(void)
313 {
314         /* this should deliver a signal to */
315         
316
317         /* XXX Andreas, we will do this later, for now, you must kill
318            pupdated with a SIGTERM from userland, before unloading obdfs.o
319         */
320         if (pupdated) {
321                 /* then let it run at least once, before continuing */
322
323                 /* XXX need to do something like this here:
324                 send_sig(SIGTERM, current, 0);
325                  */
326                 1;
327         }
328
329         /* not reached */
330         return 0;
331
332 }