Whamcloud - gitweb
ext2obd.c: changed ext2obd_brw() to use vector I/O with brw_kiovec
[fs/lustre-release.git] / lustre / obdfs / flushd.c
1 /*
2  * OBDFS Super operations - also used for Lustre file system
3  *
4  *
5  *  Copyright (C) 1991, 1992  Linus Torvalds
6  * Copryright (C) 1999 Stelias Computing Inc. <braam@stelias.com>
7  * Copryright (C) 1999 Seagate Technology Inc.
8  *
9  */
10 #define __NO_VERSION__
11 #include <linux/module.h>
12 #include <linux/sched.h>
13 #include <linux/fs.h>
14 #include <linux/malloc.h>
15 #include <linux/locks.h>
16 #include <linux/errno.h>
17 #include <linux/swap.h>
18 #include <linux/smp_lock.h>
19 #include <linux/vmalloc.h>
20 #include <linux/blkdev.h>
21 #include <linux/sysrq.h>
22 #include <linux/file.h>
23 #include <linux/init.h>
24 #include <linux/quotaops.h>
25 #include <linux/iobuf.h>
26 #include <linux/highmem.h>
27
28 #include <asm/uaccess.h>
29 #include <asm/io.h>
30 #include <asm/bitops.h>
31 #include <asm/mmu_context.h>
32
33 #include <linux/obd_support.h>
34 #include <linux/obd_class.h>
35 #include <linux/obdfs.h>
36
37
38
39 struct {
40         int nfract;  /* Percentage of buffer cache dirty to 
41                         activate bdflush */
42         int ndirty;  /* Maximum number of dirty blocks to write out per
43                         wake-cycle */
44         int nrefill; /* Number of clean buffers to try to obtain
45                                 each time we call refill */
46         int nref_dirt; /* Dirty buffer threshold for activating bdflush
47                           when trying to refill buffers. */
48         int interval; /* jiffies delay between kupdate flushes */
49         int age_buffer;  /* Time for normal buffer to age before we flush it */
50         int age_super;  /* Time for superblock to age before we flush it */
51 } pupd_prm = {40, 500, 64, 256, 5*HZ, 30*HZ, 5*HZ }; 
52
53
54 /* Remove writeback requests from an inode */
55 int obdfs_flush_reqs(struct list_head *page_list, 
56                             int flush_inode, int check_time)
57 {
58         struct list_head *tmp = page_list;
59         obd_count         num_io = 0;
60         struct obdo      *oa = NULL;
61         struct obdo      *obdos[MAX_IOVEC];
62         struct page      *pages[MAX_IOVEC];
63         char             *bufs[MAX_IOVEC];
64         obd_size          counts[MAX_IOVEC];
65         obd_off           offsets[MAX_IOVEC];
66         obd_flag          flags[MAX_IOVEC];
67         int               err = 0;
68         int i;
69         struct inode *inode = NULL;
70
71         ENTRY;
72
73         if ( list_empty(page_list)) {
74                 CDEBUG(D_INODE, "list empty\n");
75                 EXIT;
76                 return 0;
77         }
78
79
80         /* add all of the outstanding pages to a write vector, and write it */
81         while ( (tmp = tmp->next) != page_list ) {
82                 struct obdfs_pgrq *pgrq;
83                 struct page       *page;
84
85                 if ( flush_inode ) 
86                         pgrq = list_entry(tmp, struct obdfs_pgrq, rq_ilist);
87                 else 
88                         pgrq = list_entry(tmp, struct obdfs_pgrq, rq_slist);
89                 page = pgrq->rq_page;
90                 inode = pgrq->rq_inode;
91
92                 if (check_time && 
93                     pgrq->rq_jiffies > (jiffies - pupd_prm.age_buffer))
94                         continue;
95                 
96                 oa = obdo_fromid(IID(inode), inode->i_ino, OBD_MD_FLNOTOBD);
97                 if ( IS_ERR(oa) ) {
98                         EXIT;
99                         return PTR_ERR(oa);
100                 }
101                 obdfs_from_inode(oa, inode);
102
103                 CDEBUG(D_INODE, "adding page %p to vector\n", page);
104                 obdos[num_io] = oa;
105                 bufs[num_io] = (char *)page_address(page);
106                 pages[num_io] = page;
107                 counts[num_io] = PAGE_SIZE;
108                 offsets[num_io] = ((obd_off)page->index) << PAGE_SHIFT;
109                 flags[num_io] = OBD_BRW_CREATE;
110                 num_io++;
111
112                 /* remove request from list before write to avoid conflict */
113                 obdfs_pgrq_del(pgrq);
114
115                 if ( num_io == MAX_IOVEC ) {
116                         err = obdfs_do_vec_wr(inode->i_sb, &num_io, obdos, 
117                                               pages,
118                                               bufs, counts, offsets, flags);
119                         for (i=0 ; i<MAX_IOVEC ; i++) {
120                                 obdo_free(obdos[i]);
121                         if ( err ) {
122                                 /* XXX Probably should handle error here -
123                                  *     discard other writes, or put
124                                  *     (MAX_IOVEC - num_io) I/Os back to list?
125                                  */
126                                 EXIT;
127                                 goto ERR;
128                         }
129                         }
130                         num_io = 0;
131                 }
132         } 
133
134         /* flush any remaining I/Os */
135         if ( num_io ) {
136                 i = num_io - 1;
137                 err = obdfs_do_vec_wr(inode->i_sb, &num_io, obdos, pages, bufs,
138                                       counts, offsets, flags);
139                 for (  ; i>=0 ; i-- ) {
140                         obdo_free(obdos[i]);
141                 }
142         }
143         EXIT;
144 ERR:
145
146         return err;
147 } /* obdfs_remove_pages_from_cache */
148
149
150 static void obdfs_flush_dirty_pages(int check_time)
151 {
152         struct list_head *sl;
153         struct obdfs_sb_info *sbi;
154
155         sl = &obdfs_super_list;
156         while ( (sl = sl->next) != &obdfs_super_list ) {
157                 struct obdfs_super_entry *entry = 
158                         list_entry(sl, struct obdfs_super_entry, sl_chain);
159                 sbi = entry->sl_sbi;
160
161                 /* walk write requests here, use the sb, check the time */
162                 obdfs_flush_reqs(&sbi->osi_pages, 0, 1);
163         }
164
165         /* again, but now we wait for completion */
166         sl = &obdfs_super_list;
167         while ( (sl = sl->next) != &obdfs_super_list ) {
168                 struct obdfs_super_entry *entry = 
169                         list_entry(sl, struct obdfs_super_entry, sl_chain);
170                 sbi = entry->sl_sbi;
171
172                 /* walk write requests here */
173                 obdfs_flush_reqs(&sbi->osi_pages, 0, check_time);
174         }
175 }
176
177
178 static struct task_struct *pupdated;
179
180 static int pupdate(void *unused) 
181 {
182         struct task_struct * tsk = current;
183         int interval;
184         
185         pupdated = current;
186
187         exit_files(current);
188         exit_mm(current);
189
190         tsk->session = 1;
191         tsk->pgrp = 1;
192         sprintf(tsk->comm, "pupdated");
193         pupdated = current;
194
195         printk("pupdated activated...\n");
196
197         /* sigstop and sigcont will stop and wakeup pupdate */
198         spin_lock_irq(&tsk->sigmask_lock);
199         sigfillset(&tsk->blocked);
200         siginitsetinv(&tsk->blocked, sigmask(SIGTERM));
201         recalc_sigpending(tsk);
202         spin_unlock_irq(&tsk->sigmask_lock);
203
204         for (;;) {
205                 /* update interval */
206                 interval = pupd_prm.interval;
207                 if (interval)
208                 {
209                         tsk->state = TASK_INTERRUPTIBLE;
210                         schedule_timeout(interval);
211                 }
212                 else
213                 {
214                 stop_pupdate:
215                         tsk->state = TASK_STOPPED;
216                         MOD_DEC_USE_COUNT;
217                         printk("pupdated stopped...\n");
218                         return 0;
219                 }
220                 /* check for sigstop */
221                 if (signal_pending(tsk))
222                 {
223                         int stopped = 0;
224                         spin_lock_irq(&tsk->sigmask_lock);
225                         if (sigismember(&tsk->signal, SIGTERM))
226                         {
227                                 sigdelset(&tsk->signal, SIGTERM);
228                                 stopped = 1;
229                         }
230                         recalc_sigpending(tsk);
231                         spin_unlock_irq(&tsk->sigmask_lock);
232                         if (stopped)
233                                 goto stop_pupdate;
234                 }
235                 /* asynchronous setattr etc for the future ... */
236                 /* flush_inodes(); */
237                 CDEBUG(D_INODE, "about to flush pages...\n");
238                 /*
239                 obdfs_flush_dirty_pages(1);
240                 */
241                 CDEBUG(D_INODE, "done flushing pages...\n");
242         }
243 }
244
245
246 int flushd_init(void)
247 {
248         /*      kernel_thread(bdflush, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); */
249         MOD_INC_USE_COUNT;
250         kernel_thread(pupdate, NULL, 0);
251         printk("flushd inited\n");
252         return 0;
253 }
254
255 int flushd_cleanup(void)
256 {
257         /* this should deliver a signal to */
258         
259
260         /* XXX Andreas, we will do this later, for now, you must kill
261            pupdated with a SIGSTOP from userland, before unloading obdfs.o
262         */
263         if (pupdated) {
264                 /* send updated a STOP signal */
265                 /* then let it run at least once, before continuing */
266
267                 1;
268         }
269
270         /* not reached */
271         return 0;
272
273 }