2 * OBDFS Super operations - also used for Lustre file system
5 * Copyright (C) 1991, 1992 Linus Torvalds
6 * Copryright (C) 1999 Stelias Computing Inc. <braam@stelias.com>
7 * Copryright (C) 1999 Seagate Technology Inc.
10 #define __NO_VERSION__
11 #include <linux/module.h>
12 #include <linux/sched.h>
14 #include <linux/malloc.h>
15 #include <linux/locks.h>
16 #include <linux/errno.h>
17 #include <linux/swap.h>
18 #include <linux/smp_lock.h>
19 #include <linux/vmalloc.h>
20 #include <linux/blkdev.h>
21 #include <linux/sysrq.h>
22 #include <linux/file.h>
23 #include <linux/init.h>
24 #include <linux/quotaops.h>
25 #include <linux/iobuf.h>
26 #include <linux/highmem.h>
28 #include <asm/uaccess.h>
30 #include <asm/bitops.h>
31 #include <asm/mmu_context.h>
33 #include <linux/obd_support.h>
34 #include <linux/obd_class.h>
35 #include <linux/obdfs.h>
40 int nfract; /* Percentage of buffer cache dirty to
42 int ndirty; /* Maximum number of dirty blocks to write out per
44 int nrefill; /* Number of clean buffers to try to obtain
45 each time we call refill */
46 int nref_dirt; /* Dirty buffer threshold for activating bdflush
47 when trying to refill buffers. */
48 int interval; /* jiffies delay between kupdate flushes */
49 int age_buffer; /* Time for normal buffer to age before we flush it */
50 int age_super; /* Time for superblock to age before we flush it */
51 } pupd_prm = {40, 500, 64, 256, 5*HZ, 30*HZ, 5*HZ };
54 /* Remove writeback requests from an inode */
55 int obdfs_flush_reqs(struct list_head *page_list,
56 int flush_inode, int check_time)
58 struct list_head *tmp = page_list;
60 struct obdo *oa = NULL;
61 struct obdo *obdos[MAX_IOVEC];
62 struct page *pages[MAX_IOVEC];
63 char *bufs[MAX_IOVEC];
64 obd_size counts[MAX_IOVEC];
65 obd_off offsets[MAX_IOVEC];
66 obd_flag flags[MAX_IOVEC];
69 struct inode *inode = NULL;
73 if ( list_empty(page_list)) {
74 CDEBUG(D_INODE, "list empty\n");
80 /* add all of the outstanding pages to a write vector, and write it */
81 while ( (tmp = tmp->next) != page_list ) {
82 struct obdfs_pgrq *pgrq;
86 pgrq = list_entry(tmp, struct obdfs_pgrq, rq_ilist);
88 pgrq = list_entry(tmp, struct obdfs_pgrq, rq_slist);
90 inode = pgrq->rq_inode;
93 pgrq->rq_jiffies > (jiffies - pupd_prm.age_buffer))
96 oa = obdo_fromid(IID(inode), inode->i_ino, OBD_MD_FLNOTOBD);
101 obdfs_from_inode(oa, inode);
103 CDEBUG(D_INODE, "adding page %p to vector\n", page);
105 bufs[num_io] = (char *)page_address(page);
106 pages[num_io] = page;
107 counts[num_io] = PAGE_SIZE;
108 offsets[num_io] = ((obd_off)page->index) << PAGE_SHIFT;
109 flags[num_io] = OBD_BRW_CREATE;
112 /* remove request from list before write to avoid conflict */
113 obdfs_pgrq_del(pgrq);
115 if ( num_io == MAX_IOVEC ) {
116 err = obdfs_do_vec_wr(inode->i_sb, &num_io, obdos,
118 bufs, counts, offsets, flags);
119 for (i=0 ; i<MAX_IOVEC ; i++) {
122 /* XXX Probably should handle error here -
123 * discard other writes, or put
124 * (MAX_IOVEC - num_io) I/Os back to list?
134 /* flush any remaining I/Os */
137 err = obdfs_do_vec_wr(inode->i_sb, &num_io, obdos, pages, bufs,
138 counts, offsets, flags);
139 for ( ; i>=0 ; i-- ) {
147 } /* obdfs_remove_pages_from_cache */
150 static void obdfs_flush_dirty_pages(int check_time)
152 struct list_head *sl;
153 struct obdfs_sb_info *sbi;
155 sl = &obdfs_super_list;
156 while ( (sl = sl->next) != &obdfs_super_list ) {
157 struct obdfs_super_entry *entry =
158 list_entry(sl, struct obdfs_super_entry, sl_chain);
161 /* walk write requests here, use the sb, check the time */
162 obdfs_flush_reqs(&sbi->osi_pages, 0, 1);
165 /* again, but now we wait for completion */
166 sl = &obdfs_super_list;
167 while ( (sl = sl->next) != &obdfs_super_list ) {
168 struct obdfs_super_entry *entry =
169 list_entry(sl, struct obdfs_super_entry, sl_chain);
172 /* walk write requests here */
173 obdfs_flush_reqs(&sbi->osi_pages, 0, check_time);
178 static struct task_struct *pupdated;
180 static int pupdate(void *unused)
182 struct task_struct * tsk = current;
192 sprintf(tsk->comm, "pupdated");
195 printk("pupdated activated...\n");
197 /* sigstop and sigcont will stop and wakeup pupdate */
198 spin_lock_irq(&tsk->sigmask_lock);
199 sigfillset(&tsk->blocked);
200 siginitsetinv(&tsk->blocked, sigmask(SIGTERM));
201 recalc_sigpending(tsk);
202 spin_unlock_irq(&tsk->sigmask_lock);
205 /* update interval */
206 interval = pupd_prm.interval;
209 tsk->state = TASK_INTERRUPTIBLE;
210 schedule_timeout(interval);
215 tsk->state = TASK_STOPPED;
217 printk("pupdated stopped...\n");
220 /* check for sigstop */
221 if (signal_pending(tsk))
224 spin_lock_irq(&tsk->sigmask_lock);
225 if (sigismember(&tsk->signal, SIGTERM))
227 sigdelset(&tsk->signal, SIGTERM);
230 recalc_sigpending(tsk);
231 spin_unlock_irq(&tsk->sigmask_lock);
235 /* asynchronous setattr etc for the future ... */
236 /* flush_inodes(); */
237 CDEBUG(D_INODE, "about to flush pages...\n");
239 obdfs_flush_dirty_pages(1);
241 CDEBUG(D_INODE, "done flushing pages...\n");
246 int flushd_init(void)
248 /* kernel_thread(bdflush, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); */
250 kernel_thread(pupdate, NULL, 0);
251 printk("flushd inited\n");
255 int flushd_cleanup(void)
257 /* this should deliver a signal to */
260 /* XXX Andreas, we will do this later, for now, you must kill
261 pupdated with a SIGSTOP from userland, before unloading obdfs.o
264 /* send updated a STOP signal */
265 /* then let it run at least once, before continuing */