2 * OBDFS Super operations - also used for Lustre file system
5 * Copyright (C) 1991, 1992 Linus Torvalds
6 * Copryright (C) 1999 Stelias Computing Inc. <braam@stelias.com>
7 * Copryright (C) 1999 Seagate Technology Inc.
10 #define __NO_VERSION__
11 #include <linux/module.h>
12 #include <linux/sched.h>
14 #include <linux/malloc.h>
15 #include <linux/locks.h>
16 #include <linux/errno.h>
17 #include <linux/swap.h>
18 #include <linux/smp_lock.h>
19 #include <linux/vmalloc.h>
20 #include <linux/blkdev.h>
21 #include <linux/sysrq.h>
22 #include <linux/file.h>
23 #include <linux/init.h>
24 #include <linux/quotaops.h>
25 #include <linux/iobuf.h>
26 #include <linux/highmem.h>
28 #include <asm/uaccess.h>
30 #include <asm/bitops.h>
31 #include <asm/mmu_context.h>
33 #include <linux/obd_support.h>
34 #include <linux/obd_class.h>
35 #include <linux/obdfs.h>
40 int nfract; /* Percentage of buffer cache dirty to
42 int ndirty; /* Maximum number of dirty blocks to write out per
44 int nrefill; /* Number of clean buffers to try to obtain
45 each time we call refill */
46 int nref_dirt; /* Dirty buffer threshold for activating bdflush
47 when trying to refill buffers. */
48 int interval; /* jiffies delay between kupdate flushes */
49 int age_buffer; /* Time for normal buffer to age before we flush it */
50 int age_super; /* Time for superblock to age before we flush it */
51 } pupd_prm = {40, 500, 64, 256, 5*HZ, 30*HZ, 5*HZ };
54 static int obdfs_enqueue_pages(struct inode *inode, struct obdo **obdo,
55 int nr_slots, struct page **pages, char **bufs,
56 obd_size *counts, obd_off *offsets,
57 obd_flag *flag, int check_time)
59 struct list_head *page_list = obdfs_iplist(inode);
60 struct list_head *tmp;
64 /* if there are no pages, remove from super block list */
65 if (list_empty(obdfs_iplist(inode))) {
66 list_del(obdfs_islist(inode));
67 /* we check for "empty" on this animal: must init it! */
68 INIT_LIST_HEAD(obdfs_islist(inode));
69 CDEBUG(D_INODE, "empty list\n");
74 *obdo = obdo_fromid(IID(inode), inode->i_ino, OBD_MD_FLNOTOBD);
75 if ( IS_ERR(*obdo) ) {
77 return PTR_ERR(*obdo);
80 obdfs_from_inode(*obdo, inode);
81 *flag = OBD_BRW_CREATE;
84 while ( (tmp = tmp->next) != page_list && (i < nr_slots) ) {
85 struct obdfs_pgrq *req;
88 req = list_entry(tmp, struct obdfs_pgrq, rq_plist);
89 /* remove request from list before write to avoid conflict */
94 CDEBUG(D_INODE, "no page \n");
100 req->rq_jiffies > (jiffies - pupd_prm.age_buffer))
103 CDEBUG(D_INODE, "adding page %p to vector\n", page);
104 bufs[i] = (char *)page_address(page);
106 counts[i] = PAGE_SIZE;
107 offsets[i] = ((obd_off)page->index) << PAGE_SHIFT;
111 /* If no more pages for this inode, remove from superblock list */
112 if ( list_empty(obdfs_iplist(inode)) )
113 list_del(obdfs_islist(inode));
120 /* Remove writeback requests for the superblock */
121 int obdfs_flush_reqs(struct list_head *inode_list, int flush_inode,
124 struct list_head *tmp = inode_list;
125 obd_count num_io = 0;
126 obd_count num_obdos = 0;
127 struct inode *inodes[MAX_IOVEC];
128 struct obdo *obdos[MAX_IOVEC];
129 struct page *pages[MAX_IOVEC];
130 char *bufs[MAX_IOVEC];
131 obd_size counts[MAX_IOVEC];
132 obd_off offsets[MAX_IOVEC];
133 obd_flag flags[MAX_IOVEC];
134 obd_count bufs_per_obdo[MAX_IOVEC];
141 CDEBUG(D_INODE, "no list\n");
146 if ( list_empty(inode_list)) {
147 CDEBUG(D_INODE, "list empty\n");
153 /* add all of the outstanding pages to a write vector, and write it */
154 while ( (tmp = tmp->next) != inode_list ) {
155 struct obdfs_inode_info *ii;
158 ii = list_entry(tmp, struct obdfs_inode_info, oi_inodes);
159 inodes[num_obdos] = list_entry(ii, struct inode, u);
161 res = obdfs_enqueue_pages(inodes[num_obdos], &obdos[num_obdos],
162 MAX_IOVEC - num_io, &pages[num_io],
163 &bufs[num_io], &counts[num_io],
164 &offsets[num_io], &flags[num_obdos],1);
169 bufs_per_obdo[num_obdos] = res;
173 if ( num_io == MAX_IOVEC ) {
174 err = obdfs_do_vec_wr(inodes[0]->i_sb, num_io,
175 num_obdos, obdos, bufs_per_obdo,
176 pages, bufs, counts, offsets,
178 for (i = 0 ; i < num_obdos ; i++) {
179 obdfs_to_inode(inodes[i], obdos[i]);
191 /* flush any remaining I/Os */
193 err = obdfs_do_vec_wr(inodes[0]->i_sb, num_io, num_obdos,
194 obdos, bufs_per_obdo, pages, bufs,
195 counts, offsets, flags);
196 for (i = 0 ; i < num_obdos ; i++) {
197 obdfs_to_inode(inodes[i], obdos[i]);
205 } /* obdfs_remove_pages_from_cache */
208 static void obdfs_flush_dirty_pages(int check_time)
210 struct list_head *sl;
212 sl = &obdfs_super_list;
213 while ( (sl = sl->next) != &obdfs_super_list ) {
214 struct obdfs_sb_info *sbi =
215 list_entry(sl, struct obdfs_sb_info, osi_list);
217 /* walk write requests here, use the sb, check the time */
218 obdfs_flush_reqs(&sbi->osi_inodes, 0, 1);
222 /* again, but now we wait for completion */
223 sl = &obdfs_super_list;
224 while ( (sl = sl->next) != &obdfs_super_list ) {
225 struct obdfs_sb_info *sbi =
226 list_entry(sl, struct obdfs_sb_info, sl_chain);
228 /* walk write requests here */
229 obdfs_flush_reqs(&sbi->osi_pages, 0, check_time);
235 static struct task_struct *pupdated;
238 static int pupdate(void *unused)
240 struct task_struct * tsk = current;
250 sprintf(tsk->comm, "pupdated");
253 printk("pupdated activated...\n");
255 /* sigstop and sigcont will stop and wakeup pupdate */
256 spin_lock_irq(&tsk->sigmask_lock);
257 sigfillset(&tsk->blocked);
258 siginitsetinv(&tsk->blocked, sigmask(SIGTERM));
259 recalc_sigpending(tsk);
260 spin_unlock_irq(&tsk->sigmask_lock);
263 /* update interval */
264 interval = pupd_prm.interval;
267 tsk->state = TASK_INTERRUPTIBLE;
268 schedule_timeout(interval);
273 tsk->state = TASK_STOPPED;
274 /* MOD_DEC_USE_COUNT; */
275 printk("pupdated stopped...\n");
278 /* check for sigstop */
279 if (signal_pending(tsk))
282 spin_lock_irq(&tsk->sigmask_lock);
283 if (sigismember(&tsk->signal, SIGTERM))
285 sigdelset(&tsk->signal, SIGTERM);
288 recalc_sigpending(tsk);
289 spin_unlock_irq(&tsk->sigmask_lock);
293 /* asynchronous setattr etc for the future ... */
294 /* flush_inodes(); */
295 CDEBUG(D_INODE, "about to flush pages...\n");
297 obdfs_flush_dirty_pages(1);
299 CDEBUG(D_INODE, "done flushing pages...\n");
304 int flushd_init(void)
307 kernel_thread(bdflush, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
309 /* MOD_INC_USE_COUNT; */
310 kernel_thread(pupdate, NULL, 0);
311 printk("flushd inited\n");
315 int flushd_cleanup(void)
317 /* this should deliver a signal to */
320 /* XXX Andreas, we will do this later, for now, you must kill
321 pupdated with a SIGTERM from userland, before unloading obdfs.o
324 /* then let it run at least once, before continuing */
326 /* XXX need to do something like this here:
327 send_sig(SIGTERM, current, 0);