/* * OBDFS Super operations - also used for Lustre file system * * * Copyright (C) 1991, 1992 Linus Torvalds * Copryright (C) 1999 Stelias Computing Inc. * Copryright (C) 1999 Seagate Technology Inc. * */ #define __NO_VERSION__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct { int nfract; /* Percentage of buffer cache dirty to activate bdflush */ int ndirty; /* Maximum number of dirty blocks to write out per wake-cycle */ int nrefill; /* Number of clean buffers to try to obtain each time we call refill */ int nref_dirt; /* Dirty buffer threshold for activating bdflush when trying to refill buffers. */ int interval; /* jiffies delay between kupdate flushes */ int age_buffer; /* Time for normal buffer to age before we flush it */ int age_super; /* Time for superblock to age before we flush it */ } pupd_prm = {40, 500, 64, 256, 5*HZ, 30*HZ, 5*HZ }; static int obdfs_enqueue_pages(struct inode *inode, struct obdo **obdo, int nr_slots, struct page **pages, char **bufs, obd_size *counts, obd_off *offsets, obd_flag *flag, int check_time) { struct list_head *page_list = obdfs_iplist(inode); struct list_head *tmp; int i = 0; ENTRY; /* if there are no pages, remove from super block list */ if (list_empty(obdfs_iplist(inode))) { list_del(obdfs_islist(inode)); /* we check for "empty" on this animal: must init it! */ INIT_LIST_HEAD(obdfs_islist(inode)); CDEBUG(D_INODE, "empty list\n"); EXIT; return 0; } *obdo = obdo_fromid(IID(inode), inode->i_ino, OBD_MD_FLNOTOBD); if ( IS_ERR(*obdo) ) { EXIT; return PTR_ERR(*obdo); } obdfs_from_inode(*obdo, inode); *flag = OBD_BRW_CREATE; tmp = page_list; while ( (tmp = tmp->next) != page_list && (i < nr_slots) ) { struct obdfs_pgrq *req; struct page *page; req = list_entry(tmp, struct obdfs_pgrq, rq_plist); /* remove request from list before write to avoid conflict */ obdfs_pgrq_del(req); page = req->rq_page; if ( !page ) { CDEBUG(D_INODE, "no page \n"); EXIT; return 0; } if (check_time && req->rq_jiffies > (jiffies - pupd_prm.age_buffer)) continue; CDEBUG(D_INODE, "adding page %p to vector\n", page); bufs[i] = (char *)page_address(page); pages[i] = page; counts[i] = PAGE_SIZE; offsets[i] = ((obd_off)page->index) << PAGE_SHIFT; i++; } /* If no more pages for this inode, remove from superblock list */ if ( list_empty(obdfs_iplist(inode)) ) list_del(obdfs_islist(inode)); EXIT; return i; } /* Remove writeback requests for the superblock */ int obdfs_flush_reqs(struct list_head *inode_list, int flush_inode, int check_time) { struct list_head *tmp = inode_list; obd_count num_io = 0; obd_count num_obdos = 0; struct inode *inodes[MAX_IOVEC]; struct obdo *obdos[MAX_IOVEC]; struct page *pages[MAX_IOVEC]; char *bufs[MAX_IOVEC]; obd_size counts[MAX_IOVEC]; obd_off offsets[MAX_IOVEC]; obd_flag flags[MAX_IOVEC]; obd_count bufs_per_obdo[MAX_IOVEC]; int err = 0; int i; ENTRY; if (!inode_list) { CDEBUG(D_INODE, "no list\n"); EXIT; return 0; } if ( list_empty(inode_list)) { CDEBUG(D_INODE, "list empty\n"); EXIT; return 0; } /* add all of the outstanding pages to a write vector, and write it */ while ( (tmp = tmp->next) != inode_list ) { struct obdfs_inode_info *ii; int res; ii = list_entry(tmp, struct obdfs_inode_info, oi_inodes); inodes[num_obdos] = list_entry(ii, struct inode, u); res = obdfs_enqueue_pages(inodes[num_obdos], &obdos[num_obdos], MAX_IOVEC - num_io, &pages[num_io], &bufs[num_io], &counts[num_io], &offsets[num_io], &flags[num_obdos],1); if ( res < 0 ) { return -EIO; } bufs_per_obdo[num_obdos] = res; num_io += res; num_obdos++; if ( num_io == MAX_IOVEC ) { err = obdfs_do_vec_wr(inodes[0]->i_sb, num_io, num_obdos, obdos, bufs_per_obdo, pages, bufs, counts, offsets, flags); for (i = 0 ; i < num_obdos ; i++) { obdfs_to_inode(inodes[i], obdos[i]); obdo_free(obdos[i]); } if ( err ) { EXIT; goto ERR; } num_io = 0; num_obdos = 0; } } /* flush any remaining I/Os */ if ( num_io ) { err = obdfs_do_vec_wr(inodes[0]->i_sb, num_io, num_obdos, obdos, bufs_per_obdo, pages, bufs, counts, offsets, flags); for (i = 0 ; i < num_obdos ; i++) { obdfs_to_inode(inodes[i], obdos[i]); obdo_free(obdos[i]); } } EXIT; ERR: return err; } /* obdfs_remove_pages_from_cache */ static void obdfs_flush_dirty_pages(int check_time) { struct list_head *sl; sl = &obdfs_super_list; while ( (sl = sl->next) != &obdfs_super_list ) { struct obdfs_sb_info *sbi = list_entry(sl, struct obdfs_sb_info, osi_list); /* walk write requests here, use the sb, check the time */ obdfs_flush_reqs(&sbi->osi_inodes, 0, 1); } #if 0 /* again, but now we wait for completion */ sl = &obdfs_super_list; while ( (sl = sl->next) != &obdfs_super_list ) { struct obdfs_sb_info *sbi = list_entry(sl, struct obdfs_sb_info, sl_chain); /* walk write requests here */ obdfs_flush_reqs(&sbi->osi_pages, 0, check_time); } #endif } static struct task_struct *pupdated; static int pupdate(void *unused) { struct task_struct * tsk = current; int interval; pupdated = current; exit_files(current); exit_mm(current); tsk->session = 1; tsk->pgrp = 1; sprintf(tsk->comm, "pupdated"); pupdated = current; printk("pupdated activated...\n"); /* sigstop and sigcont will stop and wakeup pupdate */ spin_lock_irq(&tsk->sigmask_lock); sigfillset(&tsk->blocked); siginitsetinv(&tsk->blocked, sigmask(SIGTERM)); recalc_sigpending(tsk); spin_unlock_irq(&tsk->sigmask_lock); for (;;) { /* update interval */ interval = pupd_prm.interval; if (interval) { tsk->state = TASK_INTERRUPTIBLE; schedule_timeout(interval); } else { stop_pupdate: tsk->state = TASK_STOPPED; /* MOD_DEC_USE_COUNT; */ printk("pupdated stopped...\n"); return 0; } /* check for sigstop */ if (signal_pending(tsk)) { int stopped = 0; spin_lock_irq(&tsk->sigmask_lock); if (sigismember(&tsk->signal, SIGTERM)) { sigdelset(&tsk->signal, SIGTERM); stopped = 1; } recalc_sigpending(tsk); spin_unlock_irq(&tsk->sigmask_lock); if (stopped) goto stop_pupdate; } /* asynchronous setattr etc for the future ... */ /* flush_inodes(); */ CDEBUG(D_INODE, "about to flush pages...\n"); /* obdfs_flush_dirty_pages(1); */ CDEBUG(D_INODE, "done flushing pages...\n"); } } int flushd_init(void) { /* kernel_thread(bdflush, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); */ /* MOD_INC_USE_COUNT; */ kernel_thread(pupdate, NULL, 0); printk("flushd inited\n"); return 0; } int flushd_cleanup(void) { /* this should deliver a signal to */ /* XXX Andreas, we will do this later, for now, you must kill pupdated with a SIGTERM from userland, before unloading obdfs.o */ if (pupdated) { /* then let it run at least once, before continuing */ /* XXX need to do something like this here: send_sig(SIGTERM, current, 0); */ 1; } /* not reached */ return 0; }