1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * lustre/smfs/cache_space.c
5 * A library of functions to manage cache space based on ARC
6 * (modified LRU) replacement algorithm.
8 * Copyright (c) 2004 Cluster File Systems, Inc.
10 * This file is part of Lustre, http://www.lustre.org.
12 * Lustre is free software; you can redistribute it and/or
13 * modify it under the terms of version 2 of the GNU General Public
14 * License as published by the Free Software Foundation.
16 * Lustre is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with Lustre; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #define DEBUG_SUBSYSTEM S_SM
27 #include <linux/lustre_log.h>
28 #include <linux/lustre_fsfilt.h>
29 #include <linux/lustre_smfs.h>
31 #include "smfs_internal.h"
33 struct cache_purge_param {
34 int nfract; /* percentage of cache dirty to activate
36 int ndirty; /* maximum number of objects to write out per
38 int interval; /* jiffies delay between cache purge */
39 int nfract_sync; /* percentage of cache dirty to activate cpurge
41 int nfract_stop_cpurge; /* percentage of cache dirty to stop cpurge */
42 } cf_prm = {30, 512, 600 * HZ, 60, 20};
44 static struct cache_purge_queue smfs_cpq;
45 static struct cache_purge_queue *cpq = &smfs_cpq;
47 static int cache_leaf_node(struct dentry *dentry, __u64 *active_entry)
49 struct inode *inode = dentry->d_inode;
54 if (S_ISDIR(inode->i_mode)) {
55 if (inode->i_nlink != 2)
57 if (!strncmp((char *)dentry->d_name.name,
58 "lost+found", dentry->d_name.len))
60 LASSERT(active_entry != NULL);
61 get_active_entry(inode, active_entry);
62 return(*active_entry > 0 ? 0 : 1);
64 if (inode->i_nlink != 1)
66 if (!strncmp((char *)dentry->d_name.name, KML_LOG_NAME, dentry->d_name.len) ||
67 !strncmp((char *)dentry->d_name.name, CACHE_LRU_LOG, dentry->d_name.len))
73 static int cache_pre_leaf_node(struct dentry *dentry, __u64 *active_entry, int op)
75 if (((op == 0 && dentry->d_inode->i_nlink == 0) ||
76 (op == 1 && dentry->d_inode->i_nlink == 2)) &&
77 strncmp((char *)dentry->d_name.name, KML_LOG_NAME, dentry->d_name.len) &&
78 strncmp((char *)dentry->d_name.name, CACHE_LRU_LOG, dentry->d_name.len))
80 else if ((op == 2 && dentry->d_inode->i_nlink == 0) ||
81 (op == 3 && dentry->d_inode->i_nlink == 3)) {
82 LASSERT(active_entry != NULL);
83 get_active_entry(dentry->d_inode, active_entry);
84 return(*active_entry > 0 ? 0 : 1);
89 static int set_lru_logcookie(struct inode *inode, void *handle,
90 struct llog_cookie *logcookie)
92 struct fsfilt_operations *fsops = I2CSB(inode)->sm_fsfilt;
96 rc = fsops->fs_set_xattr(inode, handle, XATTR_SMFS_CACHE_LOGCOOKIE,
97 logcookie, sizeof(*logcookie));
101 static int get_lru_logcookie(struct inode *inode, struct llog_cookie *logcookie)
103 struct fsfilt_operations *fsops = I2CSB(inode)->sm_fsfilt;
107 rc = fsops->fs_get_xattr(inode, XATTR_SMFS_CACHE_LOGCOOKIE,
108 logcookie, sizeof(*logcookie));
112 static int try2purge_from_cache(struct lustre_id cid,
113 struct lustre_id pid)
115 struct inode *inode, *parent;
116 struct super_block *sb = cpq->cpq_sb;
117 __u32 hoard_priority = 0;
121 inode = iget(sb, cid.li_stc.u.e3s.l3s_ino);
123 CERROR("not existent inode: "LPX64"/%u\n",
124 cid.li_stc.u.e3s.l3s_ino,
125 cid.li_stc.u.e3s.l3s_gen);
128 parent = iget(sb, pid.li_stc.u.e3s.l3s_ino);
129 if (IS_ERR(parent)) {
130 CERROR("not existent inode: "LPX64"/%u\n",
131 pid.li_stc.u.e3s.l3s_ino,
132 pid.li_stc.u.e3s.l3s_gen);
137 CWARN("inode/parent %lu:%lu on the lru list\n",
138 inode->i_ino, parent->i_ino);
140 rc = get_hoard_priority(inode, &hoard_priority);
141 if (hoard_priority) {
142 CWARN("inode %lu set hoard\n", inode->i_ino);
145 if (atomic_read(&inode->i_count) > 1 || (inode->i_state & I_DIRTY)) {
146 CWARN("inode %lu is busy\n", inode->i_ino);
156 static int cache_lru_get_rec_cb(struct llog_handle *llh,
157 struct llog_rec_hdr *rec, void *data)
159 struct llog_lru_rec *llr;
160 int count = *(int *)data, rc = 0;
163 if (!(le32_to_cpu(llh->lgh_hdr->llh_flags) & LLOG_F_IS_PLAIN)) {
164 CERROR("log is not plain\n");
167 if (rec->lrh_type != CACHE_LRU_REC) {
168 CERROR("log record type error\n");
172 llr = (struct llog_lru_rec *)rec;
174 if (try2purge_from_cache(llr->llr_cid, llr->llr_pid)==1){
175 CDEBUG(D_INODE, "purge ino/gen "LPX64"/%u from cache\n",
176 llr->llr_cid.li_stc.u.e3s.l3s_ino,
177 llr->llr_cid.li_stc.u.e3s.l3s_gen);
180 rc = LLOG_PROC_BREAK;
181 *(int *)data = count;
187 static int cpurge_stop(void)
189 struct fsfilt_operations *fsops = S2SMI(cpq->cpq_sb)->sm_fsfilt;
190 struct obd_statfs osfs;
193 rc = fsops->fs_statfs(cpq->cpq_sb, &osfs);
196 free = osfs.os_bfree * 100;
197 if (free < cf_prm.nfract_stop_cpurge * osfs.os_blocks)
202 static int cache_balance_state(void)
204 struct fsfilt_operations *fsops = S2SMI(cpq->cpq_sb)->sm_fsfilt;
205 struct obd_statfs osfs;
208 rc = fsops->fs_statfs(cpq->cpq_sb, &osfs);
211 free = (osfs.os_blocks - osfs.os_bfree) * 100;
212 if (free > cf_prm.nfract * osfs.os_blocks) {
213 if (free < cf_prm.nfract_sync)
220 void wakeup_cpurge(void)
222 wake_up(&cpq->cpq_waitq);
225 /* walk the lru llog to purge count number of objects */
226 static int purge_some_cache(int *count)
231 rc = llog_cat_process(cpq->cpq_loghandle,
232 (llog_cb_t)cache_lru_get_rec_cb,
235 CDEBUG(D_INODE, "no enough objects available\n");
240 #define CFLUSH_NR 512
242 static void check_cache_space(void)
244 int state = cache_balance_state();
255 int count = CFLUSH_NR;
256 purge_some_cache(&count);
261 static int cache_space_hook_lru(struct inode *inode, struct inode *parent,
264 struct fsfilt_operations *fsops = S2SMI(cpq->cpq_sb)->sm_fsfilt;
265 struct llog_ctxt *ctxt = cpq->cpq_loghandle->lgh_ctxt;
266 struct llog_lru_rec *llr = NULL;
267 struct llog_cookie *logcookie = NULL;
268 void * handle = NULL;
269 int cookie_size = sizeof(struct llog_cookie);
273 LASSERT(ctxt != NULL);
275 if (op & ~(CACHE_SPACE_DELETE | CACHE_SPACE_INSERT |CACHE_SPACE_COMMIT))
278 OBD_ALLOC(logcookie, cookie_size);
280 GOTO(out, rc = -ENOMEM);
282 if (op & CACHE_SPACE_DELETE) {
283 rc = get_lru_logcookie(inode, logcookie);
287 if (logcookie->lgc_lgl.lgl_oid == 0) {
288 CWARN("inode %lu/%u is not in lru list\n",
289 inode->i_ino, inode->i_generation);
294 if (flags && llog_cat_half_bottom(logcookie, ctxt->loc_handle))
297 rc = llog_cancel(ctxt, 1, logcookie, 0, NULL);
299 memset(logcookie, 0, cookie_size);
300 rc = set_lru_logcookie(inode, handle, logcookie);
307 if (op & CACHE_SPACE_INSERT) {
308 LASSERT(parent != NULL);
309 OBD_ALLOC(llr, sizeof(*llr));
311 GOTO(out, rc = -ENOMEM);
313 llr->llr_hdr.lrh_len = llr->llr_tail.lrt_len = sizeof(*llr);
314 llr->llr_hdr.lrh_type = CACHE_LRU_REC;
316 /* FIXME-UMKA: should we setup fid components here? */
317 id_ino(&llr->llr_cid) = inode->i_ino;
318 id_gen(&llr->llr_cid) = inode->i_generation;
319 id_type(&llr->llr_cid) = inode->i_mode & S_IFMT;
321 id_ino(&llr->llr_pid) = parent->i_ino;
322 id_gen(&llr->llr_pid) = parent->i_generation;
323 id_type(&llr->llr_pid) = parent->i_mode & S_IFMT;
325 rc = llog_add(ctxt, &llr->llr_hdr, NULL, logcookie, 1,
328 CERROR("failed at llog_add: %d\n", rc);
331 rc = set_lru_logcookie(inode, handle, logcookie);
334 if (op & CACHE_SPACE_COMMIT) {
336 err = fsops->fs_commit(inode->i_sb, inode, handle, 0);
338 CERROR("error committing transaction: %d\n", err);
346 OBD_FREE(logcookie, cookie_size);
348 OBD_FREE(llr, sizeof(*llr));
352 static int cache_purge_thread(void *args)
355 struct l_wait_info lwi = LWI_TIMEOUT(cf_prm.interval * HZ, NULL, NULL);
359 kportal_daemonize("wb_cache_purge");
361 SIGNAL_MASK_LOCK(current, flags);
362 sigfillset(¤t->blocked);
364 SIGNAL_MASK_UNLOCK(current, flags);
367 complete(&cpq->cpq_comp);
370 int ndirty = cf_prm.ndirty;
372 purge_some_cache(&ndirty);
373 if (ndirty > 0 || cpurge_stop())
374 l_wait_event(cpq->cpq_waitq,
375 cpq->cpq_flags & SVC_STOPPING,
377 if (cpq->cpq_flags & SVC_STOPPING) {
378 cpq->cpq_flags &= ~SVC_STOPPING;
383 cpq->cpq_flags = SVC_STOPPED;
384 complete(&cpq->cpq_comp);
389 static int cache_space_hook_create (struct inode *dir, struct dentry * dentry)
391 __u64 active_entry = 0;
395 LASSERT(cache_leaf_node(dentry, NULL));
396 rc = cache_space_hook_lru(dentry->d_inode, dir, CACHE_SPACE_INSERT, 0);
399 if (cache_leaf_node(dentry->d_parent, &active_entry)) {
400 rc = cache_space_hook_lru(dir, NULL, CACHE_SPACE_DELETE, 0);
405 rc = get_active_entry(dir, &active_entry);
408 rc = set_active_entry(dir, &active_entry, NULL);
412 static int cache_space_hook_lookup(struct inode *dir, struct dentry *dentry)
418 if (cache_leaf_node(dentry, &active_entry))
419 rc = cache_space_hook_lru(dentry->d_inode, dir,
420 CACHE_SPACE_DELETE | CACHE_SPACE_INSERT, 1);
424 static int cache_space_hook_link(struct inode *dir, struct dentry *dentry)
426 __u64 active_entry = 0;
430 if (cache_pre_leaf_node(dentry, NULL, 1)) {
431 rc = cache_space_hook_lru(dentry->d_inode, NULL,
432 CACHE_SPACE_DELETE, 0);
437 if (cache_leaf_node(dentry->d_parent, &active_entry)) {
438 rc = cache_space_hook_lru(dir, NULL, CACHE_SPACE_DELETE, 0);
444 rc = get_active_entry(dir, &active_entry);
447 rc = set_active_entry(dir, &active_entry, NULL);
451 static int cache_space_hook_unlink(struct inode *dir, struct dentry *dentry)
457 if (cache_pre_leaf_node(dentry, NULL, 0))
458 rc = cache_space_hook_lru(dentry->d_inode, NULL,
459 CACHE_SPACE_DELETE, 0);
460 else if (cache_leaf_node(dentry, NULL))
461 rc = cache_space_hook_lru(dentry->d_inode, dir,
462 CACHE_SPACE_INSERT,0);
466 rc = get_active_entry(dir, &active_entry);
469 rc = set_active_entry(dir, &active_entry, NULL);
470 if (!rc && cache_leaf_node(dentry->d_parent, &active_entry))
471 rc = cache_space_hook_lru(dir,
472 dentry->d_parent->d_parent->d_inode,
473 CACHE_SPACE_INSERT, 0);
477 static int cache_space_hook_mkdir(struct inode *dir, struct dentry *dentry)
483 LASSERT(cache_leaf_node(dentry, &active_entry));
484 rc = cache_space_hook_lru(dentry->d_inode, dir, CACHE_SPACE_INSERT, 0);
486 if (!rc && cache_pre_leaf_node(dentry->d_parent, &active_entry, 3))
487 rc = cache_space_hook_lru(dir, NULL, CACHE_SPACE_DELETE, 0);
491 static int cache_space_hook_rmdir(struct inode *dir, struct dentry *dentry)
497 LASSERT(cache_pre_leaf_node(dentry, &active_entry, 2));
498 rc = cache_space_hook_lru(dentry->d_inode, NULL,
499 CACHE_SPACE_DELETE, 0);
501 if (!rc && cache_leaf_node(dentry->d_parent, &active_entry))
502 rc = cache_space_hook_lru(dir,
503 dentry->d_parent->d_parent->d_inode,
504 CACHE_SPACE_INSERT, 0);
508 static int cache_space_hook_rename(struct inode *old_dir, struct dentry *old_dentry,
509 struct inode *new_dir, struct dentry *new_dentry)
515 if (new_dentry->d_inode) {
516 if (cache_pre_leaf_node(new_dentry, NULL, 0))
517 rc = cache_space_hook_lru(new_dentry->d_inode, NULL,
518 CACHE_SPACE_DELETE,0);
519 else if (cache_leaf_node(new_dentry, NULL))
520 rc = cache_space_hook_lru(new_dentry->d_inode,
522 CACHE_SPACE_INSERT,0);
525 if (rc || old_dir == new_dir)
528 if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
529 if (cache_leaf_node(new_dentry->d_parent, &active_entry)) {
530 rc = cache_space_hook_lru(new_dir, NULL,
531 CACHE_SPACE_DELETE, 0);
536 rc = get_active_entry(new_dir, &active_entry);
539 rc = set_active_entry(new_dir, &active_entry, NULL);
542 rc = get_active_entry(old_dir, &active_entry);
545 rc = set_active_entry(old_dir, &active_entry, NULL);
546 } else if (cache_pre_leaf_node(new_dentry->d_parent, &active_entry, 3)) {
547 rc = cache_space_hook_lru(new_dir, NULL,
548 CACHE_SPACE_DELETE, 0);
551 if (!rc && cache_leaf_node(old_dentry->d_parent, &active_entry)) {
552 rc = cache_space_hook_lru(old_dir,
553 old_dentry->d_parent->d_parent->d_inode,
554 CACHE_SPACE_INSERT, 0);
560 static int lru_create (struct inode * inode, void * arg)
562 struct hook_msg * msg = arg;
563 return cache_space_hook_create(inode, msg->dentry);
565 static int lru_lookup (struct inode * inode, void * arg)
567 struct hook_msg * msg = arg;
568 return cache_space_hook_lookup(inode, msg->dentry);
570 static int lru_link (struct inode * inode, void * arg)
572 struct hook_link_msg * msg = arg;
573 return cache_space_hook_link(inode, msg->dentry);
575 static int lru_unlink (struct inode * inode, void * arg)
577 struct hook_unlink_msg * msg = arg;
578 return cache_space_hook_unlink(inode, msg->dentry);
580 static int lru_symlink (struct inode * inode, void * arg)
582 struct hook_symlink_msg * msg = arg;
583 return cache_space_hook_create(inode, msg->dentry);
585 static int lru_mkdir (struct inode * inode, void * arg)
587 struct hook_msg * msg = arg;
588 return cache_space_hook_mkdir(inode, msg->dentry);
590 static int lru_rmdir (struct inode * inode, void * arg)
592 struct hook_unlink_msg * msg = arg;
593 return cache_space_hook_rmdir(inode, msg->dentry);
595 static int lru_rename (struct inode * inode, void * arg)
597 struct hook_rename_msg * msg = arg;
598 return cache_space_hook_rename(inode, msg->dentry,
599 msg->new_dir, msg->new_dentry);
603 typedef int (*post_lru_op)(struct inode *inode, void * msg);
604 static post_lru_op smfs_lru_post[HOOK_MAX] = {
605 [HOOK_CREATE] lru_create,
606 [HOOK_LOOKUP] lru_lookup,
607 [HOOK_LINK] lru_link,
608 [HOOK_UNLINK] lru_unlink,
609 [HOOK_SYMLINK] lru_symlink,
610 [HOOK_MKDIR] lru_mkdir,
611 [HOOK_RMDIR] lru_rmdir,
612 [HOOK_MKNOD] lru_create,
613 [HOOK_RENAME] lru_rename,
619 static int smfs_lru_pre_op(int op, struct inode *inode, void * msg, int ret,
625 /* FIXME have not used op */
631 static int smfs_lru_post_op(int op, struct inode *inode, void *msg, int ret,
640 if (smfs_lru_post[op])
641 rc = smfs_lru_post[op](inode, msg);
647 static int smfs_exit_lru(struct super_block *sb, void * arg, void * priv)
651 smfs_deregister_plugin(sb, SMFS_PLG_LRU);
657 static int smfs_trans_lru (struct super_block *sb, void *arg, void * priv)
663 size = 20;//LDISKFS_INDEX_EXTRA_TRANS_BLOCKS+LDISKFS_DATA_TRANS_BLOCKS;
668 static int smfs_start_lru(struct super_block *sb, void *arg, void * priv)
671 struct smfs_super_info * smb = S2SMI(sb);
672 struct llog_ctxt *ctxt;
676 if (SMFS_IS(smb->plg_flags, SMFS_PLG_LRU))
679 /* first to initialize the cache lru catalog on local fs */
680 rc = llog_catalog_setup(&ctxt, CACHE_LRU_LOG, smb->smsi_exp,
681 smb->smsi_ctxt, smb->sm_fsfilt,
683 smb->smsi_objects_dir);
685 CERROR("failed to initialize cache lru list catalog %d\n", rc);
689 cpq->cpq_loghandle = ctxt->loc_handle;
691 /* start cache purge daemon, only one daemon now */
692 init_waitqueue_head(&cpq->cpq_waitq);
693 init_completion(&cpq->cpq_comp);
696 rc = kernel_thread(cache_purge_thread, NULL, CLONE_VM | CLONE_FILES);
698 CERROR("cannot start thread: %d\n", rc);
701 wait_for_completion(&cpq->cpq_comp);
703 SMFS_SET(smb->plg_flags, SMFS_PLG_LRU);
707 llog_catalog_cleanup(ctxt);
708 OBD_FREE(ctxt, sizeof(*ctxt));
712 static int smfs_stop_lru(struct super_block *sb, void *arg, void * priv)
714 struct smfs_super_info * smb = S2SMI(sb);
715 struct llog_ctxt *ctxt;
719 if (!SMFS_IS(smb->plg_flags, SMFS_PLG_LRU))
722 SMFS_CLEAR(smb->plg_flags, SMFS_PLG_LRU);
724 init_completion(&cpq->cpq_comp);
725 cpq->cpq_flags = SVC_STOPPING;
726 wake_up(&cpq->cpq_waitq);
727 wait_for_completion(&cpq->cpq_comp);
729 ctxt = cpq->cpq_loghandle->lgh_ctxt;
730 rc = llog_catalog_cleanup(ctxt);
731 OBD_FREE(ctxt, sizeof(*ctxt));
735 typedef int (*lru_helper)(struct super_block * sb, void *msg, void *);
736 static lru_helper smfs_lru_helpers[PLG_HELPER_MAX] = {
737 [PLG_EXIT] smfs_exit_lru,
738 [PLG_START] smfs_start_lru,
739 [PLG_STOP] smfs_stop_lru,
740 [PLG_TRANS_SIZE] smfs_trans_lru,
741 [PLG_TEST_INODE] NULL,
742 [PLG_SET_INODE] NULL,
745 static int smfs_lru_help_op(int code, struct super_block * sb,
746 void * arg, void * priv)
749 if (smfs_lru_helpers[code])
750 smfs_lru_helpers[code](sb, arg, priv);
754 int smfs_init_lru(struct super_block *sb)
756 struct smfs_plugin plg = {
757 .plg_type = SMFS_PLG_LRU,
758 .plg_pre_op = &smfs_lru_pre_op,
759 .plg_post_op = &smfs_lru_post_op,
760 .plg_helper = &smfs_lru_help_op,
767 rc = smfs_register_plugin(sb, &plg);