1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * lustre/smfs/cache_space.c
5 * A library of functions to manage cache space based on ARC
6 * (modified LRU) replacement algorithm.
8 * Copyright (c) 2004 Cluster File Systems, Inc.
10 * This file is part of Lustre, http://www.lustre.org.
12 * Lustre is free software; you can redistribute it and/or
13 * modify it under the terms of version 2 of the GNU General Public
14 * License as published by the Free Software Foundation.
16 * Lustre is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with Lustre; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #define DEBUG_SUBSYSTEM S_SM
27 #include <linux/lustre_log.h>
28 #include <linux/lustre_fsfilt.h>
29 #include <linux/lustre_smfs.h>
31 #include "smfs_internal.h"
33 struct cache_purge_param {
34 int nfract; /* percentage of cache dirty to activate
36 int ndirty; /* maximum number of objects to write out per
38 int interval; /* jiffies delay between cache purge */
39 int nfract_sync; /* percentage of cache dirty to activate cpurge
41 int nfract_stop_cpurge; /* percentage of cache dirty to stop cpurge */
42 } cf_prm = {30, 512, 600 * HZ, 60, 20};
44 static struct cache_purge_queue smfs_cpq;
45 static struct cache_purge_queue *cpq = &smfs_cpq;
47 #define CACHE_HOOK "cache_hook"
48 int cache_space_pre_hook(struct inode *inode, void *dentry,
49 void *data1, void *data2, int op, void *handle)
54 if (smfs_cache_hook(inode)) {
56 handle = smfs_trans_start(inode, KML_CACHE_NOOP, NULL);
58 RETURN(PTR_ERR(handle));
61 cache_space_pre(inode, op);
66 int cache_space_post_hook(struct inode *inode, void *de, void *data1,
67 void *data2, int op, void *handle)
71 if (smfs_cache_hook(inode)) {
72 struct inode *new_inode = (struct inode*)data1;
73 struct dentry *new_dentry = (struct dentry*)data2;
74 struct dentry *dentry = (struct dentry *)de;
76 LASSERT(handle != NULL);
77 rc = cache_space_post(op, handle, inode, dentry, new_inode,
83 int cache_space_hook_init(struct super_block *sb)
85 struct smfs_super_info *smfs_info = S2SMI(sb);
86 struct smfs_hook_ops *cache_hops;
90 cache_hops = smfs_alloc_hook_ops(CACHE_HOOK, cache_space_pre_hook,
91 cache_space_post_hook);
95 rc = smfs_register_hook_ops(smfs_info, cache_hops);
97 smfs_free_hook_ops(cache_hops);
100 SMFS_SET_CACHE_HOOK(smfs_info);
105 int cache_space_hook_exit(struct smfs_super_info *smfs_info)
107 struct smfs_hook_ops *cache_hops;
109 cache_hops = smfs_unregister_hook_ops(smfs_info, CACHE_HOOK);
110 smfs_free_hook_ops(cache_hops);
112 SMFS_CLEAN_CACHE_HOOK(smfs_info);
116 static int cache_leaf_node(struct dentry *dentry, __u64 *active_entry)
118 struct inode *inode = dentry->d_inode;
120 if (!dentry->d_inode)
122 if (S_ISDIR(inode->i_mode)) {
123 if (inode->i_nlink != 2)
125 if (!strncmp(dentry->d_name.name, "lost+found",
128 LASSERT(active_entry != NULL);
129 get_active_entry(inode, active_entry);
130 return(*active_entry > 0 ? 0 : 1);
132 if (inode->i_nlink != 1)
134 if (!strncmp(dentry->d_name.name, KML_LOG_NAME, dentry->d_name.len) ||
135 !strncmp(dentry->d_name.name, CACHE_LRU_LOG, dentry->d_name.len))
141 static int cache_pre_leaf_node(struct dentry *dentry, __u64 *active_entry, int op)
143 if (((op == 0 && dentry->d_inode->i_nlink == 0) ||
144 (op == 1 && dentry->d_inode->i_nlink == 2)) &&
145 strncmp(dentry->d_name.name, KML_LOG_NAME, dentry->d_name.len) &&
146 strncmp(dentry->d_name.name, CACHE_LRU_LOG, dentry->d_name.len))
148 else if ((op == 2 && dentry->d_inode->i_nlink == 0) ||
149 (op == 3 && dentry->d_inode->i_nlink == 3)) {
150 LASSERT(active_entry != NULL);
151 get_active_entry(dentry->d_inode, active_entry);
152 return(*active_entry > 0 ? 0 : 1);
157 static int set_lru_logcookie(struct inode *inode, void *handle,
158 struct llog_cookie *logcookie)
160 struct fsfilt_operations *fsops = I2CSB(inode)->sm_fsfilt;
164 rc = fsops->fs_set_xattr(inode, handle, XATTR_SMFS_CACHE_LOGCOOKIE,
165 logcookie, sizeof(*logcookie));
169 static int get_lru_logcookie(struct inode *inode, struct llog_cookie *logcookie)
171 struct fsfilt_operations *fsops = I2CSB(inode)->sm_fsfilt;
175 rc = fsops->fs_get_xattr(inode, XATTR_SMFS_CACHE_LOGCOOKIE,
176 logcookie, sizeof(*logcookie));
180 static int try2purge_from_cache(struct lustre_id cid,
181 struct lustre_id pid)
183 struct inode *inode, *parent;
184 struct super_block *sb = cpq->cpq_sb;
185 __u32 hoard_priority = 0;
189 inode = iget(sb, cid.li_stc.u.e3s.l3s_ino);
191 CERROR("not existent inode: "LPX64"/%u\n",
192 cid.li_stc.u.e3s.l3s_ino,
193 cid.li_stc.u.e3s.l3s_gen);
196 parent = iget(sb, pid.li_stc.u.e3s.l3s_ino);
197 if (IS_ERR(parent)) {
198 CERROR("not existent inode: "LPX64"/%u\n",
199 pid.li_stc.u.e3s.l3s_ino,
200 pid.li_stc.u.e3s.l3s_gen);
205 CWARN("inode/parent %lu:%lu on the lru list\n",
206 inode->i_ino, parent->i_ino);
208 rc = get_hoard_priority(inode, &hoard_priority);
209 if (hoard_priority) {
210 CWARN("inode %lu set hoard\n", inode->i_ino);
213 if (atomic_read(&inode->i_count) > 1 || (inode->i_state & I_DIRTY)) {
214 CWARN("inode %lu is busy\n", inode->i_ino);
224 static int cache_lru_get_rec_cb(struct llog_handle *llh,
225 struct llog_rec_hdr *rec, void *data)
227 struct llog_lru_rec *llr;
228 int count = *(int *)data, rc = 0;
231 if (!(le32_to_cpu(llh->lgh_hdr->llh_flags) & LLOG_F_IS_PLAIN)) {
232 CERROR("log is not plain\n");
235 if (rec->lrh_type != CACHE_LRU_REC) {
236 CERROR("log record type error\n");
240 llr = (struct llog_lru_rec *)rec;
242 if (try2purge_from_cache(llr->llr_cid, llr->llr_pid)==1){
243 CDEBUG(D_INODE, "purge ino/gen "LPX64"/%u from cache\n",
244 llr->llr_cid.li_stc.u.e3s.l3s_ino,
245 llr->llr_cid.li_stc.u.e3s.l3s_gen);
248 rc = LLOG_PROC_BREAK;
249 *(int *)data = count;
255 static int cpurge_stop(void)
257 struct fsfilt_operations *fsops = S2SMI(cpq->cpq_sb)->sm_fsfilt;
258 struct obd_statfs osfs;
261 rc = fsops->fs_statfs(cpq->cpq_sb, &osfs);
264 free = osfs.os_bfree * 100;
265 if (free < cf_prm.nfract_stop_cpurge * osfs.os_blocks)
270 static int cache_balance_state(void)
272 struct fsfilt_operations *fsops = S2SMI(cpq->cpq_sb)->sm_fsfilt;
273 struct obd_statfs osfs;
276 rc = fsops->fs_statfs(cpq->cpq_sb, &osfs);
279 free = (osfs.os_blocks - osfs.os_bfree) * 100;
280 if (free > cf_prm.nfract * osfs.os_blocks) {
281 if (free < cf_prm.nfract_sync)
288 void wakeup_cpurge(void)
290 wake_up(&cpq->cpq_waitq);
293 /* walk the lru llog to purge count number of objects */
294 static int purge_some_cache(int *count)
299 rc = llog_cat_process(cpq->cpq_loghandle,
300 (llog_cb_t)cache_lru_get_rec_cb,
303 CDEBUG(D_INODE, "no enough objects available\n");
308 #define CFLUSH_NR 512
310 static void check_cache_space(void)
312 int state = cache_balance_state();
323 int count = CFLUSH_NR;
324 purge_some_cache(&count);
329 void cache_space_pre(struct inode *inode, int op)
333 /* FIXME have not used op */
339 static int cache_space_hook_lru(struct inode *inode, struct inode *parent,
340 void *handle, int op, int flags)
342 struct fsfilt_operations *fsops = S2SMI(cpq->cpq_sb)->sm_fsfilt;
343 struct llog_ctxt *ctxt = cpq->cpq_loghandle->lgh_ctxt;
344 struct llog_lru_rec *llr = NULL;
345 struct llog_cookie *logcookie = NULL;
346 int cookie_size = sizeof(struct llog_cookie);
350 LASSERT(ctxt != NULL);
352 if (op & ~(CACHE_SPACE_DELETE | CACHE_SPACE_INSERT |CACHE_SPACE_COMMIT))
355 OBD_ALLOC(logcookie, cookie_size);
357 GOTO(out, rc = -ENOMEM);
359 if (op & CACHE_SPACE_DELETE) {
360 rc = get_lru_logcookie(inode, logcookie);
364 if (logcookie->lgc_lgl.lgl_oid == 0) {
365 CWARN("inode %lu/%u is not in lru list\n",
366 inode->i_ino, inode->i_generation);
367 GOTO(insert, rc = -ENOENT);
369 if (flags && llog_cat_half_bottom(logcookie, ctxt->loc_handle))
372 rc = llog_cancel(ctxt, 1, logcookie, 0, NULL);
374 memset(logcookie, 0, cookie_size);
375 rc = set_lru_logcookie(inode, handle, logcookie);
379 CERROR("failed at llog_cancel: %d\n", rc);
385 if (op & CACHE_SPACE_INSERT) {
386 LASSERT(parent != NULL);
387 OBD_ALLOC(llr, sizeof(*llr));
389 GOTO(out, rc = -ENOMEM);
391 llr->llr_hdr.lrh_len = llr->llr_tail.lrt_len = sizeof(*llr);
392 llr->llr_hdr.lrh_type = CACHE_LRU_REC;
394 /* FIXME-UMKA: should we setup fid components here? */
395 id_ino(&llr->llr_cid) = inode->i_ino;
396 id_gen(&llr->llr_cid) = inode->i_generation;
397 id_type(&llr->llr_cid) = inode->i_mode & S_IFMT;
399 id_ino(&llr->llr_pid) = parent->i_ino;
400 id_gen(&llr->llr_pid) = parent->i_generation;
401 id_type(&llr->llr_pid) = parent->i_mode & S_IFMT;
403 rc = llog_add(ctxt, &llr->llr_hdr, NULL, logcookie, 1,
406 CERROR("failed at llog_add: %d\n", rc);
409 rc = set_lru_logcookie(inode, handle, logcookie);
412 if (op & CACHE_SPACE_COMMIT) {
414 err = fsops->fs_commit(inode->i_sb, inode, handle, 0);
416 CERROR("error committing transaction: %d\n", err);
424 OBD_FREE(logcookie, cookie_size);
426 OBD_FREE(llr, sizeof(*llr));
430 static int cache_purge_thread(void *args)
433 struct l_wait_info lwi = LWI_TIMEOUT(cf_prm.interval * HZ, NULL, NULL);
437 kportal_daemonize("wb_cache_purge");
439 SIGNAL_MASK_LOCK(current, flags);
440 sigfillset(¤t->blocked);
442 SIGNAL_MASK_UNLOCK(current, flags);
445 complete(&cpq->cpq_comp);
448 int ndirty = cf_prm.ndirty;
450 purge_some_cache(&ndirty);
451 if (ndirty > 0 || cpurge_stop())
452 l_wait_event(cpq->cpq_waitq,
453 cpq->cpq_flags & SVC_STOPPING,
455 if (cpq->cpq_flags & SVC_STOPPING) {
456 cpq->cpq_flags &= ~SVC_STOPPING;
461 cpq->cpq_flags = SVC_STOPPED;
462 complete(&cpq->cpq_comp);
466 int cache_space_hook_setup(struct super_block *sb)
468 struct llog_ctxt *ctxt;
472 /* first to initialize the cache lru catalog on local fs */
473 rc = llog_catalog_setup(&ctxt, CACHE_LRU_LOG,
475 S2SMI(sb)->smsi_ctxt,
476 S2SMI(sb)->sm_fsfilt,
477 S2SMI(sb)->smsi_logs_dir,
478 S2SMI(sb)->smsi_objects_dir);
480 CERROR("failed to initialize cache lru list catalog %d\n", rc);
484 cpq->cpq_loghandle = ctxt->loc_handle;
486 /* start cache purge daemon, only one daemon now */
487 init_waitqueue_head(&cpq->cpq_waitq);
488 init_completion(&cpq->cpq_comp);
491 rc = kernel_thread(cache_purge_thread, NULL, CLONE_VM | CLONE_FILES);
493 CERROR("cannot start thread: %d\n", rc);
496 wait_for_completion(&cpq->cpq_comp);
500 llog_catalog_cleanup(ctxt);
501 OBD_FREE(ctxt, sizeof(*ctxt));
505 int cache_space_hook_cleanup(void)
507 struct llog_ctxt *ctxt;
511 init_completion(&cpq->cpq_comp);
512 cpq->cpq_flags = SVC_STOPPING;
513 wake_up(&cpq->cpq_waitq);
514 wait_for_completion(&cpq->cpq_comp);
516 ctxt = cpq->cpq_loghandle->lgh_ctxt;
517 rc = llog_catalog_cleanup(ctxt);
518 OBD_FREE(ctxt, sizeof(*ctxt));
521 CERROR("failed to clean up cache lru list catalog %d\n", rc);
526 static int cache_space_hook_create(void *handle, struct inode *dir,
527 struct dentry *dentry, struct inode *new_dir,
528 struct dentry *new_dentry)
530 __u64 active_entry = 0;
534 LASSERT(cache_leaf_node(dentry, NULL));
535 rc = cache_space_hook_lru(dentry->d_inode, dir, handle,
536 CACHE_SPACE_INSERT, 0);
539 if (cache_leaf_node(dentry->d_parent, &active_entry)) {
540 rc = cache_space_hook_lru(dir,NULL,handle,CACHE_SPACE_DELETE,0);
545 rc = get_active_entry(dir, &active_entry);
548 rc = set_active_entry(dir, &active_entry, handle);
552 static int cache_space_hook_lookup(void *handle, struct inode *dir,
553 struct dentry *dentry, struct inode *new_dir,
554 struct dentry *new_dentry)
560 if (cache_leaf_node(dentry, &active_entry))
561 rc = cache_space_hook_lru(dentry->d_inode, dir, handle,
562 CACHE_SPACE_DELETE | CACHE_SPACE_INSERT,1);
566 static int cache_space_hook_link(void *handle, struct inode *dir,
567 struct dentry *dentry, struct inode *new_dir,
568 struct dentry *new_dentry)
570 __u64 active_entry = 0;
574 if (cache_pre_leaf_node(dentry, NULL, 1)) {
575 rc = cache_space_hook_lru(dentry->d_inode, NULL,
576 handle, CACHE_SPACE_DELETE, 0);
581 if (cache_leaf_node(dentry->d_parent, &active_entry)) {
582 rc = cache_space_hook_lru(dir, NULL, handle, CACHE_SPACE_DELETE, 0);
588 rc = get_active_entry(dir, &active_entry);
591 rc = set_active_entry(dir, &active_entry, handle);
595 static int cache_space_hook_unlink(void *handle, struct inode *dir,
596 struct dentry *dentry, struct inode *new_dir,
597 struct dentry *new_dentry)
603 if (cache_pre_leaf_node(dentry, NULL, 0))
604 rc = cache_space_hook_lru(dentry->d_inode, NULL,
605 handle, CACHE_SPACE_DELETE, 0);
606 else if (cache_leaf_node(dentry, NULL))
607 rc = cache_space_hook_lru(dentry->d_inode, dir,
608 handle, CACHE_SPACE_INSERT,0);
612 rc = get_active_entry(dir, &active_entry);
615 rc = set_active_entry(dir, &active_entry, handle);
616 if (!rc && cache_leaf_node(dentry->d_parent, &active_entry))
617 rc = cache_space_hook_lru(dir,
618 dentry->d_parent->d_parent->d_inode,
619 handle, CACHE_SPACE_INSERT, 0);
623 static int cache_space_hook_mkdir(void *handle, struct inode *dir,
624 struct dentry *dentry, struct inode *new_dir,
625 struct dentry *new_dentry)
631 LASSERT(cache_leaf_node(dentry, &active_entry));
632 rc = cache_space_hook_lru(dentry->d_inode, dir, handle,
633 CACHE_SPACE_INSERT, 0);
635 if (!rc && cache_pre_leaf_node(dentry->d_parent, &active_entry, 3))
636 rc = cache_space_hook_lru(dir, NULL, handle, CACHE_SPACE_DELETE, 0);
640 static int cache_space_hook_rmdir(void *handle, struct inode *dir,
641 struct dentry *dentry, struct inode *new_dir,
642 struct dentry *new_dentry)
648 LASSERT(cache_pre_leaf_node(dentry, &active_entry, 2));
649 rc = cache_space_hook_lru(dentry->d_inode, NULL, handle,
650 CACHE_SPACE_DELETE, 0);
652 if (!rc && cache_leaf_node(dentry->d_parent, &active_entry))
653 rc = cache_space_hook_lru(dir,
654 dentry->d_parent->d_parent->d_inode,
655 handle, CACHE_SPACE_INSERT, 0);
659 static int cache_space_hook_rename(void *handle, struct inode *old_dir,
660 struct dentry *old_dentry, struct inode *new_dir,
661 struct dentry *new_dentry)
667 if (new_dentry->d_inode) {
668 if (cache_pre_leaf_node(new_dentry, NULL, 0))
669 rc = cache_space_hook_lru(new_dentry->d_inode, NULL,
670 handle, CACHE_SPACE_DELETE,0);
671 else if (cache_leaf_node(new_dentry, NULL))
672 rc = cache_space_hook_lru(new_dentry->d_inode,
674 CACHE_SPACE_INSERT,0);
677 if (rc || old_dir == new_dir)
680 if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
681 if (cache_leaf_node(new_dentry->d_parent, &active_entry)) {
682 rc = cache_space_hook_lru(new_dir, NULL, handle,
683 CACHE_SPACE_DELETE, 0);
688 rc = get_active_entry(new_dir, &active_entry);
691 rc = set_active_entry(new_dir, &active_entry, handle);
694 rc = get_active_entry(old_dir, &active_entry);
697 rc = set_active_entry(old_dir, &active_entry, handle);
698 } else if (cache_pre_leaf_node(new_dentry->d_parent, &active_entry, 3)) {
699 rc = cache_space_hook_lru(new_dir, NULL, handle,
700 CACHE_SPACE_DELETE, 0);
703 if (!rc && cache_leaf_node(old_dentry->d_parent, &active_entry)) {
704 rc = cache_space_hook_lru(old_dir,
705 old_dentry->d_parent->d_parent->d_inode,
706 handle, CACHE_SPACE_INSERT, 0);
712 typedef int (*cache_hook_op)(void *handle, struct inode *old_dir,
713 struct dentry *old_dentry, struct inode *new_dir,
714 struct dentry *new_dentry);
716 static cache_hook_op cache_space_hook_ops[HOOK_MAX + 1] = {
717 [HOOK_CREATE] cache_space_hook_create,
718 [HOOK_LOOKUP] cache_space_hook_lookup,
719 [HOOK_LINK] cache_space_hook_link,
720 [HOOK_UNLINK] cache_space_hook_unlink,
721 [HOOK_SYMLINK] cache_space_hook_create,
722 [HOOK_MKDIR] cache_space_hook_mkdir,
723 [HOOK_RMDIR] cache_space_hook_rmdir,
724 [HOOK_MKNOD] cache_space_hook_create,
725 [HOOK_RENAME] cache_space_hook_rename,
731 int cache_space_post(int op, void *handle, struct inode *old_dir,
732 struct dentry *old_dentry, struct inode *new_dir,
733 struct dentry *new_dentry)
738 LASSERT(op <= HOOK_MAX + 1);
740 if (cache_space_hook_ops[op])
741 rc = cache_space_hook_ops[op](handle, old_dir, old_dentry,
742 new_dir, new_dentry);