1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2004 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #define DEBUG_SUBSYSTEM S_SM
25 #include <linux/module.h>
26 #include <linux/kernel.h>
27 #include <linux/pagemap.h>
28 #include <linux/string.h>
29 #include <linux/slab.h>
30 #include <linux/stat.h>
31 #include <linux/unistd.h>
32 #include <linux/smp_lock.h>
33 #include <linux/obd_class.h>
34 #include <linux/obd_support.h>
35 #include <linux/lustre_lib.h>
36 #include <linux/lustre_idl.h>
37 #include <linux/lustre_fsfilt.h>
39 #include <linux/lustre_snap.h>
40 #include <linux/lustre_smfs.h>
42 #include "smfs_internal.h"
43 #define SNAPTABLE_SIZE(size) (sizeof(struct snap_table) + size * sizeof(struct snap))
44 static int smfs_init_snaptabe(struct super_block *sb)
46 struct snap_info *snap_info = S2SNAPI(sb);
47 struct snap_table *snap_table = NULL;
48 struct fsfilt_operations *snapops = snap_info->snap_fsfilt;
49 int rc = 0, size, table_size, vallen, i;
53 init_MUTEX(&snap_info->sntbl_sema);
54 /*Initialized table */
55 /*get the maxsize of snaptable*/
57 rc = snapops->fs_get_snap_info(sb, NULL, MAX_SNAPTABLE_COUNT,
58 strlen(MAX_SNAPTABLE_COUNT), &size,
61 CERROR("the Max snaptable count should not be zero\n");
65 table_size = SNAPTABLE_SIZE(size);
67 OBD_ALLOC(snap_info->sntbl, table_size);
69 if (!snap_info->sntbl) {
73 snap_table = snap_info->sntbl;
75 snap_table->sntbl_magic = cpu_to_le32((__u32)SNAPTABLE_MAGIC);
76 snap_table->sntbl_max_count = size;
77 for (i = 0; i < snap_table->sntbl_max_count; i++) {
78 /*init sn_index to -1*/
79 snap_table->sntbl_items[i].sn_index = -1;
81 /*get snaptable info*/
82 rc = snapops->fs_get_snap_info(sb, NULL, SNAPTABLE_INFO,
83 strlen(SNAPTABLE_INFO),
84 snap_table, &table_size);
87 snap_table->sntbl_count = 0;
88 CDEBUG(D_INFO, "No snaptable here\n");
91 CERROR("Can not retrive the snaptable from this filesystem\n");
92 OBD_FREE(snap_table, table_size);
96 if (le32_to_cpu(snap_table->sntbl_magic) != SNAPTABLE_MAGIC) {
97 CERROR("On disk snaptable is not right \n");
98 OBD_FREE(snap_table, table_size);
103 #define COWED_NAME_LEN (7 + 8 + 1)
104 static int smfs_init_cowed_dir(struct super_block *sb, struct dentry* cowed_dir)
106 struct snap_info *snap_info = S2SNAPI(sb);
107 struct dentry *dentry = NULL;
108 struct lvfs_run_ctxt saved;
109 char name[COWED_NAME_LEN];
113 sprintf(name, ".cowed_%08x", (__u32)cowed_dir->d_inode->i_ino);
114 push_ctxt(&saved, S2SMI(sb)->smsi_ctxt, NULL);
115 dentry = simple_mkdir(cowed_dir, name, 0777, 1);
116 pop_ctxt(&saved, S2SMI(sb)->smsi_ctxt, NULL);
117 if (IS_ERR(dentry)) {
118 rc = PTR_ERR(dentry);
119 CERROR("create cowed directory: rc = %d\n", rc);
122 snap_info->sn_cowed_dentry = dentry;
125 int smfs_start_cow(struct super_block *sb)
127 struct smfs_super_info *smfs_info = S2SMI(sb);
131 OBD_ALLOC(smfs_info->smsi_snap_info, sizeof(struct snap_info));
133 if (!smfs_info->smsi_snap_info)
136 /*init snap fsfilt operations*/
137 if (!S2SNAPI(sb)->snap_cache_fsfilt) {
138 char *snap_cache_ftype = NULL;
139 int tmp = strlen(S2SMI(sb)->smsi_cache_ftype) + strlen("_snap");
141 OBD_ALLOC(snap_cache_ftype, tmp + 1);
142 sprintf(snap_cache_ftype, "%s_snap", S2SMI(sb)->smsi_cache_ftype);
143 S2SNAPI(sb)->snap_cache_fsfilt = fsfilt_get_ops(snap_cache_ftype);
144 OBD_FREE(snap_cache_ftype, tmp + 1);
145 if (!S2SNAPI(sb)->snap_cache_fsfilt) {
146 CERROR("Can not get %s fsfilt ops needed by snap\n",
151 if (!S2SNAPI(sb)->snap_fsfilt) {
152 char *snap_ftype = NULL;
153 int tmp = strlen(S2SMI(sb)->smsi_ftype) + strlen("_snap");
155 OBD_ALLOC(snap_ftype, tmp + 1);
156 sprintf(snap_ftype, "%s_snap", S2SMI(sb)->smsi_ftype);
157 S2SNAPI(sb)->snap_fsfilt = fsfilt_get_ops(snap_ftype);
158 OBD_FREE(snap_ftype, tmp + 1);
159 if (!S2SNAPI(sb)->snap_fsfilt) {
160 CERROR("Can not get %s fsfilt ops needed by snap\n",
165 rc = smfs_init_snaptabe(sb);
167 CERROR("can not init snaptable rc=%d\n", rc);
170 /*init cowed dir to put the primary cowed inode
171 *FIXME-WANGDI, later the s_root may not be the
172 *snap dir, we can indicate any dir to be cowed*/
173 rc = smfs_init_cowed_dir(sb, sb->s_root);
176 EXPORT_SYMBOL(smfs_start_cow);
177 int smfs_stop_cow(struct super_block *sb)
179 struct snap_info *snap_info = S2SNAPI(sb);
180 struct snap_table *snap_table = snap_info->sntbl;
181 int rc = 0, table_size;
184 l_dput(snap_info->sn_cowed_dentry);
186 if (snap_info->snap_fsfilt)
187 fsfilt_put_ops(snap_info->snap_fsfilt);
188 if (snap_info->snap_cache_fsfilt)
189 fsfilt_put_ops(snap_info->snap_cache_fsfilt);
192 table_size = SNAPTABLE_SIZE(snap_table->sntbl_max_count);
193 OBD_FREE(snap_info->sntbl, table_size);
196 OBD_FREE(snap_info, sizeof(*snap_info));
200 EXPORT_SYMBOL(smfs_stop_cow);
202 int smfs_cow_init(struct super_block *sb)
204 struct smfs_super_info *smfs_info = S2SMI(sb);
207 SMFS_SET_COW(smfs_info);
212 int smfs_cow_cleanup(struct super_block *sb)
215 SMFS_CLEAN_COW(S2SMI(sb));
219 /*FIXME Note indirect and primary inode
220 * should be recorgnized here*/
221 int smfs_init_snap_inode_info(struct inode *inode, int flags)
226 if (SMFS_DO_COW(S2SMI(inode->i_sb)) &&
227 (flags & SM_DO_COW)) {
228 struct snap_inode_info *sni_info = I2SNAPI(inode);
229 struct fsfilt_operations *snapops = I2SNAPOPS(inode);
231 sni_info->sn_flags = flags;
232 vallen = sizeof(sni_info->sn_gen);
234 rc = snapops->fs_get_snap_info(NULL, inode, SNAP_GENERATION,
235 strlen(SNAP_GENERATION),
236 &sni_info->sn_gen, &vallen);
241 /* latest snap: returns
242 - the index of the latest snapshot before NOW
243 - hence it returns 0 in case all the volume snapshots lie in the future
244 - this is the index where a COW will land (will be created)
246 void snap_last(struct super_block *sb, struct snap *snap)
248 struct snap_info *snap_info = S2SNAPI(sb);
249 struct snap_table *table = snap_info->sntbl;
250 time_t now = CURRENT_TIME;
254 /* start at the highest index in the superblock snaptime array */
255 if (table->sntbl_count == 0) {
256 memset(snap, 0, sizeof(struct snap));
258 i = table->sntbl_count - 1;
259 snap->sn_index = table->sntbl_items[i].sn_index;
260 snap->sn_time = table->sntbl_items[i].sn_time;
261 snap->sn_gen = table->sntbl_items[i].sn_gen;
263 CDEBUG(D_INFO, "index: %d, time[i]: %ld, now: %ld\n",
264 snap->sn_index, snap->sn_time, now);
269 static int inline get_index_of_item(struct snap_table *table, char *name)
271 int count = table->sntbl_count;
275 for (i = 0; i < table->sntbl_max_count; i++) {
276 if (!strcmp(name, table->sntbl_items[i].sn_name)) {
277 CERROR("Duplicate name %s in snaptable\n", name);
282 for (i = 0; i < table->sntbl_max_count; i++) {
284 for (j = 0; j < (count + 1); j++) {
285 if (table->sntbl_items[j].sn_index == i) {
293 CERROR("snaptable Full\n");
297 int smfs_add_snap_item(struct super_block *sb, char *name)
299 struct snap_info *snap_info = S2SNAPI(sb);
300 struct fsfilt_operations *snapops = snap_info->snap_fsfilt;
301 struct snap_table *snap_table = snap_info->sntbl;
302 struct snap *snap_item;
303 int table_size, count = 0, index = 0, rc = 0;
305 count = snap_table->sntbl_count;
306 /* XXX Is down this sema necessary*/
307 down_interruptible(&snap_info->sntbl_sema);
308 snap_item = &snap_table->sntbl_items[count];
310 /*add item in snap_table set generation*/
311 snap_item->sn_time = CURRENT_TIME;
312 /* find table index */
313 index = get_index_of_item(snap_table, name);
315 GOTO(exit, rc = index);
317 snap_item->sn_index = index;
318 snap_item->sn_flags = 0;
319 snap_item->sn_gen = snap_table->sntbl_generation + 1;
320 memcpy(snap_item->sn_name, name, SNAP_MAX_NAMELEN);
321 /* Wrote the whole snap_table to disk */
322 table_size = SNAPTABLE_SIZE(snap_table->sntbl_max_count);
324 rc = snapops->fs_set_snap_info(sb, NULL, SNAPTABLE_INFO,
325 strlen(SNAPTABLE_INFO),
326 snap_table, &table_size);
328 CERROR("Set snaptable error rc=%d\n", rc);
331 snap_table->sntbl_count++;
332 snap_table->sntbl_generation++;
334 up(&snap_info->sntbl_sema);
337 EXPORT_SYMBOL(smfs_add_snap_item);
339 * Note: this function should be differnet with snap_do_cow.
340 * In smfs_do_cow, we check the EA for whether do cow for that inode.
341 * In smfs_needs_cow, we check whether we do need to do cow.
343 int smfs_needs_cow(struct inode *inode)
345 struct smfs_inode_info *smi_info = I2SMI(inode);
346 struct snap_inode_info *snap_info = NULL;
351 snap_info = &(smi_info->sm_sninfo);
353 snap_last(inode->i_sb, &snap);
354 /* decision .... if the snapshot is more recent than the object,
355 * then any change to the object should cause a COW.
357 if (snap_info->sn_gen < snap.sn_gen )
358 index = snap.sn_index;
360 CDEBUG(D_INFO, "snap_needs_cow, ino %lu , get index %d\n",
361 inode->i_ino, index);
364 } /* snap_needs_cow */
366 static int link_cowed_inode(struct inode *inode)
368 struct snap_info *snap_info = S2SNAPI(inode->i_sb);
369 struct dentry *cowed_dir = NULL;
370 char fidname[LL_FID_NAMELEN];
371 int fidlen = 0, rc = 0;
372 struct dentry *dchild = NULL;
373 struct dentry *tmp = NULL;
376 cowed_dir = snap_info->sn_cowed_dentry;
378 fidlen = ll_fid2str(fidname, inode->i_ino, inode->i_generation);
380 down(&cowed_dir->d_inode->i_sem);
381 dchild = ll_lookup_one_len(fidname, cowed_dir, fidlen);
382 if (IS_ERR(dchild)) {
383 rc = PTR_ERR(dchild);
384 if (rc != -EPERM && rc != -EACCES)
385 CERROR("child lookup error %d\n", rc);
388 if (dchild->d_inode != NULL) {
389 CERROR("re-cowed file %s?\n", dchild->d_name.name);
390 LASSERT(dchild->d_inode == inode);
391 GOTO(out_dput, rc = 0);
393 tmp = pre_smfs_dentry(NULL, inode, cowed_dir);
394 /* link() is semanticaly-wrong for S_IFDIR, so we set S_IFREG
395 * for linking and return real mode back then -bzzz */
396 mode = inode->i_mode;
397 inode->i_mode = S_IFREG;
398 rc = vfs_link(tmp, cowed_dir->d_inode, dchild);
399 post_smfs_dentry(tmp);
401 CERROR("error linking cowed inode %s to COWED: rc = %d\n",
404 inode->i_mode = mode;
405 if ((mode & S_IFMT) == S_IFDIR) {
406 dchild->d_inode->i_nlink++;
407 cowed_dir->d_inode->i_nlink++;
409 mark_inode_dirty(dchild->d_inode);
413 up(&cowed_dir->d_inode->i_sem);
417 * Make a copy of the data and plug a redirector in between if there
418 * is no redirector yet.
420 int snap_do_cow(struct inode *inode, struct dentry *dparent, int del)
422 struct snap_info *snap_info = S2SNAPI(inode->i_sb);
423 struct fsfilt_operations *snapops = snap_info->snap_fsfilt;
425 struct inode *ind = NULL;
429 if (!snapops || !snapops->fs_create_indirect)
432 snap_last(inode->i_sb, &snap);
433 ind = snapops->fs_create_indirect(inode, snap.sn_index, snap.sn_gen,
434 dparent->d_inode, del);
437 if (!SMFS_DO_INODE_COWED(inode)) {
438 /*insert the inode to cowed inode*/
439 SMFS_SET_INODE_COWED(inode);
440 link_cowed_inode(inode);
443 I2SMI(ind)->sm_sninfo.sn_flags = 0;
444 I2SMI(ind)->sm_sninfo.sn_gen = snap.sn_gen;
449 /*Dir inode will do cow*/
450 int smfs_cow_create(struct inode *dir, struct dentry *dentry,
451 void *data1, void *data2)
454 struct dentry *dparent;
457 if (smfs_needs_cow(dir) != -1) {
458 CDEBUG(D_INODE, "snap_needs_cow for ino %lu \n",dir->i_ino);
459 LASSERT(dentry->d_parent && dentry->d_parent->d_parent);
460 dparent = dentry->d_parent->d_parent;
461 if ((snap_do_cow(dir, dparent, 0))) {
462 CERROR("Do cow error\n");
469 int smfs_cow_setattr(struct inode *dir, struct dentry *dentry,
470 void *data1, void *data2)
474 if (smfs_needs_cow(dir) != -1) {
475 CDEBUG(D_INODE, "snap_needs_cow for ino %lu \n",dir->i_ino);
476 if ((snap_do_cow(dir, dentry->d_parent, 0))) {
477 CERROR("Do cow error\n");
484 int smfs_cow_link(struct inode *dir, struct dentry *dentry,
485 void *data1, void *data2)
488 struct dentry *dparent;
491 if (smfs_needs_cow(dir) != -1) {
492 CDEBUG(D_INODE, "snap_needs_cow for ino %lu \n",dir->i_ino);
493 LASSERT(dentry->d_parent && dentry->d_parent->d_parent);
494 dparent = dentry->d_parent->d_parent;
495 if ((snap_do_cow(dir, dparent, 0))) {
496 CERROR("Do cow error\n");
499 if ((snap_do_cow(dentry->d_inode, dentry->d_parent, 0))) {
500 CERROR("Do cow error\n");
507 int smfs_cow_unlink(struct inode *dir, struct dentry *dentry,
508 void *data1, void *data2)
510 struct dentry *dparent;
514 if (smfs_needs_cow(dir) != -1) {
515 CDEBUG(D_INODE, "snap_needs_cow for ino %lu \n",dir->i_ino);
516 LASSERT(dentry->d_parent && dentry->d_parent->d_parent);
517 dparent = dentry->d_parent->d_parent;
518 if ((snap_do_cow(dir, dparent, 0))) {
519 CERROR("Do cow error\n");
522 if ((snap_do_cow(dentry->d_inode, dentry->d_parent, 1))) {
523 CERROR("Do cow error\n");
531 int smfs_cow_rename(struct inode *dir, struct dentry *dentry,
532 void *data1, void *data2)
534 struct inode *new_dir = (struct inode *)data1;
535 struct dentry *new_dentry = (struct dentry *)data2;
536 struct dentry *dparent;
542 if (smfs_needs_cow(dir) != -1) {
543 CDEBUG(D_INODE, "snap_needs_cow for ino %lu \n", dir->i_ino);
544 LASSERT(dentry->d_parent && dentry->d_parent->d_parent);
545 dparent = dentry->d_parent->d_parent;
546 if ((snap_do_cow(dir, dparent, 0))) {
547 CERROR("Do cow error\n");
550 if ((snap_do_cow(dentry->d_inode, dentry->d_parent, 0))) {
551 CERROR("Do cow error\n");
555 if (smfs_needs_cow(new_dir) != -1) {
556 CDEBUG(D_INODE, "snap_needs_cow for ino %lu \n", new_dir->i_ino);
557 LASSERT(new_dentry->d_parent && new_dentry->d_parent->d_parent);
558 dparent = new_dentry->d_parent->d_parent;
559 if ((new_dir != dir) && (snap_do_cow(new_dir, dparent, 0))){
560 CERROR("Do cow error\n");
563 if (new_dentry->d_inode && new_dentry->d_inode->i_nlink == 1) {
564 if ((snap_do_cow(new_dentry->d_inode,
565 new_dentry->d_parent, 0))) {
566 CERROR("Do cow error\n");
574 int smfs_cow_write(struct inode *inode, struct dentry *dentry, void *data1,
577 struct snap_info *snap_info = S2SNAPI(inode->i_sb);
578 struct snap_table *table = snap_info->sntbl;
579 long blocks[2]={-1,-1};
580 int index = 0, i, rc = 0;
589 count = *(size_t *)data1;
590 pos = *(loff_t*)data2;
594 if (smfs_needs_cow(inode) != -1 ) {
595 CDEBUG(D_INFO, "snap_needs_cow for ino %lu \n",inode->i_ino);
596 snap_do_cow(inode, dentry->d_parent, 0);
599 CDEBUG(D_INFO, "write offset %lld count %u \n", pos, count);
601 if(pos & (PAGE_CACHE_SIZE - 1)){
602 blocks[0] = pos >> inode->i_sb->s_blocksize_bits;
605 if((pos + 1) & (PAGE_CACHE_SIZE - 1)){
606 blocks[1] = pos >> inode->i_sb->s_blocksize_bits;
609 if (blocks[0] == blocks[1])
612 for (i = 0; i < 2; i++) {
616 /*Find the nearest page in snaptable and copy back it*/
617 for (slot = table->sntbl_count - 1; slot >= 0; slot--) {
618 struct fsfilt_operations *snapops = snap_info->snap_fsfilt;
619 struct inode *cache_inode = NULL;
622 index = table->sntbl_items[slot].sn_index;
623 cache_inode = snapops->fs_get_indirect(inode, NULL, index);
625 if (!cache_inode) continue;
627 CDEBUG(D_INFO, "find cache_ino %lu\n", cache_inode->i_ino);
629 result = snapops->fs_copy_block(inode, cache_inode, blocks[i]);
638 GOTO(exit, rc = result);
647 EXPORT_SYMBOL(smfs_cow_write);
649 int smfs_cow_lookup(struct inode *inode, struct dentry *dentry, void *data1,
652 struct snap_info *snap_info = S2SNAPI(inode->i_sb);
653 struct fsfilt_operations *snapops = snap_info->snap_fsfilt;
654 struct dentry *dparent = dentry->d_parent;
655 struct clonefs_info *clone_info=(struct clonefs_info*)dparent->d_fsdata;
658 if (clone_info && clone_info->clone_flags && SM_CLONE_FS) {
659 struct inode *ind_inode = NULL;
660 struct inode *cache_ind = NULL;
661 struct dentry *cache_dentry = NULL;
662 struct dentry *cache_parent = NULL;
663 struct inode *cache_inode;
667 ind_inode = snapops->fs_get_indirect(inode, NULL, clone_info->clone_index);
671 if (!(cache_ind = I2CI(ind_inode)))
672 GOTO(exit, rc = -ENOENT);
674 cache_parent=pre_smfs_dentry(NULL, cache_ind, dentry->d_parent);
675 cache_dentry=pre_smfs_dentry(cache_parent, NULL, dentry);
677 tmp = cache_ind->i_op->lookup(cache_ind, cache_dentry);
680 GOTO(exit, rc = -ENOENT);
682 if ((cache_inode = tmp ? tmp->d_inode : cache_dentry->d_inode)) {
683 if (IS_ERR(cache_inode)) {
684 dentry->d_inode = cache_inode;
685 GOTO(exit, rc = -ENOENT);
687 inode = iget4(inode->i_sb, cache_inode->i_ino, NULL,
688 &I2SMI(inode)->smi_flags);
691 GOTO(exit, rc = -ENOENT);
693 d_add(dentry, inode);
696 post_smfs_dentry(cache_dentry);
697 post_smfs_dentry(cache_parent);
703 struct inode *smfs_cow_get_ind(struct inode *inode, int index)
705 struct snap_info *snap_info = S2SNAPI(inode->i_sb);
706 struct fsfilt_operations *snapops = snap_info->snap_fsfilt;
707 struct snap_table *table = snap_info->sntbl;
708 long block=(index << PAGE_CACHE_SHIFT) >> inode->i_sb->s_blocksize_bits;
712 for (slot = table->sntbl_count - 1; slot >= 0; slot--) {
713 struct address_space_operations *aops = inode->i_mapping->a_ops;
714 struct inode *cache_inode = NULL;
717 index = table->sntbl_items[slot].sn_index;
718 cache_inode = snapops->fs_get_indirect(inode, NULL, index);
720 if (!cache_inode ) continue;
722 if (aops->bmap(cache_inode->i_mapping, block))
729 EXPORT_SYMBOL(smfs_cow_get_ind);
731 typedef int (*cow_funcs)(struct inode *dir, struct dentry *dentry,
732 void *new_dir, void *new_dentry);
735 static cow_funcs smfs_cow_funcs[REINT_MAX + 2] = {
736 [REINT_SETATTR] smfs_cow_setattr,
737 [REINT_CREATE] smfs_cow_create,
738 [REINT_LINK] smfs_cow_link,
739 [REINT_UNLINK] smfs_cow_unlink,
740 [REINT_RENAME] smfs_cow_rename,
741 [REINT_WRITE] smfs_cow_write,
742 [SNAP_LOOKUP] smfs_cow_lookup,
745 int smfs_cow(struct inode *dir, struct dentry *dentry, void *new_dir,
746 void *new_dentry, int op)
748 return smfs_cow_funcs[op](dir, dentry, new_dir, new_dentry);