Whamcloud - gitweb
01e00ce1f1c82d1f8304aef78f3f28c0c58c4b25
[fs/lustre-release.git] / lustre / llite / super.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Lustre Light Super operations
5  *
6  * This code is issued under the GNU General Public License.
7  * See the file COPYING in this distribution
8  *
9  * Copryright (C) 2002 Cluster File Systems, Inc.
10  */
11
12 #define DEBUG_SUBSYSTEM S_LLITE
13
14 #include <linux/module.h>
15 #include <linux/random.h>
16 #include <linux/version.h>
17 #include <linux/lustre_lite.h>
18 #include <linux/lustre_ha.h>
19 #include <linux/lustre_dlm.h>
20 #include <linux/init.h>
21 #include <linux/fs.h>
22 #include <linux/lprocfs_status.h>
23
24 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
25 kmem_cache_t *ll_file_data_slab;
26 extern struct address_space_operations ll_aops;
27 extern struct address_space_operations ll_dir_aops;
28 struct super_operations ll_super_operations;
29
30 extern int ll_recover(struct recovd_data *, int);
31 extern int ll_commitcbd_setup(struct ll_sb_info *);
32 extern int ll_commitcbd_cleanup(struct ll_sb_info *);
33
34 extern void ll_proc_namespace(struct super_block* sb, char* osc, char* mdc);
35
36 static char *ll_read_opt(const char *opt, char *data)
37 {
38         char *value;
39         char *retval;
40         ENTRY;
41
42         CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
43         if ( strncmp(opt, data, strlen(opt)) )
44                 RETURN(NULL);
45         if ( (value = strchr(data, '=')) == NULL )
46                 RETURN(NULL);
47
48         value++;
49         OBD_ALLOC(retval, strlen(value) + 1);
50         if ( !retval ) {
51                 CERROR("out of memory!\n");
52                 RETURN(NULL);
53         }
54
55         memcpy(retval, value, strlen(value)+1);
56         CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval);
57         RETURN(retval);
58 }
59
60 static int ll_set_opt(const char *opt, char *data, int fl)
61 {
62         ENTRY;
63
64         CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
65         if ( strncmp(opt, data, strlen(opt)) )
66                 RETURN(0);
67         else
68                 RETURN(fl);
69 }
70
71 static void ll_options(char *options, char **ost, char **mds, int *flags)
72 {
73         char *this_char;
74         ENTRY;
75
76         if (!options) {
77                 EXIT;
78                 return;
79         }
80
81         for (this_char = strtok (options, ",");
82              this_char != NULL;
83              this_char = strtok (NULL, ",")) {
84                 CDEBUG(D_SUPER, "this_char %s\n", this_char);
85                 if ( (!*ost && (*ost = ll_read_opt("osc", this_char)))||
86                      (!*mds && (*mds = ll_read_opt("mdc", this_char)))||
87                      (!(*flags & LL_SBI_NOLCK) && ((*flags) = (*flags) |
88                       ll_set_opt("nolock", this_char, LL_SBI_NOLCK))) )
89                         continue;
90         }
91         EXIT;
92 }
93
94 #ifndef log2
95 #define log2(n) ffz(~(n))
96 #endif
97
98 static struct super_block * ll_read_super(struct super_block *sb,
99                                           void *data, int silent)
100 {
101         struct inode *root = 0;
102         struct obd_device *obd;
103         struct ll_sb_info *sbi;
104         char *osc = NULL;
105         char *mdc = NULL;
106         int err;
107         struct ll_fid rootfid;
108         struct obd_statfs osfs;
109         struct ptlrpc_request *request = NULL;
110         struct ptlrpc_connection *mdc_conn;
111         struct ll_read_inode2_cookie lic;
112         class_uuid_t uuid;
113
114         ENTRY;
115         MOD_INC_USE_COUNT;
116
117         OBD_ALLOC(sbi, sizeof(*sbi));
118         if (!sbi) {
119                 MOD_DEC_USE_COUNT;
120                 RETURN(NULL);
121         }
122
123         INIT_LIST_HEAD(&sbi->ll_conn_chain);
124         INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list);
125         generate_random_uuid(uuid);
126         class_uuid_unparse(uuid, sbi->ll_sb_uuid);
127
128         sb->u.generic_sbp = sbi;
129
130         ll_options(data, &osc, &mdc, &sbi->ll_flags);
131
132         if (!osc) {
133                 CERROR("no osc\n");
134                 GOTO(out_free, sb = NULL);
135         }
136
137         if (!mdc) {
138                 CERROR("no mdc\n");
139                 GOTO(out_free, sb = NULL);
140         }
141
142         obd = class_uuid2obd(mdc);
143         if (!obd) {
144                 CERROR("MDC %s: not setup or attached\n", mdc);
145                 GOTO(out_free, sb = NULL);
146         }
147
148         err = obd_connect(&sbi->ll_mdc_conn, obd, sbi->ll_sb_uuid,
149                           ptlrpc_recovd, ll_recover);
150         if (err) {
151                 CERROR("cannot connect to %s: rc = %d\n", mdc, err);
152                 GOTO(out_free, sb = NULL);
153         }
154
155 #warning Mike: is this the right place to raise the connection level?
156         mdc_conn = sbi2mdc(sbi)->cl_import.imp_connection;
157         mdc_conn->c_level = LUSTRE_CONN_FULL;
158         list_add(&mdc_conn->c_sb_chain, &sbi->ll_conn_chain);
159
160         obd = class_uuid2obd(osc);
161         if (!obd) {
162                 CERROR("OSC %s: not setup or attached\n", osc);
163                 GOTO(out_mdc, sb = NULL);
164         }
165
166         err = obd_connect(&sbi->ll_osc_conn, obd, sbi->ll_sb_uuid,
167                           ptlrpc_recovd, ll_recover);
168         if (err) {
169                 CERROR("cannot connect to %s: rc = %d\n", osc, err);
170                 GOTO(out_mdc, sb = NULL);
171         }
172
173         err = mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
174         if (err) {
175                 CERROR("cannot mds_connect: rc = %d\n", err);
176                 GOTO(out_mdc, sb = NULL);
177         }
178         CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id);
179         sbi->ll_rootino = rootfid.id;
180
181         memset(&osfs, 0, sizeof(osfs));
182         err = obd_statfs(&sbi->ll_mdc_conn, &osfs);
183         sb->s_blocksize = osfs.os_bsize;
184         sb->s_blocksize_bits = log2(osfs.os_bsize);
185         sb->s_magic = LL_SUPER_MAGIC;
186         sb->s_maxbytes = (1ULL << (32 + 9)) - osfs.os_bsize;
187
188         sb->s_op = &ll_super_operations;
189
190         /* make root inode */
191         err = mdc_getattr(&sbi->ll_mdc_conn, sbi->ll_rootino, S_IFDIR,
192                           OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request);
193         if (err) {
194                 CERROR("mdc_getattr failed for root: rc = %d\n", err);
195                 GOTO(out_request, sb = NULL);
196         }
197
198         /* initialize committed transaction callback daemon */
199         spin_lock_init(&sbi->ll_commitcbd_lock);
200         init_waitqueue_head(&sbi->ll_commitcbd_waitq);
201         init_waitqueue_head(&sbi->ll_commitcbd_ctl_waitq);
202         sbi->ll_commitcbd_flags = 0;
203         err = ll_commitcbd_setup(sbi);
204         if (err) {
205                 CERROR("failed to start commit callback daemon: rc = %d\n",err);
206                 GOTO(out_request, sb = NULL);
207         }
208
209         lic.lic_body = lustre_msg_buf(request->rq_repmsg, 0);
210         lic.lic_lmm = NULL;
211         LASSERT(sbi->ll_rootino != 0);
212         root = iget4(sb, sbi->ll_rootino, NULL, &lic);
213
214         if (root) {
215                 sb->s_root = d_alloc_root(root);
216         } else {
217                 CERROR("lustre_lite: bad iget4 for root\n");
218                 GOTO(out_cdb, sb = NULL);
219         }
220
221         ptlrpc_req_finished(request);
222         request = NULL;
223         ll_proc_namespace(sb, osc, mdc);
224
225 out_dev:
226         if (mdc)
227                 OBD_FREE(mdc, strlen(mdc) + 1);
228         if (osc)
229                 OBD_FREE(osc, strlen(osc) + 1);
230
231         RETURN(sb);
232
233 out_cdb:
234         ll_commitcbd_cleanup(sbi);
235 out_request:
236         ptlrpc_req_finished(request);
237         obd_disconnect(&sbi->ll_osc_conn);
238 out_mdc:
239         obd_disconnect(&sbi->ll_mdc_conn);
240 out_free:
241         OBD_FREE(sbi, sizeof(*sbi));
242
243         MOD_DEC_USE_COUNT;
244         goto out_dev;
245 } /* ll_read_super */
246
247 static void ll_put_super(struct super_block *sb)
248 {
249         struct ll_sb_info *sbi = ll_s2sbi(sb);
250         struct list_head *tmp, *next;
251         struct ll_fid rootfid;
252         ENTRY;
253
254         list_del(&sbi->ll_conn_chain);
255         ll_commitcbd_cleanup(sbi);
256         obd_disconnect(&sbi->ll_osc_conn);
257
258         /* NULL request to force sync on the MDS, and get the last_committed
259          * value to flush remaining RPCs from the sending queue on client.
260          *
261          * XXX This should be an mdc_sync() call to sync the whole MDS fs,
262          *     which we can call for other reasons as well.
263          */
264         mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
265
266         lprocfs_dereg_mnt(sbi->ll_proc_root);
267         sbi->ll_proc_root = NULL;
268
269         obd_disconnect(&sbi->ll_mdc_conn);
270
271         spin_lock(&dcache_lock);
272         list_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list) {
273                 struct dentry *dentry = list_entry(tmp, struct dentry, d_hash);
274                 shrink_dcache_parent(dentry);
275         }
276         spin_unlock(&dcache_lock);
277
278         OBD_FREE(sbi, sizeof(*sbi));
279
280         MOD_DEC_USE_COUNT;
281         EXIT;
282 } /* ll_put_super */
283
284 static void ll_clear_inode(struct inode *inode)
285 {
286         struct ll_sb_info *sbi = ll_i2sbi(inode);
287         struct ll_inode_info *lli = ll_i2info(inode);
288         int rc;
289         ENTRY;
290
291         rc = mdc_cancel_unused(&sbi->ll_mdc_conn, inode, LDLM_FL_NO_CALLBACK);
292         if (rc < 0) {
293                 CERROR("mdc_cancel_unused: %d\n", rc);
294                 /* XXX FIXME do something dramatic */
295         }
296
297         if (lli->lli_smd) {
298                 rc = obd_cancel_unused(&sbi->ll_osc_conn, lli->lli_smd, 0);
299                 if (rc < 0) {
300                         CERROR("obd_cancel_unused: %d\n", rc);
301                         /* XXX FIXME do something dramatic */
302                 }
303         }
304
305         if (atomic_read(&inode->i_count) == 0) {
306                 char *symlink_name = lli->lli_symlink_name;
307
308                 if (lli->lli_smd)
309                         obd_free_memmd(&sbi->ll_osc_conn, &lli->lli_smd);
310
311                 if (symlink_name) {
312                         OBD_FREE(symlink_name, strlen(symlink_name) + 1);
313                         lli->lli_symlink_name = NULL;
314                 }
315         }
316
317         EXIT;
318 }
319
320 static void ll_delete_inode(struct inode *inode)
321 {
322         ENTRY;
323         if (S_ISREG(inode->i_mode)) {
324                 int err;
325                 struct obdo *oa;
326                 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
327
328                 if (!lsm)
329                         GOTO(out, -EINVAL);
330
331                 if (lsm->lsm_object_id == 0) {
332                         CERROR("This really happens\n");
333                         /* No obdo was ever created */
334                         GOTO(out, 0);
335                 }
336
337                 oa = obdo_alloc();
338                 if (oa == NULL)
339                         GOTO(out, -ENOMEM);
340
341                 oa->o_id = lsm->lsm_object_id;
342                 oa->o_mode = inode->i_mode;
343                 oa->o_valid = OBD_MD_FLID | OBD_MD_FLEASIZE | OBD_MD_FLTYPE;
344
345                 err = obd_destroy(ll_i2obdconn(inode), oa, lsm);
346                 obdo_free(oa);
347                 CDEBUG(D_SUPER, "obd destroy of objid "LPX64" error %d\n",
348                        lsm->lsm_object_id, err);
349         }
350 out:
351         clear_inode(inode);
352         EXIT;
353 }
354
355 /* like inode_setattr, but doesn't mark the inode dirty */
356 static int ll_attr2inode(struct inode * inode, struct iattr * attr, int trunc)
357 {
358         unsigned int ia_valid = attr->ia_valid;
359         int error = 0;
360
361         if ((ia_valid & ATTR_SIZE) && trunc) {
362                 error = vmtruncate(inode, attr->ia_size);
363                 if (error)
364                         goto out;
365         } else if (ia_valid & ATTR_SIZE)
366                 inode->i_size = attr->ia_size;
367
368         if (ia_valid & ATTR_UID)
369                 inode->i_uid = attr->ia_uid;
370         if (ia_valid & ATTR_GID)
371                 inode->i_gid = attr->ia_gid;
372         if (ia_valid & ATTR_ATIME)
373                 inode->i_atime = attr->ia_atime;
374         if (ia_valid & ATTR_MTIME)
375                 inode->i_mtime = attr->ia_mtime;
376         if (ia_valid & ATTR_CTIME)
377                 inode->i_ctime = attr->ia_ctime;
378         if (ia_valid & ATTR_MODE) {
379                 inode->i_mode = attr->ia_mode;
380                 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
381                         inode->i_mode &= ~S_ISGID;
382         }
383 out:
384         return error;
385 }
386
387 int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc)
388 {
389         struct ptlrpc_request *request = NULL;
390         struct ll_sb_info *sbi = ll_i2sbi(inode);
391         int err;
392
393         ENTRY;
394
395         /* change incore inode */
396         ll_attr2inode(inode, attr, do_trunc);
397
398         err = mdc_setattr(&sbi->ll_mdc_conn, inode, attr, &request);
399         if (err)
400                 CERROR("mdc_setattr fails (%d)\n", err);
401
402         ptlrpc_req_finished(request);
403
404         RETURN(err);
405 }
406
407 int ll_setattr(struct dentry *de, struct iattr *attr)
408 {
409         int rc = inode_change_ok(de->d_inode, attr);
410
411         if (rc)
412                 return rc;
413
414         return ll_inode_setattr(de->d_inode, attr, 1);
415 }
416
417 static int ll_statfs(struct super_block *sb, struct statfs *sfs)
418 {
419         struct ll_sb_info *sbi = ll_s2sbi(sb);
420         struct obd_statfs osfs;
421         int rc;
422         ENTRY;
423
424         memset(sfs, 0, sizeof(*sfs));
425         rc = obd_statfs(&sbi->ll_mdc_conn, &osfs);
426         statfs_unpack(sfs, &osfs);
427         if (rc)
428                 CERROR("mdc_statfs fails: rc = %d\n", rc);
429         else
430                 CDEBUG(D_SUPER, "mdc_statfs shows blocks "LPU64"/"LPU64
431                        " objects "LPU64"/"LPU64"\n",
432                        osfs.os_bavail, osfs.os_blocks,
433                        osfs.os_ffree, osfs.os_files);
434
435         /* temporary until mds_statfs returns statfs info for all OSTs */
436         if (!rc) {
437                 rc = obd_statfs(&sbi->ll_osc_conn, &osfs);
438                 if (rc) {
439                         CERROR("obd_statfs fails: rc = %d\n", rc);
440                         GOTO(out, rc);
441                 }
442                 CDEBUG(D_SUPER, "obd_statfs shows blocks "LPU64"/"LPU64
443                        " objects "LPU64"/"LPU64"\n",
444                        osfs.os_bavail, osfs.os_blocks,
445                        osfs.os_ffree, osfs.os_files);
446
447                 while (osfs.os_blocks > ~0UL) {
448                         sfs->f_bsize <<= 1;
449
450                         osfs.os_blocks >>= 1;
451                         osfs.os_bfree >>= 1;
452                         osfs.os_bavail >>= 1;
453                 }
454                 sfs->f_blocks = osfs.os_blocks;
455                 sfs->f_bfree = osfs.os_bfree;
456                 sfs->f_bavail = osfs.os_bavail;
457                 if (osfs.os_ffree < (__u64)sfs->f_ffree)
458                         sfs->f_ffree = osfs.os_ffree;
459         }
460
461 out:
462         RETURN(rc);
463 }
464
465 void ll_update_inode(struct inode *inode, struct mds_body *body)
466 {
467         if (body->valid & OBD_MD_FLID)
468                 inode->i_ino = body->ino;
469         if (body->valid & OBD_MD_FLATIME)
470                 inode->i_atime = body->atime;
471         if (body->valid & OBD_MD_FLMTIME)
472                 inode->i_mtime = body->mtime;
473         if (body->valid & OBD_MD_FLCTIME)
474                 inode->i_ctime = body->ctime;
475         if (body->valid & OBD_MD_FLMODE)
476                 inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
477         if (body->valid & OBD_MD_FLTYPE)
478                 inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT);
479         if (body->valid & OBD_MD_FLUID)
480                 inode->i_uid = body->uid;
481         if (body->valid & OBD_MD_FLGID)
482                 inode->i_gid = body->gid;
483         if (body->valid & OBD_MD_FLFLAGS)
484                 inode->i_flags = body->flags;
485         if (body->valid & OBD_MD_FLNLINK)
486                 inode->i_nlink = body->nlink;
487         if (body->valid & OBD_MD_FLGENER)
488                 inode->i_generation = body->generation;
489         if (body->valid & OBD_MD_FLRDEV)
490                 inode->i_rdev = body->rdev;
491         if (body->valid & OBD_MD_FLSIZE)
492                 inode->i_size = body->size;
493 }
494
495 static void ll_read_inode2(struct inode *inode, void *opaque)
496 {
497         struct ll_read_inode2_cookie *lic = opaque;
498         struct mds_body *body = lic->lic_body;
499         struct ll_inode_info *lli = ll_i2info(inode);
500         ENTRY;
501
502         sema_init(&lli->lli_open_sem, 1);
503         atomic_set(&lli->lli_open_count, 0);
504
505         /* core attributes first */
506         ll_update_inode(inode, body);
507
508         //if (body->valid & OBD_MD_FLEASIZE)
509         LASSERT(!lli->lli_smd);
510         if (lic && lic->lic_lmm)
511                 obd_unpackmd(ll_i2obdconn(inode), &lli->lli_smd, lic->lic_lmm);
512
513         /* Get the authoritative file size */
514         if (lli->lli_smd && (inode->i_mode & S_IFREG)) {
515                 int rc;
516                 LASSERT(lli->lli_smd->lsm_object_id != 0);
517                 rc = ll_file_size(inode, lli->lli_smd);
518                 if (rc) {
519                         CERROR("ll_file_size: %d\n", rc);
520                         /* FIXME: need to somehow prevent inode creation */
521                         LBUG();
522                         make_bad_inode(inode);
523                 }
524         }
525
526         /* OIDEBUG(inode); */
527
528         if (S_ISREG(inode->i_mode)) {
529                 inode->i_op = &ll_file_inode_operations;
530                 inode->i_fop = &ll_file_operations;
531                 inode->i_mapping->a_ops = &ll_aops;
532                 EXIT;
533         } else if (S_ISDIR(inode->i_mode)) {
534                 inode->i_op = &ll_dir_inode_operations;
535                 inode->i_fop = &ll_dir_operations;
536                 inode->i_mapping->a_ops = &ll_dir_aops;
537                 EXIT;
538         } else if (S_ISLNK(inode->i_mode)) {
539                 inode->i_op = &ll_fast_symlink_inode_operations;
540                 EXIT;
541         } else {
542                 init_special_inode(inode, inode->i_mode, inode->i_rdev);
543                 EXIT;
544         }
545 }
546
547 static inline void invalidate_request_list(struct list_head *req_list)
548 {
549         struct list_head *tmp, *n;
550         list_for_each_safe(tmp, n, req_list) {
551                 struct ptlrpc_request *req =
552                         list_entry(tmp, struct ptlrpc_request, rq_list);
553                 CERROR("invalidating req xid "LPD64" op %d to %s:%d\n",
554                        (unsigned long long)req->rq_xid, req->rq_reqmsg->opc,
555                        req->rq_connection->c_remote_uuid,
556                        req->rq_import->imp_client->cli_request_portal);
557                 req->rq_flags |= PTL_RPC_FL_ERR;
558                 wake_up(&req->rq_wait_for_rep);
559         }
560 }
561
562 void ll_umount_begin(struct super_block *sb)
563 {
564         struct ll_sb_info *sbi = ll_s2sbi(sb);
565         struct list_head *ctmp;
566
567         ENTRY;
568
569         list_for_each(ctmp, &sbi->ll_conn_chain) {
570                 struct ptlrpc_connection *conn;
571                 conn = list_entry(ctmp, struct ptlrpc_connection, c_sb_chain);
572
573                 spin_lock(&conn->c_lock);
574                 /* XXX should just be dealing with imports, probably through
575                  * XXX iocontrol, need next-gen recovery! */
576                 conn->c_flags |= CONN_INVALID;
577                 invalidate_request_list(&conn->c_sending_head);
578                 invalidate_request_list(&conn->c_delayed_head);
579                 spin_unlock(&conn->c_lock);
580         }
581
582         EXIT;
583 }
584
585 /* exported operations */
586 struct super_operations ll_super_operations =
587 {
588         read_inode2: ll_read_inode2,
589         clear_inode: ll_clear_inode,
590         delete_inode: ll_delete_inode,
591         put_super: ll_put_super,
592         statfs: ll_statfs,
593         umount_begin: ll_umount_begin
594 };
595
596 struct file_system_type lustre_lite_fs_type = {
597         "lustre_lite", 0, ll_read_super, NULL
598 };
599
600 static int __init init_lustre_lite(void)
601 {
602         printk(KERN_INFO "Lustre Lite 0.5.14, info@clusterfs.com\n");
603         ll_file_data_slab = kmem_cache_create("ll_file_data",
604                                               sizeof(struct ll_file_data), 0,
605                                               SLAB_HWCACHE_ALIGN, NULL, NULL);
606         if (ll_file_data_slab == NULL)
607                 return -ENOMEM;
608         return register_filesystem(&lustre_lite_fs_type);
609 }
610
611 static void __exit exit_lustre_lite(void)
612 {
613         unregister_filesystem(&lustre_lite_fs_type);
614         kmem_cache_destroy(ll_file_data_slab);
615 }
616
617 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
618 MODULE_DESCRIPTION("Lustre Lite Client File System v1.0");
619 MODULE_LICENSE("GPL");
620
621 module_init(init_lustre_lite);
622 module_exit(exit_lustre_lite);
623 #endif