Whamcloud - gitweb
34d2c7dc1da22f9f97b07e671219ce303a0cbabf
[fs/lustre-release.git] / lustre / llite / dir.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 1992, 1993, 1994, 1995
5  * Remy Card (card@masi.ibp.fr)
6  * Laboratoire MASI - Institut Blaise Pascal
7  * Universite Pierre et Marie Curie (Paris VI)
8  *
9  *  from
10  *
11  *  linux/fs/minix/dir.c
12  *  linux/fs/ext2/dir.c
13  *
14  *  Copyright (C) 1991, 1992  Linus Torvalds
15  *
16  *  ext2 directory handling functions
17  *
18  *  Big-endian to little-endian byte-swapping/bitmaps by
19  *        David S. Miller (davem@caip.rutgers.edu), 1995
20  *
21  *  All code that works with directory layout had been switched to pagecache
22  *  and moved here. AV
23  *
24  *  Adapted for Lustre Light
25  *  Copyright (C) 2002-2003, Cluster File Systems, Inc.
26  *
27  */
28
29 #include <linux/fs.h>
30 #include <linux/ext2_fs.h>
31 #include <linux/pagemap.h>
32 #include <linux/mm.h>
33 #include <linux/version.h>
34 #include <linux/smp_lock.h>
35 #include <asm/uaccess.h>
36 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
37 # include <linux/locks.h>   // for wait_on_buffer
38 #else
39 # include <linux/buffer_head.h>   // for wait_on_buffer
40 #endif
41
42 #define DEBUG_SUBSYSTEM S_LLITE
43
44 #include <obd_support.h>
45 #include <obd_class.h>
46 #include <lustre_lib.h>
47 #include <lustre/lustre_idl.h>
48 #include <lustre_lite.h>
49 #include <lustre_dlm.h>
50 #include "llite_internal.h"
51
52 typedef struct ext2_dir_entry_2 ext2_dirent;
53
54 #ifdef HAVE_PG_FS_MISC
55 #define PageChecked(page)        test_bit(PG_fs_misc, &(page)->flags)
56 #define SetPageChecked(page)     set_bit(PG_fs_misc, &(page)->flags)
57 #endif
58
59 /* returns the page unlocked, but with a reference */
60 static int ll_dir_readpage(struct file *file, struct page *page)
61 {
62         struct inode *inode = page->mapping->host;
63         struct ll_fid mdc_fid;
64         __u64 offset;
65         struct ptlrpc_request *request;
66         struct mds_body *body;
67         int rc = 0;
68         ENTRY;
69
70         offset = (__u64)page->index << CFS_PAGE_SHIFT;
71         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) off "LPU64"\n",
72                inode->i_ino, inode->i_generation, inode, offset);
73
74         mdc_pack_fid(&mdc_fid, inode->i_ino, inode->i_generation, S_IFDIR);
75
76         rc = mdc_readpage(ll_i2sbi(inode)->ll_mdc_exp, &mdc_fid,
77                           offset, page, &request);
78         if (!rc) {
79                 body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF,
80                                       sizeof(*body));
81                 LASSERT(body != NULL); /* checked by mdc_readpage() */
82                 /* swabbed by mdc_readpage() */
83                 LASSERT_REPSWABBED(request, REPLY_REC_OFF);
84
85                 inode->i_size = body->size;
86                 SetPageUptodate(page);
87         }
88         ptlrpc_req_finished(request);
89
90         unlock_page(page);
91         EXIT;
92         return rc;
93 }
94
95 struct address_space_operations ll_dir_aops = {
96         .readpage  = ll_dir_readpage,
97 };
98
99 /*
100  * ext2 uses block-sized chunks. Arguably, sector-sized ones would be
101  * more robust, but we have what we have
102  */
103 static inline unsigned ext2_chunk_size(struct inode *inode)
104 {
105         return inode->i_sb->s_blocksize;
106 }
107
108 static inline void ext2_put_page(struct page *page)
109 {
110         kunmap(page);
111         page_cache_release(page);
112 }
113
114 static inline unsigned long dir_pages(struct inode *inode)
115 {
116         return (inode->i_size+CFS_PAGE_SIZE-1) >> CFS_PAGE_SHIFT;
117 }
118
119
120 static void ext2_check_page(struct inode *dir, struct page *page)
121 {
122         unsigned chunk_size = ext2_chunk_size(dir);
123         char *kaddr = page_address(page);
124         //      u32 max_inumber = le32_to_cpu(sb->u.ext2_sb.s_es->s_inodes_count);
125         unsigned rec_len;
126         __u64 offs, limit = CFS_PAGE_SIZE;
127         ext2_dirent *p;
128         char *error;
129
130         if ((dir->i_size >> CFS_PAGE_SHIFT) == (__u64)page->index) {
131                 limit = dir->i_size & ~CFS_PAGE_MASK;
132                 if (limit & (chunk_size - 1)) {
133                         CERROR("limit "LPU64" dir size %lld index "LPU64"\n",
134                                limit, dir->i_size, (__u64)page->index);
135                         goto Ebadsize;
136                 }
137                 for (offs = limit; offs < CFS_PAGE_SIZE; offs += chunk_size) {
138                         ext2_dirent *p = (ext2_dirent*)(kaddr + offs);
139                         p->rec_len = cpu_to_le16(chunk_size);
140                         p->name_len = 0;
141                         p->inode = 0;
142                 }
143                 if (!limit)
144                         goto out;
145         }
146         for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) {
147                 p = (ext2_dirent *)(kaddr + offs);
148                 rec_len = le16_to_cpu(p->rec_len);
149
150                 if (rec_len < EXT2_DIR_REC_LEN(1))
151                         goto Eshort;
152                 if (rec_len & 3)
153                         goto Ealign;
154                 if (rec_len < EXT2_DIR_REC_LEN(p->name_len))
155                         goto Enamelen;
156                 if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1))
157                         goto Espan;
158                 //              if (le32_to_cpu(p->inode) > max_inumber)
159                 //goto Einumber;
160         }
161         if (offs != limit)
162                 goto Eend;
163 out:
164         SetPageChecked(page);
165         return;
166
167         /* Too bad, we had an error */
168
169 Ebadsize:
170         CERROR("%s: directory %lu/%u size %llu is not a multiple of %u\n",
171                ll_i2mdcexp(dir)->exp_obd->obd_name, dir->i_ino,
172                dir->i_generation, dir->i_size, chunk_size);
173         goto fail;
174 Eshort:
175         error = "rec_len is smaller than minimal";
176         goto bad_entry;
177 Ealign:
178         error = "unaligned directory entry";
179         goto bad_entry;
180 Enamelen:
181         error = "rec_len is too small for name_len";
182         goto bad_entry;
183 Espan:
184         error = "directory entry across blocks";
185         goto bad_entry;
186         //Einumber:
187         // error = "inode out of bounds";
188 bad_entry:
189         CERROR("%s: bad entry in directory %lu/%u: %s - "
190                "offset="LPU64"+"LPU64", inode=%lu, rec_len=%d, name_len=%d\n",
191                ll_i2mdcexp(dir)->exp_obd->obd_name, dir->i_ino,
192                dir->i_generation, error, (__u64)page->index << CFS_PAGE_SHIFT,
193                offs, (unsigned long)le32_to_cpu(p->inode),
194                rec_len, p->name_len);
195         goto fail;
196 Eend:
197         p = (ext2_dirent *)(kaddr + offs);
198         CERROR("%s: entry in directory %lu/%u spans the page boundary "
199                "offset="LPU64"+"LPU64", inode=%lu\n",ll_i2mdcexp(dir)->exp_obd->obd_name,
200                dir->i_ino, dir->i_generation,
201                (__u64)page->index << CFS_PAGE_SHIFT,
202                offs, (unsigned long)le32_to_cpu(p->inode));
203 fail:
204         SetPageChecked(page);
205         SetPageError(page);
206 }
207
208 static struct page *ll_get_dir_page(struct inode *dir, unsigned long n)
209 {
210         struct ldlm_res_id res_id =
211                 { .name = { dir->i_ino, (__u64)dir->i_generation} };
212         struct lustre_handle lockh;
213         struct obd_device *obddev = class_exp2obd(ll_i2sbi(dir)->ll_mdc_exp);
214         struct address_space *mapping = dir->i_mapping;
215         struct page *page;
216         ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_UPDATE} };
217         int rc;
218
219         rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_BLOCK_GRANTED,
220                              &res_id, LDLM_IBITS, &policy, LCK_CR, &lockh);
221         if (!rc) {
222                 struct lookup_intent it = { .it_op = IT_READDIR };
223                 struct ptlrpc_request *request;
224                 struct mdc_op_data data;
225
226                 ll_prepare_mdc_op_data(&data, dir, NULL, NULL, 0, 0);
227
228                 rc = mdc_enqueue(ll_i2sbi(dir)->ll_mdc_exp, LDLM_IBITS, &it,
229                                  LCK_CR, &data, &lockh, NULL, 0,
230                                  ldlm_completion_ast, ll_mdc_blocking_ast, dir,
231                                  0);
232
233                 request = (struct ptlrpc_request *)it.d.lustre.it_data;
234                 if (request)
235                         ptlrpc_req_finished(request);
236                 if (rc < 0) {
237                         CERROR("lock enqueue: rc: %d\n", rc);
238                         return ERR_PTR(rc);
239                 }
240         }
241         ldlm_lock_dump_handle(D_OTHER, &lockh);
242
243         page = read_cache_page(mapping, n,
244                                (filler_t*)mapping->a_ops->readpage, NULL);
245         if (IS_ERR(page))
246                 GOTO(out_unlock, page);
247
248         wait_on_page(page);
249         (void)kmap(page);
250         if (!PageUptodate(page))
251                 goto fail;
252         if (!PageChecked(page))
253                 ext2_check_page(dir, page);
254         if (PageError(page))
255                 goto fail;
256
257 out_unlock:
258         ldlm_lock_decref(&lockh, LCK_CR);
259         return page;
260
261 fail:
262         ext2_put_page(page);
263         page = ERR_PTR(-EIO);
264         goto out_unlock;
265 }
266
267 /*
268  * p is at least 6 bytes before the end of page
269  */
270 static inline ext2_dirent *ext2_next_entry(ext2_dirent *p)
271 {
272         return (ext2_dirent *)((char*)p + le16_to_cpu(p->rec_len));
273 }
274
275 static inline unsigned
276 ext2_validate_entry(char *base, unsigned offset, unsigned mask)
277 {
278         ext2_dirent *de = (ext2_dirent*)(base + offset);
279         ext2_dirent *p = (ext2_dirent*)(base + (offset&mask));
280         while ((char*)p < (char*)de)
281                 p = ext2_next_entry(p);
282         return (char *)p - base;
283 }
284
285 static unsigned char ext2_filetype_table[EXT2_FT_MAX] = {
286         [EXT2_FT_UNKNOWN]       DT_UNKNOWN,
287         [EXT2_FT_REG_FILE]      DT_REG,
288         [EXT2_FT_DIR]           DT_DIR,
289         [EXT2_FT_CHRDEV]        DT_CHR,
290         [EXT2_FT_BLKDEV]        DT_BLK,
291         [EXT2_FT_FIFO]          DT_FIFO,
292         [EXT2_FT_SOCK]          DT_SOCK,
293         [EXT2_FT_SYMLINK]       DT_LNK,
294 };
295
296
297 int ll_readdir(struct file *filp, void *dirent, filldir_t filldir)
298 {
299         struct inode *inode = filp->f_dentry->d_inode;
300         loff_t pos = filp->f_pos;
301         // XXX struct super_block *sb = inode->i_sb;
302         __u64 offset = pos & ~CFS_PAGE_MASK;
303         __u64 n = pos >> CFS_PAGE_SHIFT;
304         unsigned long npages = dir_pages(inode);
305         unsigned chunk_mask = ~(ext2_chunk_size(inode)-1);
306         unsigned char *types = ext2_filetype_table;
307         int need_revalidate = (filp->f_version != inode->i_version);
308         int rc = 0;
309         ENTRY;
310
311         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %llu/%llu\n",
312                inode->i_ino, inode->i_generation, inode, pos, inode->i_size);
313
314         if (pos > inode->i_size - EXT2_DIR_REC_LEN(1))
315                 RETURN(0);
316
317         for ( ; n < npages; n++, offset = 0) {
318                 char *kaddr, *limit;
319                 ext2_dirent *de;
320                 struct page *page;
321
322                 CDEBUG(D_EXT2,"read %lu of dir %lu/%u page "LPU64"/%lu "
323                        "size %llu\n",
324                        CFS_PAGE_SIZE, inode->i_ino, inode->i_generation,
325                        n, npages, inode->i_size);
326                 page = ll_get_dir_page(inode, n);
327
328                 /* size might have been updated by mdc_readpage */
329                 npages = dir_pages(inode);
330
331                 if (IS_ERR(page)) {
332                         rc = PTR_ERR(page);
333                         CERROR("error reading dir %lu/%u page "LPU64": rc %d\n",
334                                inode->i_ino, inode->i_generation, n, rc);
335                         continue;
336                 }
337
338                 kaddr = page_address(page);
339                 if (need_revalidate) {
340                         /* page already checked from ll_get_dir_page() */
341                         offset = ext2_validate_entry(kaddr, offset, chunk_mask);
342                         need_revalidate = 0;
343                 }
344                 de = (ext2_dirent *)(kaddr+offset);
345                 limit = kaddr + CFS_PAGE_SIZE - EXT2_DIR_REC_LEN(1);
346                 for ( ;(char*)de <= limit; de = ext2_next_entry(de)) {
347                         if (de->inode) {
348                                 int over;
349
350                                 rc = 0; /* no error if we return something */
351
352                                 offset = (char *)de - kaddr;
353                                 over = filldir(dirent, de->name, de->name_len,
354                                                (n << CFS_PAGE_SHIFT) | offset,
355                                                le32_to_cpu(de->inode),
356                                                types[de->file_type &
357                                                      (EXT2_FT_MAX - 1)]);
358                                 if (over) {
359                                         ext2_put_page(page);
360                                         GOTO(done, rc);
361                                 }
362                         }
363                 }
364                 ext2_put_page(page);
365         }
366
367 done:
368         filp->f_pos = (n << CFS_PAGE_SHIFT) | offset;
369         filp->f_version = inode->i_version;
370         touch_atime(filp->f_vfsmnt, filp->f_dentry);
371
372         RETURN(rc);
373 }
374
375 #define QCTL_COPY(out, in)              \
376 do {                                    \
377         Q_COPY(out, in, qc_cmd);        \
378         Q_COPY(out, in, qc_type);       \
379         Q_COPY(out, in, qc_id);         \
380         Q_COPY(out, in, qc_stat);       \
381         Q_COPY(out, in, qc_dqinfo);     \
382         Q_COPY(out, in, qc_dqblk);      \
383 } while (0)
384
385 int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump)
386 {
387         struct ll_sb_info *sbi = ll_i2sbi(inode);
388         struct mdc_op_data data;
389         struct ptlrpc_request *req = NULL;
390
391         struct iattr attr = { 0 };
392         int rc = 0;
393
394         /*
395          * This is coming from userspace, so should be in
396          * local endian.  But the MDS would like it in little
397          * endian, so we swab it before we send it.
398          */
399         if (lump->lmm_magic != LOV_USER_MAGIC)
400                 RETURN(-EINVAL);
401
402         if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC))
403                 lustre_swab_lov_user_md(lump);
404
405         ll_prepare_mdc_op_data(&data, inode, NULL, NULL, 0, 0);
406
407         /* swabbing is done in lov_setstripe() on server side */
408         rc = mdc_setattr(sbi->ll_mdc_exp, &data,
409                          &attr, lump, sizeof(*lump), NULL, 0, &req);
410         if (rc) {
411                 ptlrpc_req_finished(req);
412                 if (rc != -EPERM && rc != -EACCES)
413                         CERROR("mdc_setattr fails: rc = %d\n", rc);
414                 return rc;
415         }
416         ptlrpc_req_finished(req);
417
418         return rc;
419
420 }
421
422 int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp, 
423                      int *lmm_size, struct ptlrpc_request **request) 
424 {
425         struct ll_sb_info *sbi = ll_i2sbi(inode);
426         struct ll_fid     fid;
427         struct mds_body   *body;
428         struct lov_mds_md *lmm = NULL;
429         struct ptlrpc_request *req = NULL;
430         int rc, lmmsize;
431
432         ll_inode2fid(&fid, inode);
433
434         rc = ll_get_max_mdsize(sbi, &lmmsize);
435         if (rc)
436                 RETURN(rc);
437
438         rc = mdc_getattr(sbi->ll_mdc_exp, &fid,
439                         OBD_MD_FLEASIZE|OBD_MD_FLDIREA,
440                         lmmsize, &req);
441         if (rc < 0) {
442                 CDEBUG(D_INFO, "mdc_getattr failed on inode "
443                        "%lu/%u: rc %d\n", inode->i_ino,
444                        inode->i_generation, rc);
445                 GOTO(out, rc);
446         }
447         body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
448                         sizeof(*body));
449         LASSERT(body != NULL); /* checked by mdc_getattr_name */
450         /* swabbed by mdc_getattr_name */
451         LASSERT_REPSWABBED(req, REPLY_REC_OFF);
452
453         lmmsize = body->eadatasize;
454
455         if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
456             lmmsize == 0) {
457                 GOTO(out, rc = -ENODATA);
458         }
459
460         lmm = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1, lmmsize);
461         LASSERT(lmm != NULL);
462         LASSERT_REPSWABBED(req, REPLY_REC_OFF + 1);
463
464         /*
465          * This is coming from the MDS, so is probably in
466          * little endian.  We convert it to host endian before
467          * passing it to userspace.
468          */
469         if (lmm->lmm_magic == __swab32(LOV_MAGIC)) {
470                 lustre_swab_lov_user_md((struct lov_user_md *)lmm);
471                 lustre_swab_lov_user_md_objects((struct lov_user_md *)lmm);
472         }
473 out:
474         *lmmp = lmm;
475         *lmm_size = lmmsize;
476         *request = req;
477         return rc;
478 }
479
480 static int ll_dir_ioctl(struct inode *inode, struct file *file,
481                         unsigned int cmd, unsigned long arg)
482 {
483         struct ll_sb_info *sbi = ll_i2sbi(inode);
484         struct obd_ioctl_data *data;
485         ENTRY;
486
487         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), cmd=%#x\n",
488                inode->i_ino, inode->i_generation, inode, cmd);
489
490         /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
491         if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
492                 return -ENOTTY;
493
494         ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
495         switch(cmd) {
496         case EXT3_IOC_GETFLAGS:
497         case EXT3_IOC_SETFLAGS:
498                 RETURN(ll_iocontrol(inode, file, cmd, arg));
499         case EXT3_IOC_GETVERSION_OLD:
500         case EXT3_IOC_GETVERSION:
501                 RETURN(put_user(inode->i_generation, (int *)arg));
502         /* We need to special case any other ioctls we want to handle,
503          * to send them to the MDS/OST as appropriate and to properly
504          * network encode the arg field.
505         case EXT3_IOC_SETVERSION_OLD:
506         case EXT3_IOC_SETVERSION:
507         */
508         case IOC_MDC_LOOKUP: {
509                 struct ptlrpc_request *request = NULL;
510                 struct ll_fid fid;
511                 char *buf = NULL;
512                 char *filename;
513                 int namelen, rc, len = 0;
514
515                 rc = obd_ioctl_getdata(&buf, &len, (void *)arg);
516                 if (rc)
517                         RETURN(rc);
518                 data = (void *)buf;
519
520                 filename = data->ioc_inlbuf1;
521                 namelen = data->ioc_inllen1;
522
523                 if (namelen < 1) {
524                         CDEBUG(D_INFO, "IOC_MDC_LOOKUP missing filename\n");
525                         GOTO(out, rc = -EINVAL);
526                 }
527
528                 ll_inode2fid(&fid, inode);
529                 rc = mdc_getattr_name(sbi->ll_mdc_exp, &fid, filename, namelen,
530                                       OBD_MD_FLID, 0, &request);
531                 if (rc < 0) {
532                         CDEBUG(D_INFO, "mdc_getattr_name: %d\n", rc);
533                         GOTO(out, rc);
534                 }
535
536                 ptlrpc_req_finished(request);
537
538                 EXIT;
539         out:
540                 obd_ioctl_freedata(buf, len);
541                 return rc;
542         }
543         case LL_IOC_LOV_SETSTRIPE: {
544                 struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
545                 int rc = 0;
546
547                 LASSERT(sizeof(lum) == sizeof(*lump));
548                 LASSERT(sizeof(lum.lmm_objects[0]) ==
549                         sizeof(lump->lmm_objects[0]));
550                 rc = copy_from_user(&lum, lump, sizeof(lum));
551                 if (rc)
552                         return(-EFAULT);
553
554                 rc = ll_dir_setstripe(inode, &lum);
555
556                 return rc;
557         }
558         case LL_IOC_OBD_STATFS:
559                 RETURN(ll_obd_statfs(inode, (void *)arg));
560         case LL_IOC_LOV_GETSTRIPE:
561         case LL_IOC_MDC_GETINFO:
562         case IOC_MDC_GETFILEINFO:
563         case IOC_MDC_GETFILESTRIPE: {
564                 struct ptlrpc_request *request = NULL;
565                 struct mds_body *body;
566                 struct lov_user_md *lump;
567                 struct lov_mds_md *lmm = NULL;
568                 char *filename = NULL;
569                 int rc, lmmsize;
570
571                 if (cmd == IOC_MDC_GETFILEINFO ||
572                     cmd == IOC_MDC_GETFILESTRIPE) {
573                         filename = getname((const char *)arg);
574                         if (IS_ERR(filename))
575                                 RETURN(PTR_ERR(filename));
576
577                         rc = ll_lov_getstripe_ea_info(inode, filename, &lmm, 
578                                                       &lmmsize, &request);
579                 } else {
580                         rc = ll_dir_getstripe(inode, &lmm, &lmmsize, &request);
581                 }
582
583                 if (request) {
584                         body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF,
585                                               sizeof(*body));
586                         LASSERT(body != NULL); /* checked by mdc_getattr_name */
587                         /* swabbed by mdc_getattr_name */
588                         LASSERT_REPSWABBED(request, REPLY_REC_OFF);
589                 } else {
590                         GOTO(out_req, rc);
591                 }
592
593                 if (rc < 0) {
594                         if (rc == -ENODATA && (cmd == IOC_MDC_GETFILEINFO || 
595                                                cmd == LL_IOC_MDC_GETINFO))
596                                 GOTO(skip_lmm, rc = 0);
597                         else
598                                 GOTO(out_req, rc);
599                 }
600
601                 if (cmd == IOC_MDC_GETFILESTRIPE ||
602                     cmd == LL_IOC_LOV_GETSTRIPE) {
603                         lump = (struct lov_user_md *)arg;
604                 } else {
605                         struct lov_user_mds_data *lmdp;
606                         lmdp = (struct lov_user_mds_data *)arg;
607                         lump = &lmdp->lmd_lmm;
608                 }
609                 rc = copy_to_user(lump, lmm, lmmsize);
610                 if (rc)
611                         GOTO(out_lmm, rc = -EFAULT);
612         skip_lmm:
613                 if (cmd == IOC_MDC_GETFILEINFO || cmd == LL_IOC_MDC_GETINFO) {
614                         struct lov_user_mds_data *lmdp;
615                         lstat_t st = { 0 };
616
617                         st.st_dev     = inode->i_sb->s_dev;
618                         st.st_mode    = body->mode;
619                         st.st_nlink   = body->nlink;
620                         st.st_uid     = body->uid;
621                         st.st_gid     = body->gid;
622                         st.st_rdev    = body->rdev;
623                         st.st_size    = body->size;
624                         st.st_blksize = CFS_PAGE_SIZE;
625                         st.st_blocks  = body->blocks;
626                         st.st_atime   = body->atime;
627                         st.st_mtime   = body->mtime;
628                         st.st_ctime   = body->ctime;
629                         st.st_ino     = body->ino;
630
631                         lmdp = (struct lov_user_mds_data *)arg;
632                         rc = copy_to_user(&lmdp->lmd_st, &st, sizeof(st));
633                         if (rc)
634                                 GOTO(out_lmm, rc = -EFAULT);
635                 }
636
637                 EXIT;
638         out_lmm:
639                 if (lmm && lmm->lmm_magic == LOV_MAGIC_JOIN)
640                         OBD_FREE(lmm, lmmsize);
641         out_req:
642                 ptlrpc_req_finished(request);
643                 if (filename)
644                         putname(filename);
645                 return rc;
646         }
647         case IOC_LOV_GETINFO: {
648                 struct lov_user_mds_data *lumd;
649                 struct lov_stripe_md *lsm;
650                 struct lov_user_md *lum;
651                 struct lov_mds_md *lmm;
652                 int lmmsize;
653                 lstat_t st;
654                 int rc;
655
656                 lumd = (struct lov_user_mds_data *)arg;
657                 lum = &lumd->lmd_lmm;
658
659                 rc = ll_get_max_mdsize(sbi, &lmmsize);
660                 if (rc)
661                         RETURN(rc);
662
663                 OBD_ALLOC(lmm, lmmsize);
664                 rc = copy_from_user(lmm, lum, lmmsize);
665                 if (rc)
666                         GOTO(free_lmm, rc = -EFAULT);
667
668                 rc = obd_unpackmd(sbi->ll_osc_exp, &lsm, lmm, lmmsize);
669                 if (rc < 0)
670                         GOTO(free_lmm, rc = -ENOMEM);
671
672                 rc = obd_checkmd(sbi->ll_osc_exp, sbi->ll_mdc_exp, lsm);
673                 if (rc)
674                         GOTO(free_lsm, rc);
675
676                 /* Perform glimpse_size operation. */
677                 memset(&st, 0, sizeof(st));
678
679                 rc = ll_glimpse_ioctl(sbi, lsm, &st);
680                 if (rc)
681                         GOTO(free_lsm, rc);
682
683                 rc = copy_to_user(&lumd->lmd_st, &st, sizeof(st));
684                 if (rc)
685                         GOTO(free_lsm, rc = -EFAULT);
686
687                 EXIT;
688         free_lsm:
689                 obd_free_memmd(sbi->ll_osc_exp, &lsm);
690         free_lmm:
691                 OBD_FREE(lmm, lmmsize);
692                 return rc;
693         }
694         case OBD_IOC_LLOG_CATINFO: {
695                 struct ptlrpc_request *req = NULL;
696                 char *buf = NULL;
697                 int rc, len = 0;
698                 char *bufs[3] = { NULL }, *str;
699                 int lens[3] = { sizeof(struct ptlrpc_body) };
700                 int size[2] = { sizeof(struct ptlrpc_body) };
701
702                 rc = obd_ioctl_getdata(&buf, &len, (void *)arg);
703                 if (rc)
704                         RETURN(rc);
705                 data = (void *)buf;
706
707                 if (!data->ioc_inlbuf1) {
708                         obd_ioctl_freedata(buf, len);
709                         RETURN(-EINVAL);
710                 }
711
712                 lens[REQ_REC_OFF] = data->ioc_inllen1;
713                 bufs[REQ_REC_OFF] = data->ioc_inlbuf1;
714                 if (data->ioc_inllen2) {
715                         lens[REQ_REC_OFF + 1] = data->ioc_inllen2;
716                         bufs[REQ_REC_OFF + 1] = data->ioc_inlbuf2;
717                 } else {
718                         lens[REQ_REC_OFF + 1] = 0;
719                         bufs[REQ_REC_OFF + 1] = NULL;
720                 }
721
722                 req = ptlrpc_prep_req(sbi2mdc(sbi)->cl_import,
723                                       LUSTRE_LOG_VERSION, LLOG_CATINFO, 3, lens,
724                                       bufs);
725                 if (!req)
726                         GOTO(out_catinfo, rc = -ENOMEM);
727
728                 size[REPLY_REC_OFF] = data->ioc_plen1;
729                 ptlrpc_req_set_repsize(req, 2, size);
730
731                 rc = ptlrpc_queue_wait(req);
732                 str = lustre_msg_string(req->rq_repmsg, REPLY_REC_OFF,
733                                         data->ioc_plen1);
734                 if (!rc)
735                         rc = copy_to_user(data->ioc_pbuf1, str,data->ioc_plen1);
736                 ptlrpc_req_finished(req);
737         out_catinfo:
738                 obd_ioctl_freedata(buf, len);
739                 RETURN(rc);
740         }
741         case OBD_IOC_QUOTACHECK: {
742                 struct obd_quotactl *oqctl;
743                 int rc, error = 0;
744
745                 if (!capable(CAP_SYS_ADMIN))
746                         RETURN(-EPERM);
747
748                 OBD_ALLOC_PTR(oqctl);
749                 if (!oqctl)
750                         RETURN(-ENOMEM);
751                 oqctl->qc_type = arg;
752                 rc = obd_quotacheck(sbi->ll_mdc_exp, oqctl);
753                 if (rc < 0) {
754                         CDEBUG(D_INFO, "mdc_quotacheck failed: rc %d\n", rc);
755                         error = rc;
756                 }
757
758                 rc = obd_quotacheck(sbi->ll_osc_exp, oqctl);
759                 if (rc < 0)
760                         CDEBUG(D_INFO, "osc_quotacheck failed: rc %d\n", rc);
761
762                 OBD_FREE_PTR(oqctl);
763                 return error ?: rc;
764         }
765         case OBD_IOC_POLL_QUOTACHECK: {
766                 struct if_quotacheck *check;
767                 int rc;
768
769                 if (!capable(CAP_SYS_ADMIN))
770                         RETURN(-EPERM);
771
772                 OBD_ALLOC_PTR(check);
773                 if (!check)
774                         RETURN(-ENOMEM);
775
776                 rc = obd_iocontrol(cmd, sbi->ll_mdc_exp, 0, (void *)check,
777                                    NULL);
778                 if (rc) {
779                         CDEBUG(D_QUOTA, "mdc ioctl %d failed: %d\n", cmd, rc);
780                         if (copy_to_user((void *)arg, check, sizeof(*check)))
781                                 rc = -EFAULT;
782                         GOTO(out_poll, rc);
783                 }
784
785                 rc = obd_iocontrol(cmd, sbi->ll_osc_exp, 0, (void *)check,
786                                    NULL);
787                 if (rc) {
788                         CDEBUG(D_QUOTA, "osc ioctl %d failed: %d\n", cmd, rc);
789                         if (copy_to_user((void *)arg, check, sizeof(*check)))
790                                 rc = -EFAULT;
791                         GOTO(out_poll, rc);
792                 }
793         out_poll:
794                 OBD_FREE_PTR(check);
795                 RETURN(rc);
796         }
797 #ifdef HAVE_QUOTA_SUPPORT
798         case OBD_IOC_QUOTACTL: {
799                 struct if_quotactl *qctl;
800                 struct obd_quotactl *oqctl;
801
802                 int cmd, type, id, rc = 0;
803
804                 OBD_ALLOC_PTR(qctl);
805                 if (!qctl)
806                         RETURN(-ENOMEM);
807
808                 OBD_ALLOC_PTR(oqctl);
809                 if (!oqctl) {
810                         OBD_FREE_PTR(qctl);
811                         RETURN(-ENOMEM);
812                 }
813                 if (copy_from_user(qctl, (void *)arg, sizeof(*qctl)))
814                         GOTO(out_quotactl, rc = -EFAULT);
815
816                 cmd = qctl->qc_cmd;
817                 type = qctl->qc_type;
818                 id = qctl->qc_id;
819                 switch (cmd) {
820                 case Q_QUOTAON:
821                 case Q_QUOTAOFF:
822                 case Q_SETQUOTA:
823                 case Q_SETINFO:
824                         if (!capable(CAP_SYS_ADMIN))
825                                 GOTO(out_quotactl, rc = -EPERM);
826                         break;
827                 case Q_GETQUOTA:
828                         if (((type == USRQUOTA && current->euid != id) ||
829                              (type == GRPQUOTA && !in_egroup_p(id))) &&
830                             !capable(CAP_SYS_ADMIN))
831                                 GOTO(out_quotactl, rc = -EPERM);
832
833                         /* XXX: dqb_valid is borrowed as a flag to mark that
834                          *      only mds quota is wanted */
835                         if (qctl->qc_dqblk.dqb_valid)
836                                 qctl->obd_uuid = sbi->ll_mdc_exp->exp_obd->
837                                                         u.cli.cl_target_uuid;
838                         break;
839                 case Q_GETINFO:
840                         break;
841                 default:
842                         CERROR("unsupported quotactl op: %#x\n", cmd);
843                         GOTO(out_quotactl, -ENOTTY);
844                 }
845
846                 QCTL_COPY(oqctl, qctl);
847
848                 if (qctl->obd_uuid.uuid[0]) {
849                         struct obd_device *obd;
850                         struct obd_uuid *uuid = &qctl->obd_uuid;
851
852                         obd = class_find_client_notype(uuid,
853                                          &sbi->ll_osc_exp->exp_obd->obd_uuid);
854                         if (!obd)
855                                 GOTO(out_quotactl, rc = -ENOENT);
856
857                         if (cmd == Q_GETINFO)
858                                 oqctl->qc_cmd = Q_GETOINFO;
859                         else if (cmd == Q_GETQUOTA)
860                                 oqctl->qc_cmd = Q_GETOQUOTA;
861                         else
862                                 GOTO(out_quotactl, rc = -EINVAL);
863
864                         if (sbi->ll_mdc_exp->exp_obd == obd) {
865                                 rc = obd_quotactl(sbi->ll_mdc_exp, oqctl);
866                         } else {
867                                 int i;
868                                 struct obd_export *exp;
869                                 struct lov_obd *lov = &sbi->ll_osc_exp->
870                                                             exp_obd->u.lov;
871
872                                 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
873                                         if (!lov->lov_tgts[i] ||
874                                             !lov->lov_tgts[i]->ltd_active)
875                                                 continue;
876                                         exp = lov->lov_tgts[i]->ltd_exp;
877                                         if (exp->exp_obd == obd) {
878                                                 rc = obd_quotactl(exp, oqctl);
879                                                 break;
880                                         }
881                                 }
882                         }
883
884                         oqctl->qc_cmd = cmd;
885                         QCTL_COPY(qctl, oqctl);
886
887                         if (copy_to_user((void *)arg, qctl, sizeof(*qctl)))
888                                 rc = -EFAULT;
889
890                         GOTO(out_quotactl, rc);
891                 }
892
893                 rc = obd_quotactl(sbi->ll_mdc_exp, oqctl);
894                 if (rc && rc != -EBUSY && cmd == Q_QUOTAON) {
895                         oqctl->qc_cmd = Q_QUOTAOFF;
896                         obd_quotactl(sbi->ll_mdc_exp, oqctl);
897                 }
898
899                 QCTL_COPY(qctl, oqctl);
900
901                 if (copy_to_user((void *)arg, qctl, sizeof(*qctl)))
902                         rc = -EFAULT;
903         out_quotactl:
904                 OBD_FREE_PTR(qctl);
905                 OBD_FREE_PTR(oqctl);
906                 RETURN(rc);
907         }
908 #endif /* HAVE_QUOTA_SUPPORT */
909         case OBD_IOC_GETNAME: {
910                 struct obd_device *obd = class_exp2obd(sbi->ll_osc_exp);
911                 if (!obd)
912                         RETURN(-EFAULT);
913                 if (copy_to_user((void *)arg, obd->obd_name,
914                                 strlen(obd->obd_name) + 1))
915                         RETURN (-EFAULT);
916                 RETURN(0);
917         }
918         default:
919                 RETURN(obd_iocontrol(cmd, sbi->ll_osc_exp,0,NULL,(void *)arg));
920         }
921 }
922
923 int ll_dir_open(struct inode *inode, struct file *file)
924 {
925         ENTRY;
926         RETURN(ll_file_open(inode, file));
927 }
928
929 int ll_dir_release(struct inode *inode, struct file *file)
930 {
931         ENTRY;
932         RETURN(ll_file_release(inode, file));
933 }
934
935 struct file_operations ll_dir_operations = {
936         .open     = ll_dir_open,
937         .release  = ll_dir_release,
938         .read     = generic_read_dir,
939         .readdir  = ll_readdir,
940         .ioctl    = ll_dir_ioctl
941 };
942