Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / llite / dir.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 1992, 1993, 1994, 1995
5  * Remy Card (card@masi.ibp.fr)
6  * Laboratoire MASI - Institut Blaise Pascal
7  * Universite Pierre et Marie Curie (Paris VI)
8  *
9  *  from
10  *
11  *  linux/fs/minix/dir.c
12  *  linux/fs/ext2/dir.c
13  *
14  *  Copyright (C) 1991, 1992  Linus Torvalds
15  *
16  *  ext2 directory handling functions
17  *
18  *  Big-endian to little-endian byte-swapping/bitmaps by
19  *        David S. Miller (davem@caip.rutgers.edu), 1995
20  *
21  *  All code that works with directory layout had been switched to pagecache
22  *  and moved here. AV
23  *
24  *  Adapted for Lustre Light
25  *  Copyright (C) 2002-2003, Cluster File Systems, Inc.
26  *
27  */
28
29 #include <linux/fs.h>
30 #include <linux/ext2_fs.h>
31 #include <linux/pagemap.h>
32 #include <linux/mm.h>
33 #include <linux/version.h>
34 #include <linux/smp_lock.h>
35 #include <asm/uaccess.h>
36 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
37 # include <linux/locks.h>   // for wait_on_buffer
38 #else
39 # include <linux/buffer_head.h>   // for wait_on_buffer
40 #endif
41
42 #define DEBUG_SUBSYSTEM S_LLITE
43
44 #include <obd_support.h>
45 #include <obd_class.h>
46 #include <lustre_lib.h>
47 #include <lustre/lustre_idl.h>
48 #include <lustre_lite.h>
49 #include <lustre_dlm.h>
50 #include "llite_internal.h"
51
52 typedef struct ext2_dir_entry_2 ext2_dirent;
53
54 #define PageChecked(page)        test_bit(PG_checked, &(page)->flags)
55 #define SetPageChecked(page)     set_bit(PG_checked, &(page)->flags)
56
57 /* returns the page unlocked, but with a reference */
58 static int ll_dir_readpage(struct file *file, struct page *page)
59 {
60         struct inode *inode = page->mapping->host;
61         struct ll_fid mdc_fid;
62         __u64 offset;
63         struct ptlrpc_request *request;
64         struct mds_body *body;
65         int rc = 0;
66         ENTRY;
67
68         offset = (__u64)page->index << CFS_PAGE_SHIFT;
69         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) off "LPU64"\n",
70                inode->i_ino, inode->i_generation, inode, offset);
71
72         mdc_pack_fid(&mdc_fid, inode->i_ino, inode->i_generation, S_IFDIR);
73
74         rc = mdc_readpage(ll_i2sbi(inode)->ll_mdc_exp, &mdc_fid,
75                           offset, page, &request);
76         if (!rc) {
77                 body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF,
78                                       sizeof(*body));
79                 LASSERT(body != NULL); /* checked by mdc_readpage() */
80                 /* swabbed by mdc_readpage() */
81                 LASSERT_REPSWABBED(request, REPLY_REC_OFF);
82
83                 inode->i_size = body->size;
84                 SetPageUptodate(page);
85         }
86         ptlrpc_req_finished(request);
87
88         unlock_page(page);
89         EXIT;
90         return rc;
91 }
92
93 struct address_space_operations ll_dir_aops = {
94         .readpage  = ll_dir_readpage,
95 };
96
97 /*
98  * ext2 uses block-sized chunks. Arguably, sector-sized ones would be
99  * more robust, but we have what we have
100  */
101 static inline unsigned ext2_chunk_size(struct inode *inode)
102 {
103         return inode->i_sb->s_blocksize;
104 }
105
106 static inline void ext2_put_page(struct page *page)
107 {
108         kunmap(page);
109         page_cache_release(page);
110 }
111
112 static inline unsigned long dir_pages(struct inode *inode)
113 {
114         return (inode->i_size+CFS_PAGE_SIZE-1) >> CFS_PAGE_SHIFT;
115 }
116
117
118 static void ext2_check_page(struct inode *dir, struct page *page)
119 {
120         unsigned chunk_size = ext2_chunk_size(dir);
121         char *kaddr = page_address(page);
122         //      u32 max_inumber = le32_to_cpu(sb->u.ext2_sb.s_es->s_inodes_count);
123         unsigned rec_len;
124         __u64 offs, limit = CFS_PAGE_SIZE;
125         ext2_dirent *p;
126         char *error;
127
128         if ((dir->i_size >> CFS_PAGE_SHIFT) == (__u64)page->index) {
129                 limit = dir->i_size & ~CFS_PAGE_MASK;
130                 if (limit & (chunk_size - 1)) {
131                         CERROR("limit "LPU64" dir size %lld index "LPU64"\n",
132                                limit, dir->i_size, (__u64)page->index);
133                         goto Ebadsize;
134                 }
135                 for (offs = limit; offs < CFS_PAGE_SIZE; offs += chunk_size) {
136                         ext2_dirent *p = (ext2_dirent*)(kaddr + offs);
137                         p->rec_len = cpu_to_le16(chunk_size);
138                         p->name_len = 0;
139                         p->inode = 0;
140                 }
141                 if (!limit)
142                         goto out;
143         }
144         for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) {
145                 p = (ext2_dirent *)(kaddr + offs);
146                 rec_len = le16_to_cpu(p->rec_len);
147
148                 if (rec_len < EXT2_DIR_REC_LEN(1))
149                         goto Eshort;
150                 if (rec_len & 3)
151                         goto Ealign;
152                 if (rec_len < EXT2_DIR_REC_LEN(p->name_len))
153                         goto Enamelen;
154                 if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1))
155                         goto Espan;
156                 //              if (le32_to_cpu(p->inode) > max_inumber)
157                 //goto Einumber;
158         }
159         if (offs != limit)
160                 goto Eend;
161 out:
162         SetPageChecked(page);
163         return;
164
165         /* Too bad, we had an error */
166
167 Ebadsize:
168         CERROR("%s: directory %lu/%u size %llu is not a multiple of %u\n",
169                ll_i2mdcexp(dir)->exp_obd->obd_name, dir->i_ino,
170                dir->i_generation, dir->i_size, chunk_size);
171         goto fail;
172 Eshort:
173         error = "rec_len is smaller than minimal";
174         goto bad_entry;
175 Ealign:
176         error = "unaligned directory entry";
177         goto bad_entry;
178 Enamelen:
179         error = "rec_len is too small for name_len";
180         goto bad_entry;
181 Espan:
182         error = "directory entry across blocks";
183         goto bad_entry;
184         //Einumber:
185         // error = "inode out of bounds";
186 bad_entry:
187         CERROR("%s: bad entry in directory %lu/%u: %s - "
188                "offset="LPU64"+"LPU64", inode=%lu, rec_len=%d, name_len=%d\n",
189                ll_i2mdcexp(dir)->exp_obd->obd_name, dir->i_ino,
190                dir->i_generation, error, (__u64)page->index << CFS_PAGE_SHIFT,
191                offs, (unsigned long)le32_to_cpu(p->inode),
192                rec_len, p->name_len);
193         goto fail;
194 Eend:
195         p = (ext2_dirent *)(kaddr + offs);
196         CERROR("%s: entry in directory %lu/%u spans the page boundary "
197                "offset="LPU64"+"LPU64", inode=%lu\n",ll_i2mdcexp(dir)->exp_obd->obd_name,
198                dir->i_ino, dir->i_generation,
199                (__u64)page->index << CFS_PAGE_SHIFT,
200                offs, (unsigned long)le32_to_cpu(p->inode));
201 fail:
202         SetPageChecked(page);
203         SetPageError(page);
204 }
205
206 static struct page *ll_get_dir_page(struct inode *dir, unsigned long n)
207 {
208         struct ldlm_res_id res_id =
209                 { .name = { dir->i_ino, (__u64)dir->i_generation} };
210         struct lustre_handle lockh;
211         struct obd_device *obddev = class_exp2obd(ll_i2sbi(dir)->ll_mdc_exp);
212         struct address_space *mapping = dir->i_mapping;
213         struct page *page;
214         ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_UPDATE} };
215         int rc;
216
217         rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_BLOCK_GRANTED,
218                              &res_id, LDLM_IBITS, &policy, LCK_CR, &lockh);
219         if (!rc) {
220                 struct lookup_intent it = { .it_op = IT_READDIR };
221                 struct ptlrpc_request *request;
222                 struct mdc_op_data data;
223
224                 ll_prepare_mdc_op_data(&data, dir, NULL, NULL, 0, 0);
225
226                 rc = mdc_enqueue(ll_i2sbi(dir)->ll_mdc_exp, LDLM_IBITS, &it,
227                                  LCK_CR, &data, &lockh, NULL, 0,
228                                  ldlm_completion_ast, ll_mdc_blocking_ast, dir,
229                                  0);
230
231                 request = (struct ptlrpc_request *)it.d.lustre.it_data;
232                 if (request)
233                         ptlrpc_req_finished(request);
234                 if (rc < 0) {
235                         CERROR("lock enqueue: rc: %d\n", rc);
236                         return ERR_PTR(rc);
237                 }
238         }
239         ldlm_lock_dump_handle(D_OTHER, &lockh);
240
241         page = read_cache_page(mapping, n,
242                                (filler_t*)mapping->a_ops->readpage, NULL);
243         if (IS_ERR(page))
244                 GOTO(out_unlock, page);
245
246         wait_on_page(page);
247         (void)kmap(page);
248         if (!PageUptodate(page))
249                 goto fail;
250         if (!PageChecked(page))
251                 ext2_check_page(dir, page);
252         if (PageError(page))
253                 goto fail;
254
255 out_unlock:
256         ldlm_lock_decref(&lockh, LCK_CR);
257         return page;
258
259 fail:
260         ext2_put_page(page);
261         page = ERR_PTR(-EIO);
262         goto out_unlock;
263 }
264
265 /*
266  * p is at least 6 bytes before the end of page
267  */
268 static inline ext2_dirent *ext2_next_entry(ext2_dirent *p)
269 {
270         return (ext2_dirent *)((char*)p + le16_to_cpu(p->rec_len));
271 }
272
273 static inline unsigned
274 ext2_validate_entry(char *base, unsigned offset, unsigned mask)
275 {
276         ext2_dirent *de = (ext2_dirent*)(base + offset);
277         ext2_dirent *p = (ext2_dirent*)(base + (offset&mask));
278         while ((char*)p < (char*)de)
279                 p = ext2_next_entry(p);
280         return (char *)p - base;
281 }
282
283 static unsigned char ext2_filetype_table[EXT2_FT_MAX] = {
284         [EXT2_FT_UNKNOWN]       DT_UNKNOWN,
285         [EXT2_FT_REG_FILE]      DT_REG,
286         [EXT2_FT_DIR]           DT_DIR,
287         [EXT2_FT_CHRDEV]        DT_CHR,
288         [EXT2_FT_BLKDEV]        DT_BLK,
289         [EXT2_FT_FIFO]          DT_FIFO,
290         [EXT2_FT_SOCK]          DT_SOCK,
291         [EXT2_FT_SYMLINK]       DT_LNK,
292 };
293
294
295 int ll_readdir(struct file *filp, void *dirent, filldir_t filldir)
296 {
297         struct inode *inode = filp->f_dentry->d_inode;
298         loff_t pos = filp->f_pos;
299         // XXX struct super_block *sb = inode->i_sb;
300         __u64 offset = pos & ~CFS_PAGE_MASK;
301         __u64 n = pos >> CFS_PAGE_SHIFT;
302         unsigned long npages = dir_pages(inode);
303         unsigned chunk_mask = ~(ext2_chunk_size(inode)-1);
304         unsigned char *types = ext2_filetype_table;
305         int need_revalidate = (filp->f_version != inode->i_version);
306         int rc = 0;
307         ENTRY;
308
309         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %llu/%llu\n",
310                inode->i_ino, inode->i_generation, inode, pos, inode->i_size);
311
312         if (pos > inode->i_size - EXT2_DIR_REC_LEN(1))
313                 RETURN(0);
314
315         for ( ; n < npages; n++, offset = 0) {
316                 char *kaddr, *limit;
317                 ext2_dirent *de;
318                 struct page *page;
319
320                 CDEBUG(D_EXT2,"read %lu of dir %lu/%u page "LPU64"/%lu "
321                        "size %llu\n",
322                        CFS_PAGE_SIZE, inode->i_ino, inode->i_generation,
323                        n, npages, inode->i_size);
324                 page = ll_get_dir_page(inode, n);
325
326                 /* size might have been updated by mdc_readpage */
327                 npages = dir_pages(inode);
328
329                 if (IS_ERR(page)) {
330                         rc = PTR_ERR(page);
331                         CERROR("error reading dir %lu/%u page "LPU64": rc %d\n",
332                                inode->i_ino, inode->i_generation, n, rc);
333                         continue;
334                 }
335
336                 kaddr = page_address(page);
337                 if (need_revalidate) {
338                         /* page already checked from ll_get_dir_page() */
339                         offset = ext2_validate_entry(kaddr, offset, chunk_mask);
340                         need_revalidate = 0;
341                 }
342                 de = (ext2_dirent *)(kaddr+offset);
343                 limit = kaddr + CFS_PAGE_SIZE - EXT2_DIR_REC_LEN(1);
344                 for ( ;(char*)de <= limit; de = ext2_next_entry(de)) {
345                         if (de->inode) {
346                                 int over;
347
348                                 rc = 0; /* no error if we return something */
349
350                                 offset = (char *)de - kaddr;
351                                 over = filldir(dirent, de->name, de->name_len,
352                                                (n << CFS_PAGE_SHIFT) | offset,
353                                                le32_to_cpu(de->inode),
354                                                types[de->file_type &
355                                                      (EXT2_FT_MAX - 1)]);
356                                 if (over) {
357                                         ext2_put_page(page);
358                                         GOTO(done, rc);
359                                 }
360                         }
361                 }
362                 ext2_put_page(page);
363         }
364
365 done:
366         filp->f_pos = (n << CFS_PAGE_SHIFT) | offset;
367         filp->f_version = inode->i_version;
368         touch_atime(filp->f_vfsmnt, filp->f_dentry);
369
370         RETURN(rc);
371 }
372
373 #define QCTL_COPY(out, in)              \
374 do {                                    \
375         Q_COPY(out, in, qc_cmd);        \
376         Q_COPY(out, in, qc_type);       \
377         Q_COPY(out, in, qc_id);         \
378         Q_COPY(out, in, qc_stat);       \
379         Q_COPY(out, in, qc_dqinfo);     \
380         Q_COPY(out, in, qc_dqblk);      \
381 } while (0)
382
383 int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump)
384 {
385         struct ll_sb_info *sbi = ll_i2sbi(inode);
386         struct mdc_op_data data;
387         struct ptlrpc_request *req = NULL;
388
389         struct iattr attr = { 0 };
390         int rc = 0;
391
392         /*
393          * This is coming from userspace, so should be in
394          * local endian.  But the MDS would like it in little
395          * endian, so we swab it before we send it.
396          */
397         if (lump->lmm_magic != LOV_USER_MAGIC)
398                 RETURN(-EINVAL);
399
400         if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC))
401                 lustre_swab_lov_user_md(lump);
402
403         ll_prepare_mdc_op_data(&data, inode, NULL, NULL, 0, 0);
404
405         /* swabbing is done in lov_setstripe() on server side */
406         rc = mdc_setattr(sbi->ll_mdc_exp, &data,
407                          &attr, lump, sizeof(*lump), NULL, 0, &req);
408         if (rc) {
409                 ptlrpc_req_finished(req);
410                 if (rc != -EPERM && rc != -EACCES)
411                         CERROR("mdc_setattr fails: rc = %d\n", rc);
412                 return rc;
413         }
414         ptlrpc_req_finished(req);
415
416         return rc;
417
418 }
419
420 int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp, 
421                      int *lmm_size, struct ptlrpc_request **request) 
422 {
423         struct ll_sb_info *sbi = ll_i2sbi(inode);
424         struct ll_fid     fid;
425         struct mds_body   *body;
426         struct lov_mds_md *lmm = NULL;
427         struct ptlrpc_request *req = NULL;
428         int rc, lmmsize;
429
430         ll_inode2fid(&fid, inode);
431
432         rc = ll_get_max_mdsize(sbi, &lmmsize);
433         if (rc)
434                 RETURN(rc);
435
436         rc = mdc_getattr(sbi->ll_mdc_exp, &fid,
437                         OBD_MD_FLEASIZE|OBD_MD_FLDIREA,
438                         lmmsize, &req);
439         if (rc < 0) {
440                 CDEBUG(D_INFO, "mdc_getattr failed on inode "
441                        "%lu/%u: rc %d\n", inode->i_ino,
442                        inode->i_generation, rc);
443                 GOTO(out, rc);
444         }
445         body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
446                         sizeof(*body));
447         LASSERT(body != NULL); /* checked by mdc_getattr_name */
448         /* swabbed by mdc_getattr_name */
449         LASSERT_REPSWABBED(req, REPLY_REC_OFF);
450
451         lmmsize = body->eadatasize;
452
453         if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
454             lmmsize == 0) {
455                 GOTO(out, rc = -ENODATA);
456         }
457
458         lmm = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1, lmmsize);
459         LASSERT(lmm != NULL);
460         LASSERT_REPSWABBED(req, REPLY_REC_OFF + 1);
461
462         /*
463          * This is coming from the MDS, so is probably in
464          * little endian.  We convert it to host endian before
465          * passing it to userspace.
466          */
467         if (lmm->lmm_magic == __swab32(LOV_MAGIC)) {
468                 lustre_swab_lov_user_md((struct lov_user_md *)lmm);
469                 lustre_swab_lov_user_md_objects((struct lov_user_md *)lmm);
470         }
471 out:
472         *lmmp = lmm;
473         *lmm_size = lmmsize;
474         *request = req;
475         return rc;
476 }
477
478 static int ll_dir_ioctl(struct inode *inode, struct file *file,
479                         unsigned int cmd, unsigned long arg)
480 {
481         struct ll_sb_info *sbi = ll_i2sbi(inode);
482         struct obd_ioctl_data *data;
483         ENTRY;
484
485         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), cmd=%#x\n",
486                inode->i_ino, inode->i_generation, inode, cmd);
487
488         /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
489         if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
490                 return -ENOTTY;
491
492         lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_IOCTL);
493         switch(cmd) {
494         case EXT3_IOC_GETFLAGS:
495         case EXT3_IOC_SETFLAGS:
496                 RETURN(ll_iocontrol(inode, file, cmd, arg));
497         case EXT3_IOC_GETVERSION_OLD:
498         case EXT3_IOC_GETVERSION:
499                 RETURN(put_user(inode->i_generation, (int *)arg));
500         /* We need to special case any other ioctls we want to handle,
501          * to send them to the MDS/OST as appropriate and to properly
502          * network encode the arg field.
503         case EXT3_IOC_SETVERSION_OLD:
504         case EXT3_IOC_SETVERSION:
505         */
506         case IOC_MDC_LOOKUP: {
507                 struct ptlrpc_request *request = NULL;
508                 struct ll_fid fid;
509                 char *buf = NULL;
510                 char *filename;
511                 int namelen, rc, len = 0;
512
513                 rc = obd_ioctl_getdata(&buf, &len, (void *)arg);
514                 if (rc)
515                         RETURN(rc);
516                 data = (void *)buf;
517
518                 filename = data->ioc_inlbuf1;
519                 namelen = data->ioc_inllen1;
520
521                 if (namelen < 1) {
522                         CDEBUG(D_INFO, "IOC_MDC_LOOKUP missing filename\n");
523                         GOTO(out, rc = -EINVAL);
524                 }
525
526                 ll_inode2fid(&fid, inode);
527                 rc = mdc_getattr_name(sbi->ll_mdc_exp, &fid, filename, namelen,
528                                       OBD_MD_FLID, 0, &request);
529                 if (rc < 0) {
530                         CDEBUG(D_INFO, "mdc_getattr_name: %d\n", rc);
531                         GOTO(out, rc);
532                 }
533
534                 ptlrpc_req_finished(request);
535
536                 EXIT;
537         out:
538                 obd_ioctl_freedata(buf, len);
539                 return rc;
540         }
541         case LL_IOC_LOV_SETSTRIPE: {
542                 struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
543                 int rc = 0;
544
545                 LASSERT(sizeof(lum) == sizeof(*lump));
546                 LASSERT(sizeof(lum.lmm_objects[0]) ==
547                         sizeof(lump->lmm_objects[0]));
548                 rc = copy_from_user(&lum, lump, sizeof(lum));
549                 if (rc)
550                         return(-EFAULT);
551
552                 rc = ll_dir_setstripe(inode, &lum);
553
554                 return rc;
555         }
556         case LL_IOC_OBD_STATFS:
557                 RETURN(ll_obd_statfs(inode, (void *)arg));
558         case LL_IOC_LOV_GETSTRIPE:
559         case LL_IOC_MDC_GETINFO:
560         case IOC_MDC_GETFILEINFO:
561         case IOC_MDC_GETFILESTRIPE: {
562                 struct ptlrpc_request *request = NULL;
563                 struct mds_body *body;
564                 struct lov_user_md *lump;
565                 struct lov_mds_md *lmm = NULL;
566                 char *filename = NULL;
567                 int rc, lmmsize;
568
569                 if (cmd == IOC_MDC_GETFILEINFO ||
570                     cmd == IOC_MDC_GETFILESTRIPE) {
571                         filename = getname((const char *)arg);
572                         if (IS_ERR(filename))
573                                 RETURN(PTR_ERR(filename));
574
575                         rc = ll_lov_getstripe_ea_info(inode, filename, &lmm, 
576                                                       &lmmsize, &request);
577                 } else {
578                         rc = ll_dir_getstripe(inode, &lmm, &lmmsize, &request);
579                 }
580
581                 if (request) {
582                         body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF,
583                                               sizeof(*body));
584                         LASSERT(body != NULL); /* checked by mdc_getattr_name */
585                         /* swabbed by mdc_getattr_name */
586                         LASSERT_REPSWABBED(request, REPLY_REC_OFF);
587                 } else {
588                         GOTO(out_req, rc);
589                 }
590
591                 if (rc < 0) {
592                         if (rc == -ENODATA && (cmd == IOC_MDC_GETFILEINFO || 
593                                                cmd == LL_IOC_MDC_GETINFO))
594                                 GOTO(skip_lmm, rc = 0);
595                         else
596                                 GOTO(out_req, rc);
597                 }
598
599                 if (cmd == IOC_MDC_GETFILESTRIPE ||
600                     cmd == LL_IOC_LOV_GETSTRIPE) {
601                         lump = (struct lov_user_md *)arg;
602                 } else {
603                         struct lov_user_mds_data *lmdp;
604                         lmdp = (struct lov_user_mds_data *)arg;
605                         lump = &lmdp->lmd_lmm;
606                 }
607                 rc = copy_to_user(lump, lmm, lmmsize);
608                 if (rc)
609                         GOTO(out_lmm, rc = -EFAULT);
610         skip_lmm:
611                 if (cmd == IOC_MDC_GETFILEINFO || cmd == LL_IOC_MDC_GETINFO) {
612                         struct lov_user_mds_data *lmdp;
613                         lstat_t st = { 0 };
614
615                         st.st_dev     = inode->i_sb->s_dev;
616                         st.st_mode    = body->mode;
617                         st.st_nlink   = body->nlink;
618                         st.st_uid     = body->uid;
619                         st.st_gid     = body->gid;
620                         st.st_rdev    = body->rdev;
621                         st.st_size    = body->size;
622                         st.st_blksize = CFS_PAGE_SIZE;
623                         st.st_blocks  = body->blocks;
624                         st.st_atime   = body->atime;
625                         st.st_mtime   = body->mtime;
626                         st.st_ctime   = body->ctime;
627                         st.st_ino     = body->ino;
628
629                         lmdp = (struct lov_user_mds_data *)arg;
630                         rc = copy_to_user(&lmdp->lmd_st, &st, sizeof(st));
631                         if (rc)
632                                 GOTO(out_lmm, rc = -EFAULT);
633                 }
634
635                 EXIT;
636         out_lmm:
637                 if (lmm && lmm->lmm_magic == LOV_MAGIC_JOIN)
638                         OBD_FREE(lmm, lmmsize);
639         out_req:
640                 ptlrpc_req_finished(request);
641                 if (filename)
642                         putname(filename);
643                 return rc;
644         }
645         case IOC_LOV_GETINFO: {
646                 struct lov_user_mds_data *lumd;
647                 struct lov_stripe_md *lsm;
648                 struct lov_user_md *lum;
649                 struct lov_mds_md *lmm;
650                 int lmmsize;
651                 lstat_t st;
652                 int rc;
653
654                 lumd = (struct lov_user_mds_data *)arg;
655                 lum = &lumd->lmd_lmm;
656
657                 rc = ll_get_max_mdsize(sbi, &lmmsize);
658                 if (rc)
659                         RETURN(rc);
660
661                 OBD_ALLOC(lmm, lmmsize);
662                 rc = copy_from_user(lmm, lum, lmmsize);
663                 if (rc)
664                         GOTO(free_lmm, rc = -EFAULT);
665
666                 rc = obd_unpackmd(sbi->ll_osc_exp, &lsm, lmm, lmmsize);
667                 if (rc < 0)
668                         GOTO(free_lmm, rc = -ENOMEM);
669
670                 rc = obd_checkmd(sbi->ll_osc_exp, sbi->ll_mdc_exp, lsm);
671                 if (rc)
672                         GOTO(free_lsm, rc);
673
674                 /* Perform glimpse_size operation. */
675                 memset(&st, 0, sizeof(st));
676
677                 rc = ll_glimpse_ioctl(sbi, lsm, &st);
678                 if (rc)
679                         GOTO(free_lsm, rc);
680
681                 rc = copy_to_user(&lumd->lmd_st, &st, sizeof(st));
682                 if (rc)
683                         GOTO(free_lsm, rc = -EFAULT);
684
685                 EXIT;
686         free_lsm:
687                 obd_free_memmd(sbi->ll_osc_exp, &lsm);
688         free_lmm:
689                 OBD_FREE(lmm, lmmsize);
690                 return rc;
691         }
692         case OBD_IOC_LLOG_CATINFO: {
693                 struct ptlrpc_request *req = NULL;
694                 char *buf = NULL;
695                 int rc, len = 0;
696                 char *bufs[3] = { NULL }, *str;
697                 int lens[3] = { sizeof(struct ptlrpc_body) };
698                 int size[2] = { sizeof(struct ptlrpc_body) };
699
700                 rc = obd_ioctl_getdata(&buf, &len, (void *)arg);
701                 if (rc)
702                         RETURN(rc);
703                 data = (void *)buf;
704
705                 if (!data->ioc_inlbuf1) {
706                         obd_ioctl_freedata(buf, len);
707                         RETURN(-EINVAL);
708                 }
709
710                 lens[REQ_REC_OFF] = data->ioc_inllen1;
711                 bufs[REQ_REC_OFF] = data->ioc_inlbuf1;
712                 if (data->ioc_inllen2) {
713                         lens[REQ_REC_OFF + 1] = data->ioc_inllen2;
714                         bufs[REQ_REC_OFF + 1] = data->ioc_inlbuf2;
715                 } else {
716                         lens[REQ_REC_OFF + 1] = 0;
717                         bufs[REQ_REC_OFF + 1] = NULL;
718                 }
719
720                 req = ptlrpc_prep_req(sbi2mdc(sbi)->cl_import,
721                                       LUSTRE_LOG_VERSION, LLOG_CATINFO, 3, lens,
722                                       bufs);
723                 if (!req)
724                         GOTO(out_catinfo, rc = -ENOMEM);
725
726                 size[REPLY_REC_OFF] = data->ioc_plen1;
727                 ptlrpc_req_set_repsize(req, 2, size);
728
729                 rc = ptlrpc_queue_wait(req);
730                 str = lustre_msg_string(req->rq_repmsg, REPLY_REC_OFF,
731                                         data->ioc_plen1);
732                 if (!rc)
733                         rc = copy_to_user(data->ioc_pbuf1, str,data->ioc_plen1);
734                 ptlrpc_req_finished(req);
735         out_catinfo:
736                 obd_ioctl_freedata(buf, len);
737                 RETURN(rc);
738         }
739         case OBD_IOC_QUOTACHECK: {
740                 struct obd_quotactl *oqctl;
741                 int rc, error = 0;
742
743                 if (!capable(CAP_SYS_ADMIN))
744                         RETURN(-EPERM);
745
746                 OBD_ALLOC_PTR(oqctl);
747                 if (!oqctl)
748                         RETURN(-ENOMEM);
749                 oqctl->qc_type = arg;
750                 rc = obd_quotacheck(sbi->ll_mdc_exp, oqctl);
751                 if (rc < 0) {
752                         CDEBUG(D_INFO, "mdc_quotacheck failed: rc %d\n", rc);
753                         error = rc;
754                 }
755
756                 rc = obd_quotacheck(sbi->ll_osc_exp, oqctl);
757                 if (rc < 0)
758                         CDEBUG(D_INFO, "osc_quotacheck failed: rc %d\n", rc);
759
760                 OBD_FREE_PTR(oqctl);
761                 return error ?: rc;
762         }
763         case OBD_IOC_POLL_QUOTACHECK: {
764                 struct if_quotacheck *check;
765                 int rc;
766
767                 if (!capable(CAP_SYS_ADMIN))
768                         RETURN(-EPERM);
769
770                 OBD_ALLOC_PTR(check);
771                 if (!check)
772                         RETURN(-ENOMEM);
773
774                 rc = obd_iocontrol(cmd, sbi->ll_mdc_exp, 0, (void *)check,
775                                    NULL);
776                 if (rc) {
777                         CDEBUG(D_QUOTA, "mdc ioctl %d failed: %d\n", cmd, rc);
778                         if (copy_to_user((void *)arg, check, sizeof(*check)))
779                                 rc = -EFAULT;
780                         GOTO(out_poll, rc);
781                 }
782
783                 rc = obd_iocontrol(cmd, sbi->ll_osc_exp, 0, (void *)check,
784                                    NULL);
785                 if (rc) {
786                         CDEBUG(D_QUOTA, "osc ioctl %d failed: %d\n", cmd, rc);
787                         if (copy_to_user((void *)arg, check, sizeof(*check)))
788                                 rc = -EFAULT;
789                         GOTO(out_poll, rc);
790                 }
791         out_poll:
792                 OBD_FREE_PTR(check);
793                 RETURN(rc);
794         }
795 #ifdef HAVE_QUOTA_SUPPORT
796         case OBD_IOC_QUOTACTL: {
797                 struct if_quotactl *qctl;
798                 struct obd_quotactl *oqctl;
799
800                 int cmd, type, id, rc = 0;
801
802                 OBD_ALLOC_PTR(qctl);
803                 if (!qctl)
804                         RETURN(-ENOMEM);
805
806                 OBD_ALLOC_PTR(oqctl);
807                 if (!oqctl) {
808                         OBD_FREE_PTR(qctl);
809                         RETURN(-ENOMEM);
810                 }
811                 if (copy_from_user(qctl, (void *)arg, sizeof(*qctl)))
812                         GOTO(out_quotactl, rc = -EFAULT);
813
814                 cmd = qctl->qc_cmd;
815                 type = qctl->qc_type;
816                 id = qctl->qc_id;
817                 switch (cmd) {
818                 case Q_QUOTAON:
819                 case Q_QUOTAOFF:
820                 case Q_SETQUOTA:
821                 case Q_SETINFO:
822                         if (!capable(CAP_SYS_ADMIN))
823                                 GOTO(out_quotactl, rc = -EPERM);
824                         break;
825                 case Q_GETQUOTA:
826                         if (((type == USRQUOTA && current->euid != id) ||
827                              (type == GRPQUOTA && !in_egroup_p(id))) &&
828                             !capable(CAP_SYS_ADMIN))
829                                 GOTO(out_quotactl, rc = -EPERM);
830
831                         /* XXX: dqb_valid is borrowed as a flag to mark that
832                          *      only mds quota is wanted */
833                         if (qctl->qc_dqblk.dqb_valid)
834                                 qctl->obd_uuid = sbi->ll_mdc_exp->exp_obd->
835                                                         u.cli.cl_target_uuid;
836                         break;
837                 case Q_GETINFO:
838                         break;
839                 default:
840                         CERROR("unsupported quotactl op: %#x\n", cmd);
841                         GOTO(out_quotactl, -ENOTTY);
842                 }
843
844                 QCTL_COPY(oqctl, qctl);
845
846                 if (qctl->obd_uuid.uuid[0]) {
847                         struct obd_device *obd;
848                         struct obd_uuid *uuid = &qctl->obd_uuid;
849
850                         obd = class_find_client_notype(uuid,
851                                          &sbi->ll_osc_exp->exp_obd->obd_uuid);
852                         if (!obd)
853                                 GOTO(out_quotactl, rc = -ENOENT);
854
855                         if (cmd == Q_GETINFO)
856                                 oqctl->qc_cmd = Q_GETOINFO;
857                         else if (cmd == Q_GETQUOTA)
858                                 oqctl->qc_cmd = Q_GETOQUOTA;
859                         else
860                                 GOTO(out_quotactl, rc = -EINVAL);
861
862                         if (sbi->ll_mdc_exp->exp_obd == obd) {
863                                 rc = obd_quotactl(sbi->ll_mdc_exp, oqctl);
864                         } else {
865                                 int i;
866                                 struct obd_export *exp;
867                                 struct lov_obd *lov = &sbi->ll_osc_exp->
868                                                             exp_obd->u.lov;
869
870                                 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
871                                         if (!lov->lov_tgts[i] ||
872                                             !lov->lov_tgts[i]->ltd_active)
873                                                 continue;
874                                         exp = lov->lov_tgts[i]->ltd_exp;
875                                         if (exp->exp_obd == obd) {
876                                                 rc = obd_quotactl(exp, oqctl);
877                                                 break;
878                                         }
879                                 }
880                         }
881
882                         oqctl->qc_cmd = cmd;
883                         QCTL_COPY(qctl, oqctl);
884
885                         if (copy_to_user((void *)arg, qctl, sizeof(*qctl)))
886                                 rc = -EFAULT;
887
888                         GOTO(out_quotactl, rc);
889                 }
890
891                 rc = obd_quotactl(sbi->ll_mdc_exp, oqctl);
892                 if (rc && rc != -EBUSY && cmd == Q_QUOTAON) {
893                         oqctl->qc_cmd = Q_QUOTAOFF;
894                         obd_quotactl(sbi->ll_mdc_exp, oqctl);
895                 }
896
897                 QCTL_COPY(qctl, oqctl);
898
899                 if (copy_to_user((void *)arg, qctl, sizeof(*qctl)))
900                         rc = -EFAULT;
901         out_quotactl:
902                 OBD_FREE_PTR(qctl);
903                 OBD_FREE_PTR(oqctl);
904                 RETURN(rc);
905         }
906 #endif /* HAVE_QUOTA_SUPPORT */
907         case OBD_IOC_GETNAME: {
908                 struct obd_device *obd = class_exp2obd(sbi->ll_osc_exp);
909                 if (!obd)
910                         RETURN(-EFAULT);
911                 if (copy_to_user((void *)arg, obd->obd_name,
912                                 strlen(obd->obd_name) + 1))
913                         RETURN (-EFAULT);
914                 RETURN(0);
915         }
916         default:
917                 RETURN(obd_iocontrol(cmd, sbi->ll_osc_exp,0,NULL,(void *)arg));
918         }
919 }
920
921 int ll_dir_open(struct inode *inode, struct file *file)
922 {
923         ENTRY;
924         RETURN(ll_file_open(inode, file));
925 }
926
927 int ll_dir_release(struct inode *inode, struct file *file)
928 {
929         ENTRY;
930         RETURN(ll_file_release(inode, file));
931 }
932
933 struct file_operations ll_dir_operations = {
934         .open     = ll_dir_open,
935         .release  = ll_dir_release,
936         .read     = generic_read_dir,
937         .readdir  = ll_readdir,
938         .ioctl    = ll_dir_ioctl
939 };
940