Whamcloud - gitweb
- fixes in llite with using mds_body vs. mdt_body
[fs/lustre-release.git] / lustre / llite / dir.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 1992, 1993, 1994, 1995
5  * Remy Card (card@masi.ibp.fr)
6  * Laboratoire MASI - Institut Blaise Pascal
7  * Universite Pierre et Marie Curie (Paris VI)
8  *
9  *  from
10  *
11  *  linux/fs/minix/dir.c
12  *  linux/fs/ext2/dir.c
13  *
14  *  Copyright (C) 1991, 1992  Linus Torvalds
15  *
16  *  ext2 directory handling functions
17  *
18  *  Big-endian to little-endian byte-swapping/bitmaps by
19  *        David S. Miller (davem@caip.rutgers.edu), 1995
20  *
21  *  All code that works with directory layout had been switched to pagecache
22  *  and moved here. AV
23  *
24  *  Adapted for Lustre Light
25  *  Copyright (C) 2002-2003, Cluster File Systems, Inc.
26  *
27  */
28
29 #include <linux/fs.h>
30 #include <linux/ext2_fs.h>
31 #include <linux/pagemap.h>
32 #include <linux/mm.h>
33 #include <linux/version.h>
34 #include <linux/smp_lock.h>
35 #include <asm/uaccess.h>
36 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
37 # include <linux/locks.h>   // for wait_on_buffer
38 #else
39 # include <linux/buffer_head.h>   // for wait_on_buffer
40 #endif
41
42 #define DEBUG_SUBSYSTEM S_LLITE
43
44 #include <linux/obd_support.h>
45 #include <linux/obd_class.h>
46 #include <linux/lustre_lib.h>
47 #include <linux/lustre_idl.h>
48 #include <linux/lustre_mdc.h>
49 #include <linux/lustre_lite.h>
50 #include <linux/lustre_dlm.h>
51 #include "llite_internal.h"
52
53 typedef struct ext2_dir_entry_2 ext2_dirent;
54
55 #define PageChecked(page)        test_bit(PG_checked, &(page)->flags)
56 #define SetPageChecked(page)     set_bit(PG_checked, &(page)->flags)
57
58 /* returns the page unlocked, but with a reference */
59 static int ll_dir_readpage(struct file *file, struct page *page)
60 {
61         struct inode *inode = page->mapping->host;
62         struct lu_fid fid;
63         __u64 offset;
64         struct ptlrpc_request *request;
65         struct mdt_body *body;
66         int rc = 0;
67         ENTRY;
68
69         offset = (__u64)page->index << PAGE_SHIFT;
70         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) off "LPU64"\n",
71                inode->i_ino, inode->i_generation, inode, offset);
72
73         ll_inode2fid(&fid, inode);
74
75         rc = mdc_readpage(ll_i2sbi(inode)->ll_mdc_exp, &fid,
76                           offset, page, &request);
77         if (!rc) {
78                 body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*body));
79                 LASSERT (body != NULL);         /* checked by mdc_readpage() */
80                 LASSERT_REPSWABBED (request, 0); /* swabbed by mdc_readpage() */
81
82                 inode->i_size = body->size;
83                 SetPageUptodate(page);
84         }
85         ptlrpc_req_finished(request);
86
87         unlock_page(page);
88         EXIT;
89         return rc;
90 }
91
92 struct address_space_operations ll_dir_aops = {
93         .readpage  = ll_dir_readpage,
94 };
95
96 /*
97  * ext2 uses block-sized chunks. Arguably, sector-sized ones would be
98  * more robust, but we have what we have
99  */
100 static inline unsigned ext2_chunk_size(struct inode *inode)
101 {
102         return inode->i_sb->s_blocksize;
103 }
104
105 static inline void ext2_put_page(struct page *page)
106 {
107         kunmap(page);
108         page_cache_release(page);
109 }
110
111 static inline unsigned long dir_pages(struct inode *inode)
112 {
113         return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
114 }
115
116
117 static void ext2_check_page(struct page *page)
118 {
119         struct inode *dir = page->mapping->host;
120         unsigned chunk_size = ext2_chunk_size(dir);
121         char *kaddr = page_address(page);
122         //      u32 max_inumber = le32_to_cpu(sb->u.ext2_sb.s_es->s_inodes_count);
123         unsigned offs, rec_len;
124         unsigned limit = PAGE_CACHE_SIZE;
125         ext2_dirent *p;
126         char *error;
127
128         if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
129                 limit = dir->i_size & ~PAGE_CACHE_MASK;
130                 if (limit & (chunk_size - 1)) {
131                         CERROR("limit %d dir size %lld index %ld\n",
132                                limit, dir->i_size, page->index);
133                         goto Ebadsize;
134                 }
135                 for (offs = limit; offs<PAGE_CACHE_SIZE; offs += chunk_size) {
136                         ext2_dirent *p = (ext2_dirent*)(kaddr + offs);
137                         p->rec_len = cpu_to_le16(chunk_size);
138                         p->name_len = 0;
139                         p->inode = 0;
140                 }
141                 if (!limit)
142                         goto out;
143         }
144         for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) {
145                 p = (ext2_dirent *)(kaddr + offs);
146                 rec_len = le16_to_cpu(p->rec_len);
147
148                 if (rec_len < EXT2_DIR_REC_LEN(1))
149                         goto Eshort;
150                 if (rec_len & 3)
151                         goto Ealign;
152                 if (rec_len < EXT2_DIR_REC_LEN(p->name_len))
153                         goto Enamelen;
154                 if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1))
155                         goto Espan;
156                 //              if (le32_to_cpu(p->inode) > max_inumber)
157                 //goto Einumber;
158         }
159         if (offs != limit)
160                 goto Eend;
161 out:
162         SetPageChecked(page);
163         return;
164
165         /* Too bad, we had an error */
166
167 Ebadsize:
168         CERROR("ext2_check_page"
169                 "size of directory #%lu is not a multiple of chunk size\n",
170                 dir->i_ino
171         );
172         goto fail;
173 Eshort:
174         error = "rec_len is smaller than minimal";
175         goto bad_entry;
176 Ealign:
177         error = "unaligned directory entry";
178         goto bad_entry;
179 Enamelen:
180         error = "rec_len is too small for name_len";
181         goto bad_entry;
182 Espan:
183         error = "directory entry across blocks";
184         goto bad_entry;
185         //Einumber:
186         // error = "inode out of bounds";
187 bad_entry:
188         CERROR("ext2_check_page: bad entry in directory #%lu: %s - "
189                 "offset=%lu+%u, inode=%lu, rec_len=%d, name_len=%d",
190                 dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT), offs,
191                 (unsigned long) le32_to_cpu(p->inode),
192                 rec_len, p->name_len);
193         goto fail;
194 Eend:
195         p = (ext2_dirent *)(kaddr + offs);
196         CERROR("ext2_check_page"
197                 "entry in directory #%lu spans the page boundary"
198                 "offset=%lu, inode=%lu",
199                 dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
200                 (unsigned long) le32_to_cpu(p->inode));
201 fail:
202         SetPageChecked(page);
203         SetPageError(page);
204 }
205
206 static struct page *ll_get_dir_page(struct inode *dir, unsigned long n)
207 {
208         struct ldlm_res_id res_id =
209                 { .name = { dir->i_ino, (__u64)dir->i_generation} };
210         struct lustre_handle lockh;
211         struct obd_device *obddev = class_exp2obd(ll_i2sbi(dir)->ll_mdc_exp);
212         struct address_space *mapping = dir->i_mapping;
213         struct page *page;
214         ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_UPDATE} };
215         int rc;
216
217         rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_BLOCK_GRANTED,
218                              &res_id, LDLM_IBITS, &policy, LCK_CR, &lockh);
219         if (!rc) {
220                 struct lookup_intent it = { .it_op = IT_READDIR };
221                 struct ptlrpc_request *request;
222                 struct mdc_op_data data;
223
224                 ll_prepare_mdc_op_data(&data, dir, NULL, NULL, 0, 0);
225
226                 rc = mdc_enqueue(ll_i2sbi(dir)->ll_mdc_exp, LDLM_IBITS, &it,
227                                  LCK_CR, &data, &lockh, NULL, 0,
228                                  ldlm_completion_ast, ll_mdc_blocking_ast, dir,
229                                  0);
230
231                 request = (struct ptlrpc_request *)it.d.lustre.it_data;
232                 if (request)
233                         ptlrpc_req_finished(request);
234                 if (rc < 0) {
235                         CERROR("lock enqueue: rc: %d\n", rc);
236                         return ERR_PTR(rc);
237                 }
238         }
239         ldlm_lock_dump_handle(D_OTHER, &lockh);
240
241         page = read_cache_page(mapping, n,
242                                (filler_t*)mapping->a_ops->readpage, NULL);
243         if (!IS_ERR(page)) {
244                 wait_on_page(page);
245                 (void)kmap(page);
246                 if (!PageUptodate(page))
247                         goto fail;
248                 if (!PageChecked(page))
249                         ext2_check_page(page);
250                 if (PageError(page))
251                         goto fail;
252         }
253
254 out_unlock:
255         ldlm_lock_decref(&lockh, LCK_CR);
256         return page;
257
258 fail:
259         ext2_put_page(page);
260         page = ERR_PTR(-EIO);
261         goto out_unlock;
262 }
263
264 /*
265  * p is at least 6 bytes before the end of page
266  */
267 static inline ext2_dirent *ext2_next_entry(ext2_dirent *p)
268 {
269         return (ext2_dirent *)((char*)p + le16_to_cpu(p->rec_len));
270 }
271
272 static inline unsigned
273 ext2_validate_entry(char *base, unsigned offset, unsigned mask)
274 {
275         ext2_dirent *de = (ext2_dirent*)(base + offset);
276         ext2_dirent *p = (ext2_dirent*)(base + (offset&mask));
277         while ((char*)p < (char*)de)
278                 p = ext2_next_entry(p);
279         return (char *)p - base;
280 }
281
282 static unsigned char ext2_filetype_table[EXT2_FT_MAX] = {
283         [EXT2_FT_UNKNOWN]       DT_UNKNOWN,
284         [EXT2_FT_REG_FILE]      DT_REG,
285         [EXT2_FT_DIR]           DT_DIR,
286         [EXT2_FT_CHRDEV]        DT_CHR,
287         [EXT2_FT_BLKDEV]        DT_BLK,
288         [EXT2_FT_FIFO]          DT_FIFO,
289         [EXT2_FT_SOCK]          DT_SOCK,
290         [EXT2_FT_SYMLINK]       DT_LNK,
291 };
292
293
294 int ll_readdir(struct file * filp, void * dirent, filldir_t filldir)
295 {
296         struct inode *inode = filp->f_dentry->d_inode;
297         loff_t pos = filp->f_pos;
298         // XXX struct super_block *sb = inode->i_sb;
299         unsigned offset = pos & ~PAGE_CACHE_MASK;
300         unsigned long n = pos >> PAGE_CACHE_SHIFT;
301         unsigned long npages = dir_pages(inode);
302         unsigned chunk_mask = ~(ext2_chunk_size(inode)-1);
303         unsigned char *types = ext2_filetype_table;
304         int need_revalidate = (filp->f_version != inode->i_version);
305         int rc = 0;
306         ENTRY;
307
308         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %llu/%llu\n",
309                inode->i_ino, inode->i_generation, inode, pos, inode->i_size);
310
311         if (pos > inode->i_size - EXT2_DIR_REC_LEN(1))
312                 RETURN(0);
313
314         for ( ; n < npages; n++, offset = 0) {
315                 char *kaddr, *limit;
316                 ext2_dirent *de;
317                 struct page *page;
318
319                 CDEBUG(D_EXT2,"read %lu of dir %lu/%u page %lu/%lu size %llu\n",
320                        PAGE_CACHE_SIZE, inode->i_ino, inode->i_generation,
321                        n, npages, inode->i_size);
322                 page = ll_get_dir_page(inode, n);
323
324                 /* size might have been updated by mdc_readpage */
325                 npages = dir_pages(inode);
326
327                 if (IS_ERR(page)) {
328                         rc = PTR_ERR(page);
329                         CERROR("error reading dir %lu/%u page %lu: rc %d\n",
330                                inode->i_ino, inode->i_generation, n, rc);
331                         continue;
332                 }
333
334                 kaddr = page_address(page);
335                 if (need_revalidate) {
336                         offset = ext2_validate_entry(kaddr, offset, chunk_mask);
337                         need_revalidate = 0;
338                 }
339                 de = (ext2_dirent *)(kaddr+offset);
340                 limit = kaddr + PAGE_CACHE_SIZE - EXT2_DIR_REC_LEN(1);
341                 for ( ;(char*)de <= limit; de = ext2_next_entry(de)) {
342                         if (de->inode) {
343                                 int over;
344
345                                 rc = 0; /* no error if we return something */
346
347                                 offset = (char *)de - kaddr;
348                                 over = filldir(dirent, de->name, de->name_len,
349                                                (n<<PAGE_CACHE_SHIFT) | offset,
350                                                le32_to_cpu(de->inode),
351                                                types[de->file_type &
352                                                      (EXT2_FT_MAX - 1)]);
353                                 if (over) {
354                                         ext2_put_page(page);
355                                         GOTO(done, rc);
356                                 }
357                         }
358                 }
359                 ext2_put_page(page);
360         }
361
362 done:
363         filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset;
364         filp->f_version = inode->i_version;
365         update_atime(inode);
366         RETURN(rc);
367 }
368
369 #define QCTL_COPY(out, in)              \
370 do {                                    \
371         Q_COPY(out, in, qc_cmd);        \
372         Q_COPY(out, in, qc_type);       \
373         Q_COPY(out, in, qc_id);         \
374         Q_COPY(out, in, qc_stat);       \
375         Q_COPY(out, in, qc_dqinfo);     \
376         Q_COPY(out, in, qc_dqblk);      \
377 } while (0)
378
379 static int ll_dir_ioctl(struct inode *inode, struct file *file,
380                         unsigned int cmd, unsigned long arg)
381 {
382         struct ll_sb_info *sbi = ll_i2sbi(inode);
383         struct obd_ioctl_data *data;
384         ENTRY;
385
386         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), cmd=%#x\n",
387                inode->i_ino, inode->i_generation, inode, cmd);
388
389         /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
390         if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
391                 return -ENOTTY;
392
393         lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_IOCTL);
394         switch(cmd) {
395         case EXT3_IOC_GETFLAGS:
396         case EXT3_IOC_SETFLAGS:
397                 RETURN(ll_iocontrol(inode, file, cmd, arg));
398         case EXT3_IOC_GETVERSION_OLD:
399         case EXT3_IOC_GETVERSION:
400                 RETURN(put_user(inode->i_generation, (int *)arg));
401         /* We need to special case any other ioctls we want to handle,
402          * to send them to the MDS/OST as appropriate and to properly
403          * network encode the arg field.
404         case EXT3_IOC_SETVERSION_OLD:
405         case EXT3_IOC_SETVERSION:
406         */
407         case IOC_MDC_LOOKUP: {
408                 struct ptlrpc_request *request = NULL;
409                 struct lu_fid fid;
410                 char *buf = NULL;
411                 char *filename;
412                 int namelen, rc, len = 0;
413
414                 rc = obd_ioctl_getdata(&buf, &len, (void *)arg);
415                 if (rc)
416                         RETURN(rc);
417                 data = (void *)buf;
418
419                 filename = data->ioc_inlbuf1;
420                 namelen = data->ioc_inllen1;
421
422                 if (namelen < 1) {
423                         CDEBUG(D_INFO, "IOC_MDC_LOOKUP missing filename\n");
424                         GOTO(out, rc = -EINVAL);
425                 }
426
427                 ll_inode2fid(&fid, inode);
428                 rc = mdc_getattr_name(sbi->ll_mdc_exp, &fid, filename, namelen,
429                                       OBD_MD_FLID, 0, &request);
430                 if (rc < 0) {
431                         CDEBUG(D_INFO, "mdc_getattr_name: %d\n", rc);
432                         GOTO(out, rc);
433                 }
434
435                 ptlrpc_req_finished(request);
436
437                 EXIT;
438         out:
439                 obd_ioctl_freedata(buf, len);
440                 return rc;
441         }
442         case LL_IOC_LOV_SETSTRIPE: {
443                 struct ptlrpc_request *request = NULL;
444                 struct mdc_op_data op_data;
445                 struct iattr attr = { 0 };
446                 struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
447                 int rc = 0;
448
449                 ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
450
451                 LASSERT(sizeof(lum) == sizeof(*lump));
452                 LASSERT(sizeof(lum.lmm_objects[0]) ==
453                         sizeof(lump->lmm_objects[0]));
454                 rc = copy_from_user(&lum, lump, sizeof(lum));
455                 if (rc)
456                         return(-EFAULT);
457
458                 /*
459                  * This is coming from userspace, so should be in
460                  * local endian.  But the MDS would like it in little
461                  * endian, so we swab it before we send it.
462                  */
463                 if (lum.lmm_magic != LOV_USER_MAGIC)
464                         RETURN(-EINVAL);
465
466                 if (lum.lmm_magic != cpu_to_le32(LOV_USER_MAGIC))
467                         lustre_swab_lov_user_md(&lum);
468
469                 /* swabbing is done in lov_setstripe() on server side */
470                 rc = mdc_setattr(sbi->ll_mdc_exp, &op_data,
471                                  &attr, &lum, sizeof(lum), NULL, 0, &request);
472                 if (rc) {
473                         ptlrpc_req_finished(request);
474                         if (rc != -EPERM && rc != -EACCES)
475                                 CERROR("mdc_setattr fails: rc = %d\n", rc);
476                         return rc;
477                 }
478                 ptlrpc_req_finished(request);
479
480                 return rc;
481         }
482         case LL_IOC_LOV_GETSTRIPE: {
483                 struct ptlrpc_request *request = NULL;
484                 struct lov_user_md *lump = (struct lov_user_md *)arg;
485                 struct lov_mds_md *lmm;
486                 struct lu_fid fid;
487                 struct mdt_body *body;
488                 int rc, lmmsize;
489
490                 ll_inode2fid(&fid, inode);
491
492                 rc = ll_get_max_mdsize(sbi, &lmmsize);
493                 if (rc)
494                         RETURN(rc);
495
496                 rc = mdc_getattr(sbi->ll_mdc_exp, &fid, OBD_MD_FLDIREA,
497                                  lmmsize, &request);
498                 if (rc < 0) {
499                         CDEBUG(D_INFO, "mdc_getattr failed: rc = %d\n", rc);
500                         RETURN(rc);
501                 }
502
503                 body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*body));
504                 LASSERT(body != NULL);         /* checked by mdc_getattr_name */
505                 LASSERT_REPSWABBED(request, 0);/* swabbed by mdc_getattr_name */
506
507                 lmmsize = body->eadatasize;
508                 if (lmmsize == 0)
509                         GOTO(out_get, rc = -ENODATA);
510
511                 lmm = lustre_msg_buf(request->rq_repmsg, 1, lmmsize);
512                 LASSERT(lmm != NULL);
513                 LASSERT_REPSWABBED(request, 1);
514
515                 /*
516                  * This is coming from the MDS, so is probably in
517                  * little endian.  We convert it to host endian before
518                  * passing it to userspace.
519                  */
520                 if (lmm->lmm_magic == __swab32(LOV_MAGIC)) {
521                         lustre_swab_lov_user_md((struct lov_user_md *)lmm);
522                         lustre_swab_lov_user_md_objects((struct lov_user_md *)lmm);
523                 }
524
525                 rc = copy_to_user(lump, lmm, lmmsize);
526                 if (rc)
527                         GOTO(out_get, rc = -EFAULT);
528
529                 EXIT;
530         out_get:
531                 ptlrpc_req_finished(request);
532                 return rc;
533         }
534         case LL_IOC_OBD_STATFS:
535                 RETURN(ll_obd_statfs(inode, (void *)arg));
536         case IOC_MDC_GETFILEINFO:
537         case IOC_MDC_GETSTRIPE: {
538                 struct ptlrpc_request *request = NULL;
539                 struct lu_fid fid;
540                 struct mdt_body *body;
541                 struct lov_user_md *lump;
542                 struct lov_mds_md *lmm;
543                 char *filename;
544                 int rc, lmmsize;
545
546                 filename = getname((const char *)arg);
547                 if (IS_ERR(filename))
548                         RETURN(PTR_ERR(filename));
549
550                 ll_inode2fid(&fid, inode);
551                 
552                 rc = ll_get_max_mdsize(sbi, &lmmsize);
553                 if (rc) 
554                         RETURN(rc);
555
556                 rc = mdc_getattr_name(sbi->ll_mdc_exp, &fid, filename,
557                                       strlen(filename) + 1, OBD_MD_FLEASIZE,
558                                       lmmsize, &request);
559                 if (rc < 0) {
560                         CDEBUG(D_INFO, "mdc_getattr_name failed on %s: rc %d\n",
561                                filename, rc);
562                         GOTO(out_name, rc);
563                 }
564
565                 body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*body));
566                 LASSERT(body != NULL);         /* checked by mdc_getattr_name */
567                 LASSERT_REPSWABBED(request, 0);/* swabbed by mdc_getattr_name */
568
569                 lmmsize = body->eadatasize;
570
571                 if (!(body->valid & OBD_MD_FLEASIZE) || lmmsize == 0)
572                         GOTO(out_req, rc = -ENODATA);
573
574                 if (lmmsize > 4096)
575                         GOTO(out_req, rc = -EFBIG);
576
577                 lmm = lustre_msg_buf(request->rq_repmsg, 1, lmmsize);
578                 LASSERT(lmm != NULL);
579                 LASSERT_REPSWABBED(request, 1);
580
581                 /*
582                  * This is coming from the MDS, so is probably in
583                  * little endian.  We convert it to host endian before
584                  * passing it to userspace.
585                  */
586                 if (lmm->lmm_magic == __swab32(LOV_MAGIC)) {
587                         lustre_swab_lov_user_md((struct lov_user_md *)lmm);
588                         lustre_swab_lov_user_md_objects((struct lov_user_md *)lmm);
589                 } else if (lmm->lmm_magic == __swab32(LOV_MAGIC_JOIN)) {
590                         lustre_swab_lov_user_md_join((struct lov_user_md_join *)lmm);
591                 }
592                 if (lmm->lmm_magic == LOV_MAGIC_JOIN) {
593                         struct lov_stripe_md *lsm;
594                         struct lov_user_md_join *lmj;
595                         int lmj_size, i, aindex = 0, rc;
596  
597                         rc = obd_unpackmd(sbi->ll_osc_exp, &lsm, lmm, lmmsize);
598                         if (rc < 0) 
599                                 GOTO(out_req, rc = -ENOMEM);
600                         rc = obd_checkmd(sbi->ll_osc_exp, sbi->ll_mdc_exp, lsm);
601                         if (rc) 
602                                 GOTO(out_free_memmd, rc); 
603                         
604                         lmj_size = sizeof(struct lov_user_md_join) +
605                                    lsm->lsm_stripe_count *
606                                    sizeof(struct lov_user_ost_data_join);
607                         OBD_ALLOC(lmj, lmj_size);
608                         if (!lmj) 
609                                 GOTO(out_free_memmd, rc = -ENOMEM);
610                         
611                         memcpy(lmj, lmm, sizeof(struct lov_user_md_join));
612                         for(i = 0; i < lsm->lsm_stripe_count; i++) {
613                                 struct lov_array_info *lai = lsm->lsm_array;
614                                 if ((lai->lai_ext_array[aindex].le_loi_idx +
615                                      lai->lai_ext_array[aindex].le_stripe_count)<=i){
616                                         aindex ++;
617                                 }
618                                 CDEBUG(D_INFO, "aindex %d i %d l_extent_start"LPU64""
619                                                "len %d \n", aindex, i, 
620                                                lai->lai_ext_array[aindex].le_start,
621                                                (int)lai->lai_ext_array[aindex].le_len);
622                                 lmj->lmm_objects[i].l_extent_start =
623                                         lai->lai_ext_array[aindex].le_start;
624  
625                                 if ((int)lai->lai_ext_array[aindex].le_len == -1) {
626                                         lmj->lmm_objects[i].l_extent_end = -1;
627                                 } else {
628                                         lmj->lmm_objects[i].l_extent_end =  
629                                         lai->lai_ext_array[aindex].le_start + 
630                                         lai->lai_ext_array[aindex].le_len;
631                                 }
632                                 lmj->lmm_objects[i].l_object_id =
633                                         lsm->lsm_oinfo[i].loi_id;
634                                 lmj->lmm_objects[i].l_object_gr =
635                                         lsm->lsm_oinfo[i].loi_gr;
636                                 lmj->lmm_objects[i].l_ost_gen =
637                                         lsm->lsm_oinfo[i].loi_ost_gen;
638                                 lmj->lmm_objects[i].l_ost_idx =
639                                         lsm->lsm_oinfo[i].loi_ost_idx;
640                         }
641                         lmm = (struct lov_mds_md *)lmj;
642                         lmmsize = lmj_size;
643 out_free_memmd:
644                         obd_free_memmd(sbi->ll_osc_exp, &lsm);
645                         if (rc)
646                                 GOTO(out_req, rc);
647                 }
648                 if (cmd == IOC_MDC_GETFILEINFO) {
649                         struct lov_user_mds_data *lmdp;
650                         lstat_t st = { 0 };
651
652                         st.st_dev     = 0;
653                         st.st_mode    = body->mode;
654                         st.st_nlink   = body->nlink;
655                         st.st_uid     = body->uid;
656                         st.st_gid     = body->gid;
657                         st.st_rdev    = body->rdev;
658                         st.st_size    = body->size;
659                         st.st_blksize = PAGE_SIZE;
660                         st.st_blocks  = body->blocks;
661                         st.st_atime   = body->atime;
662                         st.st_mtime   = body->mtime;
663                         st.st_ctime   = body->ctime;
664                         st.st_ino     = body->ino;
665
666                         lmdp = (struct lov_user_mds_data *)arg;
667                         rc = copy_to_user(&lmdp->lmd_st, &st, sizeof(st));
668                         if (rc)
669                                 GOTO(out_req, rc = -EFAULT);
670                         lump = &lmdp->lmd_lmm;
671                 } else {
672                         lump = (struct lov_user_md *)arg;
673                 }
674
675                 rc = copy_to_user(lump, lmm, lmmsize);
676                 if (lmm->lmm_magic == LOV_MAGIC_JOIN)
677                         OBD_FREE(lmm, lmmsize); 
678                 if (rc)
679                         GOTO(out_req, rc = -EFAULT);
680
681                 EXIT;
682         out_req:
683                 ptlrpc_req_finished(request);
684         out_name:
685                 putname(filename);
686                 return rc;
687         }
688         case OBD_IOC_LLOG_CATINFO: {
689                 struct ptlrpc_request *req = NULL;
690                 char *buf = NULL;
691                 int rc, len = 0;
692                 char *bufs[2], *str;
693                 int lens[2], size;
694
695                 rc = obd_ioctl_getdata(&buf, &len, (void *)arg);
696                 if (rc)
697                         RETURN(rc);
698                 data = (void *)buf;
699
700                 if (!data->ioc_inlbuf1) {
701                         obd_ioctl_freedata(buf, len);
702                         RETURN(-EINVAL);
703                 }
704
705                 lens[0] = data->ioc_inllen1;
706                 bufs[0] = data->ioc_inlbuf1;
707                 if (data->ioc_inllen2) {
708                         lens[1] = data->ioc_inllen2;
709                         bufs[1] = data->ioc_inlbuf2;
710                 } else {
711                         lens[1] = 0;
712                         bufs[1] = NULL;
713                 }
714                 size = data->ioc_plen1;
715                 req = ptlrpc_prep_req(sbi2mdc(sbi)->cl_import,
716                                       LUSTRE_LOG_VERSION, LLOG_CATINFO,
717                                       2, lens, bufs);
718                 if (!req)
719                         GOTO(out_catinfo, rc = -ENOMEM);
720                 req->rq_replen = lustre_msg_size(1, &size);
721
722                 rc = ptlrpc_queue_wait(req);
723                 str = lustre_msg_string(req->rq_repmsg, 0, data->ioc_plen1);
724                 if (!rc)
725                         rc = copy_to_user(data->ioc_pbuf1, str,
726                                           data->ioc_plen1);
727                 ptlrpc_req_finished(req);
728         out_catinfo:
729                 obd_ioctl_freedata(buf, len);
730                 RETURN(rc);
731         }
732         case OBD_IOC_QUOTACHECK: {
733                 struct obd_quotactl *oqctl;
734                 int rc, error = 0;
735
736                 if (!capable(CAP_SYS_ADMIN))
737                         RETURN(-EPERM);
738
739                 OBD_ALLOC_PTR(oqctl);
740                 if (!oqctl)
741                         RETURN(-ENOMEM);
742                 oqctl->qc_type = arg;
743                 rc = obd_quotacheck(sbi->ll_mdc_exp, oqctl);
744                 if (rc < 0) {
745                         CDEBUG(D_INFO, "mdc_quotacheck failed: rc %d\n", rc);
746                         error = rc;
747                 }
748
749                 rc = obd_quotacheck(sbi->ll_osc_exp, oqctl);
750                 if (rc < 0)
751                         CDEBUG(D_INFO, "osc_quotacheck failed: rc %d\n", rc);
752
753                 OBD_FREE_PTR(oqctl);
754                 return error ?: rc;
755         }
756         case OBD_IOC_POLL_QUOTACHECK: {
757                 struct if_quotacheck *check;
758                 int rc;
759
760                 if (!capable(CAP_SYS_ADMIN))
761                         RETURN(-EPERM);
762
763                 OBD_ALLOC_PTR(check);
764                 if (!check)
765                         RETURN(-ENOMEM);
766
767                 rc = obd_iocontrol(cmd, sbi->ll_mdc_exp, 0, (void *)check,
768                                    NULL);
769                 if (rc) {
770                         CDEBUG(D_QUOTA, "mdc ioctl %d failed: %d\n", cmd, rc);
771                         if (copy_to_user((void *)arg, check, sizeof(*check)))
772                                 rc = -EFAULT;
773                         GOTO(out_poll, rc);
774                 }
775
776                 rc = obd_iocontrol(cmd, sbi->ll_osc_exp, 0, (void *)check,
777                                    NULL);
778                 if (rc) {
779                         CDEBUG(D_QUOTA, "osc ioctl %d failed: %d\n", cmd, rc);
780                         if (copy_to_user((void *)arg, check, sizeof(*check)))
781                                 rc = -EFAULT;
782                         GOTO(out_poll, rc);
783                 }
784         out_poll:                 
785                 OBD_FREE_PTR(check);
786                 RETURN(rc);
787         }
788 #if HAVE_QUOTA_SUPPORT
789         case OBD_IOC_QUOTACTL: {
790                 struct if_quotactl *qctl;
791                 struct obd_quotactl *oqctl;
792                 
793                 int cmd, type, id, rc = 0;
794
795                 OBD_ALLOC_PTR(qctl);
796                 if (!qctl)
797                         RETURN(-ENOMEM);
798
799                 OBD_ALLOC_PTR(oqctl);
800                 if (!oqctl) {
801                         OBD_FREE_PTR(qctl);
802                         RETURN(-ENOMEM);
803                 }
804                 if (copy_from_user(qctl, (void *)arg, sizeof(*qctl)))
805                         GOTO(out_quotactl, rc = -EFAULT);
806
807                 cmd = qctl->qc_cmd;
808                 type = qctl->qc_type;
809                 id = qctl->qc_id;
810                 switch (cmd) {
811                 case Q_QUOTAON:
812                 case Q_QUOTAOFF:
813                 case Q_SETQUOTA:
814                 case Q_SETINFO:
815                         if (!capable(CAP_SYS_ADMIN))
816                                 GOTO(out_quotactl, rc = -EPERM);
817                         break;
818                 case Q_GETQUOTA:
819                         if (((type == USRQUOTA && current->euid != id) ||
820                              (type == GRPQUOTA && !in_egroup_p(id))) &&
821                             !capable(CAP_SYS_ADMIN))
822                                 GOTO(out_quotactl, rc = -EPERM);
823
824                         /* XXX: dqb_valid is borrowed as a flag to mark that
825                          *      only mds quota is wanted */
826                         if (qctl->qc_dqblk.dqb_valid)
827                                 qctl->obd_uuid = 
828                                        sbi->ll_mdc_exp->exp_obd->u.cli.
829                                        cl_import->imp_target_uuid;
830                         break;
831                 case Q_GETINFO:
832                         break;
833                 default:
834                         CERROR("unsupported quotactl op: %#x\n", cmd);
835                         GOTO(out_quotactl, -ENOTTY);
836                 }
837
838                 QCTL_COPY(oqctl, qctl);
839
840                 if (qctl->obd_uuid.uuid[0]) {
841                         struct obd_device *obd;
842                         struct obd_uuid *uuid = &qctl->obd_uuid;
843
844                         obd = class_find_client_notype(uuid,
845                                          &sbi->ll_osc_exp->exp_obd->obd_uuid);
846                         if (!obd)
847                                 GOTO(out_quotactl, rc = -ENOENT);
848
849                         if (cmd == Q_GETINFO)
850                                 oqctl->qc_cmd = Q_GETOINFO;
851                         else if (cmd == Q_GETQUOTA)
852                                 oqctl->qc_cmd = Q_GETOQUOTA;
853                         else
854                                 GOTO(out_quotactl, rc = -EINVAL);
855
856                         if (sbi->ll_mdc_exp->exp_obd == obd) {
857                                 rc = obd_quotactl(sbi->ll_mdc_exp, oqctl);
858                         } else {
859                                 int i;
860                                 struct obd_export *exp;
861                                 struct lov_obd *lov = &sbi->ll_osc_exp->
862                                                             exp_obd->u.lov;
863
864                                 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
865                                         exp = lov->tgts[i].ltd_exp;
866
867                                         if (!lov->tgts[i].active)
868                                                 continue;
869
870                                         if (exp->exp_obd == obd) {
871                                                 rc = obd_quotactl(exp, oqctl);
872                                                 break;
873                                         }
874                                 }
875                         }
876
877                         oqctl->qc_cmd = cmd;
878                         QCTL_COPY(qctl, oqctl);
879
880                         if (copy_to_user((void *)arg, qctl, sizeof(*qctl)))
881                                 rc = -EFAULT;
882
883                         GOTO(out_quotactl, rc);
884                 }
885
886                 rc = obd_quotactl(sbi->ll_mdc_exp, oqctl);
887                 if (rc && rc != -EBUSY && cmd == Q_QUOTAON) {
888                         oqctl->qc_cmd = Q_QUOTAOFF;
889                         obd_quotactl(sbi->ll_mdc_exp, oqctl);
890                 }
891
892                 QCTL_COPY(qctl, oqctl);
893
894                 if (copy_to_user((void *)arg, qctl, sizeof(*qctl)))
895                         rc = -EFAULT;
896         out_quotactl:
897                 OBD_FREE_PTR(qctl);
898                 OBD_FREE_PTR(oqctl);
899                 RETURN(rc);
900         }
901 #endif /* HAVE_QUOTA_SUPPORT */
902         case OBD_IOC_GETNAME: {  
903                 struct obd_device *obd = class_exp2obd(sbi->ll_osc_exp);
904                 if (!obd)
905                         RETURN(-EFAULT);
906                 if (copy_to_user((void *)arg, obd->obd_name, 
907                                 strlen(obd->obd_name) + 1))
908                         RETURN (-EFAULT);
909                 RETURN(0);
910         }
911         default:
912                 RETURN(obd_iocontrol(cmd, sbi->ll_osc_exp,0,NULL,(void *)arg));
913         }
914 }
915
916 int ll_dir_open(struct inode *inode, struct file *file)
917 {
918         ENTRY;
919         RETURN(ll_file_open(inode, file));
920 }
921
922 int ll_dir_release(struct inode *inode, struct file *file)
923 {
924         ENTRY;
925         RETURN(ll_file_release(inode, file));
926 }
927
928 struct file_operations ll_dir_operations = {
929         .open     = ll_dir_open,
930         .release  = ll_dir_release,
931         .read     = generic_read_dir,
932         .readdir  = ll_readdir,
933         .ioctl    = ll_dir_ioctl
934 };
935