Whamcloud - gitweb
Branch b1_8
[fs/lustre-release.git] / lustre / llite / dir.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Directory code for lustre client.
5  *
6  *  Copyright (C) 2002--2007 Cluster File Systems, Inc.
7  *
8  *   This file is part of the Lustre file system, http://www.lustre.org
9  *   Lustre is a trademark of Cluster File Systems, Inc.
10  *
11  *   You may have signed or agreed to another license before downloading
12  *   this software.  If so, you are bound by the terms and conditions
13  *   of that agreement, and the following does not apply to you.  See the
14  *   LICENSE file included with this distribution for more information.
15  *
16  *   If you did not agree to a different license, then this copy of Lustre
17  *   is open source software; you can redistribute it and/or modify it
18  *   under the terms of version 2 of the GNU General Public License as
19  *   published by the Free Software Foundation.
20  *
21  *   In either case, Lustre is distributed in the hope that it will be
22  *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
23  *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24  *   license text for more details.
25  *
26  */
27
28 #include <linux/fs.h>
29 #include <linux/pagemap.h>
30 #include <linux/mm.h>
31 #include <linux/version.h>
32 #include <linux/smp_lock.h>
33 #include <asm/uaccess.h>
34 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
35 # include <linux/locks.h>   // for wait_on_buffer
36 #else
37 # include <linux/buffer_head.h>   // for wait_on_buffer
38 #endif
39
40 #define DEBUG_SUBSYSTEM S_LLITE
41
42 #include <obd_support.h>
43 #include <obd_class.h>
44 #include <lustre_lib.h>
45 #include <lustre/lustre_idl.h>
46 #include <lustre_lite.h>
47 #include <lustre_dlm.h>
48 #include "llite_internal.h"
49
50 #ifndef HAVE_PAGE_CHECKED
51 #ifdef HAVE_PG_FS_MISC
52 #define PageChecked(page)        test_bit(PG_fs_misc, &(page)->flags)
53 #define SetPageChecked(page)     set_bit(PG_fs_misc, &(page)->flags)
54 #else
55 #error PageChecked or PageFsMisc not defined in kernel
56 #endif
57 #endif
58
59 /* returns the page unlocked, but with a reference */
60 static int ll_dir_readpage(struct file *file, struct page *page)
61 {
62         struct inode *inode = page->mapping->host;
63         struct ll_fid mdc_fid;
64         __u64 offset;
65         struct ptlrpc_request *request;
66         struct mds_body *body;
67         int rc = 0;
68         ENTRY;
69
70         offset = (__u64)page->index << CFS_PAGE_SHIFT;
71         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) off "LPU64"\n",
72                inode->i_ino, inode->i_generation, inode, offset);
73
74         mdc_pack_fid(&mdc_fid, inode->i_ino, inode->i_generation, S_IFDIR);
75
76         rc = mdc_readpage(ll_i2sbi(inode)->ll_mdc_exp, &mdc_fid,
77                           offset, page, &request);
78         if (!rc) {
79                 body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF,
80                                       sizeof(*body));
81                 LASSERT(body != NULL); /* checked by mdc_readpage() */
82                 /* swabbed by mdc_readpage() */
83                 LASSERT(lustre_rep_swabbed(request, REPLY_REC_OFF));
84
85                 if (body->size != i_size_read(inode)) {
86                         ll_inode_size_lock(inode, 0);
87                         i_size_write(inode, body->size);
88                         ll_inode_size_unlock(inode, 0);
89                 }
90
91                 SetPageUptodate(page);
92         }
93         ptlrpc_req_finished(request);
94
95         unlock_page(page);
96         EXIT;
97         return rc;
98 }
99
100 struct address_space_operations ll_dir_aops = {
101         .readpage  = ll_dir_readpage,
102 };
103
104 static inline unsigned ll_dir_page_mask(struct inode *inode)
105 {
106         return ~(inode->i_sb->s_blocksize - 1);
107 }
108
109 /*
110  * Check consistency of a single entry.
111  */
112 static int ll_dir_check_entry(struct inode *dir, struct ll_dir_entry *ent,
113                               unsigned offset, unsigned rec_len, pgoff_t index)
114 {
115         const char *msg;
116
117         /*
118          * Consider adding more checks.
119          */
120
121         if (unlikely(rec_len < ll_dir_rec_len(1)))
122                 msg = "entry is too short";
123         else if (unlikely(rec_len & 3))
124                 msg = "wrong alignment";
125         else if (unlikely(rec_len < ll_dir_rec_len(ent->lde_name_len)))
126                 msg = "rec_len doesn't match name_len";
127         else if (unlikely(((offset + rec_len - 1) ^ offset) &
128                           ll_dir_page_mask(dir)))
129                 msg = "directory entry across blocks";
130         else
131                 return 0;
132         CERROR("%s: bad entry in directory %lu/%u: %s - "
133                "offset=%lu+%u, inode=%lu, rec_len=%d,"
134                " name_len=%d\n", ll_i2mdcexp(dir)->exp_obd->obd_name,
135                dir->i_ino, dir->i_generation, msg,
136                index << CFS_PAGE_SHIFT,
137                offset, (unsigned long)le32_to_cpu(ent->lde_inode),
138                rec_len, ent->lde_name_len);
139         return -EIO;
140 }
141
142 static void ll_dir_check_page(struct inode *dir, struct page *page)
143 {
144         int      err;
145         unsigned size = dir->i_sb->s_blocksize;
146         char    *addr = page_address(page);
147         unsigned off;
148         unsigned limit;
149         unsigned reclen;
150
151         struct ll_dir_entry *ent;
152
153         err = 0;
154         if ((i_size_read(dir) >> CFS_PAGE_SHIFT) == (__u64)page->index) {
155                 /*
156                  * Last page.
157                  */
158                 limit = i_size_read(dir) & ~CFS_PAGE_MASK;
159                 if (limit & (size - 1)) {
160                         CERROR("%s: dir %lu/%u size %llu doesn't match %u\n",
161                                ll_i2mdcexp(dir)->exp_obd->obd_name, dir->i_ino,
162                                dir->i_generation, i_size_read(dir), size);
163                         err++;
164                 } else {
165                         /*
166                          * Place dummy forwarding entries to streamline
167                          * ll_readdir().
168                          */
169                         for (off = limit; off < CFS_PAGE_SIZE; off += size) {
170                                 ent = ll_entry_at(addr, off);
171                                 ent->lde_rec_len = cpu_to_le16(size);
172                                 ent->lde_name_len = 0;
173                                 ent->lde_inode = 0;
174                         }
175                 }
176         } else
177                 limit = CFS_PAGE_SIZE;
178
179         for (off = 0;
180              !err && off <= limit - ll_dir_rec_len(1); off += reclen) {
181                 ent    = ll_entry_at(addr, off);
182                 reclen = le16_to_cpu(ent->lde_rec_len);
183                 err    = ll_dir_check_entry(dir, ent, off, reclen, page->index);
184         }
185
186         if (!err && off != limit) {
187                 ent = ll_entry_at(addr, off);
188                 CERROR("%s: entry in directory %lu/%u spans the page boundary "
189                        "offset="LPU64"+%u, inode=%lu\n",
190                        ll_i2mdcexp(dir)->exp_obd->obd_name,
191                        dir->i_ino, dir->i_generation,
192                        (__u64)page->index << CFS_PAGE_SHIFT,
193                        off, (unsigned long)le32_to_cpu(ent->lde_inode));
194                 err++;
195         }
196         if (err)
197                 SetPageError(page);
198         SetPageChecked(page);
199 }
200
201 struct page *ll_get_dir_page(struct inode *dir, unsigned long n)
202 {
203         struct ldlm_res_id res_id =
204                 { .name = { dir->i_ino, (__u64)dir->i_generation} };
205         struct lustre_handle lockh;
206         struct obd_device *obddev = class_exp2obd(ll_i2sbi(dir)->ll_mdc_exp);
207         struct address_space *mapping = dir->i_mapping;
208         struct page *page;
209         ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_UPDATE} };
210         int rc;
211
212         rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_BLOCK_GRANTED,
213                              &res_id, LDLM_IBITS, &policy, LCK_CR, &lockh);
214         if (!rc) {
215                 struct lookup_intent it = { .it_op = IT_READDIR };
216                 struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_CR,
217                        ll_mdc_blocking_ast, ldlm_completion_ast, NULL, dir };
218                 struct ptlrpc_request *request;
219                 struct mdc_op_data data;
220
221                 ll_prepare_mdc_op_data(&data, dir, NULL, NULL, 0, 0, NULL);
222
223                 rc = mdc_enqueue(ll_i2sbi(dir)->ll_mdc_exp, &einfo, &it,
224                                  &data, &lockh, NULL, 0, 0);
225
226                 request = (struct ptlrpc_request *)it.d.lustre.it_data;
227                 if (request)
228                         ptlrpc_req_finished(request);
229                 if (rc < 0) {
230                         CERROR("lock enqueue: rc: %d\n", rc);
231                         return ERR_PTR(rc);
232                 }
233         }
234         ldlm_lock_dump_handle(D_OTHER, &lockh);
235
236         page = read_cache_page(mapping, n,
237                                (filler_t*)mapping->a_ops->readpage, NULL);
238         if (IS_ERR(page))
239                 GOTO(out_unlock, page);
240
241         wait_on_page(page);
242         (void)kmap(page);
243         if (!PageUptodate(page))
244                 goto fail;
245         if (!PageChecked(page))
246                 ll_dir_check_page(dir, page);
247         if (PageError(page))
248                 goto fail;
249
250 out_unlock:
251         ldlm_lock_decref(&lockh, LCK_CR);
252         return page;
253
254 fail:
255         ll_put_page(page);
256         page = ERR_PTR(-EIO);
257         goto out_unlock;
258 }
259
260 static inline unsigned ll_dir_validate_entry(char *base, unsigned offset,
261                                              unsigned mask)
262 {
263         struct ll_dir_entry *de = ll_entry_at(base, offset);
264         struct ll_dir_entry *p  = ll_entry_at(base, offset & mask);
265         while (p < de && p->lde_rec_len > 0)
266                 p = ll_dir_next_entry(p);
267         return (char *)p - base;
268 }
269
270 /*
271  * File type constants. The same as in ext2 for compatibility.
272  */
273
274 enum {
275         LL_DIR_FT_UNKNOWN,
276         LL_DIR_FT_REG_FILE,
277         LL_DIR_FT_DIR,
278         LL_DIR_FT_CHRDEV,
279         LL_DIR_FT_BLKDEV,
280         LL_DIR_FT_FIFO,
281         LL_DIR_FT_SOCK,
282         LL_DIR_FT_SYMLINK,
283         LL_DIR_FT_MAX
284 };
285
286 static unsigned char ll_dir_filetype_table[LL_DIR_FT_MAX] = {
287         [LL_DIR_FT_UNKNOWN]  = DT_UNKNOWN,
288         [LL_DIR_FT_REG_FILE] = DT_REG,
289         [LL_DIR_FT_DIR]      = DT_DIR,
290         [LL_DIR_FT_CHRDEV]   = DT_CHR,
291         [LL_DIR_FT_BLKDEV]   = DT_BLK,
292         [LL_DIR_FT_FIFO]     = DT_FIFO,
293         [LL_DIR_FT_SOCK]     = DT_SOCK,
294         [LL_DIR_FT_SYMLINK]  = DT_LNK,
295 };
296
297 /*
298  * Process one page. Returns:
299  *
300  *     -ve: filldir commands readdir to stop.
301  *     +ve: number of entries submitted to filldir.
302  *       0: no live entries on this page.
303  */
304
305 int ll_readdir_page(char *addr, __u64 base, unsigned *offset,
306                     filldir_t filldir, void *cookie)
307 {
308         struct ll_dir_entry *de;
309         char *end;
310         int nr;
311
312         de = ll_entry_at(addr, *offset);
313         end = addr + CFS_PAGE_SIZE - ll_dir_rec_len(1);
314         for (nr = 0 ;(char*)de <= end; de = ll_dir_next_entry(de)) {
315                 if (de->lde_inode != 0) {
316                         nr++;
317                         *offset = (char *)de - addr;
318                         if (filldir(cookie, de->lde_name, de->lde_name_len,
319                                     base | *offset, le32_to_cpu(de->lde_inode),
320                                     ll_dir_filetype_table[de->lde_file_type &
321                                                           (LL_DIR_FT_MAX - 1)]))
322                                 return -1;
323                 }
324         }
325         return nr;
326 }
327
328 int ll_readdir(struct file *filp, void *dirent, filldir_t filldir)
329 {
330         struct inode *inode = filp->f_dentry->d_inode;
331         loff_t pos          = filp->f_pos;
332         unsigned offset     = pos & ~CFS_PAGE_MASK;
333         pgoff_t idx         = pos >> CFS_PAGE_SHIFT;
334         pgoff_t npages      = dir_pages(inode);
335         unsigned chunk_mask = ll_dir_page_mask(inode);
336         int need_revalidate = (filp->f_version != inode->i_version);
337         int rc              = 0;
338         int done; /* when this becomes negative --- stop iterating */
339
340         ENTRY;
341
342         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %llu/%llu\n",
343                inode->i_ino, inode->i_generation, inode,
344                pos, i_size_read(inode));
345
346         /*
347          * Checking ->i_size without the lock. Should be harmless, as server
348          * re-checks.
349          */
350         if (pos > i_size_read(inode) - ll_dir_rec_len(1))
351                 RETURN(0);
352
353         for (done = 0; idx < npages; idx++, offset = 0) {
354                 /*
355                  * We can assume that all blocks on this page are filled with
356                  * entries, because ll_dir_check_page() placed special dummy
357                  * entries for us.
358                  */
359
360                 char *kaddr;
361                 struct page *page;
362
363                 CDEBUG(D_EXT2,"read %lu of dir %lu/%u page %lu/%lu "
364                        "size %llu\n",
365                        CFS_PAGE_SIZE, inode->i_ino, inode->i_generation,
366                        idx, npages, i_size_read(inode));
367                 page = ll_get_dir_page(inode, idx);
368
369                 /* size might have been updated by mdc_readpage */
370                 npages = dir_pages(inode);
371
372                 if (IS_ERR(page)) {
373                         rc = PTR_ERR(page);
374                         CERROR("error reading dir %lu/%u page %lu: rc %d\n",
375                                inode->i_ino, inode->i_generation, idx, rc);
376                         continue;
377                 }
378
379                 kaddr = page_address(page);
380                 if (need_revalidate) {
381                         /*
382                          * File offset was changed by lseek() and possibly
383                          * points in the middle of an entry. Re-scan from the
384                          * beginning of the chunk.
385                          */
386                         offset = ll_dir_validate_entry(kaddr, offset,
387                                                        chunk_mask);
388                         need_revalidate = 0;
389                 }
390                 done = ll_readdir_page(kaddr, idx << CFS_PAGE_SHIFT,
391                                        &offset, filldir, dirent);
392                 ll_put_page(page);
393                 if (done > 0)
394                         /*
395                          * Some entries were sent to the user space, return
396                          * success.
397                          */
398                         rc = 0;
399                 else if (done < 0)
400                         /*
401                          * filldir is satisfied.
402                          */
403                         break;
404         }
405
406         filp->f_pos = (idx << CFS_PAGE_SHIFT) | offset;
407         filp->f_version = inode->i_version;
408         touch_atime(filp->f_vfsmnt, filp->f_dentry);
409
410         RETURN(rc);
411 }
412
413 #define QCTL_COPY(out, in)              \
414 do {                                    \
415         Q_COPY(out, in, qc_cmd);        \
416         Q_COPY(out, in, qc_type);       \
417         Q_COPY(out, in, qc_id);         \
418         Q_COPY(out, in, qc_stat);       \
419         Q_COPY(out, in, qc_dqinfo);     \
420         Q_COPY(out, in, qc_dqblk);      \
421 } while (0)
422
423 int ll_send_mgc_param(struct obd_export *mgc, char *string)
424 {
425         struct mgs_send_param *msp;
426         int rc = 0;
427
428         OBD_ALLOC_PTR(msp);
429         if (!msp)
430                 return -ENOMEM;
431
432         strncpy(msp->mgs_param, string, MGS_PARAM_MAXLEN);
433         rc = obd_set_info_async(mgc, strlen(KEY_SET_INFO), KEY_SET_INFO,
434                                 sizeof(struct mgs_send_param), msp, NULL);
435         if (rc)
436                 CERROR("Failed to set parameter: %d\n", rc);
437
438         OBD_FREE_PTR(msp);
439         return rc;
440 }
441
442 char *ll_get_fsname(struct inode *inode)
443 {
444         struct lustre_sb_info *lsi = s2lsi(inode->i_sb);
445         char *ptr, *fsname;
446         int len;
447
448         OBD_ALLOC(fsname, MGS_PARAM_MAXLEN);
449         len = strlen(lsi->lsi_lmd->lmd_profile);
450         ptr = strrchr(lsi->lsi_lmd->lmd_profile, '-');
451         if (ptr && (strcmp(ptr, "-client") == 0))
452                 len -= 7;
453         strncpy(fsname, lsi->lsi_lmd->lmd_profile, len);
454         fsname[len] = '\0';
455
456         return fsname;
457 }
458
459 int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
460                      int set_default)
461 {
462         struct ll_sb_info *sbi = ll_i2sbi(inode);
463         struct mdc_op_data data;
464         struct ptlrpc_request *req = NULL;
465         struct lustre_sb_info *lsi = s2lsi(inode->i_sb);
466         struct obd_device *mgc = lsi->lsi_mgc;
467         char *fsname = NULL, *param = NULL;
468
469         struct iattr attr = { 0 };
470         int rc = 0;
471
472         /*
473          * This is coming from userspace, so should be in
474          * local endian.  But the MDS would like it in little
475          * endian, so we swab it before we send it.
476          */
477         if (lump->lmm_magic != LOV_USER_MAGIC)
478                 RETURN(-EINVAL);
479
480         if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC))
481                 lustre_swab_lov_user_md(lump);
482
483         ll_prepare_mdc_op_data(&data, inode, NULL, NULL, 0, 0, NULL);
484
485         /* swabbing is done in lov_setstripe() on server side */
486         rc = mdc_setattr(sbi->ll_mdc_exp, &data,
487                          &attr, lump, sizeof(*lump), NULL, 0, &req);
488         if (rc) {
489                 ptlrpc_req_finished(req);
490                 if (rc != -EPERM && rc != -EACCES)
491                         CERROR("mdc_setattr fails: rc = %d\n", rc);
492                 return rc;
493         }
494         ptlrpc_req_finished(req);
495
496         if (set_default && mgc->u.cli.cl_mgc_mgsexp) {
497                 OBD_ALLOC(param, MGS_PARAM_MAXLEN);
498
499                 /* Get fsname and assume devname to be -MDT0000. */
500                 fsname = ll_get_fsname(inode);
501                 /* Set root stripesize */
502                 sprintf(param, "%s-MDT0000.lov.stripesize=%u", fsname,
503                         lump->lmm_stripe_size);
504                 rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
505                 if (rc)
506                         goto end;
507
508                 /* Set root stripecount */
509                 sprintf(param, "%s-MDT0000.lov.stripecount=%u", fsname,
510                         lump->lmm_stripe_count);
511                 rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
512                 if (rc)
513                         goto end;
514
515                 /* Set root stripeoffset */
516                 sprintf(param, "%s-MDT0000.lov.stripeoffset=%u", fsname,
517                         lump->lmm_stripe_offset);
518                 rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
519                 if (rc)
520                         goto end;
521 end:
522                 if (fsname)
523                         OBD_FREE(fsname, MGS_PARAM_MAXLEN);
524                 if (param)
525                         OBD_FREE(param, MGS_PARAM_MAXLEN);
526         }
527         return rc;
528 }
529
530 int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
531                      int *lmm_size, struct ptlrpc_request **request)
532 {
533         struct ll_sb_info *sbi = ll_i2sbi(inode);
534         struct ll_fid     fid;
535         struct mds_body   *body;
536         struct lov_mds_md *lmm = NULL;
537         struct ptlrpc_request *req = NULL;
538         int rc, lmmsize;
539
540         ll_inode2fid(&fid, inode);
541
542         rc = ll_get_max_mdsize(sbi, &lmmsize);
543         if (rc)
544                 RETURN(rc);
545
546         rc = mdc_getattr(sbi->ll_mdc_exp, &fid,
547                         OBD_MD_FLEASIZE|OBD_MD_FLDIREA,
548                         lmmsize, &req);
549         if (rc < 0) {
550                 CDEBUG(D_INFO, "mdc_getattr failed on inode "
551                        "%lu/%u: rc %d\n", inode->i_ino,
552                        inode->i_generation, rc);
553                 GOTO(out, rc);
554         }
555         body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
556                         sizeof(*body));
557         LASSERT(body != NULL); /* checked by mdc_getattr_name */
558         /* swabbed by mdc_getattr_name */
559         LASSERT(lustre_rep_swabbed(req, REPLY_REC_OFF));
560
561         lmmsize = body->eadatasize;
562
563         if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
564             lmmsize == 0) {
565                 GOTO(out, rc = -ENODATA);
566         }
567
568         lmm = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1, lmmsize);
569         LASSERT(lmm != NULL);
570         LASSERT(lustre_rep_swabbed(req, REPLY_REC_OFF + 1));
571
572         /*
573          * This is coming from the MDS, so is probably in
574          * little endian.  We convert it to host endian before
575          * passing it to userspace.
576          */
577         if ((LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) &&
578             (cpu_to_le32(LOV_MAGIC) == lmm->lmm_magic))
579                 lustre_swab_lov_user_md((struct lov_user_md *)lmm);
580 out:
581         *lmmp = lmm;
582         *lmm_size = lmmsize;
583         *request = req;
584         return rc;
585 }
586
587 static int ll_dir_ioctl(struct inode *inode, struct file *file,
588                         unsigned int cmd, unsigned long arg)
589 {
590         struct ll_sb_info *sbi = ll_i2sbi(inode);
591         struct obd_ioctl_data *data;
592         ENTRY;
593
594         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), cmd=%#x\n",
595                inode->i_ino, inode->i_generation, inode, cmd);
596
597         /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
598         if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
599                 return -ENOTTY;
600
601         ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
602         switch(cmd) {
603         case EXT3_IOC_GETFLAGS:
604         case EXT3_IOC_SETFLAGS:
605                 RETURN(ll_iocontrol(inode, file, cmd, arg));
606         case EXT3_IOC_GETVERSION_OLD:
607         case EXT3_IOC_GETVERSION:
608                 RETURN(put_user(inode->i_generation, (int *)arg));
609         /* We need to special case any other ioctls we want to handle,
610          * to send them to the MDS/OST as appropriate and to properly
611          * network encode the arg field.
612         case EXT3_IOC_SETVERSION_OLD:
613         case EXT3_IOC_SETVERSION:
614         */
615         case IOC_MDC_LOOKUP: {
616                 struct ptlrpc_request *request = NULL;
617                 struct ll_fid fid;
618                 char *buf = NULL;
619                 char *filename;
620                 int namelen, rc, len = 0;
621
622                 rc = obd_ioctl_getdata(&buf, &len, (void *)arg);
623                 if (rc)
624                         RETURN(rc);
625                 data = (void *)buf;
626
627                 filename = data->ioc_inlbuf1;
628                 namelen = data->ioc_inllen1;
629
630                 if (namelen < 1) {
631                         CDEBUG(D_INFO, "IOC_MDC_LOOKUP missing filename\n");
632                         GOTO(out, rc = -EINVAL);
633                 }
634
635                 ll_inode2fid(&fid, inode);
636                 rc = mdc_getattr_name(sbi->ll_mdc_exp, &fid, filename, namelen,
637                                       OBD_MD_FLID, 0, &request);
638                 if (rc < 0) {
639                         CDEBUG(D_INFO, "mdc_getattr_name: %d\n", rc);
640                         GOTO(out, rc);
641                 }
642
643                 ptlrpc_req_finished(request);
644
645                 EXIT;
646         out:
647                 obd_ioctl_freedata(buf, len);
648                 return rc;
649         }
650         case LL_IOC_LOV_SETSTRIPE: {
651                 struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
652                 int rc = 0;
653                 int set_default = 0;
654
655                 LASSERT(sizeof(lum) == sizeof(*lump));
656                 LASSERT(sizeof(lum.lmm_objects[0]) ==
657                         sizeof(lump->lmm_objects[0]));
658                 rc = copy_from_user(&lum, lump, sizeof(lum));
659                 if (rc)
660                         return(-EFAULT);
661
662                 if (inode->i_sb->s_root == file->f_dentry)
663                         set_default = 1;
664
665                 rc = ll_dir_setstripe(inode, &lum, set_default);
666
667                 return rc;
668         }
669         case LL_IOC_OBD_STATFS:
670                 RETURN(ll_obd_statfs(inode, (void *)arg));
671         case LL_IOC_LOV_GETSTRIPE:
672         case LL_IOC_MDC_GETINFO:
673         case IOC_MDC_GETFILEINFO:
674         case IOC_MDC_GETFILESTRIPE: {
675                 struct ptlrpc_request *request = NULL;
676                 struct mds_body *body;
677                 struct lov_user_md *lump;
678                 struct lov_mds_md *lmm = NULL;
679                 char *filename = NULL;
680                 int rc, lmmsize;
681
682                 if (cmd == IOC_MDC_GETFILEINFO ||
683                     cmd == IOC_MDC_GETFILESTRIPE) {
684                         filename = getname((const char *)arg);
685                         if (IS_ERR(filename))
686                                 RETURN(PTR_ERR(filename));
687
688                         rc = ll_lov_getstripe_ea_info(inode, filename, &lmm,
689                                                       &lmmsize, &request);
690                 } else {
691                         rc = ll_dir_getstripe(inode, &lmm, &lmmsize, &request);
692                 }
693
694                 if (request) {
695                         body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF,
696                                               sizeof(*body));
697                         LASSERT(body != NULL); /* checked by mdc_getattr_name */
698                         /* swabbed by mdc_getattr_name */
699                         LASSERT(lustre_rep_swabbed(request, REPLY_REC_OFF));
700                 } else {
701                         GOTO(out_req, rc);
702                 }
703
704                 if (rc < 0) {
705                         if (rc == -ENODATA && (cmd == IOC_MDC_GETFILEINFO ||
706                                                cmd == LL_IOC_MDC_GETINFO))
707                                 GOTO(skip_lmm, rc = 0);
708                         else
709                                 GOTO(out_req, rc);
710                 }
711
712                 if (cmd == IOC_MDC_GETFILESTRIPE ||
713                     cmd == LL_IOC_LOV_GETSTRIPE) {
714                         lump = (struct lov_user_md *)arg;
715                 } else {
716                         struct lov_user_mds_data *lmdp;
717                         lmdp = (struct lov_user_mds_data *)arg;
718                         lump = &lmdp->lmd_lmm;
719                 }
720                 rc = copy_to_user(lump, lmm, lmmsize);
721                 if (rc)
722                         GOTO(out_lmm, rc = -EFAULT);
723         skip_lmm:
724                 if (cmd == IOC_MDC_GETFILEINFO || cmd == LL_IOC_MDC_GETINFO) {
725                         struct lov_user_mds_data *lmdp;
726                         lstat_t st = { 0 };
727
728                         st.st_dev     = inode->i_sb->s_dev;
729                         st.st_mode    = body->mode;
730                         st.st_nlink   = body->nlink;
731                         st.st_uid     = body->uid;
732                         st.st_gid     = body->gid;
733                         st.st_rdev    = body->rdev;
734                         st.st_size    = body->size;
735                         st.st_blksize = CFS_PAGE_SIZE;
736                         st.st_blocks  = body->blocks;
737                         st.st_atime   = body->atime;
738                         st.st_mtime   = body->mtime;
739                         st.st_ctime   = body->ctime;
740                         st.st_ino     = body->ino;
741
742                         lmdp = (struct lov_user_mds_data *)arg;
743                         rc = copy_to_user(&lmdp->lmd_st, &st, sizeof(st));
744                         if (rc)
745                                 GOTO(out_lmm, rc = -EFAULT);
746                 }
747
748                 EXIT;
749         out_lmm:
750                 if (lmm && lmm->lmm_magic == LOV_MAGIC_JOIN)
751                         OBD_FREE(lmm, lmmsize);
752         out_req:
753                 ptlrpc_req_finished(request);
754                 if (filename)
755                         putname(filename);
756                 return rc;
757         }
758         case IOC_LOV_GETINFO: {
759                 struct lov_user_mds_data *lumd;
760                 struct lov_stripe_md *lsm;
761                 struct lov_user_md *lum;
762                 struct lov_mds_md *lmm;
763                 int lmmsize;
764                 lstat_t st;
765                 int rc;
766
767                 lumd = (struct lov_user_mds_data *)arg;
768                 lum = &lumd->lmd_lmm;
769
770                 rc = ll_get_max_mdsize(sbi, &lmmsize);
771                 if (rc)
772                         RETURN(rc);
773
774                 OBD_ALLOC(lmm, lmmsize);
775                 rc = copy_from_user(lmm, lum, lmmsize);
776                 if (rc)
777                         GOTO(free_lmm, rc = -EFAULT);
778
779                 if (lmm->lmm_magic != LOV_USER_MAGIC)
780                         GOTO(free_lmm, rc = -EINVAL);
781
782                 if (LOV_USER_MAGIC != cpu_to_le32(LOV_USER_MAGIC) &&
783                     cpu_to_le32(LOV_USER_MAGIC) == cpu_to_le32(lmm->lmm_magic)) {
784                         lustre_swab_lov_user_md_objects((struct lov_user_md *)lmm);
785                         lustre_swab_lov_user_md((struct lov_user_md *)lmm);
786                 }
787
788                 rc = obd_unpackmd(sbi->ll_osc_exp, &lsm, lmm, lmmsize);
789                 if (rc < 0)
790                         GOTO(free_lmm, rc = -ENOMEM);
791
792                 rc = obd_checkmd(sbi->ll_osc_exp, sbi->ll_mdc_exp, lsm);
793                 if (rc)
794                         GOTO(free_lsm, rc);
795
796                 /* Perform glimpse_size operation. */
797                 memset(&st, 0, sizeof(st));
798
799                 rc = ll_glimpse_ioctl(sbi, lsm, &st);
800                 if (rc)
801                         GOTO(free_lsm, rc);
802
803                 rc = copy_to_user(&lumd->lmd_st, &st, sizeof(st));
804                 if (rc)
805                         GOTO(free_lsm, rc = -EFAULT);
806
807                 EXIT;
808         free_lsm:
809                 obd_free_memmd(sbi->ll_osc_exp, &lsm);
810         free_lmm:
811                 OBD_FREE(lmm, lmmsize);
812                 return rc;
813         }
814         case OBD_IOC_LLOG_CATINFO: {
815                 struct ptlrpc_request *req = NULL;
816                 char *buf = NULL;
817                 int rc, len = 0;
818                 char *bufs[3] = { NULL }, *str;
819                 int lens[3] = { sizeof(struct ptlrpc_body) };
820                 int size[2] = { sizeof(struct ptlrpc_body) };
821
822                 rc = obd_ioctl_getdata(&buf, &len, (void *)arg);
823                 if (rc)
824                         RETURN(rc);
825                 data = (void *)buf;
826
827                 if (!data->ioc_inlbuf1) {
828                         obd_ioctl_freedata(buf, len);
829                         RETURN(-EINVAL);
830                 }
831
832                 lens[REQ_REC_OFF] = data->ioc_inllen1;
833                 bufs[REQ_REC_OFF] = data->ioc_inlbuf1;
834                 if (data->ioc_inllen2) {
835                         lens[REQ_REC_OFF + 1] = data->ioc_inllen2;
836                         bufs[REQ_REC_OFF + 1] = data->ioc_inlbuf2;
837                 } else {
838                         lens[REQ_REC_OFF + 1] = 0;
839                         bufs[REQ_REC_OFF + 1] = NULL;
840                 }
841
842                 req = ptlrpc_prep_req(sbi2mdc(sbi)->cl_import,
843                                       LUSTRE_LOG_VERSION, LLOG_CATINFO, 3, lens,
844                                       bufs);
845                 if (!req)
846                         GOTO(out_catinfo, rc = -ENOMEM);
847
848                 size[REPLY_REC_OFF] = data->ioc_plen1;
849                 ptlrpc_req_set_repsize(req, 2, size);
850
851                 rc = ptlrpc_queue_wait(req);
852                 str = lustre_msg_string(req->rq_repmsg, REPLY_REC_OFF,
853                                         data->ioc_plen1);
854                 if (!rc)
855                         rc = copy_to_user(data->ioc_pbuf1, str,data->ioc_plen1);
856                 ptlrpc_req_finished(req);
857         out_catinfo:
858                 obd_ioctl_freedata(buf, len);
859                 RETURN(rc);
860         }
861         case OBD_IOC_QUOTACHECK: {
862                 struct obd_quotactl *oqctl;
863                 int rc, error = 0;
864
865                 if (!capable(CAP_SYS_ADMIN))
866                         RETURN(-EPERM);
867
868                 OBD_ALLOC_PTR(oqctl);
869                 if (!oqctl)
870                         RETURN(-ENOMEM);
871                 oqctl->qc_type = arg;
872                 rc = obd_quotacheck(sbi->ll_mdc_exp, oqctl);
873                 if (rc < 0) {
874                         CDEBUG(D_INFO, "mdc_quotacheck failed: rc %d\n", rc);
875                         error = rc;
876                 }
877
878                 rc = obd_quotacheck(sbi->ll_osc_exp, oqctl);
879                 if (rc < 0)
880                         CDEBUG(D_INFO, "osc_quotacheck failed: rc %d\n", rc);
881
882                 OBD_FREE_PTR(oqctl);
883                 return error ?: rc;
884         }
885         case OBD_IOC_POLL_QUOTACHECK: {
886                 struct if_quotacheck *check;
887                 int rc;
888
889                 if (!capable(CAP_SYS_ADMIN))
890                         RETURN(-EPERM);
891
892                 OBD_ALLOC_PTR(check);
893                 if (!check)
894                         RETURN(-ENOMEM);
895
896                 rc = obd_iocontrol(cmd, sbi->ll_mdc_exp, 0, (void *)check,
897                                    NULL);
898                 if (rc) {
899                         CDEBUG(D_QUOTA, "mdc ioctl %d failed: %d\n", cmd, rc);
900                         if (copy_to_user((void *)arg, check, sizeof(*check)))
901                                 rc = -EFAULT;
902                         GOTO(out_poll, rc);
903                 }
904
905                 rc = obd_iocontrol(cmd, sbi->ll_osc_exp, 0, (void *)check,
906                                    NULL);
907                 if (rc) {
908                         CDEBUG(D_QUOTA, "osc ioctl %d failed: %d\n", cmd, rc);
909                         if (copy_to_user((void *)arg, check, sizeof(*check)))
910                                 rc = -EFAULT;
911                         GOTO(out_poll, rc);
912                 }
913         out_poll:
914                 OBD_FREE_PTR(check);
915                 RETURN(rc);
916         }
917 #ifdef HAVE_QUOTA_SUPPORT
918         case OBD_IOC_QUOTACTL: {
919                 struct if_quotactl *qctl;
920                 struct obd_quotactl *oqctl;
921
922                 int cmd, type, id, rc = 0;
923
924                 OBD_ALLOC_PTR(qctl);
925                 if (!qctl)
926                         RETURN(-ENOMEM);
927
928                 OBD_ALLOC_PTR(oqctl);
929                 if (!oqctl) {
930                         OBD_FREE_PTR(qctl);
931                         RETURN(-ENOMEM);
932                 }
933                 if (copy_from_user(qctl, (void *)arg, sizeof(*qctl)))
934                         GOTO(out_quotactl, rc = -EFAULT);
935
936                 cmd = qctl->qc_cmd;
937                 type = qctl->qc_type;
938                 id = qctl->qc_id;
939                 switch (cmd) {
940                 case LUSTRE_Q_INVALIDATE:
941                 case Q_QUOTAON:
942                 case Q_QUOTAOFF:
943                 case Q_SETQUOTA:
944                 case Q_SETINFO:
945                         if (!capable(CAP_SYS_ADMIN))
946                                 GOTO(out_quotactl, rc = -EPERM);
947                         break;
948                 case Q_GETQUOTA:
949                         if (((type == USRQUOTA && current->euid != id) ||
950                              (type == GRPQUOTA && !in_egroup_p(id))) &&
951                             !capable(CAP_SYS_ADMIN))
952                                 GOTO(out_quotactl, rc = -EPERM);
953
954                         /* XXX: dqb_valid is borrowed as a flag to mark that
955                          *      only mds quota is wanted */
956                         if (qctl->qc_dqblk.dqb_valid) {
957                                 qctl->obd_uuid = sbi->ll_mdc_exp->exp_obd->
958                                                         u.cli.cl_target_uuid;
959                                 qctl->qc_dqblk.dqb_valid = 0;
960                         }
961
962                         break;
963                 case Q_GETINFO:
964                         break;
965                 default:
966                         CERROR("unsupported quotactl op: %#x\n", cmd);
967                         GOTO(out_quotactl, -ENOTTY);
968                 }
969
970                 QCTL_COPY(oqctl, qctl);
971
972                 if (qctl->obd_uuid.uuid[0]) {
973                         struct obd_device *obd;
974                         struct obd_uuid *uuid = &qctl->obd_uuid;
975
976                         obd = class_find_client_notype(uuid,
977                                          &sbi->ll_osc_exp->exp_obd->obd_uuid);
978                         if (!obd)
979                                 GOTO(out_quotactl, rc = -ENOENT);
980
981                         if (cmd == Q_GETINFO)
982                                 oqctl->qc_cmd = Q_GETOINFO;
983                         else if (cmd == Q_GETQUOTA)
984                                 oqctl->qc_cmd = Q_GETOQUOTA;
985                         else
986                                 GOTO(out_quotactl, rc = -EINVAL);
987
988                         if (sbi->ll_mdc_exp->exp_obd == obd) {
989                                 rc = obd_quotactl(sbi->ll_mdc_exp, oqctl);
990                         } else {
991                                 int i;
992                                 struct obd_export *exp;
993                                 struct lov_obd *lov = &sbi->ll_osc_exp->
994                                                             exp_obd->u.lov;
995
996                                 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
997                                         if (!lov->lov_tgts[i] ||
998                                             !lov->lov_tgts[i]->ltd_active)
999                                                 continue;
1000                                         exp = lov->lov_tgts[i]->ltd_exp;
1001                                         if (exp->exp_obd == obd) {
1002                                                 rc = obd_quotactl(exp, oqctl);
1003                                                 break;
1004                                         }
1005                                 }
1006                         }
1007
1008                         oqctl->qc_cmd = cmd;
1009                         QCTL_COPY(qctl, oqctl);
1010
1011                         if (copy_to_user((void *)arg, qctl, sizeof(*qctl)))
1012                                 rc = -EFAULT;
1013
1014                         GOTO(out_quotactl, rc);
1015                 }
1016
1017                 rc = obd_quotactl(sbi->ll_mdc_exp, oqctl);
1018                 if (rc && rc != -EBUSY && cmd == Q_QUOTAON) {
1019                         oqctl->qc_cmd = Q_QUOTAOFF;
1020                         obd_quotactl(sbi->ll_mdc_exp, oqctl);
1021                 }
1022
1023                 QCTL_COPY(qctl, oqctl);
1024
1025                 if (copy_to_user((void *)arg, qctl, sizeof(*qctl)))
1026                         rc = -EFAULT;
1027         out_quotactl:
1028                 OBD_FREE_PTR(qctl);
1029                 OBD_FREE_PTR(oqctl);
1030                 RETURN(rc);
1031         }
1032 #endif /* HAVE_QUOTA_SUPPORT */
1033         case OBD_IOC_GETNAME_OLD:
1034         case OBD_IOC_GETNAME: {
1035                 struct obd_device *obd = class_exp2obd(sbi->ll_osc_exp);
1036                 if (!obd)
1037                         RETURN(-EFAULT);
1038                 if (copy_to_user((void *)arg, obd->obd_name,
1039                                 strlen(obd->obd_name) + 1))
1040                         RETURN (-EFAULT);
1041                 RETURN(0);
1042         }
1043         default:
1044                 RETURN(obd_iocontrol(cmd, sbi->ll_osc_exp,0,NULL,(void *)arg));
1045         }
1046 }
1047
1048 struct file_operations ll_dir_operations = {
1049         .open     = ll_file_open,
1050         .release  = ll_file_release,
1051         .read     = generic_read_dir,
1052         .readdir  = ll_readdir,
1053         .ioctl    = ll_dir_ioctl
1054 };
1055