1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Directory code for lustre client.
42 #include <linux/pagemap.h>
44 #include <linux/version.h>
45 #include <linux/smp_lock.h>
46 #include <asm/uaccess.h>
47 #include <linux/buffer_head.h> // for wait_on_buffer
49 #define DEBUG_SUBSYSTEM S_LLITE
51 #include <obd_support.h>
52 #include <obd_class.h>
53 #include <lustre_lib.h>
54 #include <lustre/lustre_idl.h>
55 #include <lustre_lite.h>
56 #include <lustre_dlm.h>
57 #include "llite_internal.h"
59 #ifndef HAVE_PAGE_CHECKED
60 #ifdef HAVE_PG_FS_MISC
61 #define PageChecked(page) test_bit(PG_fs_misc, &(page)->flags)
62 #define SetPageChecked(page) set_bit(PG_fs_misc, &(page)->flags)
64 #error PageChecked or PageFsMisc not defined in kernel
68 /* returns the page unlocked, but with a reference */
69 static int ll_dir_readpage(struct file *file, struct page *page)
71 struct inode *inode = page->mapping->host;
72 struct ll_fid mdc_fid;
74 struct ptlrpc_request *request;
75 struct mds_body *body;
79 offset = (__u64)page->index << CFS_PAGE_SHIFT;
80 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) off "LPU64"\n",
81 inode->i_ino, inode->i_generation, inode, offset);
83 ll_pack_fid(&mdc_fid, inode->i_ino, inode->i_generation, S_IFDIR);
85 rc = mdc_readpage(ll_i2sbi(inode)->ll_mdc_exp, &mdc_fid,
86 offset, page, &request);
88 body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF,
90 LASSERT(body != NULL); /* checked by mdc_readpage() */
91 /* swabbed by mdc_readpage() */
92 LASSERT(lustre_rep_swabbed(request, REPLY_REC_OFF));
94 if (body->size != i_size_read(inode)) {
95 ll_inode_size_lock(inode, 0);
96 i_size_write(inode, body->size);
97 ll_inode_size_unlock(inode, 0);
100 SetPageUptodate(page);
102 ptlrpc_req_finished(request);
109 #ifndef MS_HAS_NEW_AOPS
110 struct address_space_operations ll_dir_aops = {
111 .readpage = ll_dir_readpage,
114 struct address_space_operations_ext ll_dir_aops = {
115 .orig_aops.readpage = ll_dir_readpage,
119 static inline unsigned ll_dir_page_mask(struct inode *inode)
121 return ~(inode->i_sb->s_blocksize - 1);
125 * Check consistency of a single entry.
127 static int ll_dir_check_entry(struct inode *dir, struct ll_dir_entry *ent,
128 unsigned offset, unsigned rec_len, pgoff_t index)
133 * Consider adding more checks.
136 if (unlikely(rec_len < ll_dir_rec_len(1)))
137 msg = "entry is too short";
138 else if (unlikely(rec_len & 3))
139 msg = "wrong alignment";
140 else if (unlikely(rec_len < ll_dir_rec_len(ent->lde_name_len)))
141 msg = "rec_len doesn't match name_len";
142 else if (unlikely(((offset + rec_len - 1) ^ offset) &
143 ll_dir_page_mask(dir)))
144 msg = "directory entry across blocks";
147 CERROR("%s: bad entry in directory %lu/%u: %s - "
148 "offset=%lu+%u, inode=%lu, rec_len=%d,"
149 " name_len=%d\n", ll_i2mdcexp(dir)->exp_obd->obd_name,
150 dir->i_ino, dir->i_generation, msg,
151 index << CFS_PAGE_SHIFT,
152 offset, (unsigned long)le32_to_cpu(ent->lde_inode),
153 rec_len, ent->lde_name_len);
157 static void ll_dir_check_page(struct inode *dir, struct page *page)
160 unsigned size = dir->i_sb->s_blocksize;
161 char *addr = page_address(page);
166 struct ll_dir_entry *ent;
169 if ((i_size_read(dir) >> CFS_PAGE_SHIFT) == (__u64)page->index) {
173 limit = i_size_read(dir) & ~CFS_PAGE_MASK;
174 if (limit & (size - 1)) {
175 CERROR("%s: dir %lu/%u size %llu doesn't match %u\n",
176 ll_i2mdcexp(dir)->exp_obd->obd_name, dir->i_ino,
177 dir->i_generation, i_size_read(dir), size);
181 * Place dummy forwarding entries to streamline
184 for (off = limit; off < CFS_PAGE_SIZE; off += size) {
185 ent = ll_entry_at(addr, off);
186 ent->lde_rec_len = cpu_to_le16(size);
187 ent->lde_name_len = 0;
192 limit = CFS_PAGE_SIZE;
195 !err && off <= limit - ll_dir_rec_len(1); off += reclen) {
196 ent = ll_entry_at(addr, off);
197 reclen = le16_to_cpu(ent->lde_rec_len);
198 err = ll_dir_check_entry(dir, ent, off, reclen, page->index);
201 if (!err && off != limit) {
202 ent = ll_entry_at(addr, off);
203 CERROR("%s: entry in directory %lu/%u spans the page boundary "
204 "offset="LPU64"+%u, inode=%lu\n",
205 ll_i2mdcexp(dir)->exp_obd->obd_name,
206 dir->i_ino, dir->i_generation,
207 (__u64)page->index << CFS_PAGE_SHIFT,
208 off, (unsigned long)le32_to_cpu(ent->lde_inode));
213 SetPageChecked(page);
216 struct page *ll_get_dir_page(struct inode *dir, unsigned long n)
218 struct ldlm_res_id res_id;
219 struct lustre_handle lockh;
220 struct obd_device *obddev = class_exp2obd(ll_i2sbi(dir)->ll_mdc_exp);
221 struct address_space *mapping = dir->i_mapping;
223 ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_UPDATE} };
226 fid_build_reg_res_name(ll_inode_lu_fid(dir), &res_id);
227 rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_BLOCK_GRANTED,
228 &res_id, LDLM_IBITS, &policy, LCK_CR, &lockh);
230 struct lookup_intent it = { .it_op = IT_READDIR };
231 struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_CR,
232 ll_mdc_blocking_ast, ldlm_completion_ast, NULL, dir };
233 struct ptlrpc_request *request;
234 struct mdc_op_data data = { { 0 } };
236 ll_prepare_mdc_op_data(&data, dir, NULL, NULL, 0, 0, NULL);
238 rc = mdc_enqueue(ll_i2sbi(dir)->ll_mdc_exp, &einfo, &it,
239 &data, &lockh, NULL, 0, 0);
241 request = (struct ptlrpc_request *)it.d.lustre.it_data;
243 ptlrpc_req_finished(request);
245 CERROR("lock enqueue: rc: %d\n", rc);
249 ldlm_lock_dump_handle(D_OTHER, &lockh);
251 page = read_cache_page(mapping, n,
252 (filler_t*)mapping->a_ops->readpage, NULL);
254 GOTO(out_unlock, page);
258 if (!PageUptodate(page))
260 if (!PageChecked(page))
261 ll_dir_check_page(dir, page);
266 ldlm_lock_decref(&lockh, LCK_CR);
271 page = ERR_PTR(-EIO);
275 static inline unsigned ll_dir_validate_entry(char *base, unsigned offset,
278 struct ll_dir_entry *de = ll_entry_at(base, offset);
279 struct ll_dir_entry *p = ll_entry_at(base, offset & mask);
280 while (p < de && p->lde_rec_len > 0)
281 p = ll_dir_next_entry(p);
282 return (char *)p - base;
286 * File type constants. The same as in ext2 for compatibility.
301 static unsigned char ll_dir_filetype_table[LL_DIR_FT_MAX] = {
302 [LL_DIR_FT_UNKNOWN] = DT_UNKNOWN,
303 [LL_DIR_FT_REG_FILE] = DT_REG,
304 [LL_DIR_FT_DIR] = DT_DIR,
305 [LL_DIR_FT_CHRDEV] = DT_CHR,
306 [LL_DIR_FT_BLKDEV] = DT_BLK,
307 [LL_DIR_FT_FIFO] = DT_FIFO,
308 [LL_DIR_FT_SOCK] = DT_SOCK,
309 [LL_DIR_FT_SYMLINK] = DT_LNK,
313 * Process one page. Returns:
315 * -ve: filldir commands readdir to stop.
316 * +ve: number of entries submitted to filldir.
317 * 0: no live entries on this page.
320 static int ll_readdir_page(char *addr, __u64 base, unsigned *offset,
321 filldir_t filldir, void *cookie)
323 struct ll_dir_entry *de;
327 de = ll_entry_at(addr, *offset);
328 end = addr + CFS_PAGE_SIZE - ll_dir_rec_len(1);
329 for (nr = 0 ;(char*)de <= end; de = ll_dir_next_entry(de)) {
330 if (de->lde_inode != 0) {
332 *offset = (char *)de - addr;
333 if (filldir(cookie, de->lde_name, de->lde_name_len,
334 base | *offset, le32_to_cpu(de->lde_inode),
335 ll_dir_filetype_table[de->lde_file_type &
336 (LL_DIR_FT_MAX - 1)]))
343 static int ll_readdir_18(struct file *filp, void *dirent, filldir_t filldir)
345 struct inode *inode = filp->f_dentry->d_inode;
346 loff_t pos = filp->f_pos;
347 unsigned offset = pos & ~CFS_PAGE_MASK;
348 pgoff_t idx = pos >> CFS_PAGE_SHIFT;
349 pgoff_t npages = dir_pages(inode);
350 unsigned chunk_mask = ll_dir_page_mask(inode);
351 int need_revalidate = (filp->f_version != inode->i_version);
353 int done; /* when this becomes negative --- stop iterating */
357 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %llu/%llu\n",
358 inode->i_ino, inode->i_generation, inode,
359 pos, i_size_read(inode));
362 * Checking ->i_size without the lock. Should be harmless, as server
365 if (pos > i_size_read(inode) - ll_dir_rec_len(1))
368 for (done = 0; idx < npages; idx++, offset = 0) {
370 * We can assume that all blocks on this page are filled with
371 * entries, because ll_dir_check_page() placed special dummy
378 CDEBUG(D_EXT2,"read %lu of dir %lu/%u page %lu/%lu "
380 CFS_PAGE_SIZE, inode->i_ino, inode->i_generation,
381 idx, npages, i_size_read(inode));
382 page = ll_get_dir_page(inode, idx);
384 /* size might have been updated by mdc_readpage */
385 npages = dir_pages(inode);
389 CERROR("error reading dir %lu/%u page %lu: rc %d\n",
390 inode->i_ino, inode->i_generation, idx, rc);
394 kaddr = page_address(page);
395 if (need_revalidate) {
397 * File offset was changed by lseek() and possibly
398 * points in the middle of an entry. Re-scan from the
399 * beginning of the chunk.
401 offset = ll_dir_validate_entry(kaddr, offset,
405 done = ll_readdir_page(kaddr, idx << CFS_PAGE_SHIFT,
406 &offset, filldir, dirent);
410 * Some entries were sent to the user space, return
416 * filldir is satisfied.
421 filp->f_pos = (idx << CFS_PAGE_SHIFT) | offset;
422 filp->f_version = inode->i_version;
423 touch_atime(filp->f_vfsmnt, filp->f_dentry);
429 * Chain of hash overflow pages.
431 struct ll_dir_chain {
432 /* XXX something. Later */
435 static inline void ll_dir_chain_init(struct ll_dir_chain *chain)
439 static inline void ll_dir_chain_fini(struct ll_dir_chain *chain)
443 static inline unsigned long hash_x_index(__u64 hash, int hash64)
446 if (BITS_PER_LONG == 32 && hash64)
453 * Layout of readdir pages, as transmitted on wire.
456 /** valid if LUDA_FID is set. */
457 struct lu_fid lde_fid;
458 /** a unique entry identifier: a hash or an offset. */
460 /** total record length, including all attributes. */
464 /** optional variable size attributes following this entry.
465 * taken from enum lu_dirent_attrs.
468 /** name is followed by the attributes indicated in ->ldp_attrs, in
469 * their natural order. After the last attribute, padding bytes are
470 * added to make ->lde_reclen a multiple of 8.
476 __u64 ldp_hash_start;
481 struct lu_dirent ldp_entries[0];
485 * Definitions of optional directory entry attributes formats.
487 * Individual attributes do not have their length encoded in a generic way. It
488 * is assumed that consumer of an attribute knows its format. This means that
489 * it is impossible to skip over an unknown attribute, except by skipping over all
490 * remaining attributes (by using ->lde_reclen), which is not too
491 * constraining, because new server versions will append new attributes at
492 * the end of an entry.
496 * Fid directory attribute: a fid of an object referenced by the entry. This
497 * will be almost always requested by the client and supplied by the server.
499 * Aligned to 8 bytes.
501 /* To have compatibility with 1.8, lets have fid in lu_dirent struct. */
506 * Aligned to 2 bytes.
512 enum lu_dirpage_flags {
516 static inline int lu_dirent_calc_size(int namelen, __u16 attr)
520 if (attr & LUDA_TYPE) {
521 const unsigned align = sizeof(struct luda_type) - 1;
522 size = (sizeof(struct lu_dirent) + namelen + align) & ~align;
523 size += sizeof(struct luda_type);
525 size = sizeof(struct lu_dirent) + namelen;
527 return (size + 7) & ~7;
531 * return IF_* type for given lu_dirent entry.
532 * IF_* flag shld be converted to particular OS file type in
533 * platform llite module.
535 __u16 ll_dirent_type_get(struct lu_dirent *ent)
538 struct luda_type *lt;
541 if (le32_to_cpu(ent->lde_attrs) & LUDA_TYPE) {
542 const unsigned align = sizeof(struct luda_type) - 1;
544 len = le16_to_cpu(ent->lde_namelen);
545 len = (len + align) & ~align;
546 lt = (void *) ent->lde_name + len;
547 type = CFS_IFTODT(le16_to_cpu(lt->lt_type));
552 static inline struct lu_dirent *lu_dirent_start(struct lu_dirpage *dp)
554 if (le16_to_cpu(dp->ldp_flags) & LDF_EMPTY)
557 return dp->ldp_entries;
560 static inline struct lu_dirent *lu_dirent_next(struct lu_dirent *ent)
562 struct lu_dirent *next;
564 if (le16_to_cpu(ent->lde_reclen) != 0)
565 next = ((void *)ent) + le16_to_cpu(ent->lde_reclen);
572 static inline int lu_dirent_size(struct lu_dirent *ent)
574 if (le16_to_cpu(ent->lde_reclen) == 0) {
575 return lu_dirent_calc_size(le16_to_cpu(ent->lde_namelen),
576 le32_to_cpu(ent->lde_attrs));
578 return le16_to_cpu(ent->lde_reclen);
581 #ifdef HAVE_RW_TREE_LOCK
582 #define TREE_READ_LOCK_IRQ(mapping) read_lock_irq(&(mapping)->tree_lock)
583 #define TREE_READ_UNLOCK_IRQ(mapping) read_unlock_irq(&(mapping)->tree_lock)
585 #define TREE_READ_LOCK_IRQ(mapping) spin_lock_irq(&(mapping)->tree_lock)
586 #define TREE_READ_UNLOCK_IRQ(mapping) spin_unlock_irq(&(mapping)->tree_lock)
589 /* returns the page unlocked, but with a reference */
590 static int ll_dir_readpage_20(struct file *file, struct page *page)
592 struct inode *inode = page->mapping->host;
593 struct ptlrpc_request *request;
594 struct mdt_body *body;
600 /*XXX: statahead is disabled by force under interoperability mode.
601 * So file must not be NULL here. Fix me when enable statahead
602 * under interoperability mode. */
603 LASSERT(file != NULL);
604 hash = ((struct ll_file_data *)LUSTRE_FPRIVATE(file))->fd_dir.lfd_next;
605 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) off %lu\n",
606 inode->i_ino, inode->i_generation, inode, (unsigned long)hash);
608 ll_inode2fid(&fid, inode);
609 rc = mdc_readpage(ll_i2sbi(inode)->ll_mdc_exp, &fid,
610 hash, page, &request);
612 body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF,
614 /* Checked by mdc_readpage() */
615 LASSERT(body != NULL);
617 if (body->valid & OBD_MD_FLSIZE) {
618 ll_inode_size_lock(inode, 0);
619 i_size_write(inode, body->size);
620 ll_inode_size_unlock(inode, 0);
622 SetPageUptodate(page);
624 ptlrpc_req_finished(request);
632 static void ll_check_page(struct inode *dir, struct page *page)
634 /* XXX: check page format later */
635 SetPageChecked(page);
640 * Find, kmap and return page that contains given hash.
642 static struct page *ll_dir_page_locate(struct inode *dir, __u64 *hash,
643 __u64 *start, __u64 *end)
645 int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;
646 struct address_space *mapping = dir->i_mapping;
648 * Complement of hash is used as an index so that
649 * radix_tree_gang_lookup() can be used to find a page with starting
650 * hash _smaller_ than one we are looking for.
652 unsigned long offset = hash_x_index(*hash, hash64);
657 TREE_READ_LOCK_IRQ(mapping);
658 found = radix_tree_gang_lookup(&mapping->page_tree,
659 (void **)&page, offset, 1);
661 struct lu_dirpage *dp;
663 page_cache_get(page);
664 TREE_READ_UNLOCK_IRQ(mapping);
666 * In contrast to find_lock_page() we are sure that directory
667 * page cannot be truncated (while DLM lock is held) and,
668 * hence, can avoid restart.
670 * In fact, page cannot be locked here at all, because
671 * ll_dir_readpage() does synchronous io.
674 if (PageUptodate(page)) {
676 if (BITS_PER_LONG == 32 && hash64) {
677 *start = le64_to_cpu(dp->ldp_hash_start) >> 32;
678 *end = le64_to_cpu(dp->ldp_hash_end) >> 32;
681 *start = le64_to_cpu(dp->ldp_hash_start);
682 *end = le64_to_cpu(dp->ldp_hash_end);
684 LASSERTF(*start <= *hash, "start = "LPX64",end = "
685 LPX64",hash = "LPX64"\n", *start, *end, *hash);
686 if (*hash > *end || (*end != *start && *hash == *end)) {
689 truncate_complete_page(page->mapping, page);
691 page_cache_release(page);
695 page_cache_release(page);
696 page = ERR_PTR(-EIO);
700 TREE_READ_UNLOCK_IRQ(mapping);
706 static struct page *ll_get_dir_page_20(struct file *filp, struct inode *dir,
707 __u64 hash, int exact,
708 struct ll_dir_chain *chain)
710 struct ldlm_res_id res_id;
711 struct lustre_handle lockh;
712 struct obd_device *obddev = class_exp2obd(ll_i2sbi(dir)->ll_mdc_exp);
713 struct address_space *mapping = dir->i_mapping;
714 struct lu_dirpage *dp;
716 ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_UPDATE} };
722 int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;
725 fid_build_reg_res_name(ll_inode_lu_fid(dir), &res_id);
727 rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_BLOCK_GRANTED,
728 &res_id, LDLM_IBITS, &policy, mode, &lockh);
730 struct lookup_intent it = { .it_op = IT_READDIR };
731 struct ldlm_enqueue_info einfo = { LDLM_IBITS, mode,
732 ll_mdc_blocking_ast, ldlm_completion_ast, NULL, dir };
733 struct ptlrpc_request *request;
734 struct mdc_op_data op_data = { { 0 } };
736 ll_prepare_mdc_op_data(&op_data, dir, NULL, NULL, 0, 0, NULL);
738 rc = mdc_enqueue(ll_i2sbi(dir)->ll_mdc_exp, &einfo, &it,
739 &op_data, &lockh, NULL, 0, 0);
741 request = (struct ptlrpc_request *)it.d.lustre.it_data;
743 ptlrpc_req_finished(request);
745 CERROR("lock enqueue: rc: %d\n", rc);
749 ldlm_lock_dump_handle(D_OTHER, &lockh);
751 page = ll_dir_page_locate(dir, &lhash, &start, &end);
753 GOTO(out_unlock, page);
757 * XXX nikita: not entirely correct handling of a corner case:
758 * suppose hash chain of entries with hash value HASH crosses
759 * border between pages P0 and P1. First both P0 and P1 are
760 * cached, seekdir() is called for some entry from the P0 part
761 * of the chain. Later P0 goes out of cache. telldir(HASH)
762 * happens and finds P1, as it starts with matching hash
763 * value. Remaining entries from P0 part of the chain are
764 * skipped. (Is that really a bug?)
766 * Possible solutions: 0. don't cache P1 is such case, handle
767 * it as an "overflow" page. 1. invalidate all pages at
768 * once. 2. use HASH|1 as an index for P1.
770 if (exact && hash != start) {
772 * readdir asked for a page starting _exactly_ from
773 * given hash, but cache contains stale page, with
774 * entries with smaller hash values. Stale page should
775 * be invalidated, and new one fetched.
777 CDEBUG(D_INFO, "Stale readpage page %p: %#lx != %#lx\n",
778 page, (unsigned long)lhash, (unsigned long)start);
780 truncate_complete_page(page->mapping, page);
782 page_cache_release(page);
784 GOTO(hash_collision, page);
788 page = read_cache_page(mapping, hash_x_index(hash, hash64),
789 (filler_t*)ll_dir_readpage_20, filp);
791 GOTO(out_unlock, page);
795 if (!PageUptodate(page))
797 if (!PageChecked(page))
798 ll_check_page(dir, page);
802 dp = page_address(page);
804 if (BITS_PER_LONG == 32 && hash64) {
805 start = le64_to_cpu(dp->ldp_hash_start) >> 32;
806 end = le64_to_cpu(dp->ldp_hash_end) >> 32;
809 start = le64_to_cpu(dp->ldp_hash_start);
810 end = le64_to_cpu(dp->ldp_hash_end);
814 LASSERT(start == lhash);
815 CWARN("Page-wide hash collision: "LPU64"\n", end);
816 if (BITS_PER_LONG == 32 && hash64)
817 CWARN("Real page-wide hash collision at ["LPU64" "LPU64
818 "] with hash "LPU64"\n",
819 le64_to_cpu(dp->ldp_hash_start),
820 le64_to_cpu(dp->ldp_hash_end), hash);
822 * Fetch whole overflow chain...
829 ldlm_lock_decref(&lockh, mode);
834 page = ERR_PTR(-EIO);
838 static int ll_readdir_20(struct file *filp, void *cookie, filldir_t filldir)
840 struct inode *inode = filp->f_dentry->d_inode;
841 struct ll_sb_info *sbi = ll_i2sbi(inode);
842 struct ll_file_data *fd = LUSTRE_FPRIVATE(filp);
843 __u64 pos = fd->fd_dir.lfd_pos;
844 int api32 = ll_need_32bit_api(sbi);
845 int hash64= sbi->ll_flags & LL_SBI_64BIT_HASH;
847 struct ll_dir_chain chain;
853 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %lu/%llu 32bit_api %d\n",
854 inode->i_ino, inode->i_generation, inode,
855 (unsigned long)pos, i_size_read(inode), api32);
857 if (pos == MDS_DIR_END_OFF)
866 ll_dir_chain_init(&chain);
868 fd->fd_dir.lfd_next = pos;
869 page = ll_get_dir_page_20(filp, inode, pos, 0, &chain);
872 while (rc == 0 && !done) {
873 struct lu_dirpage *dp;
874 struct lu_dirent *ent;
878 * If page is empty (end of directoryis reached),
881 __u64 hash = MDS_DIR_END_OFF;
884 dp = page_address(page);
885 for (ent = lu_dirent_start(dp); ent != NULL && !done;
886 ent = lu_dirent_next(ent)) {
893 hash = le64_to_cpu(ent->lde_hash);
896 * Skip until we find target hash
901 namelen = le16_to_cpu(ent->lde_namelen);
908 fid_le_to_cpu(&fid, &ent->lde_fid);
909 ino = ll_fid_build_ino((struct ll_fid *)&fid,
915 type = ll_dirent_type_get(ent);
916 done = filldir(cookie, ent->lde_name, namelen,
919 next = le64_to_cpu(dp->ldp_hash_end);
923 if (pos == MDS_DIR_END_OFF) {
925 * End of directory reached.
928 } else if (1 /* chain is exhausted*/) {
930 * Normal case: continue to the next
933 fd->fd_dir.lfd_next = pos;
934 page = ll_get_dir_page_20(filp, inode,
939 * go into overflow page.
947 CERROR("error reading dir "DFID" at %lu: rc %d\n",
948 PFID(ll_inode_lu_fid(inode)),
949 (unsigned long)pos, rc);
953 fd->fd_dir.lfd_pos = pos;
954 if (pos == MDS_DIR_END_OFF) {
956 filp->f_pos = LL_DIR_END_OFF_32BIT;
958 filp->f_pos = LL_DIR_END_OFF;
961 filp->f_pos = pos >> 32;
965 filp->f_version = inode->i_version;
966 touch_atime(filp->f_vfsmnt, filp->f_dentry);
968 ll_dir_chain_fini(&chain);
973 static int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
975 struct inode *inode = filp->f_dentry->d_inode;
976 struct ll_sb_info *sbi = ll_i2sbi(inode);
978 if (sbi->ll_mdc_exp->exp_connect_flags & OBD_CONNECT_FID) {
979 return ll_readdir_20(filp, cookie, filldir);
981 return ll_readdir_18(filp, cookie, filldir);
985 #define QCTL_COPY(out, in) \
987 Q_COPY(out, in, qc_cmd); \
988 Q_COPY(out, in, qc_type); \
989 Q_COPY(out, in, qc_id); \
990 Q_COPY(out, in, qc_stat); \
991 Q_COPY(out, in, qc_dqinfo); \
992 Q_COPY(out, in, qc_dqblk); \
995 static int ll_send_mgc_param(struct obd_export *mgc, char *string)
997 struct mgs_send_param *msp;
1004 strncpy(msp->mgs_param, string, MGS_PARAM_MAXLEN);
1005 rc = obd_set_info_async(mgc, sizeof(KEY_SET_INFO), KEY_SET_INFO,
1006 sizeof(struct mgs_send_param), msp, NULL);
1008 CERROR("Failed to set parameter: %d\n", rc);
1014 static char *ll_get_fsname(struct inode *inode)
1016 struct lustre_sb_info *lsi = s2lsi(inode->i_sb);
1020 OBD_ALLOC(fsname, MGS_PARAM_MAXLEN);
1021 len = strlen(lsi->lsi_lmd->lmd_profile);
1022 ptr = strrchr(lsi->lsi_lmd->lmd_profile, '-');
1023 if (ptr && (strcmp(ptr, "-client") == 0))
1025 strncpy(fsname, lsi->lsi_lmd->lmd_profile, len);
1031 int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
1034 struct ll_sb_info *sbi = ll_i2sbi(inode);
1035 struct mdc_op_data data = { { 0 } };
1036 struct ptlrpc_request *req = NULL;
1037 struct lustre_sb_info *lsi = s2lsi(inode->i_sb);
1038 struct obd_device *mgc = lsi->lsi_mgc;
1039 char *fsname = NULL, *param = NULL;
1040 struct iattr attr = { 0 };
1041 int lum_size = 0, rc = 0;
1044 if (lump->lmm_magic == LOV_USER_MAGIC_V3)
1045 lum_size = sizeof(struct lov_user_md_v3);
1047 lum_size = sizeof(struct lov_user_md_v1);
1049 * This is coming from userspace, so should be in
1050 * local endian. But the MDS would like it in little
1051 * endian, so we swab it before we send it.
1053 if ((lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V1)) &&
1054 (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V3))) {
1055 rc = lustre_swab_lov_user_md(lump);
1059 } else { /* NULL value means remove LOV EA */
1060 lum_size = sizeof(struct lov_user_md_v1);
1063 ll_prepare_mdc_op_data(&data, inode, NULL, NULL, 0, 0, NULL);
1065 /* swabbing is done in lov_setstripe() on server side */
1066 rc = mdc_setattr(sbi->ll_mdc_exp, &data,
1067 &attr, lump, lum_size, NULL, 0, &req);
1069 ptlrpc_req_finished(req);
1070 if (rc != -EPERM && rc != -EACCES)
1071 CERROR("mdc_setattr fails: rc = %d\n", rc);
1074 ptlrpc_req_finished(req);
1076 /* In the following we use the fact that LOV_USER_MAGIC_V1 and
1077 LOV_USER_MAGIC_V3 have the same initial fields so we do not
1078 need the make the distiction between the 2 versions */
1079 if (set_default && mgc->u.cli.cl_mgc_mgsexp) {
1080 OBD_ALLOC(param, MGS_PARAM_MAXLEN);
1082 /* Get fsname and assume devname to be -MDT0000. */
1083 fsname = ll_get_fsname(inode);
1084 /* Set root stripesize */
1085 sprintf(param, "%s-MDT0000.lov.stripesize=%u", fsname,
1086 lump ? le32_to_cpu(lump->lmm_stripe_size) : 0);
1087 rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
1091 /* Set root stripecount */
1092 sprintf(param, "%s-MDT0000.lov.stripecount=%u", fsname,
1093 lump ? le16_to_cpu(lump->lmm_stripe_count) : 0);
1094 rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
1098 /* Set root stripeoffset */
1099 sprintf(param, "%s-MDT0000.lov.stripeoffset=%u", fsname,
1100 lump ? le16_to_cpu(lump->lmm_stripe_offset) :
1101 (typeof(lump->lmm_stripe_offset))(-1));
1102 rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
1107 OBD_FREE(fsname, MGS_PARAM_MAXLEN);
1109 OBD_FREE(param, MGS_PARAM_MAXLEN);
1114 int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
1115 int *lmm_size, struct ptlrpc_request **request)
1117 struct ll_sb_info *sbi = ll_i2sbi(inode);
1119 struct mds_body *body;
1120 struct lov_mds_md *lmm = NULL;
1121 struct ptlrpc_request *req = NULL;
1124 ll_inode2fid(&fid, inode);
1126 rc = ll_get_max_mdsize(sbi, &lmmsize);
1130 rc = mdc_getattr(sbi->ll_mdc_exp, &fid,
1131 OBD_MD_FLEASIZE|OBD_MD_FLDIREA,
1134 CDEBUG(D_INFO, "mdc_getattr failed on inode "
1135 "%lu/%u: rc %d\n", inode->i_ino,
1136 inode->i_generation, rc);
1139 body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
1141 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1142 /* swabbed by mdc_getattr_name */
1143 LASSERT(lustre_rep_swabbed(req, REPLY_REC_OFF));
1145 lmmsize = body->eadatasize;
1147 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1149 GOTO(out, rc = -ENODATA);
1152 lmm = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1, lmmsize);
1153 LASSERT(lmm != NULL);
1154 LASSERT(lustre_rep_swabbed(req, REPLY_REC_OFF + 1));
1157 * This is coming from the MDS, so is probably in
1158 * little endian. We convert it to host endian before
1159 * passing it to userspace.
1161 /* We don't swab objects for directories */
1162 if (((le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_V1) ||
1163 (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_V3)) &&
1164 (LOV_MAGIC != cpu_to_le32(LOV_MAGIC))) {
1165 rc = lustre_swab_lov_user_md((struct lov_user_md*)lmm);
1172 *lmm_size = lmmsize;
1177 static int ll_dir_ioctl(struct inode *inode, struct file *file,
1178 unsigned int cmd, unsigned long arg)
1180 struct ll_sb_info *sbi = ll_i2sbi(inode);
1181 struct obd_ioctl_data *data;
1184 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), cmd=%#x\n",
1185 inode->i_ino, inode->i_generation, inode, cmd);
1187 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1188 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1191 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1193 case FSFILT_IOC_GETFLAGS:
1194 case FSFILT_IOC_SETFLAGS:
1195 RETURN(ll_iocontrol(inode, file, cmd, arg));
1196 case FSFILT_IOC_GETVERSION_OLD:
1197 case FSFILT_IOC_GETVERSION:
1198 RETURN(put_user(inode->i_generation, (int *)arg));
1199 /* We need to special case any other ioctls we want to handle,
1200 * to send them to the MDS/OST as appropriate and to properly
1201 * network encode the arg field.
1202 case EXT3_IOC_SETVERSION_OLD:
1203 case EXT3_IOC_SETVERSION:
1205 case IOC_MDC_LOOKUP: {
1206 struct ptlrpc_request *request = NULL;
1210 int namelen, rc, len = 0;
1212 rc = obd_ioctl_getdata(&buf, &len, (void *)arg);
1217 filename = data->ioc_inlbuf1;
1218 namelen = data->ioc_inllen1;
1221 CDEBUG(D_INFO, "IOC_MDC_LOOKUP missing filename\n");
1222 GOTO(out, rc = -EINVAL);
1225 ll_inode2fid(&fid, inode);
1226 rc = mdc_getattr_name(sbi->ll_mdc_exp, &fid, filename, namelen,
1227 OBD_MD_FLID, 0, &request);
1229 CDEBUG(D_INFO, "mdc_getattr_name: %d\n", rc);
1233 ptlrpc_req_finished(request);
1237 obd_ioctl_freedata(buf, len);
1240 case LL_IOC_LOV_SETSTRIPE: {
1241 struct lov_user_md_v3 lumv3;
1242 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1243 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1244 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1247 int set_default = 0;
1249 LASSERT(sizeof(lumv3) == sizeof(*lumv3p));
1250 LASSERT(sizeof(lumv3.lmm_objects[0]) ==
1251 sizeof(lumv3p->lmm_objects[0]));
1253 /* first try with v1 which is smaller than v3 */
1254 if (copy_from_user(lumv1, lumv1p, sizeof(*lumv1)))
1257 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1258 if (copy_from_user(&lumv3, lumv3p, sizeof(lumv3)))
1262 if (inode->i_sb->s_root == file->f_dentry)
1265 /* in v1 and v3 cases lumv1 points to data */
1266 rc = ll_dir_setstripe(inode, lumv1, set_default);
1270 case LL_IOC_OBD_STATFS:
1271 RETURN(ll_obd_statfs(inode, (void *)arg));
1272 case LL_IOC_LOV_GETSTRIPE:
1273 case LL_IOC_MDC_GETINFO:
1274 case IOC_MDC_GETFILEINFO:
1275 case IOC_MDC_GETFILESTRIPE: {
1276 struct ptlrpc_request *request = NULL;
1277 struct mds_body *body;
1278 struct lov_user_md *lump;
1279 struct lov_mds_md *lmm = NULL;
1280 char *filename = NULL;
1283 if (cmd == IOC_MDC_GETFILEINFO ||
1284 cmd == IOC_MDC_GETFILESTRIPE) {
1285 filename = getname((const char *)arg);
1286 if (IS_ERR(filename))
1287 RETURN(PTR_ERR(filename));
1289 rc = ll_lov_getstripe_ea_info(inode, filename, &lmm,
1290 &lmmsize, &request);
1292 rc = ll_dir_getstripe(inode, &lmm, &lmmsize, &request);
1296 body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF,
1298 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1299 /* swabbed by mdc_getattr_name */
1300 LASSERT(lustre_rep_swabbed(request, REPLY_REC_OFF));
1306 if (rc == -ENODATA && (cmd == IOC_MDC_GETFILEINFO ||
1307 cmd == LL_IOC_MDC_GETINFO))
1308 GOTO(skip_lmm, rc = 0);
1313 if (cmd == IOC_MDC_GETFILESTRIPE ||
1314 cmd == LL_IOC_LOV_GETSTRIPE) {
1315 lump = (struct lov_user_md *)arg;
1317 struct lov_user_mds_data *lmdp;
1318 lmdp = (struct lov_user_mds_data *)arg;
1319 lump = &lmdp->lmd_lmm;
1321 if (copy_to_user(lump, lmm, lmmsize) != 0) {
1322 if (copy_to_user(lump, lmm, sizeof(*lump)) != 0)
1323 GOTO(out_lmm, rc = -EFAULT);
1327 if (cmd == IOC_MDC_GETFILEINFO || cmd == LL_IOC_MDC_GETINFO) {
1328 struct lov_user_mds_data *lmdp;
1331 st.st_dev = inode->i_sb->s_dev;
1332 st.st_mode = body->mode;
1333 st.st_nlink = body->nlink;
1334 st.st_uid = body->uid;
1335 st.st_gid = body->gid;
1336 st.st_rdev = body->rdev;
1337 st.st_size = body->size;
1338 st.st_blksize = CFS_PAGE_SIZE;
1339 st.st_blocks = body->blocks;
1340 st.st_atime = body->atime;
1341 st.st_mtime = body->mtime;
1342 st.st_ctime = body->ctime;
1343 st.st_ino = body->ino;
1345 lmdp = (struct lov_user_mds_data *)arg;
1346 if (copy_to_user(&lmdp->lmd_st, &st, sizeof(st)))
1347 GOTO(out_lmm, rc = -EFAULT);
1352 if (lmm && lmm->lmm_magic == LOV_MAGIC_JOIN)
1353 OBD_FREE(lmm, lmmsize);
1355 ptlrpc_req_finished(request);
1360 case IOC_LOV_GETINFO: {
1361 struct lov_user_mds_data *lumd;
1362 struct lov_stripe_md *lsm;
1363 struct lov_user_md *lum;
1364 struct lov_mds_md *lmm;
1369 lumd = (struct lov_user_mds_data *)arg;
1370 lum = &lumd->lmd_lmm;
1372 rc = ll_get_max_mdsize(sbi, &lmmsize);
1376 OBD_ALLOC(lmm, lmmsize);
1377 if (copy_from_user(lmm, lum, lmmsize))
1378 GOTO(free_lmm, rc = -EFAULT);
1380 if (LOV_USER_MAGIC != cpu_to_le32(LOV_USER_MAGIC)) {
1381 rc = lustre_swab_lov_user_md(
1382 (struct lov_user_md_v1 *)lmm);
1385 rc = lustre_swab_lov_user_md_objects(
1386 (struct lov_user_md*)lmm);
1391 rc = obd_unpackmd(sbi->ll_osc_exp, &lsm, lmm, lmmsize);
1393 GOTO(free_lmm, rc = -ENOMEM);
1395 rc = obd_checkmd(sbi->ll_osc_exp, sbi->ll_mdc_exp, lsm);
1399 /* Perform glimpse_size operation. */
1400 memset(&st, 0, sizeof(st));
1402 rc = ll_glimpse_ioctl(sbi, lsm, &st);
1406 if (copy_to_user(&lumd->lmd_st, &st, sizeof(st)))
1407 GOTO(free_lsm, rc = -EFAULT);
1411 obd_free_memmd(sbi->ll_osc_exp, &lsm);
1413 OBD_FREE(lmm, lmmsize);
1416 case OBD_IOC_LLOG_CATINFO: {
1417 struct ptlrpc_request *req = NULL;
1420 char *bufs[3] = { NULL }, *str;
1421 int lens[3] = { sizeof(struct ptlrpc_body) };
1422 int size[2] = { sizeof(struct ptlrpc_body) };
1424 rc = obd_ioctl_getdata(&buf, &len, (void *)arg);
1429 if (!data->ioc_inlbuf1) {
1430 obd_ioctl_freedata(buf, len);
1434 lens[REQ_REC_OFF] = data->ioc_inllen1;
1435 bufs[REQ_REC_OFF] = data->ioc_inlbuf1;
1436 if (data->ioc_inllen2) {
1437 lens[REQ_REC_OFF + 1] = data->ioc_inllen2;
1438 bufs[REQ_REC_OFF + 1] = data->ioc_inlbuf2;
1440 lens[REQ_REC_OFF + 1] = 0;
1441 bufs[REQ_REC_OFF + 1] = NULL;
1444 req = ptlrpc_prep_req(sbi2mdc(sbi)->cl_import,
1445 LUSTRE_LOG_VERSION, LLOG_CATINFO, 3, lens,
1448 GOTO(out_catinfo, rc = -ENOMEM);
1450 size[REPLY_REC_OFF] = data->ioc_plen1;
1451 ptlrpc_req_set_repsize(req, 2, size);
1453 rc = ptlrpc_queue_wait(req);
1454 str = lustre_msg_string(req->rq_repmsg, REPLY_REC_OFF,
1457 if (copy_to_user(data->ioc_pbuf1, str,data->ioc_plen1))
1459 ptlrpc_req_finished(req);
1461 obd_ioctl_freedata(buf, len);
1464 case OBD_IOC_QUOTACHECK: {
1465 struct obd_quotactl *oqctl;
1468 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1471 OBD_ALLOC_PTR(oqctl);
1474 oqctl->qc_type = arg;
1475 rc = obd_quotacheck(sbi->ll_mdc_exp, oqctl);
1477 CDEBUG(D_INFO, "mdc_quotacheck failed: rc %d\n", rc);
1481 rc = obd_quotacheck(sbi->ll_osc_exp, oqctl);
1483 CDEBUG(D_INFO, "osc_quotacheck failed: rc %d\n", rc);
1485 OBD_FREE_PTR(oqctl);
1488 case OBD_IOC_POLL_QUOTACHECK: {
1489 struct if_quotacheck *check;
1492 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1495 OBD_ALLOC_PTR(check);
1499 rc = obd_iocontrol(cmd, sbi->ll_mdc_exp, 0, (void *)check,
1502 CDEBUG(D_QUOTA, "mdc ioctl %d failed: %d\n", cmd, rc);
1503 if (copy_to_user((void *)arg, check, sizeof(*check)))
1504 CDEBUG(D_QUOTA, "copy_to_user failed\n");
1508 rc = obd_iocontrol(cmd, sbi->ll_osc_exp, 0, (void *)check,
1511 CDEBUG(D_QUOTA, "osc ioctl %d failed: %d\n", cmd, rc);
1512 if (copy_to_user((void *)arg, check, sizeof(*check)))
1513 CDEBUG(D_QUOTA, "copy_to_user failed\n");
1517 OBD_FREE_PTR(check);
1520 case OBD_IOC_QUOTACTL: {
1521 struct if_quotactl *qctl;
1522 struct obd_quotactl *oqctl;
1524 int cmd, type, id, rc = 0;
1526 OBD_ALLOC_PTR(qctl);
1530 OBD_ALLOC_PTR(oqctl);
1535 if (copy_from_user(qctl, (void *)arg, sizeof(*qctl)))
1536 GOTO(out_quotactl, rc = -EFAULT);
1539 type = qctl->qc_type;
1542 case LUSTRE_Q_INVALIDATE:
1543 case LUSTRE_Q_FINVALIDATE:
1548 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1549 GOTO(out_quotactl, rc = -EPERM);
1552 if (((type == USRQUOTA && cfs_curproc_euid() != id) ||
1553 (type == GRPQUOTA && !in_egroup_p(id))) &&
1554 !cfs_capable(CFS_CAP_SYS_ADMIN))
1555 GOTO(out_quotactl, rc = -EPERM);
1557 /* XXX: dqb_valid is borrowed as a flag to mark that
1558 * only mds quota is wanted */
1559 if (qctl->qc_dqblk.dqb_valid) {
1560 qctl->obd_uuid = sbi->ll_mdc_exp->exp_obd->
1561 u.cli.cl_target_uuid;
1562 qctl->qc_dqblk.dqb_valid = 0;
1569 CERROR("unsupported quotactl op: %#x\n", cmd);
1570 GOTO(out_quotactl, -ENOTTY);
1573 QCTL_COPY(oqctl, qctl);
1575 if (qctl->obd_uuid.uuid[0]) {
1576 struct obd_device *obd;
1577 struct obd_uuid *uuid = &qctl->obd_uuid;
1579 obd = class_find_client_notype(uuid,
1580 &sbi->ll_osc_exp->exp_obd->obd_uuid);
1582 GOTO(out_quotactl, rc = -ENOENT);
1584 if (cmd == Q_GETINFO)
1585 oqctl->qc_cmd = Q_GETOINFO;
1586 else if (cmd == Q_GETQUOTA)
1587 oqctl->qc_cmd = Q_GETOQUOTA;
1589 GOTO(out_quotactl, rc = -EINVAL);
1591 if (sbi->ll_mdc_exp->exp_obd == obd) {
1592 rc = obd_quotactl(sbi->ll_mdc_exp, oqctl);
1595 struct obd_export *exp;
1596 struct lov_obd *lov = &sbi->ll_osc_exp->
1599 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
1600 if (!lov->lov_tgts[i] ||
1601 !lov->lov_tgts[i]->ltd_active)
1603 exp = lov->lov_tgts[i]->ltd_exp;
1604 if (exp->exp_obd == obd) {
1605 rc = obd_quotactl(exp, oqctl);
1611 oqctl->qc_cmd = cmd;
1612 QCTL_COPY(qctl, oqctl);
1614 if (copy_to_user((void *)arg, qctl, sizeof(*qctl)))
1617 GOTO(out_quotactl, rc);
1620 rc = obd_quotactl(sbi->ll_mdc_exp, oqctl);
1621 if (rc && rc != -EBUSY && cmd == Q_QUOTAON) {
1622 oqctl->qc_cmd = Q_QUOTAOFF;
1623 obd_quotactl(sbi->ll_mdc_exp, oqctl);
1626 /* If QIF_SPACE is not set, client should collect the
1627 * space usage from OSSs by itself */
1628 if (cmd == Q_GETQUOTA &&
1629 !(oqctl->qc_dqblk.dqb_valid & QIF_SPACE) &&
1630 !oqctl->qc_dqblk.dqb_curspace) {
1631 struct obd_quotactl *oqctl_tmp;
1633 OBD_ALLOC_PTR(oqctl_tmp);
1634 if (oqctl_tmp == NULL)
1635 GOTO(out_quotactl, rc = -ENOMEM);
1637 oqctl_tmp->qc_cmd = Q_GETOQUOTA;
1638 oqctl_tmp->qc_id = oqctl->qc_id;
1639 oqctl_tmp->qc_type = oqctl->qc_type;
1641 /* collect space usage from OSTs */
1642 oqctl_tmp->qc_dqblk.dqb_curspace = 0;
1643 rc = obd_quotactl(sbi->ll_osc_exp, oqctl_tmp);
1644 if (!rc || rc == -EREMOTEIO) {
1645 oqctl->qc_dqblk.dqb_curspace =
1646 oqctl_tmp->qc_dqblk.dqb_curspace;
1647 oqctl->qc_dqblk.dqb_valid |= QIF_SPACE;
1650 /* collect space & inode usage from MDTs */
1651 oqctl_tmp->qc_dqblk.dqb_curspace = 0;
1652 oqctl_tmp->qc_dqblk.dqb_curinodes = 0;
1653 rc = obd_quotactl(sbi->ll_mdc_exp, oqctl_tmp);
1654 if (!rc || rc == -EREMOTEIO) {
1655 oqctl->qc_dqblk.dqb_curspace +=
1656 oqctl_tmp->qc_dqblk.dqb_curspace;
1657 oqctl->qc_dqblk.dqb_curinodes =
1658 oqctl_tmp->qc_dqblk.dqb_curinodes;
1659 oqctl->qc_dqblk.dqb_valid |= QIF_INODES;
1661 oqctl->qc_dqblk.dqb_valid &= ~QIF_SPACE;
1664 OBD_FREE_PTR(oqctl_tmp);
1667 QCTL_COPY(qctl, oqctl);
1669 if (copy_to_user((void *)arg, qctl, sizeof(*qctl)))
1673 OBD_FREE_PTR(oqctl);
1676 case OBD_IOC_GETNAME_OLD:
1677 case OBD_IOC_GETNAME: {
1678 struct obd_device *obd = class_exp2obd(sbi->ll_osc_exp);
1681 if (copy_to_user((void *)arg, obd->obd_name,
1682 strlen(obd->obd_name) + 1))
1686 case LL_IOC_PATH2FID: {
1687 if (copy_to_user((void *)arg, ll_inode_lu_fid(inode),
1688 sizeof(struct lu_fid)))
1693 case LL_IOC_GET_CONNECT_FLAGS: {
1694 if (copy_to_user((void *)arg,
1695 &sbi->ll_mdc_exp->exp_connect_flags,
1701 RETURN(obd_iocontrol(cmd, sbi->ll_osc_exp,0,NULL,(void *)arg));
1705 static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
1707 struct inode *inode = file->f_mapping->host;
1708 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1709 struct ll_sb_info *sbi = ll_i2sbi(inode);
1710 int api32 = ll_need_32bit_api(sbi);
1711 loff_t ret = -EINVAL;
1714 if (!(sbi->ll_mdc_exp->exp_connect_flags & OBD_CONNECT_FID))
1715 return default_llseek(file, offset, origin);
1717 mutex_lock(&inode->i_mutex);
1722 offset += file->f_pos;
1728 offset += LL_DIR_END_OFF_32BIT;
1730 offset += LL_DIR_END_OFF;
1737 ((api32 && offset <= LL_DIR_END_OFF_32BIT) ||
1738 (!api32 && offset <= LL_DIR_END_OFF))) {
1739 if (offset != file->f_pos) {
1740 if ((api32 && offset == LL_DIR_END_OFF_32BIT) ||
1741 (!api32 && offset == LL_DIR_END_OFF))
1742 fd->fd_dir.lfd_pos = MDS_DIR_END_OFF;
1743 else if (api32 && sbi->ll_flags & LL_SBI_64BIT_HASH)
1744 fd->fd_dir.lfd_pos = offset << 32;
1746 fd->fd_dir.lfd_pos = offset;
1747 file->f_pos = offset;
1748 file->f_version = 0;
1755 mutex_unlock(&inode->i_mutex);
1759 struct file_operations ll_dir_operations = {
1760 .open = ll_file_open,
1761 .llseek = ll_dir_seek,
1762 .release = ll_file_release,
1763 .read = generic_read_dir,
1764 .readdir = ll_readdir,
1765 .ioctl = ll_dir_ioctl,