1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
11 * linux/fs/minix/dir.c
14 * Copyright (C) 1991, 1992 Linus Torvalds
16 * ext2 directory handling functions
18 * Big-endian to little-endian byte-swapping/bitmaps by
19 * David S. Miller (davem@caip.rutgers.edu), 1995
21 * All code that works with directory layout had been switched to pagecache
24 * Adapted for Lustre Light
25 * Copyright (C) 2002-2003, Cluster File Systems, Inc.
30 #include <linux/ext2_fs.h>
31 #include <linux/pagemap.h>
33 #include <linux/version.h>
34 #include <linux/smp_lock.h>
35 #include <asm/uaccess.h>
36 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
37 #include <linux/locks.h> // for wait_on_buffer
39 #include <linux/buffer_head.h> // for wait_on_buffer
42 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <linux/obd_support.h>
45 #include <linux/obd_class.h>
46 #include <linux/lustre_lib.h>
47 #include <linux/lustre_idl.h>
48 #include <linux/lustre_mds.h>
49 #include <linux/lustre_lite.h>
50 #include <linux/lustre_dlm.h>
52 typedef struct ext2_dir_entry_2 ext2_dirent;
54 #define PageChecked(page) test_bit(PG_checked, &(page)->flags)
55 #define SetPageChecked(page) set_bit(PG_checked, &(page)->flags)
57 /* returns the page unlocked, but with a reference */
58 static int ll_dir_readpage(struct file *file, struct page *page)
60 struct inode *inode = page->mapping->host;
61 struct ll_sb_info *sbi = ll_i2sbi(inode);
64 struct ptlrpc_request *request;
65 struct lustre_handle lockh;
66 struct mds_body *body;
67 struct lookup_intent it = { .it_op = IT_READDIR };
68 struct mdc_op_data data;
69 struct obd_device *obddev = class_conn2obd(&sbi->ll_mdc_conn);
70 struct ldlm_res_id res_id =
71 { .name = {inode->i_ino, (__u64)inode->i_generation} };
72 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA;
75 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
76 inode->i_generation, inode);
77 if ((inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_SHIFT <= page->index){
78 /* XXX why do we need this exactly, and why do we think that
79 * an all-zero directory page is useful?
81 CERROR("memsetting dir page %lu to zero (size %lld)\n",
82 page->index, inode->i_size);
83 memset(kmap(page), 0, PAGE_CACHE_SIZE);
85 GOTO(readpage_out, rc);
88 rc = ldlm_lock_match(obddev->obd_namespace, flags, &res_id,
89 LDLM_PLAIN, NULL, 0, LCK_PR, inode,
92 ll_prepare_mdc_op_data(&data, inode, NULL, NULL, 0, 0);
94 rc = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, &it, LCK_PR,
95 &data, &lockh, NULL, 0,
96 ldlm_completion_ast, ll_mdc_blocking_ast,
98 request = (struct ptlrpc_request *)it.it_data;
100 ptlrpc_req_finished(request);
102 CERROR("lock enqueue: err: %d\n", rc);
107 ldlm_lock_dump_handle(D_OTHER, &lockh);
109 if (PageUptodate(page)) {
110 CERROR("Explain this please?\n");
111 GOTO(readpage_out, rc);
114 offset = page->index << PAGE_SHIFT;
115 rc = mdc_readpage(&sbi->ll_mdc_conn, inode->i_ino,
116 S_IFDIR, offset, page, &request);
118 body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*body));
119 LASSERT (body != NULL); /* checked by mdc_readpage() */
120 LASSERT_REPSWABBED (request, 0); /* swabbed by mdc_readpage() */
122 inode->i_size = body->size;
124 ptlrpc_req_finished(request);
129 SetPageUptodate(page);
132 ldlm_lock_decref(&lockh, LCK_PR);
136 struct address_space_operations ll_dir_aops = {
137 readpage: ll_dir_readpage,
141 * ext2 uses block-sized chunks. Arguably, sector-sized ones would be
142 * more robust, but we have what we have
144 static inline unsigned ext2_chunk_size(struct inode *inode)
146 return inode->i_sb->s_blocksize;
149 static inline void ext2_put_page(struct page *page)
152 page_cache_release(page);
155 static inline unsigned long dir_pages(struct inode *inode)
157 return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
161 static void ext2_check_page(struct page *page)
163 struct inode *dir = page->mapping->host;
164 unsigned chunk_size = ext2_chunk_size(dir);
165 char *kaddr = page_address(page);
166 // u32 max_inumber = le32_to_cpu(sb->u.ext2_sb.s_es->s_inodes_count);
167 unsigned offs, rec_len;
168 unsigned limit = PAGE_CACHE_SIZE;
172 if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
173 limit = dir->i_size & ~PAGE_CACHE_MASK;
174 if (limit & (chunk_size - 1)) {
175 CERROR("limit %d dir size %lld index %ld\n",
176 limit, dir->i_size, page->index);
179 for (offs = limit; offs<PAGE_CACHE_SIZE; offs += chunk_size) {
180 ext2_dirent *p = (ext2_dirent*)(kaddr + offs);
181 p->rec_len = cpu_to_le16(chunk_size);
188 for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) {
189 p = (ext2_dirent *)(kaddr + offs);
190 rec_len = le16_to_cpu(p->rec_len);
192 if (rec_len < EXT2_DIR_REC_LEN(1))
196 if (rec_len < EXT2_DIR_REC_LEN(p->name_len))
198 if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1))
200 // if (le32_to_cpu(p->inode) > max_inumber)
206 SetPageChecked(page);
209 /* Too bad, we had an error */
212 CERROR("ext2_check_page"
213 "size of directory #%lu is not a multiple of chunk size\n",
218 error = "rec_len is smaller than minimal";
221 error = "unaligned directory entry";
224 error = "rec_len is too small for name_len";
227 error = "directory entry across blocks";
230 // error = "inode out of bounds";
232 CERROR("ext2_check_page: bad entry in directory #%lu: %s - "
233 "offset=%lu+%u, inode=%lu, rec_len=%d, name_len=%d",
234 dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT), offs,
235 (unsigned long) le32_to_cpu(p->inode),
236 rec_len, p->name_len);
239 p = (ext2_dirent *)(kaddr + offs);
240 CERROR("ext2_check_page"
241 "entry in directory #%lu spans the page boundary"
242 "offset=%lu, inode=%lu",
243 dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
244 (unsigned long) le32_to_cpu(p->inode));
246 SetPageChecked(page);
251 static struct page *ll_get_dir_page(struct inode *dir, unsigned long n)
253 struct address_space *mapping = dir->i_mapping;
254 struct page *page = read_cache_page(mapping, n,
255 (filler_t*)mapping->a_ops->readpage, NULL);
259 if (!PageUptodate(page))
261 if (!PageChecked(page))
262 ext2_check_page(page);
270 return ERR_PTR(-EIO);
275 * p is at least 6 bytes before the end of page
277 static inline ext2_dirent *ext2_next_entry(ext2_dirent *p)
279 return (ext2_dirent *)((char*)p + le16_to_cpu(p->rec_len));
282 static inline unsigned
283 ext2_validate_entry(char *base, unsigned offset, unsigned mask)
285 ext2_dirent *de = (ext2_dirent*)(base + offset);
286 ext2_dirent *p = (ext2_dirent*)(base + (offset&mask));
287 while ((char*)p < (char*)de)
288 p = ext2_next_entry(p);
289 return (char *)p - base;
292 static unsigned char ext2_filetype_table[EXT2_FT_MAX] = {
293 [EXT2_FT_UNKNOWN] DT_UNKNOWN,
294 [EXT2_FT_REG_FILE] DT_REG,
295 [EXT2_FT_DIR] DT_DIR,
296 [EXT2_FT_CHRDEV] DT_CHR,
297 [EXT2_FT_BLKDEV] DT_BLK,
298 [EXT2_FT_FIFO] DT_FIFO,
299 [EXT2_FT_SOCK] DT_SOCK,
300 [EXT2_FT_SYMLINK] DT_LNK,
304 int ll_readdir(struct file * filp, void * dirent, filldir_t filldir)
306 loff_t pos = filp->f_pos;
307 struct inode *inode = filp->f_dentry->d_inode;
308 // XXX struct super_block *sb = inode->i_sb;
309 unsigned offset = pos & ~PAGE_CACHE_MASK;
310 unsigned long n = pos >> PAGE_CACHE_SHIFT;
311 unsigned long npages = dir_pages(inode);
312 unsigned chunk_mask = ~(ext2_chunk_size(inode)-1);
313 unsigned char *types = NULL;
314 int need_revalidate = (filp->f_version != inode->i_version);
317 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
318 inode->i_generation, inode);
319 if (pos > inode->i_size - EXT2_DIR_REC_LEN(1))
322 types = ext2_filetype_table;
324 for ( ; n < npages; n++, offset = 0) {
329 CDEBUG(D_EXT2, "reading %lu of dir %lu page %lu, size %llu\n",
330 PAGE_CACHE_SIZE, inode->i_ino, n, inode->i_size);
331 page = ll_get_dir_page(inode, n);
333 /* size might have been updated by mdc_readpage */
334 npages = dir_pages(inode);
338 kaddr = page_address(page);
339 if (need_revalidate) {
340 offset = ext2_validate_entry(kaddr, offset, chunk_mask);
343 de = (ext2_dirent *)(kaddr+offset);
344 limit = kaddr + PAGE_CACHE_SIZE - EXT2_DIR_REC_LEN(1);
345 for ( ;(char*)de <= limit; de = ext2_next_entry(de)) {
348 unsigned char d_type = DT_UNKNOWN;
350 if (types && de->file_type < EXT2_FT_MAX)
351 d_type = types[de->file_type];
353 offset = (char *)de - kaddr;
354 over = filldir(dirent, de->name, de->name_len,
355 (n<<PAGE_CACHE_SHIFT) | offset,
356 le32_to_cpu(de->inode), d_type);
367 filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset;
368 filp->f_version = inode->i_version;
373 static int ll_dir_ioctl(struct inode *inode, struct file *file,
374 unsigned int cmd, unsigned long arg)
376 struct ll_sb_info *sbi = ll_i2sbi(inode);
377 struct obd_ioctl_data *data;
380 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%u\n", inode->i_ino,
381 inode->i_generation, inode, cmd);
383 if (_IOC_TYPE(cmd) == 'T') /* tty ioctls */
386 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_IOCTL);
388 case IOC_MDC_LOOKUP: {
389 struct ptlrpc_request *request = NULL;
392 struct mds_body *body;
394 int namelen, rc, err, len = 0;
397 rc = obd_ioctl_getdata(&buf, &len, (void *)arg);
402 filename = data->ioc_inlbuf1;
403 namelen = data->ioc_inllen1;
406 CERROR("IOC_MDC_LOOKUP missing filename\n");
407 GOTO(out, rc = -EINVAL);
410 valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE;
411 ll_inode2fid(&fid, inode);
412 rc = mdc_getattr_name(&sbi->ll_mdc_conn, &fid,
413 filename, namelen, valid, 0, &request);
415 CERROR("mdc_getattr_name: %d\n", rc);
419 body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*body));
420 LASSERT(body != NULL); /* checked by mdc_getattr_name */
421 LASSERT_REPSWABBED(request, 0);/* swabbed by mdc_getattr_name */
423 /* surely there's a better way -phik */
424 data->ioc_obdo1.o_mode = body->mode;
425 data->ioc_obdo1.o_uid = body->uid;
426 data->ioc_obdo1.o_gid = body->gid;
428 ptlrpc_req_finished(request);
430 err = copy_to_user((void *)arg, buf, len);
432 GOTO(out, rc = -EFAULT);
436 obd_ioctl_freedata(buf, len);
439 case LL_IOC_LOV_SETSTRIPE:
440 case LL_IOC_LOV_GETSTRIPE:
442 case IOC_MDC_GETSTRIPE: {
443 struct ptlrpc_request *request = NULL;
445 struct mds_body *body;
446 struct lov_mds_md *lmm;
450 filename = getname((const char *)arg);
451 if (IS_ERR(filename))
452 RETURN(PTR_ERR(filename));
454 ll_inode2fid(&fid, inode);
455 rc = mdc_getattr_name(&sbi->ll_mdc_conn, &fid, filename,
456 strlen(filename)+1, OBD_MD_FLEASIZE,
457 obd_size_diskmd(&sbi->ll_osc_conn, NULL),
460 CERROR("mdc_getattr_name: failed on %s: rc %d\n",
465 body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*body));
466 LASSERT(body != NULL); /* checked by mdc_getattr_name */
467 LASSERT_REPSWABBED(request, 0);/* swabbed by mdc_getattr_name */
469 lmmsize = body->eadatasize;
471 if (!(body->valid & OBD_MD_FLEASIZE) || lmmsize == 0)
472 GOTO(out_req, rc = -ENODATA);
475 GOTO(out_req, rc = -EFBIG);
477 lmm = lustre_msg_buf(request->rq_repmsg, 1, lmmsize);
478 LASSERT(lmm != NULL);
479 LASSERT_REPSWABBED(request, 1);
481 rc = copy_to_user((struct lov_mds_md *)arg, lmm, lmmsize);
483 GOTO(out_req, rc = -EFAULT);
487 ptlrpc_req_finished(request);
493 return obd_iocontrol(cmd,&sbi->ll_osc_conn,0,NULL,(void *)arg);
497 int ll_dir_open(struct inode *inode, struct file *file)
499 return ll_file_open(inode, file);
502 int ll_dir_release(struct inode *inode, struct file *file)
504 return ll_file_release(inode, file);
507 struct file_operations ll_dir_operations = {
509 release: ll_dir_release,
510 read: generic_read_dir,