Whamcloud - gitweb
c924aa407fee2de19fa65b75f3622c1ba6ebe3e8
[fs/lustre-release.git] / lustre / liblustre / dir.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see [sun.com URL with a
20  * copy of GPLv2].
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/liblustre/dir.c
37  *
38  * Lustre Light directory handling
39  */
40
41 #define DEBUG_SUBSYSTEM S_LLITE
42
43 #include <unistd.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <assert.h>
47 #include <time.h>
48 #include <sys/types.h>
49 #include <sys/stat.h>
50 #include <fcntl.h>
51 #include <sys/queue.h>
52
53 #include <sysio.h>
54 #ifdef HAVE_XTIO_H
55 #include <xtio.h>
56 #endif
57 #include <fs.h>
58 #include <mount.h>
59 #include <inode.h>
60 #ifdef HAVE_FILE_H
61 #include <file.h>
62 #endif
63
64 #undef LIST_HEAD
65
66 #ifdef HAVE_ASM_TYPES_H
67 #include <asm/types.h>
68 #elif defined(HAVE_SYS_TYPES_H)
69 #include <sys/types.h>
70 #endif
71
72 #ifdef HAVE_LINUX_UNISTD_H
73 #include <linux/unistd.h>
74 #elif defined(HAVE_UNISTD_H)
75 #include <unistd.h>
76 #endif
77
78 #include <dirent.h>
79
80 #include "llite_lib.h"
81
82 /* (new) readdir implementation overview can be found in lustre/llite/dir.c */
83
84 static int llu_dir_do_readpage(struct inode *inode, struct page *page)
85 {
86         struct llu_inode_info *lli = llu_i2info(inode);
87         struct intnl_stat     *st = llu_i2stat(inode);
88         struct llu_sb_info    *sbi = llu_i2sbi(inode);
89         struct ptlrpc_request *request;
90         struct lustre_handle   lockh;
91         struct mdt_body       *body;
92         struct lookup_intent   it = { .it_op = IT_READDIR };
93         struct md_op_data      op_data = {{ 0 }};
94         ldlm_policy_data_t policy = { .l_inodebits = { MDS_INODELOCK_UPDATE } };
95         __u64 offset;
96         int rc = 0;
97         ENTRY;
98
99         rc = md_lock_match(sbi->ll_md_exp, LDLM_FL_BLOCK_GRANTED,
100                            &lli->lli_fid, LDLM_IBITS, &policy, LCK_CR, &lockh);
101         if (!rc) {
102                 struct ldlm_enqueue_info einfo = {LDLM_IBITS, LCK_CR,
103                         llu_md_blocking_ast, ldlm_completion_ast, NULL, inode};
104
105                 llu_prep_md_op_data(&op_data, inode, NULL, NULL, 0, 0,
106                                     LUSTRE_OPC_ANY);
107
108                 rc = md_enqueue(sbi->ll_md_exp, &einfo, &it,
109                                 &op_data, &lockh, NULL, 0,
110                                 LDLM_FL_CANCEL_ON_BLOCK);
111                 request = (struct ptlrpc_request *)it.d.lustre.it_data;
112                 if (request)
113                         ptlrpc_req_finished(request);
114                 if (rc < 0) {
115                         CERROR("lock enqueue: err: %d\n", rc);
116                         RETURN(rc);
117                 }
118         }
119         ldlm_lock_dump_handle(D_OTHER, &lockh);
120
121         offset = (__u64)hash_x_index(page->index);
122         rc = md_readpage(sbi->ll_md_exp, &lli->lli_fid, NULL,
123                          offset, page, &request);
124         if (!rc) {
125                 body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
126                 LASSERT(body != NULL);         /* checked by md_readpage() */
127
128                 if (body->valid & OBD_MD_FLSIZE)
129                         st->st_size = body->size;
130         } else {
131                 CERROR("read_dir_page(%ld) error %d\n", page->index, rc);
132         }
133         ptlrpc_req_finished(request);
134         EXIT;
135
136         ldlm_lock_decref(&lockh, LCK_CR);
137         return rc;
138 }
139
140 static struct page *llu_dir_read_page(struct inode *ino, __u32 hash,
141                                       int exact, struct ll_dir_chain *chain)
142 {
143         struct page *page;
144         int rc;
145         ENTRY;
146
147         OBD_PAGE_ALLOC(page, 0);
148         if (!page)
149                 RETURN(ERR_PTR(-ENOMEM));
150         page->index = hash_x_index(hash);
151
152         rc = llu_dir_do_readpage(ino, page);
153         if (rc) {
154                 OBD_PAGE_FREE(page);
155                 RETURN(ERR_PTR(rc));
156         }
157
158         return page;
159 }
160
161 void *(*memmover)(void *, const void *, size_t) = memmove;
162
163 #define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de)))
164 #define ROUND_UP64(x)   (((x)+sizeof(__u64)-1) & ~(sizeof(__u64)-1))
165 static int filldir(char *buf, int buflen,
166                    const char *name, int namelen, loff_t offset,
167                    ino_t ino, unsigned int d_type, int *filled)
168 {
169         cfs_dirent_t *dirent = (cfs_dirent_t *) (buf + *filled);
170         cfs_dirent_t  holder;
171         int reclen = ROUND_UP64(NAME_OFFSET(dirent) + namelen + 1);
172
173         /*
174          * @buf is not guaranteed to be properly aligned. To work around,
175          * first fill stack-allocated @holder, then copy @holder into @buf by
176          * memmove().
177          */
178
179         /* check overflow */
180         if ((*filled + reclen) > buflen)
181                 return 1;
182
183         holder.d_ino = ino;
184 #ifdef _DIRENT_HAVE_D_OFF
185         holder.d_off = offset;
186 #endif
187         holder.d_reclen = reclen;
188 #ifdef _DIRENT_HAVE_D_TYPE
189         holder.d_type = (unsigned short) d_type;
190 #endif
191         /* gcc unrolls memcpy() of structs into field-wise assignments,
192          * assuming proper alignment. Humor it. */
193         (*memmover)(dirent, &holder, NAME_OFFSET(dirent));
194         memcpy(dirent->d_name, name, namelen);
195         dirent->d_name[namelen] = 0;
196
197         *filled += reclen;
198
199         return 0;
200 }
201
202 /* 
203  * TODO: much of the code here is similar/identical to llite ll_readdir().
204  * These code can be factored out and shared in a common module.
205  */
206
207 ssize_t llu_iop_filldirentries(struct inode *dir, _SYSIO_OFF_T *basep, 
208                                char *buf, size_t nbytes)
209 {
210         struct llu_inode_info *lli = llu_i2info(dir);
211         struct intnl_stat     *st = llu_i2stat(dir);
212         loff_t                 pos = *basep;
213         struct ll_dir_chain    chain;
214         struct page           *page;
215         int filled = 0;
216         int rc;
217         int done;
218         int shift;
219         ENTRY;
220
221         liblustre_wait_event(0);
222
223         if (st->st_size == 0) {
224                 CWARN("dir size is 0?\n");
225                 RETURN(0);
226         }
227
228         if (pos == DIR_END_OFF)
229                 /*
230                  * end-of-file.
231                  */
232                 RETURN(0);
233
234         rc    = 0;
235         done  = 0;
236         shift = 0;
237         ll_dir_chain_init(&chain);
238
239         page = llu_dir_read_page(dir, pos, 0, &chain);
240         while (rc == 0 && !done) {
241                 struct lu_dirpage *dp;
242                 struct lu_dirent  *ent;
243
244                 if (!IS_ERR(page)) {
245                         /* 
246                          * If page is empty (end of directoryis reached),
247                          * use this value. 
248                          */
249                         __u64 hash = DIR_END_OFF;
250                         __u64 next;
251
252                         dp = page->addr;
253                         for (ent = lu_dirent_start(dp); ent != NULL && !done;
254                              ent = lu_dirent_next(ent)) {
255                                 char          *name;
256                                 int            namelen;
257                                 struct lu_fid  fid;
258                                 ino_t          ino;
259
260                                 hash    = le64_to_cpu(ent->lde_hash);
261                                 namelen = le16_to_cpu(ent->lde_namelen);
262
263                                 if (hash < pos)
264                                         /*
265                                          * Skip until we find target hash
266                                          * value.
267                                          */
268                                         continue;
269
270                                 if (namelen == 0)
271                                         /*
272                                          * Skip dummy record.
273                                          */
274                                         continue;
275
276                                 fid  = ent->lde_fid;
277                                 name = ent->lde_name;
278                                 fid_le_to_cpu(&fid, &fid);
279                                 ino  = llu_fid_build_ino(llu_i2sbi(dir), &fid);
280
281                                 done = filldir(buf, nbytes, name, namelen,
282                                                (loff_t)hash, ino, DT_UNKNOWN,
283                                                &filled);
284                         }
285                         next = le64_to_cpu(dp->ldp_hash_end);
286                         OBD_PAGE_FREE(page);
287                         if (!done) {
288                                 pos = next;
289                                 if (pos == DIR_END_OFF)
290                                         /*
291                                          * End of directory reached.
292                                          */
293                                         done = 1;
294                                 else if (1 /* chain is exhausted*/)
295                                         /*
296                                          * Normal case: continue to the next
297                                          * page.
298                                          */
299                                         page = llu_dir_read_page(dir, pos, 1,
300                                                                &chain);
301                                 else {
302                                         /*
303                                          * go into overflow page.
304                                          */
305                                 }
306                         } else {
307                                 pos = hash;
308                                 if (filled == 0)
309                                         GOTO(out, filled = -EINVAL);
310                         }
311                 } else {
312                         rc = PTR_ERR(page);
313                         CERROR("error reading dir "DFID" at %lu: rc %d\n",
314                                PFID(&lli->lli_fid), (unsigned long)pos, rc);
315                 }
316         }
317         lli->lli_dir_pos = (loff_t)(__s32)pos;
318         *basep = lli->lli_dir_pos;
319 out:
320         ll_dir_chain_fini(&chain);
321         liblustre_wait_event(0);
322         RETURN(filled);
323 }