Whamcloud - gitweb
branch: HEAD
[fs/lustre-release.git] / lustre / liblustre / dir.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/liblustre/dir.c
37  *
38  * Lustre Light directory handling
39  */
40
41 #define DEBUG_SUBSYSTEM S_LLITE
42
43 #include <unistd.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <assert.h>
47 #include <time.h>
48 #include <sys/types.h>
49 #include <sys/stat.h>
50 #include <fcntl.h>
51 #include <sys/queue.h>
52
53 #include <sysio.h>
54 #ifdef HAVE_XTIO_H
55 #include <xtio.h>
56 #endif
57 #include <fs.h>
58 #include <mount.h>
59 #include <inode.h>
60 #ifdef HAVE_FILE_H
61 #include <file.h>
62 #endif
63
64 #undef LIST_HEAD
65
66 #ifdef HAVE_LINUX_UNISTD_H
67 #include <linux/unistd.h>
68 #elif defined(HAVE_UNISTD_H)
69 #include <unistd.h>
70 #endif
71
72 #include <dirent.h>
73
74 #include "llite_lib.h"
75
76 /* (new) readdir implementation overview can be found in lustre/llite/dir.c */
77
78 static int llu_dir_do_readpage(struct inode *inode, struct page *page)
79 {
80         struct llu_inode_info *lli = llu_i2info(inode);
81         struct intnl_stat     *st = llu_i2stat(inode);
82         struct llu_sb_info    *sbi = llu_i2sbi(inode);
83         struct ptlrpc_request *request;
84         struct lustre_handle   lockh;
85         struct mdt_body       *body;
86         struct lookup_intent   it = { .it_op = IT_READDIR };
87         struct md_op_data      op_data = {{ 0 }};
88         ldlm_policy_data_t policy = { .l_inodebits = { MDS_INODELOCK_UPDATE } };
89         __u64 offset;
90         int rc = 0;
91         ENTRY;
92
93         rc = md_lock_match(sbi->ll_md_exp, LDLM_FL_BLOCK_GRANTED,
94                            &lli->lli_fid, LDLM_IBITS, &policy, LCK_CR, &lockh);
95         if (!rc) {
96                 struct ldlm_enqueue_info einfo = {LDLM_IBITS, LCK_CR,
97                         llu_md_blocking_ast, ldlm_completion_ast, NULL, NULL,
98                         inode};
99
100                 llu_prep_md_op_data(&op_data, inode, NULL, NULL, 0, 0,
101                                     LUSTRE_OPC_ANY);
102
103                 rc = md_enqueue(sbi->ll_md_exp, &einfo, &it,
104                                 &op_data, &lockh, NULL, 0, NULL,
105                                 LDLM_FL_CANCEL_ON_BLOCK);
106                 request = (struct ptlrpc_request *)it.d.lustre.it_data;
107                 if (request)
108                         ptlrpc_req_finished(request);
109                 if (rc < 0) {
110                         CERROR("lock enqueue: err: %d\n", rc);
111                         RETURN(rc);
112                 }
113         }
114         ldlm_lock_dump_handle(D_OTHER, &lockh);
115
116         offset = (__u64)hash_x_index(page->index);
117         rc = md_readpage(sbi->ll_md_exp, &lli->lli_fid, NULL,
118                          offset, page, &request);
119         if (!rc) {
120                 body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
121                 LASSERT(body != NULL);         /* checked by md_readpage() */
122
123                 if (body->valid & OBD_MD_FLSIZE)
124                         st->st_size = body->size;
125         } else {
126                 CERROR("read_dir_page(%ld) error %d\n", page->index, rc);
127         }
128         ptlrpc_req_finished(request);
129         EXIT;
130
131         ldlm_lock_decref(&lockh, LCK_CR);
132         return rc;
133 }
134
135 static cfs_page_t *llu_dir_read_page(struct inode *ino, __u64 hash,
136                                      int exact, struct ll_dir_chain *chain)
137 {
138         cfs_page_t *page;
139         int rc;
140         ENTRY;
141
142         OBD_PAGE_ALLOC(page, 0);
143         if (!page)
144                 RETURN(ERR_PTR(-ENOMEM));
145         page->index = hash_x_index(hash);
146
147         rc = llu_dir_do_readpage(ino, page);
148         if (rc) {
149                 OBD_PAGE_FREE(page);
150                 RETURN(ERR_PTR(rc));
151         }
152
153         return page;
154 }
155
156 void *(*memmover)(void *, const void *, size_t) = memmove;
157
158 #define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de)))
159 #define ROUND_UP64(x)   (((x)+sizeof(__u64)-1) & ~(sizeof(__u64)-1))
160 static int filldir(char *buf, int buflen,
161                    const char *name, int namelen, loff_t offset,
162                    ino_t ino, unsigned int d_type, int *filled)
163 {
164         cfs_dirent_t *dirent = (cfs_dirent_t *) (buf + *filled);
165         cfs_dirent_t  holder;
166         int reclen = ROUND_UP64(NAME_OFFSET(dirent) + namelen + 1);
167
168         /*
169          * @buf is not guaranteed to be properly aligned. To work around,
170          * first fill stack-allocated @holder, then copy @holder into @buf by
171          * memmove().
172          */
173
174         /* check overflow */
175         if ((*filled + reclen) > buflen)
176                 return 1;
177
178         holder.d_ino = ino;
179 #ifdef _DIRENT_HAVE_D_OFF
180         holder.d_off = offset;
181 #endif
182         holder.d_reclen = reclen;
183 #ifdef _DIRENT_HAVE_D_TYPE
184         holder.d_type = (unsigned short) d_type;
185 #endif
186         /* gcc unrolls memcpy() of structs into field-wise assignments,
187          * assuming proper alignment. Humor it. */
188         (*memmover)(dirent, &holder, NAME_OFFSET(dirent));
189         memcpy(dirent->d_name, name, namelen);
190         dirent->d_name[namelen] = 0;
191
192         *filled += reclen;
193
194         return 0;
195 }
196
197 /*
198  * TODO: much of the code here is similar/identical to llite ll_readdir().
199  * These code can be factored out and shared in a common module.
200  */
201
202 ssize_t llu_iop_filldirentries(struct inode *dir, _SYSIO_OFF_T *basep,
203                                char *buf, size_t nbytes)
204 {
205         struct llu_inode_info *lli = llu_i2info(dir);
206         struct intnl_stat     *st = llu_i2stat(dir);
207         loff_t                 pos = *basep;
208         struct ll_dir_chain    chain;
209         cfs_page_t            *page;
210         int filled = 0;
211         int rc;
212         int done;
213         int shift;
214         __u16 type;
215         ENTRY;
216
217         liblustre_wait_event(0);
218
219         if (st->st_size == 0) {
220                 CWARN("dir size is 0?\n");
221                 RETURN(0);
222         }
223
224         if (pos == DIR_END_OFF)
225                 /*
226                  * end-of-file.
227                  */
228                 RETURN(0);
229
230         rc    = 0;
231         done  = 0;
232         shift = 0;
233         ll_dir_chain_init(&chain);
234
235         page = llu_dir_read_page(dir, pos, 0, &chain);
236         while (rc == 0 && !done) {
237                 struct lu_dirpage *dp;
238                 struct lu_dirent  *ent;
239
240                 if (!IS_ERR(page)) {
241                         /*
242                          * If page is empty (end of directoryis reached),
243                          * use this value.
244                          */
245                         __u64 hash = DIR_END_OFF;
246                         __u64 next;
247
248                         dp = page->addr;
249                         for (ent = lu_dirent_start(dp); ent != NULL && !done;
250                              ent = lu_dirent_next(ent)) {
251                                 char          *name;
252                                 int            namelen;
253                                 struct lu_fid  fid;
254                                 ino_t          ino;
255
256                                 hash    = le64_to_cpu(ent->lde_hash);
257                                 namelen = le16_to_cpu(ent->lde_namelen);
258
259                                 if (hash < pos)
260                                         /*
261                                          * Skip until we find target hash
262                                          * value.
263                                          */
264                                         continue;
265
266                                 if (namelen == 0)
267                                         /*
268                                          * Skip dummy record.
269                                          */
270                                         continue;
271
272                                 fid  = ent->lde_fid;
273                                 name = ent->lde_name;
274                                 fid_le_to_cpu(&fid, &fid);
275                                 ino  = cl_fid_build_ino(&fid);
276                                 type = ll_dirent_type_get(ent);
277                                 done = filldir(buf, nbytes, name, namelen,
278                                                (loff_t)hash, ino, type,
279                                                &filled);
280                         }
281                         next = le64_to_cpu(dp->ldp_hash_end);
282                         OBD_PAGE_FREE(page);
283                         if (!done) {
284                                 pos = next;
285                                 if (pos == DIR_END_OFF)
286                                         /*
287                                          * End of directory reached.
288                                          */
289                                         done = 1;
290                                 else if (1 /* chain is exhausted*/)
291                                         /*
292                                          * Normal case: continue to the next
293                                          * page.
294                                          */
295                                         page = llu_dir_read_page(dir, pos, 1,
296                                                                &chain);
297                                 else {
298                                         /*
299                                          * go into overflow page.
300                                          */
301                                 }
302                         } else {
303                                 pos = hash;
304                                 if (filled == 0)
305                                         GOTO(out, filled = -EINVAL);
306                         }
307                 } else {
308                         rc = PTR_ERR(page);
309                         CERROR("error reading dir "DFID" at %lu: rc %d\n",
310                                PFID(&lli->lli_fid), (unsigned long)pos, rc);
311                 }
312         }
313         lli->lli_dir_pos = (loff_t)pos;
314         *basep = lli->lli_dir_pos;
315 out:
316         ll_dir_chain_fini(&chain);
317         liblustre_wait_event(0);
318         RETURN(filled);
319 }