Whamcloud - gitweb
lu: make site hash table dynamically adjustable.
[fs/lustre-release.git] / lustre / obdclass / llog_lvfs.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
5  *   Author: Andreas Dilger <adilger@clusterfs.com>
6  *
7  *   This file is part of the Lustre file system, http://www.lustre.org
8  *   Lustre is a trademark of Cluster File Systems, Inc.
9  *
10  *   You may have signed or agreed to another license before downloading
11  *   this software.  If so, you are bound by the terms and conditions
12  *   of that agreement, and the following does not apply to you.  See the
13  *   LICENSE file included with this distribution for more information.
14  *
15  *   If you did not agree to a different license, then this copy of Lustre
16  *   is open source software; you can redistribute it and/or modify it
17  *   under the terms of version 2 of the GNU General Public License as
18  *   published by the Free Software Foundation.
19  *
20  *   In either case, Lustre is distributed in the hope that it will be
21  *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
22  *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23  *   license text for more details.
24  *
25  * OST<->MDS recovery logging infrastructure.
26  *
27  * Invariants in implementation:
28  * - we do not share logs among different OST<->MDS connections, so that
29  *   if an OST or MDS fails it need only look at log(s) relevant to itself
30  */
31
32 #define DEBUG_SUBSYSTEM S_LOG
33
34 #ifndef EXPORT_SYMTAB
35 #define EXPORT_SYMTAB
36 #endif
37
38 #ifndef __KERNEL__
39 #include <liblustre.h>
40 #endif
41
42 #include <obd.h>
43 #include <obd_class.h>
44 #include <lustre_log.h>
45 #include <obd_ost.h>
46 #include <lustre_mds.h>
47 #include <libcfs/list.h>
48 #include <lvfs.h>
49 #include <lustre_fsfilt.h>
50 #include <lustre_disk.h>
51 #include "llog_internal.h"
52
53 #if defined(__KERNEL__) && defined(LLOG_LVFS)
54
55 static int llog_lvfs_pad(struct obd_device *obd, struct l_file *file,
56                                 int len, int index)
57 {
58         struct llog_rec_hdr rec = { 0 };
59         struct llog_rec_tail tail;
60         int rc;
61         ENTRY;
62
63         LASSERT(len >= LLOG_MIN_REC_SIZE && (len & 0x7) == 0);
64
65         tail.lrt_len = rec.lrh_len = len;
66         tail.lrt_index = rec.lrh_index = index;
67         rec.lrh_type = LLOG_PAD_MAGIC;
68
69         rc = fsfilt_write_record(obd, file, &rec, sizeof(rec), &file->f_pos, 0);
70         if (rc) {
71                 CERROR("error writing padding record: rc %d\n", rc);
72                 goto out;
73         }
74
75         file->f_pos += len - sizeof(rec) - sizeof(tail);
76         rc = fsfilt_write_record(obd, file, &tail, sizeof(tail),&file->f_pos,0);
77         if (rc) {
78                 CERROR("error writing padding record: rc %d\n", rc);
79                 goto out;
80         }
81
82  out:
83         RETURN(rc);
84 }
85
86 static int llog_lvfs_write_blob(struct obd_device *obd, struct l_file *file,
87                                 struct llog_rec_hdr *rec, void *buf, loff_t off)
88 {
89         int rc;
90         struct llog_rec_tail end;
91         loff_t saved_off = file->f_pos;
92         int buflen = rec->lrh_len;
93
94         ENTRY;
95         file->f_pos = off;
96
97         if (!buf) {
98                 rc = fsfilt_write_record(obd, file, rec, buflen,&file->f_pos,0);
99                 if (rc) {
100                         CERROR("error writing log record: rc %d\n", rc);
101                         goto out;
102                 }
103                 GOTO(out, rc = 0);
104         }
105
106         /* the buf case */
107         rec->lrh_len = sizeof(*rec) + buflen + sizeof(end);
108         rc = fsfilt_write_record(obd, file, rec, sizeof(*rec), &file->f_pos, 0);
109         if (rc) {
110                 CERROR("error writing log hdr: rc %d\n", rc);
111                 goto out;
112         }
113
114         rc = fsfilt_write_record(obd, file, buf, buflen, &file->f_pos, 0);
115         if (rc) {
116                 CERROR("error writing log buffer: rc %d\n", rc);
117                 goto out;
118         }
119
120         end.lrt_len = rec->lrh_len;
121         end.lrt_index = rec->lrh_index;
122         rc = fsfilt_write_record(obd, file, &end, sizeof(end), &file->f_pos, 0);
123         if (rc) {
124                 CERROR("error writing log tail: rc %d\n", rc);
125                 goto out;
126         }
127
128         rc = 0;
129  out:
130         if (saved_off > file->f_pos)
131                 file->f_pos = saved_off;
132         LASSERT(rc <= 0);
133         RETURN(rc);
134 }
135
136 static int llog_lvfs_read_blob(struct obd_device *obd, struct l_file *file,
137                                 void *buf, int size, loff_t off)
138 {
139         loff_t offset = off;
140         int rc;
141         ENTRY;
142
143         rc = fsfilt_read_record(obd, file, buf, size, &offset);
144         if (rc) {
145                 CERROR("error reading log record: rc %d\n", rc);
146                 RETURN(rc);
147         }
148         RETURN(0);
149 }
150
151 static int llog_lvfs_read_header(struct llog_handle *handle)
152 {
153         struct obd_device *obd;
154         int rc;
155         ENTRY;
156
157         LASSERT(sizeof(*handle->lgh_hdr) == LLOG_CHUNK_SIZE);
158
159         obd = handle->lgh_ctxt->loc_exp->exp_obd;
160
161         if (handle->lgh_file->f_dentry->d_inode->i_size == 0) {
162                 CDEBUG(D_HA, "not reading header from 0-byte log\n");
163                 RETURN(LLOG_EEMPTY);
164         }
165
166         rc = llog_lvfs_read_blob(obd, handle->lgh_file, handle->lgh_hdr,
167                                  LLOG_CHUNK_SIZE, 0);
168         if (rc) {
169                 CERROR("error reading log header from %.*s\n",
170                        handle->lgh_file->f_dentry->d_name.len,
171                        handle->lgh_file->f_dentry->d_name.name);
172         } else {
173                 struct llog_rec_hdr *llh_hdr = &handle->lgh_hdr->llh_hdr;
174
175                 if (LLOG_REC_HDR_NEEDS_SWABBING(llh_hdr))
176                         lustre_swab_llog_hdr(handle->lgh_hdr);
177
178                 if (llh_hdr->lrh_type != LLOG_HDR_MAGIC) {
179                         CERROR("bad log %.*s header magic: %#x (expected %#x)\n",
180                                handle->lgh_file->f_dentry->d_name.len,
181                                handle->lgh_file->f_dentry->d_name.name,
182                                llh_hdr->lrh_type, LLOG_HDR_MAGIC);
183                         rc = -EIO;
184                 } else if (llh_hdr->lrh_len != LLOG_CHUNK_SIZE) {
185                         CERROR("incorrectly sized log %.*s header: %#x "
186                                "(expected %#x)\n",
187                                handle->lgh_file->f_dentry->d_name.len,
188                                handle->lgh_file->f_dentry->d_name.name,
189                                llh_hdr->lrh_len, LLOG_CHUNK_SIZE);
190                         CERROR("you may need to re-run lconf --write_conf.\n");
191                         rc = -EIO;
192                 }
193         }
194
195         handle->lgh_last_idx = handle->lgh_hdr->llh_tail.lrt_index;
196         handle->lgh_file->f_pos = handle->lgh_file->f_dentry->d_inode->i_size;
197
198         RETURN(rc);
199 }
200
201 /* returns negative in on error; 0 if success && reccookie == 0; 1 otherwise */
202 /* appends if idx == -1, otherwise overwrites record idx. */
203 static int llog_lvfs_write_rec(struct llog_handle *loghandle,
204                                struct llog_rec_hdr *rec,
205                                struct llog_cookie *reccookie, int cookiecount,
206                                void *buf, int idx)
207 {
208         struct llog_log_hdr *llh;
209         int reclen = rec->lrh_len, index, rc;
210         struct llog_rec_tail *lrt;
211         struct obd_device *obd;
212         struct file *file;
213         size_t left;
214         ENTRY;
215
216         llh = loghandle->lgh_hdr;
217         file = loghandle->lgh_file;
218         obd = loghandle->lgh_ctxt->loc_exp->exp_obd;
219
220         /* record length should not bigger than LLOG_CHUNK_SIZE */
221         if (buf)
222                 rc = (reclen > LLOG_CHUNK_SIZE - sizeof(struct llog_rec_hdr) -
223                       sizeof(struct llog_rec_tail)) ? -E2BIG : 0;
224         else
225                 rc = (reclen > LLOG_CHUNK_SIZE) ? -E2BIG : 0;
226         if (rc)
227                 RETURN(rc);
228
229         if (idx != -1) {
230                 loff_t saved_offset;
231
232                 /* no header: only allowed to insert record 1 */
233                 if (idx != 1 && !file->f_dentry->d_inode->i_size) {
234                         CERROR("idx != -1 in empty log\n");
235                         LBUG();
236                 }
237
238                 if (idx && llh->llh_size && llh->llh_size != reclen)
239                         RETURN(-EINVAL);
240
241                 rc = llog_lvfs_write_blob(obd, file, &llh->llh_hdr, NULL, 0);
242                 /* we are done if we only write the header or on error */
243                 if (rc || idx == 0)
244                         RETURN(rc);
245
246                 saved_offset = sizeof(*llh) + (idx-1)*rec->lrh_len;
247                 rc = llog_lvfs_write_blob(obd, file, rec, buf, saved_offset);
248                 if (rc == 0 && reccookie) {
249                         reccookie->lgc_lgl = loghandle->lgh_id;
250                         reccookie->lgc_index = idx;
251                         rc = 1;
252                 }
253                 RETURN(rc);
254         }
255
256         /* Make sure that records don't cross a chunk boundary, so we can
257          * process them page-at-a-time if needed.  If it will cross a chunk
258          * boundary, write in a fake (but referenced) entry to pad the chunk.
259          *
260          * We know that llog_current_log() will return a loghandle that is
261          * big enough to hold reclen, so all we care about is padding here.
262          */
263         left = LLOG_CHUNK_SIZE - (file->f_pos & (LLOG_CHUNK_SIZE - 1));
264         if (buf)
265                 reclen = sizeof(*rec) + rec->lrh_len + 
266                         sizeof(struct llog_rec_tail);
267
268         /* NOTE: padding is a record, but no bit is set */
269         if (left != 0 && left != reclen &&
270             left < (reclen + LLOG_MIN_REC_SIZE)) {
271                 loghandle->lgh_last_idx++;
272                 rc = llog_lvfs_pad(obd, file, left, loghandle->lgh_last_idx);
273                 if (rc)
274                         RETURN(rc);
275                 /* if it's the last idx in log file, then return -ENOSPC */
276                 if (loghandle->lgh_last_idx == LLOG_BITMAP_SIZE(llh) - 1)
277                         RETURN(-ENOSPC);
278         }
279
280         loghandle->lgh_last_idx++;
281         index = loghandle->lgh_last_idx;
282         LASSERT(index < LLOG_BITMAP_SIZE(llh));
283         rec->lrh_index = index;
284         if (buf == NULL) {
285                 lrt = (struct llog_rec_tail *)
286                         ((char *)rec + rec->lrh_len - sizeof(*lrt));
287                 lrt->lrt_len = rec->lrh_len;
288                 lrt->lrt_index = rec->lrh_index;
289         }
290         if (ext2_set_bit(index, llh->llh_bitmap)) {
291                 CERROR("argh, index %u already set in log bitmap?\n", index);
292                 LBUG(); /* should never happen */
293         }
294         llh->llh_count++;
295         llh->llh_tail.lrt_index = index;
296
297         rc = llog_lvfs_write_blob(obd, file, &llh->llh_hdr, NULL, 0);
298         if (rc)
299                 RETURN(rc);
300
301         rc = llog_lvfs_write_blob(obd, file, rec, buf, file->f_pos);
302         if (rc)
303                 RETURN(rc);
304
305         CDEBUG(D_HA, "added record "LPX64": idx: %u, %u bytes\n",
306                loghandle->lgh_id.lgl_oid, index, rec->lrh_len);
307         if (rc == 0 && reccookie) {
308                 reccookie->lgc_lgl = loghandle->lgh_id;
309                 reccookie->lgc_index = index;
310                 if ((rec->lrh_type == MDS_UNLINK_REC) || 
311                                 (rec->lrh_type == MDS_SETATTR_REC))
312                         reccookie->lgc_subsys = LLOG_MDS_OST_ORIG_CTXT;
313                 else if (rec->lrh_type == OST_SZ_REC)
314                         reccookie->lgc_subsys = LLOG_SIZE_ORIG_CTXT;
315                 else if (rec->lrh_type == OST_RAID1_REC)
316                         reccookie->lgc_subsys = LLOG_RD1_ORIG_CTXT;
317                 else
318                         reccookie->lgc_subsys = -1;
319                 rc = 1;
320         }
321         if (rc == 0 && rec->lrh_type == LLOG_GEN_REC)
322                 rc = 1;
323
324         RETURN(rc);
325 }
326
327 /* We can skip reading at least as many log blocks as the number of
328 * minimum sized log records we are skipping.  If it turns out
329 * that we are not far enough along the log (because the
330 * actual records are larger than minimum size) we just skip
331 * some more records. */
332
333 static void llog_skip_over(__u64 *off, int curr, int goal)
334 {
335         if (goal <= curr)
336                 return;
337         *off = (*off + (goal-curr-1) * LLOG_MIN_REC_SIZE) &
338                 ~(LLOG_CHUNK_SIZE - 1);
339 }
340
341
342 /* sets:
343  *  - cur_offset to the furthest point read in the log file
344  *  - cur_idx to the log index preceeding cur_offset
345  * returns -EIO/-EINVAL on error
346  */
347 static int llog_lvfs_next_block(struct llog_handle *loghandle, int *cur_idx,
348                                 int next_idx, __u64 *cur_offset, void *buf,
349                                 int len)
350 {
351         int rc;
352         ENTRY;
353
354         if (len == 0 || len & (LLOG_CHUNK_SIZE - 1))
355                 RETURN(-EINVAL);
356
357         CDEBUG(D_OTHER, "looking for log index %u (cur idx %u off "LPU64")\n",
358                next_idx, *cur_idx, *cur_offset);
359
360         while (*cur_offset < loghandle->lgh_file->f_dentry->d_inode->i_size) {
361                 struct llog_rec_hdr *rec;
362                 struct llog_rec_tail *tail;
363                 loff_t ppos;
364
365                 llog_skip_over(cur_offset, *cur_idx, next_idx);
366
367                 ppos = *cur_offset;
368                 rc = fsfilt_read_record(loghandle->lgh_ctxt->loc_exp->exp_obd,
369                                         loghandle->lgh_file, buf, len,
370                                         &ppos);
371                 if (rc) {
372                         CERROR("Cant read llog block at log id "LPU64
373                                "/%u offset "LPU64"\n",
374                                loghandle->lgh_id.lgl_oid,
375                                loghandle->lgh_id.lgl_ogen,
376                                *cur_offset);
377                         RETURN(rc);
378                 }
379
380                 /* put number of bytes read into rc to make code simpler */
381                 rc = ppos - *cur_offset;
382                 *cur_offset = ppos;
383
384                 if (rc == 0) /* end of file, nothing to do */
385                         RETURN(0);
386
387                 if (rc < sizeof(*tail)) {
388                         CERROR("Invalid llog block at log id "LPU64"/%u offset "
389                                LPU64"\n", loghandle->lgh_id.lgl_oid,
390                                loghandle->lgh_id.lgl_ogen, *cur_offset);
391                         RETURN(-EINVAL);
392                 }
393
394                 rec = buf;
395                 tail = (struct llog_rec_tail *)((char *)buf + rc -
396                                                 sizeof(struct llog_rec_tail));
397
398                 if (LLOG_REC_HDR_NEEDS_SWABBING(rec)) {
399                         lustre_swab_llog_rec(rec, tail);
400                 }
401
402                 *cur_idx = tail->lrt_index;
403
404                 /* this shouldn't happen */
405                 if (tail->lrt_index == 0) {
406                         CERROR("Invalid llog tail at log id "LPU64"/%u offset "
407                                LPU64"\n", loghandle->lgh_id.lgl_oid,
408                                loghandle->lgh_id.lgl_ogen, *cur_offset);
409                         RETURN(-EINVAL);
410                 }
411                 if (tail->lrt_index < next_idx)
412                         continue;
413
414                 /* sanity check that the start of the new buffer is no farther
415                  * than the record that we wanted.  This shouldn't happen. */
416                 if (rec->lrh_index > next_idx) {
417                         CERROR("missed desired record? %u > %u\n",
418                                rec->lrh_index, next_idx);
419                         RETURN(-ENOENT);
420                 }
421                 RETURN(0);
422         }
423         RETURN(-EIO);
424 }
425
426 static int llog_lvfs_prev_block(struct llog_handle *loghandle,
427                                 int prev_idx, void *buf, int len)
428 {
429         __u64 cur_offset;
430         int rc;
431         ENTRY;
432
433         if (len == 0 || len & (LLOG_CHUNK_SIZE - 1))
434                 RETURN(-EINVAL);
435
436         CDEBUG(D_OTHER, "looking for log index %u n", prev_idx);
437
438         cur_offset = LLOG_CHUNK_SIZE;
439         llog_skip_over(&cur_offset, 0, prev_idx);
440
441         while (cur_offset < loghandle->lgh_file->f_dentry->d_inode->i_size) {
442                 struct llog_rec_hdr *rec;
443                 struct llog_rec_tail *tail;
444                 loff_t ppos;
445
446                 ppos = cur_offset;
447
448                 rc = fsfilt_read_record(loghandle->lgh_ctxt->loc_exp->exp_obd,
449                                         loghandle->lgh_file, buf, len,
450                                         &ppos);
451                 if (rc) {
452                         CERROR("Cant read llog block at log id "LPU64
453                                "/%u offset "LPU64"\n",
454                                loghandle->lgh_id.lgl_oid,
455                                loghandle->lgh_id.lgl_ogen,
456                                cur_offset);
457                         RETURN(rc);
458                 }
459
460                 /* put number of bytes read into rc to make code simpler */
461                 rc = ppos - cur_offset;
462                 cur_offset = ppos;
463
464                 if (rc == 0) /* end of file, nothing to do */
465                         RETURN(0);
466
467                 if (rc < sizeof(*tail)) {
468                         CERROR("Invalid llog block at log id "LPU64"/%u offset "
469                                LPU64"\n", loghandle->lgh_id.lgl_oid,
470                                loghandle->lgh_id.lgl_ogen, cur_offset);
471                         RETURN(-EINVAL);
472                 }
473
474                 tail = buf + rc - sizeof(struct llog_rec_tail);
475
476                 /* this shouldn't happen */
477                 if (tail->lrt_index == 0) {
478                         CERROR("Invalid llog tail at log id "LPU64"/%u offset "
479                                LPU64"\n", loghandle->lgh_id.lgl_oid,
480                                loghandle->lgh_id.lgl_ogen, cur_offset);
481                         RETURN(-EINVAL);
482                 }
483                 if (le32_to_cpu(tail->lrt_index) < prev_idx)
484                         continue;
485
486                 /* sanity check that the start of the new buffer is no farther
487                  * than the record that we wanted.  This shouldn't happen. */
488                 rec = buf;
489                 if (le32_to_cpu(rec->lrh_index) > prev_idx) {
490                         CERROR("missed desired record? %u > %u\n",
491                                le32_to_cpu(rec->lrh_index), prev_idx);
492                         RETURN(-ENOENT);
493                 }
494                 RETURN(0);
495         }
496         RETURN(-EIO);
497 }
498
499 static struct file *llog_filp_open(char *dir, char *name, int flags, int mode)
500 {
501         char *logname;
502         struct file *filp;
503         int len;
504
505         OBD_ALLOC(logname, PATH_MAX);
506         if (logname == NULL)
507                 return ERR_PTR(-ENOMEM);
508
509         len = snprintf(logname, PATH_MAX, "%s/%s", dir, name);
510         if (len >= PATH_MAX - 1) {
511                 filp = ERR_PTR(-ENAMETOOLONG);
512         } else {
513                 filp = l_filp_open(logname, flags, mode);
514                 if (IS_ERR(filp))
515                         CERROR("logfile creation %s: %ld\n", logname,
516                                PTR_ERR(filp));
517         }
518         OBD_FREE(logname, PATH_MAX);
519         return filp;
520 }
521
522 /* This is a callback from the llog_* functions.
523  * Assumes caller has already pushed us into the kernel context. */
524 static int llog_lvfs_create(struct llog_ctxt *ctxt, struct llog_handle **res,
525                             struct llog_logid *logid, char *name)
526 {
527         struct llog_handle *handle;
528         struct obd_device *obd;
529         struct l_dentry *dchild = NULL;
530         struct obdo *oa = NULL;
531         int rc = 0, cleanup_phase = 1;
532         int open_flags = O_RDWR | O_CREAT | O_LARGEFILE;
533         ENTRY;
534
535         handle = llog_alloc_handle();
536         if (handle == NULL)
537                 RETURN(-ENOMEM);
538         *res = handle;
539
540         LASSERT(ctxt);
541         LASSERT(ctxt->loc_exp);
542         obd = ctxt->loc_exp->exp_obd;
543
544         if (logid != NULL) {
545                 dchild = obd_lvfs_fid2dentry(ctxt->loc_exp, logid->lgl_oid,
546                                              logid->lgl_ogen, logid->lgl_ogr);
547
548                 if (IS_ERR(dchild)) {
549                         rc = PTR_ERR(dchild);
550                         CERROR("error looking up logfile "LPX64":0x%x: rc %d\n",
551                                logid->lgl_oid, logid->lgl_ogen, rc);
552                         GOTO(cleanup, rc);
553                 }
554
555                 cleanup_phase = 2;
556                 if (dchild->d_inode == NULL) {
557                         rc = -ENOENT;
558                         CERROR("nonexistent log file "LPX64":"LPX64": rc %d\n",
559                                logid->lgl_oid, logid->lgl_ogr, rc);
560                         GOTO(cleanup, rc);
561                 }
562
563                 handle->lgh_file = l_dentry_open(&obd->obd_lvfs_ctxt, dchild,
564                                                     O_RDWR | O_LARGEFILE);
565                 if (IS_ERR(handle->lgh_file)) {
566                         rc = PTR_ERR(handle->lgh_file);
567                         CERROR("error opening logfile "LPX64"0x%x: rc %d\n",
568                                logid->lgl_oid, logid->lgl_ogen, rc);
569                         GOTO(cleanup, rc);
570                 }
571
572                 /* assign the value of lgh_id for handle directly */
573                 handle->lgh_id = *logid;
574
575         } else if (name) {
576                 /* COMPAT_146 */
577                 if (strcmp(obd->obd_type->typ_name, LUSTRE_MDS_NAME) == 0) {
578                         handle->lgh_file = llog_filp_open(MDT_LOGS_DIR, name, 
579                                                           open_flags, 0644);
580                 } else {
581                         /* end COMPAT_146 */
582                         handle->lgh_file = llog_filp_open(MOUNT_CONFIGS_DIR,
583                                                           name, open_flags, 
584                                                           0644);
585                 }
586                 if (IS_ERR(handle->lgh_file))
587                         GOTO(cleanup, rc = PTR_ERR(handle->lgh_file));
588
589                 handle->lgh_id.lgl_ogr = 1;
590                 handle->lgh_id.lgl_oid =
591                         handle->lgh_file->f_dentry->d_inode->i_ino;
592                 handle->lgh_id.lgl_ogen =
593                         handle->lgh_file->f_dentry->d_inode->i_generation;
594         } else {
595                 oa = obdo_alloc();
596                 if (oa == NULL)
597                         GOTO(cleanup, rc = -ENOMEM);
598
599                 oa->o_gr = FILTER_GROUP_LLOG;
600                 oa->o_valid = OBD_MD_FLGENER | OBD_MD_FLGROUP;
601
602                 rc = obd_create(ctxt->loc_exp, oa, NULL, NULL);
603                 if (rc)
604                         GOTO(cleanup, rc);
605
606                 dchild = obd_lvfs_fid2dentry(ctxt->loc_exp, oa->o_id,
607                                              oa->o_generation, oa->o_gr);
608
609                 if (IS_ERR(dchild))
610                         GOTO(cleanup, rc = PTR_ERR(dchild));
611                 cleanup_phase = 2;
612                 handle->lgh_file = l_dentry_open(&obd->obd_lvfs_ctxt, dchild,
613                                                  open_flags);
614                 if (IS_ERR(handle->lgh_file))
615                         GOTO(cleanup, rc = PTR_ERR(handle->lgh_file));
616
617                 handle->lgh_id.lgl_ogr = oa->o_gr;
618                 handle->lgh_id.lgl_oid = oa->o_id;
619                 handle->lgh_id.lgl_ogen = oa->o_generation;
620         }
621
622         handle->lgh_ctxt = ctxt;
623  finish:
624         if (oa)
625                 obdo_free(oa);
626         RETURN(rc);
627 cleanup:
628         switch (cleanup_phase) {
629         case 2:
630                 l_dput(dchild);
631         case 1:
632                 llog_free_handle(handle);
633         }
634         goto finish;
635 }
636
637 static int llog_lvfs_close(struct llog_handle *handle)
638 {
639         int rc;
640         ENTRY;
641
642         rc = filp_close(handle->lgh_file, 0);
643         if (rc)
644                 CERROR("error closing log: rc %d\n", rc);
645         RETURN(rc);
646 }
647
648 static int llog_lvfs_destroy(struct llog_handle *handle)
649 {
650         struct dentry *fdentry;
651         struct obdo *oa;
652         struct obd_device *obd = handle->lgh_ctxt->loc_exp->exp_obd;
653         char *dir;
654         int rc;
655         ENTRY;
656
657         /* COMPAT_146 */
658         if (strcmp(obd->obd_type->typ_name, LUSTRE_MDS_NAME) == 0)
659                 dir = MDT_LOGS_DIR;
660         else
661                 /* end COMPAT_146 */
662                 dir = MOUNT_CONFIGS_DIR;
663
664         fdentry = handle->lgh_file->f_dentry;
665         if (strcmp(fdentry->d_parent->d_name.name, dir) == 0) {
666                 struct inode *inode = fdentry->d_parent->d_inode;
667                 struct lvfs_run_ctxt saved;
668
669                 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
670                 dget(fdentry);
671                 rc = llog_lvfs_close(handle);
672
673                 if (rc == 0) {
674                         LOCK_INODE_MUTEX(inode);
675                         rc = vfs_unlink(inode, fdentry);
676                         UNLOCK_INODE_MUTEX(inode);
677                 }
678
679                 dput(fdentry);
680                 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
681                 RETURN(rc);
682         }
683
684         oa = obdo_alloc();
685         if (oa == NULL)
686                 RETURN(-ENOMEM);
687
688         oa->o_id = handle->lgh_id.lgl_oid;
689         oa->o_gr = handle->lgh_id.lgl_ogr;
690         oa->o_generation = handle->lgh_id.lgl_ogen;
691         oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLGENER;
692
693         rc = llog_lvfs_close(handle);
694         if (rc)
695                 GOTO(out, rc);
696
697         rc = obd_destroy(handle->lgh_ctxt->loc_exp, oa, NULL, NULL, NULL);
698  out:
699         obdo_free(oa);
700         RETURN(rc);
701 }
702
703 /* reads the catalog list */
704 int llog_get_cat_list(struct obd_device *obd, struct obd_device *disk_obd,
705                       char *name, int count, struct llog_catid *idarray)
706 {
707         struct lvfs_run_ctxt saved;
708         struct l_file *file;
709         int rc;
710         int size = sizeof(*idarray) * count;
711         loff_t off = 0;
712         ENTRY;
713
714         if (!count) 
715                 RETURN(0);
716
717         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
718         file = filp_open(name, O_RDWR | O_CREAT | O_LARGEFILE, 0700);
719         if (!file || IS_ERR(file)) {
720                 rc = PTR_ERR(file);
721                 CERROR("OBD filter: cannot open/create %s: rc = %d\n",
722                        name, rc);
723                 GOTO(out, rc);
724         }
725         
726         if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
727                 CERROR("%s is not a regular file!: mode = %o\n", name,
728                        file->f_dentry->d_inode->i_mode);
729                 GOTO(out, rc = -ENOENT);
730         }
731
732         CDEBUG(D_CONFIG, "cat list: disk size=%d, read=%d\n", 
733                (int)file->f_dentry->d_inode->i_size, size);
734
735         rc = fsfilt_read_record(disk_obd, file, idarray, size, &off);
736         if (rc) {
737                 CERROR("OBD filter: error reading %s: rc %d\n", name, rc);
738                 GOTO(out, rc);
739         }
740
741         EXIT;
742  out:
743         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
744         if (file && !IS_ERR(file))
745                 rc = filp_close(file, 0);
746         return rc;
747 }
748 EXPORT_SYMBOL(llog_get_cat_list);
749
750 /* writes the cat list */
751 int llog_put_cat_list(struct obd_device *obd, struct obd_device *disk_obd,
752                       char *name, int count, struct llog_catid *idarray)
753 {
754         struct lvfs_run_ctxt saved;
755         struct l_file *file;
756         int rc;
757         int size = sizeof(*idarray) * count;
758         loff_t off = 0;
759
760         if (!count) 
761                 return (0);
762
763         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
764         file = filp_open(name, O_RDWR | O_CREAT | O_LARGEFILE, 0700);
765         if (!file || IS_ERR(file)) {
766                 rc = PTR_ERR(file);
767                 CERROR("OBD filter: cannot open/create %s: rc = %d\n",
768                        name, rc);
769                 GOTO(out, rc);
770         }
771
772         if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
773                 CERROR("%s is not a regular file!: mode = %o\n", name,
774                        file->f_dentry->d_inode->i_mode);
775                 GOTO(out, rc = -ENOENT);
776         }
777
778         rc = fsfilt_write_record(disk_obd, file, idarray, size, &off, 1);
779         if (rc) {
780                 CDEBUG(D_INODE,"OBD filter: error reading %s: rc %d\n",
781                        name, rc);
782                 GOTO(out, rc);
783         }
784
785  out:
786         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
787         if (file && !IS_ERR(file))
788                 rc = filp_close(file, 0);
789         RETURN(rc);
790 }
791
792 struct llog_operations llog_lvfs_ops = {
793         lop_write_rec:   llog_lvfs_write_rec,
794         lop_next_block:  llog_lvfs_next_block,
795         lop_prev_block:  llog_lvfs_prev_block,
796         lop_read_header: llog_lvfs_read_header,
797         lop_create:      llog_lvfs_create,
798         lop_destroy:     llog_lvfs_destroy,
799         lop_close:       llog_lvfs_close,
800         //        lop_cancel: llog_lvfs_cancel,
801 };
802
803 EXPORT_SYMBOL(llog_lvfs_ops);
804
805 #else /* !__KERNEL__ */
806
807 static int llog_lvfs_read_header(struct llog_handle *handle)
808 {
809         LBUG();
810         return 0;
811 }
812
813 static int llog_lvfs_write_rec(struct llog_handle *loghandle,
814                                struct llog_rec_hdr *rec,
815                                struct llog_cookie *reccookie, int cookiecount,
816                                void *buf, int idx)
817 {
818         LBUG();
819         return 0;
820 }
821
822 static int llog_lvfs_next_block(struct llog_handle *loghandle, int *cur_idx,
823                                 int next_idx, __u64 *cur_offset, void *buf,
824                                 int len)
825 {
826         LBUG();
827         return 0;
828 }
829
830 static int llog_lvfs_prev_block(struct llog_handle *loghandle,
831                                 int prev_idx, void *buf, int len)
832 {
833         LBUG();
834         return 0;
835 }
836
837 static int llog_lvfs_create(struct llog_ctxt *ctxt, struct llog_handle **res,
838                             struct llog_logid *logid, char *name)
839 {
840         LBUG();
841         return 0;
842 }
843
844 static int llog_lvfs_close(struct llog_handle *handle)
845 {
846         LBUG();
847         return 0;
848 }
849
850 static int llog_lvfs_destroy(struct llog_handle *handle)
851 {
852         LBUG();
853         return 0;
854 }
855
856 int llog_get_cat_list(struct obd_device *obd, struct obd_device *disk_obd,
857                       char *name, int count, struct llog_catid *idarray)
858 {
859         LBUG();
860         return 0;
861 }
862
863 int llog_put_cat_list(struct obd_device *obd, struct obd_device *disk_obd,
864                       char *name, int count, struct llog_catid *idarray)
865 {
866         LBUG();
867         return 0;
868 }
869
870 struct llog_operations llog_lvfs_ops = {
871         lop_write_rec:   llog_lvfs_write_rec,
872         lop_next_block:  llog_lvfs_next_block,
873         lop_prev_block:  llog_lvfs_prev_block,
874         lop_read_header: llog_lvfs_read_header,
875         lop_create:      llog_lvfs_create,
876         lop_destroy:     llog_lvfs_destroy,
877         lop_close:       llog_lvfs_close,
878 //        lop_cancel:      llog_lvfs_cancel,
879 };
880 #endif