Whamcloud - gitweb
b=21259 "lfs check" is only allowed for root.
[fs/lustre-release.git] / lustre / obdclass / llog_lvfs.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/obdclass/llog_lvfs.c
37  *
38  * OST<->MDS recovery logging infrastructure.
39  * Invariants in implementation:
40  * - we do not share logs among different OST<->MDS connections, so that
41  *   if an OST or MDS fails it need only look at log(s) relevant to itself
42  *
43  * Author: Andreas Dilger <adilger@clusterfs.com>
44  */
45
46 #define DEBUG_SUBSYSTEM S_LOG
47
48 #ifndef EXPORT_SYMTAB
49 #define EXPORT_SYMTAB
50 #endif
51
52 #ifndef __KERNEL__
53 #include <liblustre.h>
54 #endif
55
56 #include <obd.h>
57 #include <obd_class.h>
58 #include <lustre_log.h>
59 #include <obd_ost.h>
60 #include <libcfs/list.h>
61 #include <lvfs.h>
62 #include <lustre_fsfilt.h>
63 #include <lustre_disk.h>
64 #include "llog_internal.h"
65
66 #if defined(__KERNEL__) && defined(LLOG_LVFS)
67
68 static int llog_lvfs_pad(struct obd_device *obd, struct l_file *file,
69                                 int len, int index)
70 {
71         struct llog_rec_hdr rec = { 0 };
72         struct llog_rec_tail tail;
73         int rc;
74         ENTRY;
75
76         LASSERT(len >= LLOG_MIN_REC_SIZE && (len & 0x7) == 0);
77
78         tail.lrt_len = rec.lrh_len = len;
79         tail.lrt_index = rec.lrh_index = index;
80         rec.lrh_type = LLOG_PAD_MAGIC;
81
82         rc = fsfilt_write_record(obd, file, &rec, sizeof(rec), &file->f_pos, 0);
83         if (rc) {
84                 CERROR("error writing padding record: rc %d\n", rc);
85                 goto out;
86         }
87
88         file->f_pos += len - sizeof(rec) - sizeof(tail);
89         rc = fsfilt_write_record(obd, file, &tail, sizeof(tail),&file->f_pos,0);
90         if (rc) {
91                 CERROR("error writing padding record: rc %d\n", rc);
92                 goto out;
93         }
94
95  out:
96         RETURN(rc);
97 }
98
99 static int llog_lvfs_write_blob(struct obd_device *obd, struct l_file *file,
100                                 struct llog_rec_hdr *rec, void *buf, loff_t off)
101 {
102         int rc;
103         struct llog_rec_tail end;
104         loff_t saved_off = file->f_pos;
105         int buflen = rec->lrh_len;
106
107         ENTRY;
108
109         file->f_pos = off;
110
111         if (buflen == 0)
112                 CWARN("0-length record\n");
113
114         if (!buf) {
115                 rc = fsfilt_write_record(obd, file, rec, buflen,&file->f_pos,0);
116                 if (rc) {
117                         CERROR("error writing log record: rc %d\n", rc);
118                         goto out;
119                 }
120                 GOTO(out, rc = 0);
121         }
122
123         /* the buf case */
124         rec->lrh_len = sizeof(*rec) + buflen + sizeof(end);
125         rc = fsfilt_write_record(obd, file, rec, sizeof(*rec), &file->f_pos, 0);
126         if (rc) {
127                 CERROR("error writing log hdr: rc %d\n", rc);
128                 goto out;
129         }
130
131         rc = fsfilt_write_record(obd, file, buf, buflen, &file->f_pos, 0);
132         if (rc) {
133                 CERROR("error writing log buffer: rc %d\n", rc);
134                 goto out;
135         }
136
137         end.lrt_len = rec->lrh_len;
138         end.lrt_index = rec->lrh_index;
139         rc = fsfilt_write_record(obd, file, &end, sizeof(end), &file->f_pos, 0);
140         if (rc) {
141                 CERROR("error writing log tail: rc %d\n", rc);
142                 goto out;
143         }
144
145         rc = 0;
146  out:
147         if (saved_off > file->f_pos)
148                 file->f_pos = saved_off;
149         LASSERT(rc <= 0);
150         RETURN(rc);
151 }
152
153 static int llog_lvfs_read_blob(struct obd_device *obd, struct l_file *file,
154                                 void *buf, int size, loff_t off)
155 {
156         loff_t offset = off;
157         int rc;
158         ENTRY;
159
160         rc = fsfilt_read_record(obd, file, buf, size, &offset);
161         if (rc) {
162                 CERROR("error reading log record: rc %d\n", rc);
163                 RETURN(rc);
164         }
165         RETURN(0);
166 }
167
168 static int llog_lvfs_read_header(struct llog_handle *handle)
169 {
170         struct obd_device *obd;
171         int rc;
172         ENTRY;
173
174         LASSERT(sizeof(*handle->lgh_hdr) == LLOG_CHUNK_SIZE);
175
176         obd = handle->lgh_ctxt->loc_exp->exp_obd;
177
178         if (i_size_read(handle->lgh_file->f_dentry->d_inode) == 0) {
179                 CDEBUG(D_HA, "not reading header from 0-byte log\n");
180                 RETURN(LLOG_EEMPTY);
181         }
182
183         rc = llog_lvfs_read_blob(obd, handle->lgh_file, handle->lgh_hdr,
184                                  LLOG_CHUNK_SIZE, 0);
185         if (rc) {
186                 CERROR("error reading log header from %.*s\n",
187                        handle->lgh_file->f_dentry->d_name.len,
188                        handle->lgh_file->f_dentry->d_name.name);
189         } else {
190                 struct llog_rec_hdr *llh_hdr = &handle->lgh_hdr->llh_hdr;
191
192                 if (LLOG_REC_HDR_NEEDS_SWABBING(llh_hdr))
193                         lustre_swab_llog_hdr(handle->lgh_hdr);
194
195                 if (llh_hdr->lrh_type != LLOG_HDR_MAGIC) {
196                         CERROR("bad log %.*s header magic: %#x (expected %#x)\n",
197                                handle->lgh_file->f_dentry->d_name.len,
198                                handle->lgh_file->f_dentry->d_name.name,
199                                llh_hdr->lrh_type, LLOG_HDR_MAGIC);
200                         rc = -EIO;
201                 } else if (llh_hdr->lrh_len != LLOG_CHUNK_SIZE) {
202                         CERROR("incorrectly sized log %.*s header: %#x "
203                                "(expected %#x)\n",
204                                handle->lgh_file->f_dentry->d_name.len,
205                                handle->lgh_file->f_dentry->d_name.name,
206                                llh_hdr->lrh_len, LLOG_CHUNK_SIZE);
207                         CERROR("you may need to re-run lconf --write_conf.\n");
208                         rc = -EIO;
209                 }
210         }
211
212         handle->lgh_last_idx = handle->lgh_hdr->llh_tail.lrt_index;
213         handle->lgh_file->f_pos = i_size_read(handle->lgh_file->f_dentry->d_inode);
214
215         RETURN(rc);
216 }
217
218 /* returns negative in on error; 0 if success && reccookie == 0; 1 otherwise */
219 /* appends if idx == -1, otherwise overwrites record idx. */
220 static int llog_lvfs_write_rec(struct llog_handle *loghandle,
221                                struct llog_rec_hdr *rec,
222                                struct llog_cookie *reccookie, int cookiecount,
223                                void *buf, int idx)
224 {
225         struct llog_log_hdr *llh;
226         int reclen = rec->lrh_len, index, rc;
227         struct llog_rec_tail *lrt;
228         struct obd_device *obd;
229         struct file *file;
230         size_t left;
231         ENTRY;
232
233         llh = loghandle->lgh_hdr;
234         file = loghandle->lgh_file;
235         obd = loghandle->lgh_ctxt->loc_exp->exp_obd;
236
237         /* record length should not bigger than LLOG_CHUNK_SIZE */
238         if (buf)
239                 rc = (reclen > LLOG_CHUNK_SIZE - sizeof(struct llog_rec_hdr) -
240                       sizeof(struct llog_rec_tail)) ? -E2BIG : 0;
241         else
242                 rc = (reclen > LLOG_CHUNK_SIZE) ? -E2BIG : 0;
243         if (rc)
244                 RETURN(rc);
245
246         if (buf)
247                 /* write_blob adds header and tail to lrh_len. */
248                 reclen = sizeof(*rec) + rec->lrh_len +
249                          sizeof(struct llog_rec_tail);
250
251         if (idx != -1) {
252                 loff_t saved_offset;
253
254                 /* no header: only allowed to insert record 1 */
255                 if (idx != 1 && !i_size_read(file->f_dentry->d_inode)) {
256                         CERROR("idx != -1 in empty log\n");
257                         LBUG();
258                 }
259
260                 if (idx && llh->llh_size && llh->llh_size != rec->lrh_len)
261                         RETURN(-EINVAL);
262
263                 if (!ext2_test_bit(idx, llh->llh_bitmap))
264                         CERROR("Modify unset record %u\n", idx);
265                 if (idx != rec->lrh_index)
266                         CERROR("Index mismatch %d %u\n", idx, rec->lrh_index);
267
268                 rc = llog_lvfs_write_blob(obd, file, &llh->llh_hdr, NULL, 0);
269                 /* we are done if we only write the header or on error */
270                 if (rc || idx == 0)
271                         RETURN(rc);
272
273                 /* Assumes constant lrh_len */
274                 saved_offset = sizeof(*llh) + (idx - 1) * reclen;
275
276                 if (buf) {
277                         struct llog_rec_hdr check;
278
279                         /* We assume that caller has set lgh_cur_* */
280                         saved_offset = loghandle->lgh_cur_offset;
281                         CDEBUG(D_OTHER,
282                                "modify record "LPX64": idx:%d/%u/%d, len:%u "
283                                "offset %llu\n",
284                                loghandle->lgh_id.lgl_oid, idx, rec->lrh_index,
285                                loghandle->lgh_cur_idx, rec->lrh_len,
286                                (long long)(saved_offset - sizeof(*llh)));
287                         if (rec->lrh_index != loghandle->lgh_cur_idx) {
288                                 CERROR("modify idx mismatch %u/%d\n",
289                                        idx, loghandle->lgh_cur_idx);
290                                 RETURN(-EFAULT);
291                         }
292 #if 1  /* FIXME remove this safety check at some point */
293                         /* Verify that the record we're modifying is the
294                            right one. */
295                         rc = llog_lvfs_read_blob(obd, file, &check,
296                                                  sizeof(check), saved_offset);
297                         if (check.lrh_index != idx || check.lrh_len != reclen) {
298                                 CERROR("Bad modify idx %u/%u size %u/%u (%d)\n",
299                                        idx, check.lrh_index, reclen,
300                                        check.lrh_len, rc);
301                                 RETURN(-EFAULT);
302                         }
303 #endif
304                 }
305
306                 rc = llog_lvfs_write_blob(obd, file, rec, buf, saved_offset);
307                 if (rc == 0 && reccookie) {
308                         reccookie->lgc_lgl = loghandle->lgh_id;
309                         reccookie->lgc_index = idx;
310                         rc = 1;
311                 }
312                 RETURN(rc);
313         }
314
315         /* Make sure that records don't cross a chunk boundary, so we can
316          * process them page-at-a-time if needed.  If it will cross a chunk
317          * boundary, write in a fake (but referenced) entry to pad the chunk.
318          *
319          * We know that llog_current_log() will return a loghandle that is
320          * big enough to hold reclen, so all we care about is padding here.
321          */
322         left = LLOG_CHUNK_SIZE - (file->f_pos & (LLOG_CHUNK_SIZE - 1));
323
324         /* NOTE: padding is a record, but no bit is set */
325         if (left != 0 && left != reclen &&
326             left < (reclen + LLOG_MIN_REC_SIZE)) {
327                  index = loghandle->lgh_last_idx + 1;
328                  rc = llog_lvfs_pad(obd, file, left, index);
329                  if (rc)
330                          RETURN(rc);
331                  loghandle->lgh_last_idx++; /*for pad rec*/
332          }
333          /* if it's the last idx in log file, then return -ENOSPC */
334          if (loghandle->lgh_last_idx >= LLOG_BITMAP_SIZE(llh) - 1)
335                  RETURN(-ENOSPC);
336         loghandle->lgh_last_idx++;
337         index = loghandle->lgh_last_idx;
338         LASSERT(index < LLOG_BITMAP_SIZE(llh));
339         rec->lrh_index = index;
340         if (buf == NULL) {
341                 lrt = (struct llog_rec_tail *)
342                         ((char *)rec + rec->lrh_len - sizeof(*lrt));
343                 lrt->lrt_len = rec->lrh_len;
344                 lrt->lrt_index = rec->lrh_index;
345         }
346         /*The caller should make sure only 1 process access the lgh_last_idx,
347          *Otherwise it might hit the assert.*/
348         LASSERT(index < LLOG_BITMAP_SIZE(llh));
349         if (ext2_set_bit(index, llh->llh_bitmap)) {
350                 CERROR("argh, index %u already set in log bitmap?\n", index);
351                 LBUG(); /* should never happen */
352         }
353         llh->llh_count++;
354         llh->llh_tail.lrt_index = index;
355
356         rc = llog_lvfs_write_blob(obd, file, &llh->llh_hdr, NULL, 0);
357         if (rc)
358                 RETURN(rc);
359
360         rc = llog_lvfs_write_blob(obd, file, rec, buf, file->f_pos);
361         if (rc)
362                 RETURN(rc);
363
364         CDEBUG(D_RPCTRACE, "added record "LPX64": idx: %u, %u \n",
365                loghandle->lgh_id.lgl_oid, index, rec->lrh_len);
366         if (rc == 0 && reccookie) {
367                 reccookie->lgc_lgl = loghandle->lgh_id;
368                 reccookie->lgc_index = index;
369                 if ((rec->lrh_type == MDS_UNLINK_REC) ||
370                     (rec->lrh_type == MDS_SETATTR_REC) ||
371                     (rec->lrh_type == MDS_SETATTR64_REC))
372                         reccookie->lgc_subsys = LLOG_MDS_OST_ORIG_CTXT;
373                 else if (rec->lrh_type == OST_SZ_REC)
374                         reccookie->lgc_subsys = LLOG_SIZE_ORIG_CTXT;
375                 else if (rec->lrh_type == OST_RAID1_REC)
376                         reccookie->lgc_subsys = LLOG_RD1_ORIG_CTXT;
377                 else
378                         reccookie->lgc_subsys = -1;
379                 rc = 1;
380         }
381         if (rc == 0 && rec->lrh_type == LLOG_GEN_REC)
382                 rc = 1;
383
384         RETURN(rc);
385 }
386
387 /* We can skip reading at least as many log blocks as the number of
388 * minimum sized log records we are skipping.  If it turns out
389 * that we are not far enough along the log (because the
390 * actual records are larger than minimum size) we just skip
391 * some more records. */
392
393 static void llog_skip_over(__u64 *off, int curr, int goal)
394 {
395         if (goal <= curr)
396                 return;
397         *off = (*off + (goal-curr-1) * LLOG_MIN_REC_SIZE) &
398                 ~(LLOG_CHUNK_SIZE - 1);
399 }
400
401
402 /* sets:
403  *  - cur_offset to the furthest point read in the log file
404  *  - cur_idx to the log index preceeding cur_offset
405  * returns -EIO/-EINVAL on error
406  */
407 static int llog_lvfs_next_block(struct llog_handle *loghandle, int *cur_idx,
408                                 int next_idx, __u64 *cur_offset, void *buf,
409                                 int len)
410 {
411         int rc;
412         ENTRY;
413
414         if (len == 0 || len & (LLOG_CHUNK_SIZE - 1))
415                 RETURN(-EINVAL);
416
417         CDEBUG(D_OTHER, "looking for log index %u (cur idx %u off "LPU64")\n",
418                next_idx, *cur_idx, *cur_offset);
419
420         while (*cur_offset < i_size_read(loghandle->lgh_file->f_dentry->d_inode)) {
421                 struct llog_rec_hdr *rec;
422                 struct llog_rec_tail *tail;
423                 loff_t ppos;
424
425                 llog_skip_over(cur_offset, *cur_idx, next_idx);
426
427                 ppos = *cur_offset;
428                 rc = fsfilt_read_record(loghandle->lgh_ctxt->loc_exp->exp_obd,
429                                         loghandle->lgh_file, buf, len,
430                                         &ppos);
431                 if (rc) {
432                         CERROR("Cant read llog block at log id "LPU64
433                                "/%u offset "LPU64"\n",
434                                loghandle->lgh_id.lgl_oid,
435                                loghandle->lgh_id.lgl_ogen,
436                                *cur_offset);
437                         RETURN(rc);
438                 }
439
440                 /* put number of bytes read into rc to make code simpler */
441                 rc = ppos - *cur_offset;
442                 *cur_offset = ppos;
443
444                 if (rc < len) {
445                         /* signal the end of the valid buffer to llog_process */
446                         memset(buf + rc, 0, len - rc);
447                 }
448
449                 if (rc == 0) /* end of file, nothing to do */
450                         RETURN(0);
451
452                 if (rc < sizeof(*tail)) {
453                         CERROR("Invalid llog block at log id "LPU64"/%u offset "
454                                LPU64"\n", loghandle->lgh_id.lgl_oid,
455                                loghandle->lgh_id.lgl_ogen, *cur_offset);
456                         RETURN(-EINVAL);
457                 }
458
459                 rec = buf;
460                 tail = (struct llog_rec_tail *)((char *)buf + rc -
461                                                 sizeof(struct llog_rec_tail));
462
463                 if (LLOG_REC_HDR_NEEDS_SWABBING(rec)) {
464                         lustre_swab_llog_rec(rec, tail);
465                 }
466
467                 *cur_idx = tail->lrt_index;
468
469                 /* this shouldn't happen */
470                 if (tail->lrt_index == 0) {
471                         CERROR("Invalid llog tail at log id "LPU64"/%u offset "
472                                LPU64"\n", loghandle->lgh_id.lgl_oid,
473                                loghandle->lgh_id.lgl_ogen, *cur_offset);
474                         RETURN(-EINVAL);
475                 }
476                 if (tail->lrt_index < next_idx)
477                         continue;
478
479                 /* sanity check that the start of the new buffer is no farther
480                  * than the record that we wanted.  This shouldn't happen. */
481                 if (rec->lrh_index > next_idx) {
482                         CERROR("missed desired record? %u > %u\n",
483                                rec->lrh_index, next_idx);
484                         RETURN(-ENOENT);
485                 }
486                 RETURN(0);
487         }
488         RETURN(-EIO);
489 }
490
491 static int llog_lvfs_prev_block(struct llog_handle *loghandle,
492                                 int prev_idx, void *buf, int len)
493 {
494         __u64 cur_offset;
495         int rc;
496         ENTRY;
497
498         if (len == 0 || len & (LLOG_CHUNK_SIZE - 1))
499                 RETURN(-EINVAL);
500
501         CDEBUG(D_OTHER, "looking for log index %u\n", prev_idx);
502
503         cur_offset = LLOG_CHUNK_SIZE;
504         llog_skip_over(&cur_offset, 0, prev_idx);
505
506         while (cur_offset < i_size_read(loghandle->lgh_file->f_dentry->d_inode)) {
507                 struct llog_rec_hdr *rec;
508                 struct llog_rec_tail *tail;
509                 loff_t ppos;
510
511                 ppos = cur_offset;
512
513                 rc = fsfilt_read_record(loghandle->lgh_ctxt->loc_exp->exp_obd,
514                                         loghandle->lgh_file, buf, len,
515                                         &ppos);
516                 if (rc) {
517                         CERROR("Cant read llog block at log id "LPU64
518                                "/%u offset "LPU64"\n",
519                                loghandle->lgh_id.lgl_oid,
520                                loghandle->lgh_id.lgl_ogen,
521                                cur_offset);
522                         RETURN(rc);
523                 }
524
525                 /* put number of bytes read into rc to make code simpler */
526                 rc = ppos - cur_offset;
527                 cur_offset = ppos;
528
529                 if (rc == 0) /* end of file, nothing to do */
530                         RETURN(0);
531
532                 if (rc < sizeof(*tail)) {
533                         CERROR("Invalid llog block at log id "LPU64"/%u offset "
534                                LPU64"\n", loghandle->lgh_id.lgl_oid,
535                                loghandle->lgh_id.lgl_ogen, cur_offset);
536                         RETURN(-EINVAL);
537                 }
538
539                 tail = buf + rc - sizeof(struct llog_rec_tail);
540
541                 /* this shouldn't happen */
542                 if (tail->lrt_index == 0) {
543                         CERROR("Invalid llog tail at log id "LPU64"/%u offset "
544                                LPU64"\n", loghandle->lgh_id.lgl_oid,
545                                loghandle->lgh_id.lgl_ogen, cur_offset);
546                         RETURN(-EINVAL);
547                 }
548                 if (le32_to_cpu(tail->lrt_index) < prev_idx)
549                         continue;
550
551                 /* sanity check that the start of the new buffer is no farther
552                  * than the record that we wanted.  This shouldn't happen. */
553                 rec = buf;
554                 if (le32_to_cpu(rec->lrh_index) > prev_idx) {
555                         CERROR("missed desired record? %u > %u\n",
556                                le32_to_cpu(rec->lrh_index), prev_idx);
557                         RETURN(-ENOENT);
558                 }
559                 RETURN(0);
560         }
561         RETURN(-EIO);
562 }
563
564 static struct file *llog_filp_open(char *dir, char *name, int flags, int mode)
565 {
566         char *logname;
567         struct file *filp;
568         int len;
569
570         OBD_ALLOC(logname, PATH_MAX);
571         if (logname == NULL)
572                 return ERR_PTR(-ENOMEM);
573
574         len = snprintf(logname, PATH_MAX, "%s/%s", dir, name);
575         if (len >= PATH_MAX - 1) {
576                 filp = ERR_PTR(-ENAMETOOLONG);
577         } else {
578                 filp = l_filp_open(logname, flags, mode);
579                 if (IS_ERR(filp))
580                         CERROR("logfile creation %s: %ld\n", logname,
581                                PTR_ERR(filp));
582         }
583         OBD_FREE(logname, PATH_MAX);
584         return filp;
585 }
586
587 /* This is a callback from the llog_* functions.
588  * Assumes caller has already pushed us into the kernel context. */
589 static int llog_lvfs_create(struct llog_ctxt *ctxt, struct llog_handle **res,
590                             struct llog_logid *logid, char *name)
591 {
592         struct llog_handle *handle;
593         struct obd_device *obd;
594         struct l_dentry *dchild = NULL;
595         struct obdo *oa = NULL;
596         int rc = 0;
597         int open_flags = O_RDWR | O_CREAT | O_LARGEFILE;
598         ENTRY;
599
600         handle = llog_alloc_handle();
601         if (handle == NULL)
602                 RETURN(-ENOMEM);
603         *res = handle;
604
605         LASSERT(ctxt);
606         LASSERT(ctxt->loc_exp);
607         obd = ctxt->loc_exp->exp_obd;
608
609         if (logid != NULL) {
610                 dchild = obd_lvfs_fid2dentry(ctxt->loc_exp, logid->lgl_oid,
611                                              logid->lgl_ogen, logid->lgl_ogr);
612
613                 if (IS_ERR(dchild)) {
614                         rc = PTR_ERR(dchild);
615                         CERROR("error looking up logfile "LPX64":0x%x: rc %d\n",
616                                logid->lgl_oid, logid->lgl_ogen, rc);
617                         GOTO(out, rc);
618                 }
619
620                 if (dchild->d_inode == NULL) {
621                         l_dput(dchild);
622                         rc = -ENOENT;
623                         CERROR("nonexistent log file "LPX64":"LPX64": rc %d\n",
624                                logid->lgl_oid, logid->lgl_ogr, rc);
625                         GOTO(out, rc);
626                 }
627
628                 /* l_dentry_open will call dput(dchild) if there is an error */
629                 handle->lgh_file = l_dentry_open(&obd->obd_lvfs_ctxt, dchild,
630                                                     O_RDWR | O_LARGEFILE);
631                 if (IS_ERR(handle->lgh_file)) {
632                         rc = PTR_ERR(handle->lgh_file);
633                         CERROR("error opening logfile "LPX64"0x%x: rc %d\n",
634                                logid->lgl_oid, logid->lgl_ogen, rc);
635                         GOTO(out, rc);
636                 }
637
638                 /* assign the value of lgh_id for handle directly */
639                 handle->lgh_id = *logid;
640
641         } else if (name) {
642                 handle->lgh_file = llog_filp_open(MOUNT_CONFIGS_DIR,
643                                                   name, open_flags, 0644);
644                 if (IS_ERR(handle->lgh_file))
645                         GOTO(out, rc = PTR_ERR(handle->lgh_file));
646
647                 handle->lgh_id.lgl_ogr = 1;
648                 handle->lgh_id.lgl_oid =
649                         handle->lgh_file->f_dentry->d_inode->i_ino;
650                 handle->lgh_id.lgl_ogen =
651                         handle->lgh_file->f_dentry->d_inode->i_generation;
652         } else {
653                 OBDO_ALLOC(oa);
654                 if (oa == NULL)
655                         GOTO(out, rc = -ENOMEM);
656
657                 oa->o_gr = FILTER_GROUP_LLOG;
658                 oa->o_valid = OBD_MD_FLGENER | OBD_MD_FLGROUP;
659
660                 rc = obd_create(ctxt->loc_exp, oa, NULL, NULL);
661                 if (rc)
662                         GOTO(out, rc);
663
664                 dchild = obd_lvfs_fid2dentry(ctxt->loc_exp, oa->o_id,
665                                              oa->o_generation, oa->o_gr);
666
667                 if (IS_ERR(dchild))
668                         GOTO(out, rc = PTR_ERR(dchild));
669
670                 handle->lgh_file = l_dentry_open(&obd->obd_lvfs_ctxt, dchild,
671                                                  open_flags);
672                 if (IS_ERR(handle->lgh_file))
673                         GOTO(out, rc = PTR_ERR(handle->lgh_file));
674
675                 handle->lgh_id.lgl_ogr = oa->o_gr;
676                 handle->lgh_id.lgl_oid = oa->o_id;
677                 handle->lgh_id.lgl_ogen = oa->o_generation;
678         }
679
680         handle->lgh_ctxt = ctxt;
681 out:
682         if (rc)
683                 llog_free_handle(handle);
684
685         if (oa)
686                 OBDO_FREE(oa);
687         RETURN(rc);
688 }
689
690 static int llog_lvfs_close(struct llog_handle *handle)
691 {
692         int rc;
693         ENTRY;
694
695         rc = filp_close(handle->lgh_file, 0);
696         if (rc)
697                 CERROR("error closing log: rc %d\n", rc);
698         RETURN(rc);
699 }
700
701 static int llog_lvfs_destroy(struct llog_handle *handle)
702 {
703         struct dentry *fdentry;
704         struct obdo *oa;
705         struct obd_device *obd = handle->lgh_ctxt->loc_exp->exp_obd;
706         char *dir;
707         int rc;
708         ENTRY;
709
710         dir = MOUNT_CONFIGS_DIR;
711
712         fdentry = handle->lgh_file->f_dentry;
713         if (strcmp(fdentry->d_parent->d_name.name, dir) == 0) {
714                 struct inode *inode = fdentry->d_parent->d_inode;
715                 struct lvfs_run_ctxt saved;
716                 struct vfsmount *mnt = mntget(handle->lgh_file->f_vfsmnt);
717
718                 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
719                 dget(fdentry);
720                 rc = llog_lvfs_close(handle);
721
722                 if (rc == 0) {
723                         LOCK_INODE_MUTEX_PARENT(inode);
724                         rc = ll_vfs_unlink(inode, fdentry, mnt);
725                         UNLOCK_INODE_MUTEX(inode);
726                 }
727                 mntput(mnt);
728
729                 dput(fdentry);
730                 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
731                 RETURN(rc);
732         }
733
734         OBDO_ALLOC(oa);
735         if (oa == NULL)
736                 RETURN(-ENOMEM);
737
738         oa->o_id = handle->lgh_id.lgl_oid;
739         oa->o_gr = handle->lgh_id.lgl_ogr;
740         oa->o_generation = handle->lgh_id.lgl_ogen;
741         oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLGENER;
742
743         rc = llog_lvfs_close(handle);
744         if (rc)
745                 GOTO(out, rc);
746
747         rc = obd_destroy(handle->lgh_ctxt->loc_exp, oa, NULL, NULL, NULL, NULL);
748  out:
749         OBDO_FREE(oa);
750         RETURN(rc);
751 }
752
753 /* reads the catalog list */
754 int llog_get_cat_list(struct obd_device *disk_obd,
755                       char *name, int idx, int count, struct llog_catid *idarray)
756 {
757         struct lvfs_run_ctxt saved;
758         struct l_file *file;
759         int rc, rc1 = 0;
760         int size = sizeof(*idarray) * count;
761         loff_t off = idx *  sizeof(*idarray);
762         ENTRY;
763
764         if (!count)
765                 RETURN(0);
766
767         push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
768         file = filp_open(name, O_RDWR | O_CREAT | O_LARGEFILE, 0700);
769         if (!file || IS_ERR(file)) {
770                 rc = PTR_ERR(file);
771                 CERROR("OBD filter: cannot open/create %s: rc = %d\n",
772                        name, rc);
773                 GOTO(out, rc);
774         }
775
776         if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
777                 CERROR("%s is not a regular file!: mode = %o\n", name,
778                        file->f_dentry->d_inode->i_mode);
779                 GOTO(out, rc = -ENOENT);
780         }
781
782         CDEBUG(D_CONFIG, "cat list: disk size=%d, read=%d\n",
783                (int)i_size_read(file->f_dentry->d_inode), size);
784
785         /* read for new ost index or for empty file */
786         memset(idarray, 0, size);
787         if (i_size_read(file->f_dentry->d_inode) < off)
788                 GOTO(out, rc = 0);
789
790         rc = fsfilt_read_record(disk_obd, file, idarray, size, &off);
791         if (rc) {
792                 CERROR("OBD filter: error reading %s: rc %d\n", name, rc);
793                 GOTO(out, rc);
794         }
795
796         EXIT;
797  out:
798         pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
799         if (file && !IS_ERR(file))
800                 rc1 = filp_close(file, 0);
801         if (rc == 0)
802                 rc = rc1;
803         return rc;
804 }
805 EXPORT_SYMBOL(llog_get_cat_list);
806
807 /* writes the cat list */
808 int llog_put_cat_list(struct obd_device *disk_obd,
809                       char *name, int idx, int count, struct llog_catid *idarray)
810 {
811         struct lvfs_run_ctxt saved;
812         struct l_file *file;
813         int rc, rc1 = 0;
814         int size = sizeof(*idarray) * count;
815         loff_t off = idx * sizeof(*idarray);
816
817         if (!count)
818                 GOTO(out1, rc = 0);
819
820         push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
821         file = filp_open(name, O_RDWR | O_CREAT | O_LARGEFILE, 0700);
822         if (!file || IS_ERR(file)) {
823                 rc = PTR_ERR(file);
824                 CERROR("OBD filter: cannot open/create %s: rc = %d\n",
825                        name, rc);
826                 GOTO(out, rc);
827         }
828
829         if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
830                 CERROR("%s is not a regular file!: mode = %o\n", name,
831                        file->f_dentry->d_inode->i_mode);
832                 GOTO(out, rc = -ENOENT);
833         }
834
835         rc = fsfilt_write_record(disk_obd, file, idarray, size, &off, 1);
836         if (rc) {
837                 CDEBUG(D_INODE,"OBD filter: error writeing %s: rc %d\n",
838                        name, rc);
839                 GOTO(out, rc);
840         }
841
842 out:
843         pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
844         if (file && !IS_ERR(file))
845                 rc1 = filp_close(file, 0);
846
847         if (rc == 0)
848                 rc = rc1;
849 out1:
850         RETURN(rc);
851 }
852 EXPORT_SYMBOL(llog_put_cat_list);
853
854 struct llog_operations llog_lvfs_ops = {
855         lop_write_rec:   llog_lvfs_write_rec,
856         lop_next_block:  llog_lvfs_next_block,
857         lop_prev_block:  llog_lvfs_prev_block,
858         lop_read_header: llog_lvfs_read_header,
859         lop_create:      llog_lvfs_create,
860         lop_destroy:     llog_lvfs_destroy,
861         lop_close:       llog_lvfs_close,
862         //        lop_cancel: llog_lvfs_cancel,
863 };
864
865 EXPORT_SYMBOL(llog_lvfs_ops);
866
867 #else /* !__KERNEL__ */
868
869 static int llog_lvfs_read_header(struct llog_handle *handle)
870 {
871         LBUG();
872         return 0;
873 }
874
875 static int llog_lvfs_write_rec(struct llog_handle *loghandle,
876                                struct llog_rec_hdr *rec,
877                                struct llog_cookie *reccookie, int cookiecount,
878                                void *buf, int idx)
879 {
880         LBUG();
881         return 0;
882 }
883
884 static int llog_lvfs_next_block(struct llog_handle *loghandle, int *cur_idx,
885                                 int next_idx, __u64 *cur_offset, void *buf,
886                                 int len)
887 {
888         LBUG();
889         return 0;
890 }
891
892 static int llog_lvfs_prev_block(struct llog_handle *loghandle,
893                                 int prev_idx, void *buf, int len)
894 {
895         LBUG();
896         return 0;
897 }
898
899 static int llog_lvfs_create(struct llog_ctxt *ctxt, struct llog_handle **res,
900                             struct llog_logid *logid, char *name)
901 {
902         LBUG();
903         return 0;
904 }
905
906 static int llog_lvfs_close(struct llog_handle *handle)
907 {
908         LBUG();
909         return 0;
910 }
911
912 static int llog_lvfs_destroy(struct llog_handle *handle)
913 {
914         LBUG();
915         return 0;
916 }
917
918 int llog_get_cat_list(struct obd_device *disk_obd,
919                       char *name, int idx, int count, struct llog_catid *idarray)
920 {
921         LBUG();
922         return 0;
923 }
924
925 int llog_put_cat_list(struct obd_device *disk_obd,
926                       char *name, int idx, int count, struct llog_catid *idarray)
927 {
928         LBUG();
929         return 0;
930 }
931
932 struct llog_operations llog_lvfs_ops = {
933         lop_write_rec:   llog_lvfs_write_rec,
934         lop_next_block:  llog_lvfs_next_block,
935         lop_prev_block:  llog_lvfs_prev_block,
936         lop_read_header: llog_lvfs_read_header,
937         lop_create:      llog_lvfs_create,
938         lop_destroy:     llog_lvfs_destroy,
939         lop_close:       llog_lvfs_close,
940 //        lop_cancel:      llog_lvfs_cancel,
941 };
942 #endif