Whamcloud - gitweb
LU-1866 lfsck: enhance otable-based iteration
[fs/lustre-release.git] / lustre / obdclass / llog_lvfs.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2012, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/obdclass/llog_lvfs.c
37  *
38  * OST<->MDS recovery logging infrastructure.
39  * Invariants in implementation:
40  * - we do not share logs among different OST<->MDS connections, so that
41  *   if an OST or MDS fails it need only look at log(s) relevant to itself
42  *
43  * Author: Andreas Dilger <adilger@clusterfs.com>
44  */
45
46 #define DEBUG_SUBSYSTEM S_LOG
47
48 #ifndef __KERNEL__
49 #include <liblustre.h>
50 #endif
51
52 #include <obd.h>
53 #include <obd_class.h>
54 #include <lustre_log.h>
55 #include <obd_ost.h>
56 #include <libcfs/list.h>
57 #include <lvfs.h>
58 #include <lustre_fsfilt.h>
59 #include <lustre_disk.h>
60 #include "llog_internal.h"
61
62 #if defined(__KERNEL__) && defined(LLOG_LVFS)
63
64 static int llog_lvfs_pad(struct obd_device *obd, struct l_file *file,
65                                 int len, int index)
66 {
67         struct llog_rec_hdr rec = { 0 };
68         struct llog_rec_tail tail;
69         int rc;
70         ENTRY;
71
72         LASSERT(len >= LLOG_MIN_REC_SIZE && (len & 0x7) == 0);
73
74         tail.lrt_len = rec.lrh_len = len;
75         tail.lrt_index = rec.lrh_index = index;
76         rec.lrh_type = LLOG_PAD_MAGIC;
77
78         rc = fsfilt_write_record(obd, file, &rec, sizeof(rec), &file->f_pos, 0);
79         if (rc) {
80                 CERROR("error writing padding record: rc %d\n", rc);
81                 goto out;
82         }
83
84         file->f_pos += len - sizeof(rec) - sizeof(tail);
85         rc = fsfilt_write_record(obd, file, &tail, sizeof(tail),&file->f_pos,0);
86         if (rc) {
87                 CERROR("error writing padding record: rc %d\n", rc);
88                 goto out;
89         }
90
91  out:
92         RETURN(rc);
93 }
94
95 static int llog_lvfs_write_blob(struct obd_device *obd, struct l_file *file,
96                                 struct llog_rec_hdr *rec, void *buf, loff_t off)
97 {
98         int rc;
99         struct llog_rec_tail end;
100         loff_t saved_off = file->f_pos;
101         int buflen = rec->lrh_len;
102
103         ENTRY;
104
105         file->f_pos = off;
106
107         if (buflen == 0)
108                 CWARN("0-length record\n");
109
110         if (!buf) {
111                 rc = fsfilt_write_record(obd, file, rec, buflen,&file->f_pos,0);
112                 if (rc) {
113                         CERROR("error writing log record: rc %d\n", rc);
114                         goto out;
115                 }
116                 GOTO(out, rc = 0);
117         }
118
119         /* the buf case */
120         rec->lrh_len = sizeof(*rec) + buflen + sizeof(end);
121         rc = fsfilt_write_record(obd, file, rec, sizeof(*rec), &file->f_pos, 0);
122         if (rc) {
123                 CERROR("error writing log hdr: rc %d\n", rc);
124                 goto out;
125         }
126
127         rc = fsfilt_write_record(obd, file, buf, buflen, &file->f_pos, 0);
128         if (rc) {
129                 CERROR("error writing log buffer: rc %d\n", rc);
130                 goto out;
131         }
132
133         end.lrt_len = rec->lrh_len;
134         end.lrt_index = rec->lrh_index;
135         rc = fsfilt_write_record(obd, file, &end, sizeof(end), &file->f_pos, 0);
136         if (rc) {
137                 CERROR("error writing log tail: rc %d\n", rc);
138                 goto out;
139         }
140
141         rc = 0;
142  out:
143         if (saved_off > file->f_pos)
144                 file->f_pos = saved_off;
145         LASSERT(rc <= 0);
146         RETURN(rc);
147 }
148
149 static int llog_lvfs_read_blob(struct obd_device *obd, struct l_file *file,
150                                 void *buf, int size, loff_t off)
151 {
152         loff_t offset = off;
153         int rc;
154         ENTRY;
155
156         rc = fsfilt_read_record(obd, file, buf, size, &offset);
157         if (rc) {
158                 CERROR("error reading log record: rc %d\n", rc);
159                 RETURN(rc);
160         }
161         RETURN(0);
162 }
163
164 static int llog_lvfs_read_header(const struct lu_env *env,
165                                  struct llog_handle *handle)
166 {
167         struct obd_device *obd;
168         int rc;
169         ENTRY;
170
171         LASSERT(sizeof(*handle->lgh_hdr) == LLOG_CHUNK_SIZE);
172
173         obd = handle->lgh_ctxt->loc_exp->exp_obd;
174
175         if (i_size_read(handle->lgh_file->f_dentry->d_inode) == 0) {
176                 CDEBUG(D_HA, "not reading header from 0-byte log\n");
177                 RETURN(LLOG_EEMPTY);
178         }
179
180         rc = llog_lvfs_read_blob(obd, handle->lgh_file, handle->lgh_hdr,
181                                  LLOG_CHUNK_SIZE, 0);
182         if (rc) {
183                 CERROR("error reading log header from %.*s\n",
184                        handle->lgh_file->f_dentry->d_name.len,
185                        handle->lgh_file->f_dentry->d_name.name);
186         } else {
187                 struct llog_rec_hdr *llh_hdr = &handle->lgh_hdr->llh_hdr;
188
189                 if (LLOG_REC_HDR_NEEDS_SWABBING(llh_hdr))
190                         lustre_swab_llog_hdr(handle->lgh_hdr);
191
192                 if (llh_hdr->lrh_type != LLOG_HDR_MAGIC) {
193                         CERROR("bad log %.*s header magic: %#x (expected %#x)\n",
194                                handle->lgh_file->f_dentry->d_name.len,
195                                handle->lgh_file->f_dentry->d_name.name,
196                                llh_hdr->lrh_type, LLOG_HDR_MAGIC);
197                         rc = -EIO;
198                 } else if (llh_hdr->lrh_len != LLOG_CHUNK_SIZE) {
199                         CERROR("incorrectly sized log %.*s header: %#x "
200                                "(expected %#x)\n",
201                                handle->lgh_file->f_dentry->d_name.len,
202                                handle->lgh_file->f_dentry->d_name.name,
203                                llh_hdr->lrh_len, LLOG_CHUNK_SIZE);
204                         CERROR("you may need to re-run lconf --write_conf.\n");
205                         rc = -EIO;
206                 }
207         }
208
209         handle->lgh_last_idx = handle->lgh_hdr->llh_tail.lrt_index;
210         handle->lgh_file->f_pos = i_size_read(handle->lgh_file->f_dentry->d_inode);
211
212         RETURN(rc);
213 }
214
215 /* returns negative in on error; 0 if success && reccookie == 0; 1 otherwise */
216 /* appends if idx == -1, otherwise overwrites record idx. */
217 static int llog_lvfs_write_rec(const struct lu_env *env,
218                                struct llog_handle *loghandle,
219                                struct llog_rec_hdr *rec,
220                                struct llog_cookie *reccookie, int cookiecount,
221                                void *buf, int idx, struct thandle *th)
222 {
223         struct llog_log_hdr *llh;
224         int reclen = rec->lrh_len, index, rc;
225         struct llog_rec_tail *lrt;
226         struct obd_device *obd;
227         struct file *file;
228         size_t left;
229         ENTRY;
230
231         llh = loghandle->lgh_hdr;
232         file = loghandle->lgh_file;
233         obd = loghandle->lgh_ctxt->loc_exp->exp_obd;
234
235         /* record length should not bigger than LLOG_CHUNK_SIZE */
236         if (buf)
237                 rc = (reclen > LLOG_CHUNK_SIZE - sizeof(struct llog_rec_hdr) -
238                       sizeof(struct llog_rec_tail)) ? -E2BIG : 0;
239         else
240                 rc = (reclen > LLOG_CHUNK_SIZE) ? -E2BIG : 0;
241         if (rc)
242                 RETURN(rc);
243
244         if (buf)
245                 /* write_blob adds header and tail to lrh_len. */
246                 reclen = sizeof(*rec) + rec->lrh_len +
247                          sizeof(struct llog_rec_tail);
248
249         if (idx != -1) {
250                 loff_t saved_offset;
251
252                 /* no header: only allowed to insert record 1 */
253                 if (idx != 1 && !i_size_read(file->f_dentry->d_inode)) {
254                         CERROR("idx != -1 in empty log\n");
255                         LBUG();
256                 }
257
258                 if (idx && llh->llh_size && llh->llh_size != rec->lrh_len)
259                         RETURN(-EINVAL);
260
261                 if (!ext2_test_bit(idx, llh->llh_bitmap))
262                         CERROR("Modify unset record %u\n", idx);
263                 if (idx != rec->lrh_index)
264                         CERROR("Index mismatch %d %u\n", idx, rec->lrh_index);
265
266                 rc = llog_lvfs_write_blob(obd, file, &llh->llh_hdr, NULL, 0);
267                 /* we are done if we only write the header or on error */
268                 if (rc || idx == 0)
269                         RETURN(rc);
270
271                 if (buf) {
272                         /* We assume that caller has set lgh_cur_* */
273                         saved_offset = loghandle->lgh_cur_offset;
274                         CDEBUG(D_OTHER,
275                                "modify record "LPX64": idx:%d/%u/%d, len:%u "
276                                "offset %llu\n",
277                                loghandle->lgh_id.lgl_oid, idx, rec->lrh_index,
278                                loghandle->lgh_cur_idx, rec->lrh_len,
279                                (long long)(saved_offset - sizeof(*llh)));
280                         if (rec->lrh_index != loghandle->lgh_cur_idx) {
281                                 CERROR("modify idx mismatch %u/%d\n",
282                                        idx, loghandle->lgh_cur_idx);
283                                 RETURN(-EFAULT);
284                         }
285                 } else {
286                         /* Assumes constant lrh_len */
287                         saved_offset = sizeof(*llh) + (idx - 1) * reclen;
288                 }
289
290                 rc = llog_lvfs_write_blob(obd, file, rec, buf, saved_offset);
291                 if (rc == 0 && reccookie) {
292                         reccookie->lgc_lgl = loghandle->lgh_id;
293                         reccookie->lgc_index = idx;
294                         rc = 1;
295                 }
296                 RETURN(rc);
297         }
298
299         /* Make sure that records don't cross a chunk boundary, so we can
300          * process them page-at-a-time if needed.  If it will cross a chunk
301          * boundary, write in a fake (but referenced) entry to pad the chunk.
302          *
303          * We know that llog_current_log() will return a loghandle that is
304          * big enough to hold reclen, so all we care about is padding here.
305          */
306         left = LLOG_CHUNK_SIZE - (file->f_pos & (LLOG_CHUNK_SIZE - 1));
307
308         /* NOTE: padding is a record, but no bit is set */
309         if (left != 0 && left != reclen &&
310             left < (reclen + LLOG_MIN_REC_SIZE)) {
311                  index = loghandle->lgh_last_idx + 1;
312                  rc = llog_lvfs_pad(obd, file, left, index);
313                  if (rc)
314                          RETURN(rc);
315                  loghandle->lgh_last_idx++; /*for pad rec*/
316          }
317          /* if it's the last idx in log file, then return -ENOSPC */
318          if (loghandle->lgh_last_idx >= LLOG_BITMAP_SIZE(llh) - 1)
319                  RETURN(-ENOSPC);
320         loghandle->lgh_last_idx++;
321         index = loghandle->lgh_last_idx;
322         LASSERT(index < LLOG_BITMAP_SIZE(llh));
323         rec->lrh_index = index;
324         if (buf == NULL) {
325                 lrt = (struct llog_rec_tail *)
326                         ((char *)rec + rec->lrh_len - sizeof(*lrt));
327                 lrt->lrt_len = rec->lrh_len;
328                 lrt->lrt_index = rec->lrh_index;
329         }
330         /*The caller should make sure only 1 process access the lgh_last_idx,
331          *Otherwise it might hit the assert.*/
332         LASSERT(index < LLOG_BITMAP_SIZE(llh));
333         spin_lock(&loghandle->lgh_hdr_lock);
334         if (ext2_set_bit(index, llh->llh_bitmap)) {
335                 CERROR("argh, index %u already set in log bitmap?\n", index);
336                 spin_unlock(&loghandle->lgh_hdr_lock);
337                 LBUG(); /* should never happen */
338         }
339         llh->llh_count++;
340         spin_unlock(&loghandle->lgh_hdr_lock);
341         llh->llh_tail.lrt_index = index;
342
343         rc = llog_lvfs_write_blob(obd, file, &llh->llh_hdr, NULL, 0);
344         if (rc)
345                 RETURN(rc);
346
347         rc = llog_lvfs_write_blob(obd, file, rec, buf, file->f_pos);
348         if (rc)
349                 RETURN(rc);
350
351         CDEBUG(D_RPCTRACE, "added record "LPX64": idx: %u, %u \n",
352                loghandle->lgh_id.lgl_oid, index, rec->lrh_len);
353         if (rc == 0 && reccookie) {
354                 reccookie->lgc_lgl = loghandle->lgh_id;
355                 reccookie->lgc_index = index;
356                 if ((rec->lrh_type == MDS_UNLINK_REC) ||
357                     (rec->lrh_type == MDS_SETATTR64_REC))
358                         reccookie->lgc_subsys = LLOG_MDS_OST_ORIG_CTXT;
359                 else if (rec->lrh_type == OST_SZ_REC)
360                         reccookie->lgc_subsys = LLOG_SIZE_ORIG_CTXT;
361                 else
362                         reccookie->lgc_subsys = -1;
363                 rc = 1;
364         }
365         if (rc == 0 && rec->lrh_type == LLOG_GEN_REC)
366                 rc = 1;
367
368         RETURN(rc);
369 }
370
371 /* We can skip reading at least as many log blocks as the number of
372 * minimum sized log records we are skipping.  If it turns out
373 * that we are not far enough along the log (because the
374 * actual records are larger than minimum size) we just skip
375 * some more records. */
376
377 static void llog_skip_over(__u64 *off, int curr, int goal)
378 {
379         if (goal <= curr)
380                 return;
381         *off = (*off + (goal-curr-1) * LLOG_MIN_REC_SIZE) &
382                 ~(LLOG_CHUNK_SIZE - 1);
383 }
384
385
386 /* sets:
387  *  - cur_offset to the furthest point read in the log file
388  *  - cur_idx to the log index preceeding cur_offset
389  * returns -EIO/-EINVAL on error
390  */
391 static int llog_lvfs_next_block(const struct lu_env *env,
392                                 struct llog_handle *loghandle, int *cur_idx,
393                                 int next_idx, __u64 *cur_offset, void *buf,
394                                 int len)
395 {
396         int rc;
397         ENTRY;
398
399         if (len == 0 || len & (LLOG_CHUNK_SIZE - 1))
400                 RETURN(-EINVAL);
401
402         CDEBUG(D_OTHER, "looking for log index %u (cur idx %u off "LPU64")\n",
403                next_idx, *cur_idx, *cur_offset);
404
405         while (*cur_offset < i_size_read(loghandle->lgh_file->f_dentry->d_inode)) {
406                 struct llog_rec_hdr *rec, *last_rec;
407                 struct llog_rec_tail *tail;
408                 loff_t ppos;
409                 int llen;
410
411                 llog_skip_over(cur_offset, *cur_idx, next_idx);
412
413                 /* read up to next LLOG_CHUNK_SIZE block */
414                 ppos = *cur_offset;
415                 llen = LLOG_CHUNK_SIZE - (*cur_offset & (LLOG_CHUNK_SIZE - 1));
416                 rc = fsfilt_read_record(loghandle->lgh_ctxt->loc_exp->exp_obd,
417                                         loghandle->lgh_file, buf, llen,
418                                         cur_offset);
419                 if (rc < 0) {
420                         CERROR("Cant read llog block at log id "LPU64
421                                "/%u offset "LPU64"\n",
422                                loghandle->lgh_id.lgl_oid,
423                                loghandle->lgh_id.lgl_ogen,
424                                *cur_offset);
425                         RETURN(rc);
426                 }
427
428                 /* put number of bytes read into rc to make code simpler */
429                 rc = *cur_offset - ppos;
430                 if (rc < len) {
431                         /* signal the end of the valid buffer to llog_process */
432                         memset(buf + rc, 0, len - rc);
433                 }
434
435                 if (rc == 0) /* end of file, nothing to do */
436                         RETURN(0);
437
438                 if (rc < sizeof(*tail)) {
439                         CERROR("Invalid llog block at log id "LPU64"/%u offset "
440                                LPU64"\n", loghandle->lgh_id.lgl_oid,
441                                loghandle->lgh_id.lgl_ogen, *cur_offset);
442                         RETURN(-EINVAL);
443                 }
444
445                 rec = buf;
446                 if (LLOG_REC_HDR_NEEDS_SWABBING(rec))
447                         lustre_swab_llog_rec(rec);
448
449                 tail = (struct llog_rec_tail *)(buf + rc -
450                                                 sizeof(struct llog_rec_tail));
451
452                 /* get the last record in block */
453                 last_rec = (struct llog_rec_hdr *)(buf + rc -
454                                                    le32_to_cpu(tail->lrt_len));
455
456                 if (LLOG_REC_HDR_NEEDS_SWABBING(last_rec))
457                         lustre_swab_llog_rec(last_rec);
458                 LASSERT(last_rec->lrh_index == tail->lrt_index);
459
460                 *cur_idx = tail->lrt_index;
461
462                 /* this shouldn't happen */
463                 if (tail->lrt_index == 0) {
464                         CERROR("Invalid llog tail at log id "LPU64"/%u offset "
465                                LPU64"\n", loghandle->lgh_id.lgl_oid,
466                                loghandle->lgh_id.lgl_ogen, *cur_offset);
467                         RETURN(-EINVAL);
468                 }
469                 if (tail->lrt_index < next_idx)
470                         continue;
471
472                 /* sanity check that the start of the new buffer is no farther
473                  * than the record that we wanted.  This shouldn't happen. */
474                 if (rec->lrh_index > next_idx) {
475                         CERROR("missed desired record? %u > %u\n",
476                                rec->lrh_index, next_idx);
477                         RETURN(-ENOENT);
478                 }
479                 RETURN(0);
480         }
481         RETURN(-EIO);
482 }
483
484 static int llog_lvfs_prev_block(const struct lu_env *env,
485                                 struct llog_handle *loghandle,
486                                 int prev_idx, void *buf, int len)
487 {
488         __u64 cur_offset;
489         int rc;
490         ENTRY;
491
492         if (len == 0 || len & (LLOG_CHUNK_SIZE - 1))
493                 RETURN(-EINVAL);
494
495         CDEBUG(D_OTHER, "looking for log index %u\n", prev_idx);
496
497         cur_offset = LLOG_CHUNK_SIZE;
498         llog_skip_over(&cur_offset, 0, prev_idx);
499
500         while (cur_offset < i_size_read(loghandle->lgh_file->f_dentry->d_inode)) {
501                 struct llog_rec_hdr *rec, *last_rec;
502                 struct llog_rec_tail *tail;
503                 loff_t ppos = cur_offset;
504
505                 rc = fsfilt_read_record(loghandle->lgh_ctxt->loc_exp->exp_obd,
506                                         loghandle->lgh_file, buf, len,
507                                         &cur_offset);
508                 if (rc < 0) {
509                         CERROR("Cant read llog block at log id "LPU64
510                                "/%u offset "LPU64"\n",
511                                loghandle->lgh_id.lgl_oid,
512                                loghandle->lgh_id.lgl_ogen,
513                                cur_offset);
514                         RETURN(rc);
515                 }
516
517                 /* put number of bytes read into rc to make code simpler */
518                 rc = cur_offset - ppos;
519
520                 if (rc == 0) /* end of file, nothing to do */
521                         RETURN(0);
522
523                 if (rc < sizeof(*tail)) {
524                         CERROR("Invalid llog block at log id "LPU64"/%u offset "
525                                LPU64"\n", loghandle->lgh_id.lgl_oid,
526                                loghandle->lgh_id.lgl_ogen, cur_offset);
527                         RETURN(-EINVAL);
528                 }
529
530                 rec = buf;
531                 if (LLOG_REC_HDR_NEEDS_SWABBING(rec))
532                         lustre_swab_llog_rec(rec);
533
534                 tail = (struct llog_rec_tail *)(buf + rc -
535                                                 sizeof(struct llog_rec_tail));
536
537                 /* get the last record in block */
538                 last_rec = (struct llog_rec_hdr *)(buf + rc -
539                                                    le32_to_cpu(tail->lrt_len));
540
541                 if (LLOG_REC_HDR_NEEDS_SWABBING(last_rec))
542                         lustre_swab_llog_rec(last_rec);
543                 LASSERT(last_rec->lrh_index == tail->lrt_index);
544
545                 /* this shouldn't happen */
546                 if (tail->lrt_index == 0) {
547                         CERROR("Invalid llog tail at log id "LPU64"/%u offset "
548                                LPU64"\n", loghandle->lgh_id.lgl_oid,
549                                loghandle->lgh_id.lgl_ogen, cur_offset);
550                         RETURN(-EINVAL);
551                 }
552                 if (tail->lrt_index < prev_idx)
553                         continue;
554
555                 /* sanity check that the start of the new buffer is no farther
556                  * than the record that we wanted.  This shouldn't happen. */
557                 if (rec->lrh_index > prev_idx) {
558                         CERROR("missed desired record? %u > %u\n",
559                                rec->lrh_index, prev_idx);
560                         RETURN(-ENOENT);
561                 }
562                 RETURN(0);
563         }
564         RETURN(-EIO);
565 }
566
567 static struct file *llog_filp_open(char *dir, char *name, int flags, int mode)
568 {
569         char *logname;
570         struct file *filp;
571         int len;
572
573         OBD_ALLOC(logname, PATH_MAX);
574         if (logname == NULL)
575                 return ERR_PTR(-ENOMEM);
576
577         len = snprintf(logname, PATH_MAX, "%s/%s", dir, name);
578         if (len >= PATH_MAX - 1) {
579                 filp = ERR_PTR(-ENAMETOOLONG);
580         } else {
581                 filp = l_filp_open(logname, flags, mode);
582                 if (IS_ERR(filp) && PTR_ERR(filp) != -ENOENT)
583                         CERROR("logfile creation %s: %ld\n", logname,
584                                PTR_ERR(filp));
585         }
586         OBD_FREE(logname, PATH_MAX);
587         return filp;
588 }
589
590 static int llog_lvfs_open(const struct lu_env *env,  struct llog_handle *handle,
591                           struct llog_logid *logid, char *name,
592                           enum llog_open_param open_param)
593 {
594         struct llog_ctxt        *ctxt = handle->lgh_ctxt;
595         struct l_dentry         *dchild = NULL;
596         struct obd_device       *obd;
597         int                      rc = 0;
598
599         ENTRY;
600
601         LASSERT(ctxt);
602         LASSERT(ctxt->loc_exp);
603         LASSERT(ctxt->loc_exp->exp_obd);
604         obd = ctxt->loc_exp->exp_obd;
605
606         LASSERT(handle);
607         if (logid != NULL) {
608                 dchild = obd_lvfs_fid2dentry(ctxt->loc_exp, logid->lgl_oid,
609                                              logid->lgl_ogen, logid->lgl_oseq);
610                 if (IS_ERR(dchild)) {
611                         rc = PTR_ERR(dchild);
612                         CERROR("%s: error looking up logfile #"LPX64"#"
613                                LPX64"#%08x: rc = %d\n",
614                                ctxt->loc_obd->obd_name, logid->lgl_oid,
615                                logid->lgl_oseq, logid->lgl_ogen, rc);
616                         GOTO(out, rc);
617                 }
618                 if (dchild->d_inode == NULL) {
619                         l_dput(dchild);
620                         rc = -ENOENT;
621                         CERROR("%s: nonexistent llog #"LPX64"#"LPX64"#%08x: "
622                                "rc = %d\n", ctxt->loc_obd->obd_name,
623                                logid->lgl_oid, logid->lgl_oseq,
624                                logid->lgl_ogen, rc);
625                         GOTO(out, rc);
626                 }
627                 /* l_dentry_open will call dput(dchild) if there is an error */
628                 handle->lgh_file = l_dentry_open(&obd->obd_lvfs_ctxt, dchild,
629                                                  O_RDWR | O_LARGEFILE);
630                 if (IS_ERR(handle->lgh_file)) {
631                         rc = PTR_ERR(handle->lgh_file);
632                         handle->lgh_file = NULL;
633                         CERROR("%s: error opening llog #"LPX64"#"LPX64"#%08x: "
634                                "rc = %d\n", ctxt->loc_obd->obd_name,
635                                logid->lgl_oid, logid->lgl_oseq,
636                                logid->lgl_ogen, rc);
637                         GOTO(out, rc);
638                 }
639                 handle->lgh_id = *logid;
640         } else if (name) {
641                 handle->lgh_file = llog_filp_open(MOUNT_CONFIGS_DIR, name,
642                                                   O_RDWR | O_LARGEFILE, 0644);
643                 if (IS_ERR(handle->lgh_file)) {
644                         rc = PTR_ERR(handle->lgh_file);
645                         handle->lgh_file = NULL;
646                         if (rc == -ENOENT && open_param == LLOG_OPEN_NEW) {
647                                 OBD_ALLOC(handle->lgh_name, strlen(name) + 1);
648                                 if (handle->lgh_name)
649                                         strcpy(handle->lgh_name, name);
650                                 else
651                                         GOTO(out, rc = -ENOMEM);
652                                 rc = 0;
653                         } else {
654                                 GOTO(out, rc);
655                         }
656                 } else {
657                         handle->lgh_id.lgl_oseq = FID_SEQ_LLOG;
658                         handle->lgh_id.lgl_oid =
659                                 handle->lgh_file->f_dentry->d_inode->i_ino;
660                         handle->lgh_id.lgl_ogen =
661                                 handle->lgh_file->f_dentry->d_inode->i_generation;
662                 }
663         } else {
664                 LASSERTF(open_param == LLOG_OPEN_NEW, "%#x\n", open_param);
665                 handle->lgh_file = NULL;
666         }
667
668         /* No new llog is expected but doesn't exist */
669         if (open_param != LLOG_OPEN_NEW && handle->lgh_file == NULL)
670                 GOTO(out_name, rc = -ENOENT);
671
672         RETURN(0);
673 out_name:
674         if (handle->lgh_name != NULL)
675                 OBD_FREE(handle->lgh_name, strlen(name) + 1);
676 out:
677         RETURN(rc);
678 }
679
680 static int llog_lvfs_exist(struct llog_handle *handle)
681 {
682         return (handle->lgh_file != NULL);
683 }
684
685 /* This is a callback from the llog_* functions.
686  * Assumes caller has already pushed us into the kernel context. */
687 static int llog_lvfs_create(const struct lu_env *env,
688                             struct llog_handle *handle,
689                             struct thandle *th)
690 {
691         struct llog_ctxt        *ctxt = handle->lgh_ctxt;
692         struct obd_device       *obd;
693         struct l_dentry         *dchild = NULL;
694         struct file             *file;
695         struct obdo             *oa = NULL;
696         int                      rc = 0;
697         int                      open_flags = O_RDWR | O_CREAT | O_LARGEFILE;
698
699         ENTRY;
700
701         LASSERT(ctxt);
702         LASSERT(ctxt->loc_exp);
703         obd = ctxt->loc_exp->exp_obd;
704         LASSERT(handle->lgh_file == NULL);
705
706         if (handle->lgh_name) {
707                 file = llog_filp_open(MOUNT_CONFIGS_DIR, handle->lgh_name,
708                                       open_flags, 0644);
709                 if (IS_ERR(file))
710                         RETURN(PTR_ERR(file));
711
712                 handle->lgh_id.lgl_oseq = FID_SEQ_LLOG;
713                 handle->lgh_id.lgl_oid = file->f_dentry->d_inode->i_ino;
714                 handle->lgh_id.lgl_ogen =
715                                 file->f_dentry->d_inode->i_generation;
716                 handle->lgh_file = file;
717         } else {
718                 OBDO_ALLOC(oa);
719                 if (oa == NULL)
720                         RETURN(-ENOMEM);
721
722                 oa->o_seq = FID_SEQ_LLOG;
723                 oa->o_valid = OBD_MD_FLGENER | OBD_MD_FLGROUP;
724
725                 rc = obd_create(NULL, ctxt->loc_exp, oa, NULL, NULL);
726                 if (rc)
727                         GOTO(out, rc);
728
729                 /* FIXME: rationalize the misuse of o_generation in
730                  *        this API along with mds_obd_{create,destroy}.
731                  *        Hopefully it is only an internal API issue. */
732 #define o_generation o_parent_oid
733                 dchild = obd_lvfs_fid2dentry(ctxt->loc_exp, oa->o_id,
734                                              oa->o_generation, oa->o_seq);
735                 if (IS_ERR(dchild))
736                         GOTO(out, rc = PTR_ERR(dchild));
737
738                 file = l_dentry_open(&obd->obd_lvfs_ctxt, dchild, open_flags);
739                 if (IS_ERR(file))
740                         GOTO(out, rc = PTR_ERR(file));
741                 handle->lgh_id.lgl_oseq = oa->o_seq;
742                 handle->lgh_id.lgl_oid = oa->o_id;
743                 handle->lgh_id.lgl_ogen = oa->o_generation;
744                 handle->lgh_file = file;
745 out:
746                 OBDO_FREE(oa);
747         }
748         RETURN(rc);
749 }
750
751 static int llog_lvfs_close(const struct lu_env *env,
752                            struct llog_handle *handle)
753 {
754         int rc;
755
756         ENTRY;
757
758         if (handle->lgh_file == NULL)
759                 RETURN(0);
760         rc = filp_close(handle->lgh_file, 0);
761         if (rc)
762                 CERROR("%s: error closing llog #"LPX64"#"LPX64"#%08x: "
763                        "rc = %d\n", handle->lgh_ctxt->loc_obd->obd_name,
764                        handle->lgh_id.lgl_oid, handle->lgh_id.lgl_oseq,
765                        handle->lgh_id.lgl_ogen, rc);
766         handle->lgh_file = NULL;
767         if (handle->lgh_name) {
768                 OBD_FREE(handle->lgh_name, strlen(handle->lgh_name) + 1);
769                 handle->lgh_name = NULL;
770         }
771         RETURN(rc);
772 }
773
774 static int llog_lvfs_destroy(const struct lu_env *env,
775                              struct llog_handle *handle)
776 {
777         struct dentry *fdentry;
778         struct obdo *oa;
779         struct obd_device *obd = handle->lgh_ctxt->loc_exp->exp_obd;
780         char *dir;
781         void *th;
782         struct inode *inode;
783         int rc, rc1;
784         ENTRY;
785
786         dir = MOUNT_CONFIGS_DIR;
787
788         LASSERT(handle->lgh_file);
789         fdentry = handle->lgh_file->f_dentry;
790         inode = fdentry->d_parent->d_inode;
791         if (strcmp(fdentry->d_parent->d_name.name, dir) == 0) {
792                 struct lvfs_run_ctxt saved;
793                 struct vfsmount *mnt = mntget(handle->lgh_file->f_vfsmnt);
794
795                 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
796                 dget(fdentry);
797                 rc = llog_lvfs_close(env, handle);
798                 if (rc == 0) {
799                         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
800                         rc = ll_vfs_unlink(inode, fdentry, mnt);
801                         mutex_unlock(&inode->i_mutex);
802                 }
803                 mntput(mnt);
804
805                 dput(fdentry);
806                 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
807                 RETURN(rc);
808         }
809
810         OBDO_ALLOC(oa);
811         if (oa == NULL)
812                 RETURN(-ENOMEM);
813
814         oa->o_id = handle->lgh_id.lgl_oid;
815         oa->o_seq = handle->lgh_id.lgl_oseq;
816         oa->o_generation = handle->lgh_id.lgl_ogen;
817 #undef o_generation
818         oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLGENER;
819
820         rc = llog_lvfs_close(env, handle);
821         if (rc)
822                 GOTO(out, rc);
823
824         th = fsfilt_start_log(obd, inode, FSFILT_OP_UNLINK, NULL, 1);
825         if (IS_ERR(th)) {
826                 CERROR("fsfilt_start failed: %ld\n", PTR_ERR(th));
827                 GOTO(out, rc = PTR_ERR(th));
828         }
829
830         rc = obd_destroy(NULL, handle->lgh_ctxt->loc_exp, oa,
831                          NULL, NULL, NULL, NULL);
832
833         rc1 = fsfilt_commit(obd, inode, th, 0);
834         if (rc == 0 && rc1 != 0)
835                 rc = rc1;
836  out:
837         OBDO_FREE(oa);
838         RETURN(rc);
839 }
840
841 /* reads the catalog list */
842 int llog_get_cat_list(struct obd_device *disk_obd,
843                       char *name, int idx, int count, struct llog_catid *idarray)
844 {
845         struct lvfs_run_ctxt saved;
846         struct l_file *file;
847         int rc, rc1 = 0;
848         int size = sizeof(*idarray) * count;
849         loff_t off = idx *  sizeof(*idarray);
850         ENTRY;
851
852         if (!count)
853                 RETURN(0);
854
855         push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
856         file = filp_open(name, O_RDWR | O_CREAT | O_LARGEFILE, 0700);
857         if (!file || IS_ERR(file)) {
858                 rc = PTR_ERR(file);
859                 CERROR("OBD filter: cannot open/create %s: rc = %d\n",
860                        name, rc);
861                 GOTO(out, rc);
862         }
863
864         if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
865                 CERROR("%s is not a regular file!: mode = %o\n", name,
866                        file->f_dentry->d_inode->i_mode);
867                 GOTO(out, rc = -ENOENT);
868         }
869
870         CDEBUG(D_CONFIG, "cat list: disk size=%d, read=%d\n",
871                (int)i_size_read(file->f_dentry->d_inode), size);
872
873         /* read for new ost index or for empty file */
874         memset(idarray, 0, size);
875         if (i_size_read(file->f_dentry->d_inode) < off)
876                 GOTO(out, rc = 0);
877
878         rc = fsfilt_read_record(disk_obd, file, idarray, size, &off);
879         if (rc) {
880                 CERROR("OBD filter: error reading %s: rc %d\n", name, rc);
881                 GOTO(out, rc);
882         }
883
884         EXIT;
885  out:
886         pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
887         if (file && !IS_ERR(file))
888                 rc1 = filp_close(file, 0);
889         if (rc == 0)
890                 rc = rc1;
891         return rc;
892 }
893 EXPORT_SYMBOL(llog_get_cat_list);
894
895 /* writes the cat list */
896 int llog_put_cat_list(struct obd_device *disk_obd,
897                       char *name, int idx, int count, struct llog_catid *idarray)
898 {
899         struct lvfs_run_ctxt saved;
900         struct l_file *file;
901         int rc, rc1 = 0;
902         int size = sizeof(*idarray) * count;
903         loff_t off = idx * sizeof(*idarray);
904
905         if (!count)
906                 GOTO(out1, rc = 0);
907
908         push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
909         file = filp_open(name, O_RDWR | O_CREAT | O_LARGEFILE, 0700);
910         if (!file || IS_ERR(file)) {
911                 rc = PTR_ERR(file);
912                 CERROR("OBD filter: cannot open/create %s: rc = %d\n",
913                        name, rc);
914                 GOTO(out, rc);
915         }
916
917         if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
918                 CERROR("%s is not a regular file!: mode = %o\n", name,
919                        file->f_dentry->d_inode->i_mode);
920                 GOTO(out, rc = -ENOENT);
921         }
922
923         rc = fsfilt_write_record(disk_obd, file, idarray, size, &off, 1);
924         if (rc) {
925                 CDEBUG(D_INODE,"OBD filter: error writeing %s: rc %d\n",
926                        name, rc);
927                 GOTO(out, rc);
928         }
929
930 out:
931         pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
932         if (file && !IS_ERR(file))
933                 rc1 = filp_close(file, 0);
934
935         if (rc == 0)
936                 rc = rc1;
937 out1:
938         RETURN(rc);
939 }
940 EXPORT_SYMBOL(llog_put_cat_list);
941
942 static int llog_lvfs_declare_create(const struct lu_env *env,
943                                     struct llog_handle *res,
944                                     struct thandle *th)
945 {
946         return 0;
947 }
948
949 static int llog_lvfs_declare_write_rec(const struct lu_env *env,
950                                        struct llog_handle *loghandle,
951                                        struct llog_rec_hdr *rec,
952                                        int idx, struct thandle *th)
953 {
954         return 0;
955 }
956
957 struct llog_operations llog_lvfs_ops = {
958         .lop_write_rec          = llog_lvfs_write_rec,
959         .lop_next_block         = llog_lvfs_next_block,
960         .lop_prev_block         = llog_lvfs_prev_block,
961         .lop_read_header        = llog_lvfs_read_header,
962         .lop_create             = llog_lvfs_create,
963         .lop_destroy            = llog_lvfs_destroy,
964         .lop_close              = llog_lvfs_close,
965         .lop_open               = llog_lvfs_open,
966         .lop_exist              = llog_lvfs_exist,
967         .lop_declare_create     = llog_lvfs_declare_create,
968         .lop_declare_write_rec  = llog_lvfs_declare_write_rec,
969 };
970 EXPORT_SYMBOL(llog_lvfs_ops);
971 #else /* !__KERNEL__ */
972 int llog_get_cat_list(struct obd_device *disk_obd,
973                       char *name, int idx, int count,
974                       struct llog_catid *idarray)
975 {
976         LBUG();
977         return 0;
978 }
979
980 int llog_put_cat_list(struct obd_device *disk_obd,
981                       char *name, int idx, int count,
982                       struct llog_catid *idarray)
983 {
984         LBUG();
985         return 0;
986 }
987
988 struct llog_operations llog_lvfs_ops = {};
989 #endif