Whamcloud - gitweb
Land b1_8_gate onto b1_8 (20081218_1708)
[fs/lustre-release.git] / lustre / obdclass / llog_lvfs.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/obdclass/llog_lvfs.c
37  *
38  * OST<->MDS recovery logging infrastructure.
39  * Invariants in implementation:
40  * - we do not share logs among different OST<->MDS connections, so that
41  *   if an OST or MDS fails it need only look at log(s) relevant to itself
42  *
43  * Author: Andreas Dilger <adilger@clusterfs.com>
44  */
45
46 #define DEBUG_SUBSYSTEM S_LOG
47
48 #ifndef EXPORT_SYMTAB
49 #define EXPORT_SYMTAB
50 #endif
51
52 #ifndef __KERNEL__
53 #include <liblustre.h>
54 #endif
55
56 #include <obd.h>
57 #include <obd_class.h>
58 #include <lustre_log.h>
59 #include <obd_ost.h>
60 #include <libcfs/list.h>
61 #include <lvfs.h>
62 #include <lustre_fsfilt.h>
63 #include <lustre_disk.h>
64 #include "llog_internal.h"
65
66 #if defined(__KERNEL__) && defined(LLOG_LVFS)
67
68 static int llog_lvfs_pad(struct obd_device *obd, struct l_file *file,
69                                 int len, int index)
70 {
71         struct llog_rec_hdr rec = { 0 };
72         struct llog_rec_tail tail;
73         int rc;
74         ENTRY;
75
76         LASSERT(len >= LLOG_MIN_REC_SIZE && (len & 0x7) == 0);
77
78         tail.lrt_len = rec.lrh_len = len;
79         tail.lrt_index = rec.lrh_index = index;
80         rec.lrh_type = LLOG_PAD_MAGIC;
81
82         rc = fsfilt_write_record(obd, file, &rec, sizeof(rec), &file->f_pos, 0);
83         if (rc) {
84                 CERROR("error writing padding record: rc %d\n", rc);
85                 goto out;
86         }
87
88         file->f_pos += len - sizeof(rec) - sizeof(tail);
89         rc = fsfilt_write_record(obd, file, &tail, sizeof(tail),&file->f_pos,0);
90         if (rc) {
91                 CERROR("error writing padding record: rc %d\n", rc);
92                 goto out;
93         }
94
95  out:
96         RETURN(rc);
97 }
98
99 static int llog_lvfs_write_blob(struct obd_device *obd, struct l_file *file,
100                                 struct llog_rec_hdr *rec, void *buf, loff_t off)
101 {
102         int rc;
103         struct llog_rec_tail end;
104         loff_t saved_off = file->f_pos;
105         int buflen = rec->lrh_len;
106         ENTRY;
107
108         file->f_pos = off;
109
110         if (buflen == 0) 
111                 CWARN("0-length record\n");
112
113         if (!buf) {
114                 rc = fsfilt_write_record(obd, file, rec, buflen,&file->f_pos,0);
115                 if (rc) {
116                         CERROR("error writing log record: rc %d\n", rc);
117                         goto out;
118                 }
119                 GOTO(out, rc = 0);
120         }
121
122         /* the buf case */
123         rec->lrh_len = sizeof(*rec) + buflen + sizeof(end);
124         rc = fsfilt_write_record(obd, file, rec, sizeof(*rec), &file->f_pos, 0);
125         if (rc) {
126                 CERROR("error writing log hdr: rc %d\n", rc);
127                 goto out;
128         }
129
130         rc = fsfilt_write_record(obd, file, buf, buflen, &file->f_pos, 0);
131         if (rc) {
132                 CERROR("error writing log buffer: rc %d\n", rc);
133                 goto out;
134         }
135
136         end.lrt_len = rec->lrh_len;
137         end.lrt_index = rec->lrh_index;
138         rc = fsfilt_write_record(obd, file, &end, sizeof(end), &file->f_pos, 0);
139         if (rc) {
140                 CERROR("error writing log tail: rc %d\n", rc);
141                 goto out;
142         }
143
144         rc = 0;
145  out:
146         if (saved_off > file->f_pos)
147                 file->f_pos = saved_off;
148         LASSERT(rc <= 0);
149         RETURN(rc);
150 }
151
152 static int llog_lvfs_read_blob(struct obd_device *obd, struct l_file *file,
153                                 void *buf, int size, loff_t off)
154 {
155         loff_t offset = off;
156         int rc;
157         ENTRY;
158
159         rc = fsfilt_read_record(obd, file, buf, size, &offset);
160         if (rc) {
161                 CERROR("error reading log record: rc %d\n", rc);
162                 RETURN(rc);
163         }
164         RETURN(0);
165 }
166
167 static int llog_lvfs_read_header(struct llog_handle *handle)
168 {
169         struct obd_device *obd;
170         int rc;
171         ENTRY;
172
173         LASSERT(sizeof(*handle->lgh_hdr) == LLOG_CHUNK_SIZE);
174
175         obd = handle->lgh_ctxt->loc_exp->exp_obd;
176
177         if (i_size_read(handle->lgh_file->f_dentry->d_inode) == 0) {
178                 CDEBUG(D_RPCTRACE, "not reading header from 0-byte log\n");
179                 RETURN(LLOG_EEMPTY);
180         }
181
182         rc = llog_lvfs_read_blob(obd, handle->lgh_file, handle->lgh_hdr,
183                                  LLOG_CHUNK_SIZE, 0);
184         if (rc) {
185                 CERROR("error reading log header from %.*s\n",
186                        handle->lgh_file->f_dentry->d_name.len,
187                        handle->lgh_file->f_dentry->d_name.name);
188         } else {
189                 struct llog_rec_hdr *llh_hdr = &handle->lgh_hdr->llh_hdr;
190
191                 if (LLOG_REC_HDR_NEEDS_SWABBING(llh_hdr))
192                         lustre_swab_llog_hdr(handle->lgh_hdr);
193
194                 if (llh_hdr->lrh_type != LLOG_HDR_MAGIC) {
195                         CERROR("bad log %.*s header magic: %#x (expected %#x)\n",
196                                handle->lgh_file->f_dentry->d_name.len,
197                                handle->lgh_file->f_dentry->d_name.name,
198                                llh_hdr->lrh_type, LLOG_HDR_MAGIC);
199                         rc = -EIO;
200                 } else if (llh_hdr->lrh_len != LLOG_CHUNK_SIZE) {
201                         CERROR("incorrectly sized log %.*s header: %#x "
202                                "(expected %#x)\n",
203                                handle->lgh_file->f_dentry->d_name.len,
204                                handle->lgh_file->f_dentry->d_name.name,
205                                llh_hdr->lrh_len, LLOG_CHUNK_SIZE);
206                         CERROR("you may need to re-run lconf --write_conf.\n");
207                         rc = -EIO;
208                 }
209         }
210
211         handle->lgh_last_idx = handle->lgh_hdr->llh_tail.lrt_index;
212         handle->lgh_file->f_pos = i_size_read(handle->lgh_file->f_dentry->d_inode);
213
214         RETURN(rc);
215 }
216
217 /* returns negative in on error; 0 if success && reccookie == 0; 1 otherwise */
218 /* appends if idx == -1, otherwise overwrites record idx. */
219 static int llog_lvfs_write_rec(struct llog_handle *loghandle,
220                                struct llog_rec_hdr *rec,
221                                struct llog_cookie *reccookie, int cookiecount,
222                                void *buf, int idx)
223 {
224         struct llog_log_hdr *llh;
225         int reclen = rec->lrh_len, index, rc;
226         struct llog_rec_tail *lrt;
227         struct obd_device *obd;
228         struct file *file;
229         size_t left;
230         ENTRY;
231
232         llh = loghandle->lgh_hdr;
233         file = loghandle->lgh_file;
234         obd = loghandle->lgh_ctxt->loc_exp->exp_obd;
235
236         /* record length should not bigger than LLOG_CHUNK_SIZE */
237         if (buf)
238                 rc = (reclen > LLOG_CHUNK_SIZE - sizeof(struct llog_rec_hdr) -
239                       sizeof(struct llog_rec_tail)) ? -E2BIG : 0;
240         else
241                 rc = (reclen > LLOG_CHUNK_SIZE) ? -E2BIG : 0;
242         if (rc)
243                 RETURN(rc);
244
245         if (buf)
246                 /* write_blob adds header and tail to lrh_len. */ 
247                 reclen = sizeof(*rec) + rec->lrh_len + 
248                         sizeof(struct llog_rec_tail);
249
250         if (idx != -1) {
251                 loff_t saved_offset;
252
253                 /* no header: only allowed to insert record 1 */
254                 if (idx != 1 && !i_size_read(file->f_dentry->d_inode)) {
255                         CERROR("idx != -1 in empty log\n");
256                         LBUG();
257                 }
258
259                 if (idx && llh->llh_size && llh->llh_size != rec->lrh_len)
260                         RETURN(-EINVAL);
261
262                 if (!ext2_test_bit(idx, llh->llh_bitmap)) 
263                         CERROR("Modify unset record %u\n", idx);
264                 if (idx != rec->lrh_index)
265                         CERROR("Index mismatch %d %u\n", idx, rec->lrh_index);
266
267                 rc = llog_lvfs_write_blob(obd, file, &llh->llh_hdr, NULL, 0);
268                 /* we are done if we only write the header or on error */
269                 if (rc || idx == 0)
270                         RETURN(rc);
271
272                 /* Assumes constant lrh_len */
273                 saved_offset = sizeof(*llh) + (idx - 1) * reclen;
274
275                 if (buf) {
276                         struct llog_rec_hdr check;
277
278                         /* We assume that caller has set lgh_cur_* */
279                         saved_offset = loghandle->lgh_cur_offset;
280                         CDEBUG(D_OTHER,
281                                "modify record "LPX64": idx:%d/%u/%d, len:%u "
282                                "offset %llu\n",
283                                loghandle->lgh_id.lgl_oid, idx, rec->lrh_index,
284                                loghandle->lgh_cur_idx, rec->lrh_len,
285                                (long long)(saved_offset - sizeof(*llh)));
286                         if (rec->lrh_index != loghandle->lgh_cur_idx) {
287                                 CERROR("modify idx mismatch %u/%d\n",
288                                        idx, loghandle->lgh_cur_idx);
289                                 RETURN(-EFAULT);
290                         }
291 #if 1  /* FIXME remove this safety check at some point */
292                         /* Verify that the record we're modifying is the 
293                            right one. */
294                         rc = llog_lvfs_read_blob(obd, file, &check,
295                                                  sizeof(check), saved_offset);
296                         if (check.lrh_index != idx || check.lrh_len != reclen) {
297                                 CERROR("Bad modify idx %u/%u size %u/%u (%d)\n",
298                                        idx, check.lrh_index, reclen, 
299                                        check.lrh_len, rc);
300                                 RETURN(-EFAULT);
301                         }
302 #endif
303                 }
304
305                 rc = llog_lvfs_write_blob(obd, file, rec, buf, saved_offset);
306                 if (rc == 0 && reccookie) {
307                         reccookie->lgc_lgl = loghandle->lgh_id;
308                         reccookie->lgc_index = idx;
309                         rc = 1;
310                 }
311                 RETURN(rc);
312         }
313
314         /* Make sure that records don't cross a chunk boundary, so we can
315          * process them page-at-a-time if needed.  If it will cross a chunk
316          * boundary, write in a fake (but referenced) entry to pad the chunk.
317          *
318          * We know that llog_current_log() will return a loghandle that is
319          * big enough to hold reclen, so all we care about is padding here.
320          */
321         left = LLOG_CHUNK_SIZE - (file->f_pos & (LLOG_CHUNK_SIZE - 1));
322
323         /* NOTE: padding is a record, but no bit is set */
324         if (left != 0 && left != reclen &&
325             left < (reclen + LLOG_MIN_REC_SIZE)) {
326                 index = loghandle->lgh_last_idx + 1;
327                 rc = llog_lvfs_pad(obd, file, left, index);
328                 if (rc)
329                         RETURN(rc);
330                 loghandle->lgh_last_idx++; /*for pad rec*/
331         }
332         /* if it's the last idx in log file, then return -ENOSPC */
333         if (loghandle->lgh_last_idx >= LLOG_BITMAP_SIZE(llh) - 1)
334                 RETURN(-ENOSPC);
335         index = ++loghandle->lgh_last_idx;
336         rec->lrh_index = index;
337         if (buf == NULL) {
338                 lrt = (struct llog_rec_tail *)
339                         ((char *)rec + rec->lrh_len - sizeof(*lrt));
340                 lrt->lrt_len = rec->lrh_len;
341                 lrt->lrt_index = rec->lrh_index;
342         }
343         /*The caller should make sure only 1 process access the lgh_last_idx,
344          *Otherwise it might hit the assert.*/
345         LASSERT(index < LLOG_BITMAP_SIZE(llh));
346         if (ext2_set_bit(index, llh->llh_bitmap)) {
347                 CERROR("argh, index %u already set in log bitmap?\n", index);
348                 LBUG(); /* should never happen */
349         }
350         llh->llh_count++;
351         llh->llh_tail.lrt_index = index;
352
353         rc = llog_lvfs_write_blob(obd, file, &llh->llh_hdr, NULL, 0);
354         if (rc)
355                 RETURN(rc);
356
357         rc = llog_lvfs_write_blob(obd, file, rec, buf, file->f_pos);
358         if (rc)
359                 RETURN(rc);
360
361         CDEBUG(D_RPCTRACE, "added record "LPX64": idx: %u, %u bytes\n",
362                loghandle->lgh_id.lgl_oid, index, rec->lrh_len);
363         if (rc == 0 && reccookie) {
364                 reccookie->lgc_lgl = loghandle->lgh_id;
365                 reccookie->lgc_index = index;
366                 if ((rec->lrh_type == MDS_UNLINK_REC) || 
367                                 (rec->lrh_type == MDS_SETATTR_REC))
368                         reccookie->lgc_subsys = LLOG_MDS_OST_ORIG_CTXT;
369                 else if (rec->lrh_type == OST_SZ_REC)
370                         reccookie->lgc_subsys = LLOG_SIZE_ORIG_CTXT;
371                 else if (rec->lrh_type == OST_RAID1_REC)
372                         reccookie->lgc_subsys = LLOG_RD1_ORIG_CTXT;
373                 else
374                         reccookie->lgc_subsys = -1;
375                 rc = 1;
376         }
377         if (rc == 0 && rec->lrh_type == LLOG_GEN_REC)
378                 rc = 1;
379
380         RETURN(rc);
381 }
382
383 /* We can skip reading at least as many log blocks as the number of
384 * minimum sized log records we are skipping.  If it turns out
385 * that we are not far enough along the log (because the
386 * actual records are larger than minimum size) we just skip
387 * some more records. */
388
389 static void llog_skip_over(__u64 *off, int curr, int goal)
390 {
391         if (goal <= curr)
392                 return;
393         *off = (*off + (goal-curr-1) * LLOG_MIN_REC_SIZE) &
394                 ~(LLOG_CHUNK_SIZE - 1);
395 }
396
397
398 /* sets:
399  *  - cur_offset to the furthest point read in the log file
400  *  - cur_idx to the log index preceeding cur_offset
401  * returns -EIO/-EINVAL on error
402  */
403 static int llog_lvfs_next_block(struct llog_handle *loghandle, int *cur_idx,
404                                 int next_idx, __u64 *cur_offset, void *buf,
405                                 int len)
406 {
407         int rc;
408         ENTRY;
409
410         if (len == 0 || len & (LLOG_CHUNK_SIZE - 1))
411                 RETURN(-EINVAL);
412
413         CDEBUG(D_OTHER, "looking for log index %u (cur idx %u off "LPU64")\n",
414                next_idx, *cur_idx, *cur_offset);
415
416         while (*cur_offset < i_size_read(loghandle->lgh_file->f_dentry->d_inode)) {
417                 struct llog_rec_hdr *rec;
418                 struct llog_rec_tail *tail;
419                 loff_t ppos;
420
421                 llog_skip_over(cur_offset, *cur_idx, next_idx);
422
423                 ppos = *cur_offset;
424                 rc = fsfilt_read_record(loghandle->lgh_ctxt->loc_exp->exp_obd,
425                                         loghandle->lgh_file, buf, len,
426                                         &ppos);
427                 if (rc) {
428                         CERROR("Cant read llog block at log id "LPU64
429                                "/%u offset "LPU64"\n",
430                                loghandle->lgh_id.lgl_oid,
431                                loghandle->lgh_id.lgl_ogen,
432                                *cur_offset);
433                         RETURN(rc);
434                 }
435
436                 /* put number of bytes read into rc to make code simpler */
437                 rc = ppos - *cur_offset;
438                 *cur_offset = ppos;
439                 
440                 if (rc < len) {
441                         /* signal the end of the valid buffer to llog_process */
442                         memset(buf + rc, 0, len - rc);
443                 }
444
445                 if (rc == 0) /* end of file, nothing to do */
446                         RETURN(0);
447
448                 if (rc < sizeof(*tail)) {
449                         CERROR("Invalid llog block at log id "LPU64"/%u offset "
450                                LPU64"\n", loghandle->lgh_id.lgl_oid,
451                                loghandle->lgh_id.lgl_ogen, *cur_offset);
452                         RETURN(-EINVAL);
453                 }
454
455                 rec = buf;
456                 tail = (struct llog_rec_tail *)((char *)buf + rc -
457                                                 sizeof(struct llog_rec_tail));
458
459                 if (LLOG_REC_HDR_NEEDS_SWABBING(rec)) {
460                         lustre_swab_llog_rec(rec, tail);
461                 }
462
463                 *cur_idx = tail->lrt_index;
464
465                 /* this shouldn't happen */
466                 if (tail->lrt_index == 0) {
467                         CERROR("Invalid llog tail at log id "LPU64"/%u offset "
468                                LPU64"\n", loghandle->lgh_id.lgl_oid,
469                                loghandle->lgh_id.lgl_ogen, *cur_offset);
470                         RETURN(-EINVAL);
471                 }
472                 if (tail->lrt_index < next_idx)
473                         continue;
474
475                 /* sanity check that the start of the new buffer is no farther
476                  * than the record that we wanted.  This shouldn't happen. */
477                 if (rec->lrh_index > next_idx) {
478                         CERROR("missed desired record? %u > %u\n",
479                                rec->lrh_index, next_idx);
480                         RETURN(-ENOENT);
481                 }
482                 RETURN(0);
483         }
484         RETURN(-EIO);
485 }
486
487 static int llog_lvfs_prev_block(struct llog_handle *loghandle,
488                                 int prev_idx, void *buf, int len)
489 {
490         __u64 cur_offset;
491         int rc;
492         ENTRY;
493
494         if (len == 0 || len & (LLOG_CHUNK_SIZE - 1))
495                 RETURN(-EINVAL);
496
497         CDEBUG(D_OTHER, "looking for log index %u\n", prev_idx);
498
499         cur_offset = LLOG_CHUNK_SIZE;
500         llog_skip_over(&cur_offset, 0, prev_idx);
501
502         while (cur_offset < i_size_read(loghandle->lgh_file->f_dentry->d_inode)) {
503                 struct llog_rec_hdr *rec;
504                 struct llog_rec_tail *tail;
505                 loff_t ppos;
506
507                 ppos = cur_offset;
508
509                 rc = fsfilt_read_record(loghandle->lgh_ctxt->loc_exp->exp_obd,
510                                         loghandle->lgh_file, buf, len,
511                                         &ppos);
512                 if (rc) {
513                         CERROR("Cant read llog block at log id "LPU64
514                                "/%u offset "LPU64"\n",
515                                loghandle->lgh_id.lgl_oid,
516                                loghandle->lgh_id.lgl_ogen,
517                                cur_offset);
518                         RETURN(rc);
519                 }
520
521                 /* put number of bytes read into rc to make code simpler */
522                 rc = ppos - cur_offset;
523                 cur_offset = ppos;
524
525                 if (rc == 0) /* end of file, nothing to do */
526                         RETURN(0);
527
528                 if (rc < sizeof(*tail)) {
529                         CERROR("Invalid llog block at log id "LPU64"/%u offset "
530                                LPU64"\n", loghandle->lgh_id.lgl_oid,
531                                loghandle->lgh_id.lgl_ogen, cur_offset);
532                         RETURN(-EINVAL);
533                 }
534
535                 tail = buf + rc - sizeof(struct llog_rec_tail);
536
537                 /* this shouldn't happen */
538                 if (tail->lrt_index == 0) {
539                         CERROR("Invalid llog tail at log id "LPU64"/%u offset "
540                                LPU64"\n", loghandle->lgh_id.lgl_oid,
541                                loghandle->lgh_id.lgl_ogen, cur_offset);
542                         RETURN(-EINVAL);
543                 }
544                 if (le32_to_cpu(tail->lrt_index) < prev_idx)
545                         continue;
546
547                 /* sanity check that the start of the new buffer is no farther
548                  * than the record that we wanted.  This shouldn't happen. */
549                 rec = buf;
550                 if (le32_to_cpu(rec->lrh_index) > prev_idx) {
551                         CERROR("missed desired record? %u > %u\n",
552                                le32_to_cpu(rec->lrh_index), prev_idx);
553                         RETURN(-ENOENT);
554                 }
555                 RETURN(0);
556         }
557         RETURN(-EIO);
558 }
559
560 static struct file *llog_filp_open(char *dir, char *name, int flags, int mode)
561 {
562         char *logname;
563         struct file *filp;
564         int len;
565
566         OBD_ALLOC(logname, PATH_MAX);
567         if (logname == NULL)
568                 return ERR_PTR(-ENOMEM);
569
570         len = snprintf(logname, PATH_MAX, "%s/%s", dir, name);
571         if (len >= PATH_MAX - 1) {
572                 filp = ERR_PTR(-ENAMETOOLONG);
573         } else {
574                 filp = l_filp_open(logname, flags, mode);
575                 if (IS_ERR(filp))
576                         CERROR("logfile creation %s: %ld\n", logname,
577                                PTR_ERR(filp));
578         }
579         OBD_FREE(logname, PATH_MAX);
580         return filp;
581 }
582
583 /* This is a callback from the llog_* functions.
584  * Assumes caller has already pushed us into the kernel context. */
585 static int llog_lvfs_create(struct llog_ctxt *ctxt, struct llog_handle **res,
586                             struct llog_logid *logid, char *name)
587 {
588         struct llog_handle *handle;
589         struct obd_device *obd;
590         struct l_dentry *dchild = NULL;
591         struct obdo *oa = NULL;
592         int rc = 0, cleanup_phase = 1;
593         int open_flags = O_RDWR | O_CREAT | O_LARGEFILE;
594         ENTRY;
595
596         handle = llog_alloc_handle();
597         if (handle == NULL)
598                 RETURN(-ENOMEM);
599         *res = handle;
600
601         LASSERT(ctxt);
602         LASSERT(ctxt->loc_exp);
603         obd = ctxt->loc_exp->exp_obd;
604
605         if (logid != NULL) {
606                 dchild = obd_lvfs_fid2dentry(ctxt->loc_exp, logid->lgl_oid,
607                                              logid->lgl_ogen, logid->lgl_ogr);
608
609                 if (IS_ERR(dchild)) {
610                         rc = PTR_ERR(dchild);
611                         CERROR("error looking up logfile "LPX64":0x%x: rc %d\n",
612                                logid->lgl_oid, logid->lgl_ogen, rc);
613                         GOTO(cleanup, rc);
614                 }
615
616                 cleanup_phase = 2;
617                 if (dchild->d_inode == NULL) {
618                         rc = -ENOENT;
619                         CERROR("nonexistent log file "LPX64":"LPX64": rc %d\n",
620                                logid->lgl_oid, logid->lgl_ogr, rc);
621                         GOTO(cleanup, rc);
622                 }
623
624                 handle->lgh_file = l_dentry_open(&obd->obd_lvfs_ctxt, dchild,
625                                                     O_RDWR | O_LARGEFILE);
626                 if (IS_ERR(handle->lgh_file)) {
627                         rc = PTR_ERR(handle->lgh_file);
628                         CERROR("error opening logfile "LPX64"0x%x: rc %d\n",
629                                logid->lgl_oid, logid->lgl_ogen, rc);
630                         GOTO(cleanup, rc);
631                 }
632
633                 /* assign the value of lgh_id for handle directly */
634                 handle->lgh_id = *logid;
635
636         } else if (name) {
637                 /* COMPAT_146 */
638                 if (strcmp(obd->obd_type->typ_name, LUSTRE_MDS_NAME) == 0) {
639                         handle->lgh_file = llog_filp_open(MDT_LOGS_DIR, name, 
640                                                           open_flags, 0644);
641                 } else {
642                         /* end COMPAT_146 */
643                         handle->lgh_file = llog_filp_open(MOUNT_CONFIGS_DIR,
644                                                           name, open_flags, 
645                                                           0644);
646                 }
647                 if (IS_ERR(handle->lgh_file))
648                         GOTO(cleanup, rc = PTR_ERR(handle->lgh_file));
649
650                 handle->lgh_id.lgl_ogr = 1;
651                 handle->lgh_id.lgl_oid =
652                         handle->lgh_file->f_dentry->d_inode->i_ino;
653                 handle->lgh_id.lgl_ogen =
654                         handle->lgh_file->f_dentry->d_inode->i_generation;
655         } else {
656                 OBDO_ALLOC(oa);
657                 if (oa == NULL)
658                         GOTO(cleanup, rc = -ENOMEM);
659
660                 oa->o_gr = FILTER_GROUP_LLOG;
661                 oa->o_valid = OBD_MD_FLGENER | OBD_MD_FLGROUP;
662
663                 rc = obd_create(ctxt->loc_exp, oa, NULL, NULL);
664                 if (rc)
665                         GOTO(cleanup, rc);
666
667                 dchild = obd_lvfs_fid2dentry(ctxt->loc_exp, oa->o_id,
668                                              oa->o_generation, oa->o_gr);
669
670                 if (IS_ERR(dchild))
671                         GOTO(cleanup, rc = PTR_ERR(dchild));
672                 cleanup_phase = 2;
673                 handle->lgh_file = l_dentry_open(&obd->obd_lvfs_ctxt, dchild,
674                                                  open_flags);
675                 if (IS_ERR(handle->lgh_file))
676                         GOTO(cleanup, rc = PTR_ERR(handle->lgh_file));
677
678                 handle->lgh_id.lgl_ogr = oa->o_gr;
679                 handle->lgh_id.lgl_oid = oa->o_id;
680                 handle->lgh_id.lgl_ogen = oa->o_generation;
681         }
682
683         handle->lgh_ctxt = ctxt;
684  finish:
685         if (oa)
686                 OBDO_FREE(oa);
687         RETURN(rc);
688 cleanup:
689         switch (cleanup_phase) {
690         case 2:
691                 l_dput(dchild);
692         case 1:
693                 llog_free_handle(handle);
694         }
695         goto finish;
696 }
697
698 static int llog_lvfs_close(struct llog_handle *handle)
699 {
700         int rc;
701         ENTRY;
702
703         rc = filp_close(handle->lgh_file, 0);
704         if (rc)
705                 CERROR("error closing log: rc %d\n", rc);
706         RETURN(rc);
707 }
708
709 static int llog_lvfs_destroy(struct llog_handle *handle)
710 {
711         struct dentry *fdentry;
712         struct obdo *oa;
713         struct obd_device *obd = handle->lgh_ctxt->loc_exp->exp_obd;
714         char *dir;
715         int rc;
716         ENTRY;
717
718         /* COMPAT_146 */
719         if (strcmp(obd->obd_type->typ_name, LUSTRE_MDS_NAME) == 0)
720                 dir = MDT_LOGS_DIR;
721         else
722                 /* end COMPAT_146 */
723                 dir = MOUNT_CONFIGS_DIR;
724
725         fdentry = handle->lgh_file->f_dentry;
726         if (strcmp(fdentry->d_parent->d_name.name, dir) == 0) {
727                 struct inode *inode = fdentry->d_parent->d_inode;
728                 struct lvfs_run_ctxt saved;
729                 struct vfsmount *mnt = mntget(handle->lgh_file->f_vfsmnt);
730
731                 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
732                 dget(fdentry);
733                 rc = llog_lvfs_close(handle);
734
735                 if (rc == 0) {
736                         LOCK_INODE_MUTEX(inode);
737                         rc = ll_vfs_unlink(inode, fdentry, mnt);
738                         UNLOCK_INODE_MUTEX(inode);
739                 }
740                 mntput(mnt);
741
742                 dput(fdentry);
743                 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
744                 RETURN(rc);
745         }
746
747         OBDO_ALLOC(oa);
748         if (oa == NULL)
749                 RETURN(-ENOMEM);
750
751         oa->o_id = handle->lgh_id.lgl_oid;
752         oa->o_gr = handle->lgh_id.lgl_ogr;
753         oa->o_generation = handle->lgh_id.lgl_ogen;
754         oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLGENER;
755
756         rc = llog_lvfs_close(handle);
757         if (rc)
758                 GOTO(out, rc);
759
760         rc = obd_destroy(handle->lgh_ctxt->loc_exp, oa, NULL, NULL, NULL);
761  out:
762         OBDO_FREE(oa);
763         RETURN(rc);
764 }
765
766 /* reads the catalog list */
767 int llog_get_cat_list(struct obd_device *obd, struct obd_device *disk_obd,
768                       char *name, int idx, int count, struct llog_catid *idarray)
769 {
770         struct lvfs_run_ctxt saved;
771         struct l_file *file;
772         int rc, rc1 = 0;
773         int size = sizeof(*idarray) * count;
774         loff_t off = idx *  sizeof(*idarray);
775         ENTRY;
776
777         if (!count) 
778                 RETURN(0);
779
780         LASSERT_SEM_LOCKED(&obd->obd_llog_cat_process);
781
782         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
783         file = filp_open(name, O_RDWR | O_CREAT | O_LARGEFILE, 0700);
784         if (!file || IS_ERR(file)) {
785                 rc = PTR_ERR(file);
786                 CERROR("OBD filter: cannot open/create %s: rc = %d\n",
787                        name, rc);
788                 GOTO(out, rc);
789         }
790
791         if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
792                 CERROR("%s is not a regular file!: mode = %o\n", name,
793                        file->f_dentry->d_inode->i_mode);
794                 GOTO(out, rc = -ENOENT);
795         }
796
797         CDEBUG(D_CONFIG, "cat list: disk size=%d, read=%d\n",
798                (int)i_size_read(file->f_dentry->d_inode), size);
799
800         memset(idarray, 0, size);
801         /* read for new ost index or for empty file */
802         if (i_size_read(file->f_dentry->d_inode) < off)
803                 GOTO(out, rc = 0);
804
805         rc = fsfilt_read_record(disk_obd, file, idarray, size, &off);
806         if (rc) {
807                 CERROR("OBD filter: error reading %s: rc %d\n", name, rc);
808                 GOTO(out, rc);
809         }
810
811         EXIT;
812  out:
813         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
814         if (file && !IS_ERR(file))
815                 rc1 = filp_close(file, 0);
816         if (rc == 0)
817                 rc = rc1;
818         return rc;
819 }
820 EXPORT_SYMBOL(llog_get_cat_list);
821
822 /* writes the cat list */
823 int llog_put_cat_list(struct obd_device *obd, struct obd_device *disk_obd,
824                       char *name, int idx, int count, struct llog_catid *idarray)
825 {
826         struct lvfs_run_ctxt saved;
827         struct l_file *file;
828         int rc, rc1 = 0;
829         int size = sizeof(*idarray) * count;
830         loff_t off = idx * sizeof(*idarray);
831
832         if (!count)
833                 GOTO(out1, rc = 0);
834
835         LASSERT_SEM_LOCKED(&obd->obd_llog_cat_process);
836         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
837         file = filp_open(name, O_RDWR | O_CREAT | O_LARGEFILE, 0700);
838         if (!file || IS_ERR(file)) {
839                 rc = PTR_ERR(file);
840                 CERROR("OBD filter: cannot open/create %s: rc = %d\n",
841                        name, rc);
842                 GOTO(out, rc);
843         }
844
845         if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
846                 CERROR("%s is not a regular file!: mode = %o\n", name,
847                        file->f_dentry->d_inode->i_mode);
848                 GOTO(out, rc = -ENOENT);
849         }
850
851         rc = fsfilt_write_record(disk_obd, file, idarray, size, &off, 1);
852         if (rc) {
853                 CDEBUG(D_INODE,"OBD filter: error writeing %s: rc %d\n",
854                        name, rc);
855                 GOTO(out, rc);
856         }
857
858 out:
859         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
860         if (file && !IS_ERR(file))
861                 rc1 = filp_close(file, 0);
862
863         if (rc == 0)
864                 rc = rc1;
865 out1:
866         RETURN(rc);
867 }
868 EXPORT_SYMBOL(llog_put_cat_list);
869
870 struct llog_operations llog_lvfs_ops = {
871         lop_write_rec:   llog_lvfs_write_rec,
872         lop_next_block:  llog_lvfs_next_block,
873         lop_prev_block:  llog_lvfs_prev_block,
874         lop_read_header: llog_lvfs_read_header,
875         lop_create:      llog_lvfs_create,
876         lop_destroy:     llog_lvfs_destroy,
877         lop_close:       llog_lvfs_close,
878         //        lop_cancel: llog_lvfs_cancel,
879 };
880
881 EXPORT_SYMBOL(llog_lvfs_ops);
882
883 #else /* !__KERNEL__ */
884
885 static int llog_lvfs_read_header(struct llog_handle *handle)
886 {
887         LBUG();
888         return 0;
889 }
890
891 static int llog_lvfs_write_rec(struct llog_handle *loghandle,
892                                struct llog_rec_hdr *rec,
893                                struct llog_cookie *reccookie, int cookiecount,
894                                void *buf, int idx)
895 {
896         LBUG();
897         return 0;
898 }
899
900 static int llog_lvfs_next_block(struct llog_handle *loghandle, int *cur_idx,
901                                 int next_idx, __u64 *cur_offset, void *buf,
902                                 int len)
903 {
904         LBUG();
905         return 0;
906 }
907
908 static int llog_lvfs_prev_block(struct llog_handle *loghandle,
909                                 int prev_idx, void *buf, int len)
910 {
911         LBUG();
912         return 0;
913 }
914
915 static int llog_lvfs_create(struct llog_ctxt *ctxt, struct llog_handle **res,
916                             struct llog_logid *logid, char *name)
917 {
918         LBUG();
919         return 0;
920 }
921
922 static int llog_lvfs_close(struct llog_handle *handle)
923 {
924         LBUG();
925         return 0;
926 }
927
928 static int llog_lvfs_destroy(struct llog_handle *handle)
929 {
930         LBUG();
931         return 0;
932 }
933
934 int llog_get_cat_list(struct obd_device *obd, struct obd_device *disk_obd,
935                       char *name, int idx, int count, struct llog_catid *idarray)
936 {
937         LBUG();
938         return 0;
939 }
940
941 int llog_put_cat_list(struct obd_device *obd, struct obd_device *disk_obd,
942                       char *name, int idx, int count, struct llog_catid *idarray)
943 {
944         LBUG();
945         return 0;
946 }
947
948 struct llog_operations llog_lvfs_ops = {
949         lop_write_rec:   llog_lvfs_write_rec,
950         lop_next_block:  llog_lvfs_next_block,
951         lop_prev_block:  llog_lvfs_prev_block,
952         lop_read_header: llog_lvfs_read_header,
953         lop_create:      llog_lvfs_create,
954         lop_destroy:     llog_lvfs_destroy,
955         lop_close:       llog_lvfs_close,
956 //        lop_cancel:      llog_lvfs_cancel,
957 };
958 #endif