1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2001-2003 Cluster File Systems, Inc.
5 * Author: Andreas Dilger <adilger@clusterfs.com>
7 * This file is part of Lustre, http://www.lustre.org.
9 * Lustre is free software; you can redistribute it and/or
10 * modify it under the terms of version 2 of the GNU General Public
11 * License as published by the Free Software Foundation.
13 * Lustre is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with Lustre; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 * OST<->MDS recovery logging infrastructure.
24 * Invariants in implementation:
25 * - we do not share logs among different OST<->MDS connections, so that
26 * if an OST or MDS fails it need only look at log(s) relevant to itself
29 #define DEBUG_SUBSYSTEM S_LOG
38 #include <liblustre.h>
41 #include <linux/lvfs.h>
42 #include <linux/lustre_fsfilt.h>
43 #include <linux/lustre_log.h>
47 static int llog_lvfs_pad(struct llog_ctxt *ctxt, struct l_file *file,
50 struct llog_rec_hdr rec;
51 struct llog_rec_tail tail;
55 LASSERT(len >= LLOG_MIN_REC_SIZE && (len & 0x7) == 0);
57 tail.lrt_len = rec.lrh_len = cpu_to_le32(len);
58 tail.lrt_index = rec.lrh_index = cpu_to_le32(index);
61 rc = llog_fsfilt_write_record(ctxt, file, &rec, sizeof(rec),
64 CERROR("error writing padding record: rc %d\n", rc);
68 file->f_pos += len - sizeof(rec) - sizeof(tail);
69 rc = llog_fsfilt_write_record(ctxt, file, &tail, sizeof(tail),
72 CERROR("error writing padding record: rc %d\n", rc);
80 static int llog_lvfs_write_blob(struct llog_ctxt *ctxt, struct l_file *file,
81 struct llog_rec_hdr *rec, void *buf, loff_t off)
84 struct llog_rec_tail end;
85 loff_t saved_off = file->f_pos;
86 int buflen = le32_to_cpu(rec->lrh_len);
92 rc = llog_fsfilt_write_record(ctxt, file, rec, buflen,
95 CERROR("error writing log record: rc %d\n", rc);
102 rec->lrh_len = cpu_to_le32(sizeof(*rec) + buflen + sizeof(end));
103 rc = llog_fsfilt_write_record(ctxt, file, rec, sizeof(*rec),
106 CERROR("error writing log hdr: rc %d\n", rc);
110 rc = llog_fsfilt_write_record(ctxt, file, buf, buflen,
113 CERROR("error writing log buffer: rc %d\n", rc);
117 end.lrt_len = rec->lrh_len;
118 end.lrt_index = rec->lrh_index;
119 rc = llog_fsfilt_write_record(ctxt, file, &end, sizeof(end),
122 CERROR("error writing log tail: rc %d\n", rc);
128 if (saved_off > file->f_pos)
129 file->f_pos = saved_off;
134 static int llog_lvfs_read_blob(struct llog_ctxt *ctxt, struct l_file *file,
135 void *buf, int size, loff_t off)
141 rc = llog_fsfilt_read_record(ctxt, file, buf, size, &offset);
143 CERROR("error reading log record: rc %d\n", rc);
149 static int llog_lvfs_read_header(struct llog_handle *handle)
151 struct llog_ctxt *ctxt = handle->lgh_ctxt;
155 LASSERT(sizeof(*handle->lgh_hdr) == LLOG_CHUNK_SIZE);
156 LASSERT(ctxt != NULL);
158 if (handle->lgh_file->f_dentry->d_inode->i_size == 0) {
159 CDEBUG(D_HA, "not reading header from 0-byte log\n");
163 rc = llog_lvfs_read_blob(ctxt, handle->lgh_file, handle->lgh_hdr,
166 CERROR("error reading log header\n");
168 handle->lgh_last_idx = le32_to_cpu(handle->lgh_hdr->llh_tail.lrt_index);
169 handle->lgh_file->f_pos = handle->lgh_file->f_dentry->d_inode->i_size;
174 /* returns negative in on error; 0 if success && reccookie == 0; 1 otherwise */
175 /* appends if idx == -1, otherwise overwrites record idx. */
176 static int llog_lvfs_write_rec(struct llog_handle *loghandle,
177 struct llog_rec_hdr *rec,
178 struct llog_cookie *reccookie,
182 struct llog_log_hdr *llh;
183 int reclen = le32_to_cpu(rec->lrh_len), index, rc;
184 struct llog_rec_tail *lrt;
185 struct llog_ctxt *ctxt = loghandle->lgh_ctxt;
191 llh = loghandle->lgh_hdr;
192 file = loghandle->lgh_file;
194 /* record length should not bigger than LLOG_CHUNK_SIZE */
196 rc = (reclen > LLOG_CHUNK_SIZE - sizeof(struct llog_rec_hdr)
197 - sizeof(struct llog_rec_tail)) ? -E2BIG : 0;
199 rc = (reclen > LLOG_CHUNK_SIZE) ? -E2BIG : 0;
206 /* no header: only allowed to insert record 1 */
207 if (idx > 1 && !file->f_dentry->d_inode->i_size) {
208 CERROR("idx != -1 in empty log\n");
212 if (idx && llh->llh_size && llh->llh_size != reclen)
215 rc = llog_lvfs_write_blob(ctxt, file, &llh->llh_hdr, NULL, 0);
216 /* we are done if we only write the header or on error */
220 saved_offset = sizeof(*llh) + (idx-1)*le32_to_cpu(rec->lrh_len);
221 rc = llog_lvfs_write_blob(ctxt, file, rec, buf, saved_offset);
222 if (rc == 0 && reccookie) {
223 reccookie->lgc_lgl = loghandle->lgh_id;
224 reccookie->lgc_index = idx;
230 /* Make sure that records don't cross a chunk boundary, so we can
231 * process them page-at-a-time if needed. If it will cross a chunk
232 * boundary, write in a fake (but referenced) entry to pad the chunk.
234 * We know that llog_current_log() will return a loghandle that is
235 * big enough to hold reclen, so all we care about is padding here.
237 left = LLOG_CHUNK_SIZE - (file->f_pos & (LLOG_CHUNK_SIZE - 1));
239 reclen = sizeof(*rec) + le32_to_cpu(rec->lrh_len) +
240 sizeof(struct llog_rec_tail);
242 /* NOTE: padding is a record, but no bit is set */
243 if (left != 0 && left != reclen &&
244 left < (reclen + LLOG_MIN_REC_SIZE)) {
245 loghandle->lgh_last_idx++;
246 rc = llog_lvfs_pad(ctxt, file, left, loghandle->lgh_last_idx);
249 /* if it's the last idx in log file, then return -ENOSPC */
250 if (loghandle->lgh_last_idx == LLOG_BITMAP_SIZE(llh) - 1)
254 loghandle->lgh_last_idx++;
255 index = loghandle->lgh_last_idx;
256 LASSERT(index < LLOG_BITMAP_SIZE(llh));
257 rec->lrh_index = cpu_to_le32(index);
259 lrt = (void *)rec + le32_to_cpu(rec->lrh_len) - sizeof(*lrt);
260 lrt->lrt_len = rec->lrh_len;
261 lrt->lrt_index = rec->lrh_index;
263 if (ext2_set_bit(index, llh->llh_bitmap)) {
264 CERROR("argh, index %u already set in log bitmap?\n", index);
265 LBUG(); /* should never happen */
267 llh->llh_count = cpu_to_le32(le32_to_cpu(llh->llh_count) + 1);
268 llh->llh_tail.lrt_index = cpu_to_le32(index);
271 rc = llog_lvfs_write_blob(ctxt, file, &llh->llh_hdr, NULL, 0);
275 CDEBUG(D_HA, "adding record "LPX64": idx: %u, %u bytes off: %lld\n",
276 loghandle->lgh_id.lgl_oid, index, le32_to_cpu(rec->lrh_len),
279 rc = llog_lvfs_write_blob(ctxt, file, rec, buf, file->f_pos);
283 if (rc == 0 && reccookie) {
284 if (llog_cookie_get_flags(reccookie) & LLOG_COOKIE_REPLAY) {
285 LASSERTF(EQ_LOGID(reccookie->lgc_lgl,loghandle->lgh_id),
286 "lgc_lgl.oid/gr "LPU64"/"LPU64" lgh_id.oid/gr"
288 reccookie->lgc_lgl.lgl_oid,
289 reccookie->lgc_lgl.lgl_ogr,
290 loghandle->lgh_id.lgl_oid,
291 loghandle->lgh_id.lgl_oid);
292 LASSERTF(reccookie->lgc_index == index,
293 "lgc_index %u != index %u\n",
294 reccookie->lgc_index, index);
296 reccookie->lgc_lgl = loghandle->lgh_id;
297 reccookie->lgc_index = index;
298 llog_cookie_add_flags(reccookie, LLOG_COOKIE_REPLAY);
301 if (le32_to_cpu(rec->lrh_type) == MDS_UNLINK_REC)
302 reccookie->lgc_subsys = LLOG_UNLINK_ORIG_CTXT;
303 else if (le32_to_cpu(rec->lrh_type) == OST_SZ_REC)
304 reccookie->lgc_subsys = LLOG_SIZE_ORIG_CTXT;
305 else if (le32_to_cpu(rec->lrh_type) == OST_RAID1_REC)
306 reccookie->lgc_subsys = LLOG_RD1_ORIG_CTXT;
308 reccookie->lgc_subsys = -1;
311 if (rc == 0 && (le32_to_cpu(rec->lrh_type) == LLOG_GEN_REC ||
312 le32_to_cpu(rec->lrh_type) == SMFS_UPDATE_REC))
318 /* We can skip reading at least as many log blocks as the number of
319 * minimum sized log records we are skipping. If it turns out
320 * that we are not far enough along the log (because the
321 * actual records are larger than minimum size) we just skip
322 * some more records. */
324 static void llog_skip_over(__u64 *off, int curr, int goal)
328 *off = (*off + (goal-curr-1) * LLOG_MIN_REC_SIZE) &
329 ~(LLOG_CHUNK_SIZE - 1);
333 * - curr_offset to the furthest point read in the log file
334 * - curr_idx to the log index preceeding curr_offset
335 * returns -EIO/-EINVAL on error
337 static int llog_lvfs_next_block(struct llog_handle *loghandle, int *curr_idx,
338 int next_idx, __u64 *curr_offset, void *buf,
341 struct llog_ctxt *ctxt = loghandle->lgh_ctxt;
344 if (len == 0 || len & (LLOG_CHUNK_SIZE - 1))
347 CDEBUG(D_OTHER, "looking for log index %u (cur idx %u off "LPU64")\n",
348 next_idx, *curr_idx, *curr_offset);
350 while (*curr_offset < loghandle->lgh_file->f_dentry->d_inode->i_size) {
351 struct llog_rec_hdr *rec;
352 struct llog_rec_tail *tail;
356 llog_skip_over(curr_offset, *curr_idx, next_idx);
359 rc = llog_fsfilt_read_record(ctxt, loghandle->lgh_file,
363 CERROR("Cant read llog block at log id "LPU64
364 "/%u offset "LPU64"\n",
365 loghandle->lgh_id.lgl_oid,
366 loghandle->lgh_id.lgl_ogen,
371 nbytes = ppos - *curr_offset;
374 if (nbytes == 0) /* end of file, nothing to do */
377 if (nbytes < sizeof(*tail)) {
378 CERROR("Invalid llog block at log id "LPU64"/%u offset "
379 LPU64"\n", loghandle->lgh_id.lgl_oid,
380 loghandle->lgh_id.lgl_ogen, *curr_offset);
384 tail = buf + nbytes - sizeof(struct llog_rec_tail);
385 *curr_idx = le32_to_cpu(tail->lrt_index);
387 /* this shouldn't happen */
388 if (tail->lrt_index == 0) {
389 CERROR("Invalid llog tail at log id "LPU64"/%u offset "
390 LPU64"\n", loghandle->lgh_id.lgl_oid,
391 loghandle->lgh_id.lgl_ogen, *curr_offset);
394 if (le32_to_cpu(tail->lrt_index) < next_idx) {
399 /* sanity check that the start of the new buffer is no farther
400 * than the record that we wanted. This shouldn't happen. */
402 if (le32_to_cpu(rec->lrh_index) > next_idx) {
403 CERROR("missed desired record? %u > %u\n",
404 le32_to_cpu(rec->lrh_index), next_idx);
412 static int llog_lvfs_prev_block(struct llog_handle *loghandle,
413 int prev_idx, void *buf, int len)
415 struct llog_ctxt *ctxt = loghandle->lgh_ctxt;
420 if (len == 0 || len & (LLOG_CHUNK_SIZE - 1))
423 CDEBUG(D_OTHER, "looking for log index %u \n", prev_idx);
425 curr_offset = LLOG_CHUNK_SIZE;
426 llog_skip_over(&curr_offset, 0, prev_idx);
428 while (curr_offset < loghandle->lgh_file->f_dentry->d_inode->i_size) {
429 struct llog_rec_hdr *rec;
430 struct llog_rec_tail *tail;
434 rc = llog_fsfilt_read_record(ctxt, loghandle->lgh_file,
438 CERROR("Cant read llog block at log id "LPU64
439 "/%u offset "LPU64"\n",
440 loghandle->lgh_id.lgl_oid,
441 loghandle->lgh_id.lgl_ogen,
446 /* put number of bytes read into rc to make code simpler */
447 rc = ppos - curr_offset;
450 if (rc == 0) /* end of file, nothing to do */
453 if (rc < sizeof(*tail)) {
454 CERROR("Invalid llog block at log id "LPU64"/%u offset "
455 LPU64"\n", loghandle->lgh_id.lgl_oid,
456 loghandle->lgh_id.lgl_ogen, curr_offset);
460 tail = buf + rc - sizeof(struct llog_rec_tail);
462 /* this shouldn't happen */
463 if (tail->lrt_index == 0) {
464 CERROR("Invalid llog tail at log id "LPU64"/%u offset "
465 LPU64"\n", loghandle->lgh_id.lgl_oid,
466 loghandle->lgh_id.lgl_ogen, curr_offset);
469 if (le32_to_cpu(tail->lrt_index) < prev_idx)
472 /* sanity check that the start of the new buffer is no farther
473 * than the record that we wanted. This shouldn't happen. */
475 if (le32_to_cpu(rec->lrh_index) > prev_idx) {
476 CERROR("missed desired record? %u > %u\n",
477 le32_to_cpu(rec->lrh_index), prev_idx);
485 static struct file *llog_filp_open(char *name, int flags, int mode)
491 OBD_ALLOC(logname, PATH_MAX);
493 return ERR_PTR(-ENOMEM);
495 len = snprintf(logname, PATH_MAX, "LOGS/%s", name);
496 if (len >= PATH_MAX - 1) {
497 filp = ERR_PTR(-ENAMETOOLONG);
499 filp = l_filp_open(logname, flags, mode);
501 CERROR("logfile %s(%s): %ld\n",
502 flags & O_CREAT ? "create" : "open", logname,
507 OBD_FREE(logname, PATH_MAX);
511 /* creates object for the case when we have no obd (smfs). */
513 llog_object_create_alone(struct llog_ctxt *ctxt, struct llog_logid *lgh_id)
519 LASSERT(lgh_id != NULL);
520 if (lgh_id->lgl_oid) {
521 struct dentry *dchild;
522 char id_name[LL_ID_NAMELEN];
525 down(&ctxt->loc_objects_dir->d_inode->i_sem);
526 id_len = ll_id2str(id_name, lgh_id->lgl_oid,
529 dchild = lookup_one_len(id_name, ctxt->loc_objects_dir,
531 if (IS_ERR(dchild)) {
532 up(&ctxt->loc_objects_dir->d_inode->i_sem);
533 RETURN((struct file *)dchild);
535 if (dchild->d_inode == NULL) {
536 struct dentry_params dp;
539 dchild->d_fsdata = (void *) &dp;
541 dp.p_inum = lgh_id->lgl_oid;
542 rc = ll_vfs_create(ctxt->loc_objects_dir->d_inode,
543 dchild, S_IFREG, NULL);
544 if (dchild->d_fsdata == (void *)(unsigned long)lgh_id->lgl_oid)
545 dchild->d_fsdata = NULL;
547 CDEBUG(D_INODE, "err during create: %d\n", rc);
549 up(&ctxt->loc_objects_dir->d_inode->i_sem);
552 inode = dchild->d_inode;
553 LASSERT(inode->i_ino == lgh_id->lgl_oid);
554 inode->i_generation = lgh_id->lgl_ogen;
555 CDEBUG(D_HA, "recreated ino %lu with gen %u\n",
556 inode->i_ino, inode->i_generation);
557 mark_inode_dirty(inode);
560 mntget(ctxt->loc_lvfs_ctxt->pwdmnt);
561 filp = dentry_open(dchild, ctxt->loc_lvfs_ctxt->pwdmnt,
562 O_RDWR | O_LARGEFILE);
565 up(&ctxt->loc_objects_dir->d_inode->i_sem);
568 if (!S_ISREG(filp->f_dentry->d_inode->i_mode)) {
569 CERROR("%s is not a regular file!: mode = %o\n",
570 id_name, filp->f_dentry->d_inode->i_mode);
572 up(&ctxt->loc_objects_dir->d_inode->i_sem);
573 RETURN(ERR_PTR(-ENOENT));
576 up(&ctxt->loc_objects_dir->d_inode->i_sem);
580 unsigned int tmpname = ll_insecure_random_int();
581 char id_name[LL_ID_NAMELEN];
582 struct dentry *new_child, *parent;
586 sprintf(id_name, "OBJECTS/%u", tmpname);
587 filp = filp_open(id_name, O_CREAT | O_EXCL, 0644);
591 CERROR("impossible object name collision %u\n",
595 CERROR("error creating tmp object %u: rc %d\n", tmpname, rc);
599 id_len = ll_id2str(id_name, filp->f_dentry->d_inode->i_ino,
600 filp->f_dentry->d_inode->i_generation);
601 parent = filp->f_dentry->d_parent;
602 down(&parent->d_inode->i_sem);
603 new_child = lookup_one_len(id_name, parent, id_len);
604 if (IS_ERR(new_child)) {
605 CERROR("getting neg dentry for obj rename: %d\n", rc);
606 GOTO(out_close, rc = PTR_ERR(new_child));
608 if (new_child->d_inode != NULL) {
609 CERROR("impossible non-negative obj dentry %lu:%u!\n",
610 filp->f_dentry->d_inode->i_ino,
611 filp->f_dentry->d_inode->i_generation);
615 handle = llog_fsfilt_start(ctxt, parent->d_inode, FSFILT_OP_RENAME, NULL);
617 GOTO(out_dput, rc = PTR_ERR(handle));
620 rc = vfs_rename(parent->d_inode, filp->f_dentry,
621 parent->d_inode, new_child);
624 CERROR("error renaming new object %lu:%u: rc %d\n",
625 filp->f_dentry->d_inode->i_ino,
626 filp->f_dentry->d_inode->i_generation, rc);
628 err = llog_fsfilt_commit(ctxt, parent->d_inode, handle, 0);
635 up(&parent->d_inode->i_sem);
640 /* FIXME: is this group 1 is correct? */
642 lgh_id->lgl_oid = filp->f_dentry->d_inode->i_ino;
643 lgh_id->lgl_ogen = filp->f_dentry->d_inode->i_generation;
649 /* creates object for generic case (obd exists) */
651 llog_object_create_generic(struct llog_ctxt *ctxt, struct llog_logid *lgh_id)
653 struct file *filp = NULL;
654 struct dentry *dchild;
655 struct obd_device *obd;
656 struct obdo *oa = NULL;
657 int open_flags = O_RDWR | O_LARGEFILE;
661 obd = ctxt->loc_exp->exp_obd;
662 LASSERT(obd != NULL);
664 if (lgh_id->lgl_oid) {
665 dchild = obd_lvfs_id2dentry(ctxt->loc_exp, lgh_id->lgl_oid,
666 lgh_id->lgl_ogen, lgh_id->lgl_ogr);
667 if (IS_ERR(dchild) == -ENOENT) {
668 OBD_ALLOC(oa, sizeof(*oa));
670 RETURN(ERR_PTR(-ENOMEM));
672 oa->o_id = lgh_id->lgl_oid;
673 oa->o_generation = lgh_id->lgl_ogen;
674 oa->o_gr = lgh_id->lgl_ogr;
675 oa->o_valid = OBD_MD_FLGENER | OBD_MD_FLGROUP;
676 rc = obd_create(ctxt->loc_exp, oa, NULL, 0, NULL, NULL);
678 CDEBUG(D_INODE, "err during create: %d\n", rc);
679 GOTO(out_free_oa, rc);
681 CDEBUG(D_HA, "re-create log object "LPX64":0x%x:"LPX64"\n",
682 lgh_id->lgl_oid, lgh_id->lgl_ogen, lgh_id->lgl_ogr);
684 dchild = obd_lvfs_id2dentry(ctxt->loc_exp, lgh_id->lgl_oid,
685 lgh_id->lgl_ogen, lgh_id->lgl_ogr);
686 } else if (IS_ERR(dchild)) {
687 CERROR("error looking up logfile "LPX64":0x%x: rc %d\n",
688 lgh_id->lgl_oid, lgh_id->lgl_ogen, rc);
689 RETURN((struct file *)dchild);
692 filp = l_dentry_open(&obd->obd_lvfs_ctxt, dchild, open_flags);
696 CERROR("error opening logfile "LPX64"0x%x: rc %d\n",
697 lgh_id->lgl_oid, lgh_id->lgl_ogen, rc);
699 GOTO(out_free_oa, rc);
701 /* this is important to work here over obd_create() as it manages
702 groups and we need it. Yet another reason is that mds_obd_create()
703 is fully the same as old version of this function and this helps
704 us to avoid code duplicating and layering violating. */
705 OBD_ALLOC(oa, sizeof(*oa));
707 RETURN(ERR_PTR(-ENOMEM));
709 oa->o_gr = FILTER_GROUP_LLOG;
710 oa->o_valid = OBD_MD_FLGENER | OBD_MD_FLGROUP;
711 rc = obd_create(ctxt->loc_exp, oa, NULL, 0, NULL, NULL);
713 GOTO(out_free_oa, rc);
715 dchild = obd_lvfs_id2dentry(ctxt->loc_exp, oa->o_id,
716 oa->o_generation, oa->o_gr);
718 GOTO(out_free_oa, rc = PTR_ERR(dchild));
720 filp = l_dentry_open(&obd->obd_lvfs_ctxt, dchild,
724 GOTO(out_free_oa, rc = PTR_ERR(filp));
727 /* group 1 is not longer valid, we use the group which is set
728 by obd_create()->mds_obd_create(). */
729 lgh_id->lgl_ogr = oa->o_gr;
730 lgh_id->lgl_oid = oa->o_id;
731 lgh_id->lgl_ogen = oa->o_generation;
738 OBD_FREE(oa, sizeof(*oa));
743 llog_object_create(struct llog_ctxt *ctxt, struct llog_logid *lgh_id)
746 return llog_object_create_alone(ctxt, lgh_id);
748 return llog_object_create_generic(ctxt, lgh_id);
751 static int llog_add_link_object(struct llog_ctxt *ctxt, struct llog_logid logid,
752 struct dentry *dentry)
754 struct dentry *new_child;
755 char id_name[LL_ID_NAMELEN];
757 int id_len, rc = 0, err;
760 id_len = ll_id2str(id_name, logid.lgl_oid, logid.lgl_ogen);
761 down(&ctxt->loc_objects_dir->d_inode->i_sem);
762 new_child = lookup_one_len(id_name, ctxt->loc_objects_dir, id_len);
763 if (IS_ERR(new_child)) {
764 CERROR("getting neg dentry for obj rename: %d\n", rc);
765 GOTO(out, rc = PTR_ERR(new_child));
767 if (new_child->d_inode == dentry->d_inode)
769 if (new_child->d_inode != NULL) {
770 CERROR("impossible non-negative obj dentry "LPX64":%u!\n",
771 logid.lgl_oid, logid.lgl_ogen);
774 handle = llog_fsfilt_start(ctxt, ctxt->loc_objects_dir->d_inode,
775 FSFILT_OP_LINK, NULL);
777 GOTO(out_dput, rc = PTR_ERR(handle));
780 rc = vfs_link(dentry, ctxt->loc_objects_dir->d_inode, new_child);
783 CERROR("error link new object "LPX64":%08x: rc %d\n",
784 logid.lgl_oid, logid.lgl_ogen, rc);
785 /* it doesn't make much sense to get -EEXIST here */
786 LASSERTF(rc != -EEXIST, "bug 3490: dentry: %p "
787 "dir->d_ionode %p new_child: %p \n",
788 dentry, ctxt->loc_objects_dir->d_inode, new_child);
790 err = llog_fsfilt_commit(ctxt, ctxt->loc_objects_dir->d_inode, handle, 0);
794 up(&ctxt->loc_objects_dir->d_inode->i_sem);
798 static int llog_lvfs_open(struct llog_ctxt *ctxt, struct llog_handle **res,
799 struct llog_logid *logid, char *name, int flags)
801 struct llog_handle *handle;
802 struct lvfs_run_ctxt saved;
804 int open_flags = O_RDWR | O_LARGEFILE;
807 if (flags & OBD_LLOG_FL_CREATE)
808 open_flags |= O_CREAT;
810 handle = llog_alloc_handle();
816 if (ctxt->loc_lvfs_ctxt)
817 push_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL);
820 handle->lgh_file = llog_object_create(ctxt, logid);
821 if (IS_ERR(handle->lgh_file)) {
822 CERROR("cannot create/open llog object "LPX64":%x "
823 "error = %ld", logid->lgl_oid, logid->lgl_ogen,
824 PTR_ERR(handle->lgh_file));
825 GOTO(cleanup, rc = PTR_ERR(handle->lgh_file));
827 handle->lgh_id = *logid;
830 handle->lgh_file = llog_filp_open(name, open_flags, 0644);
831 if (IS_ERR(handle->lgh_file)) {
832 CERROR("cannot open %s file, error = %ld\n",
833 name, PTR_ERR(handle->lgh_file));
834 GOTO(cleanup, rc = PTR_ERR(handle->lgh_file));
836 LASSERT(handle->lgh_file->f_dentry->d_parent == ctxt->loc_logs_dir);
838 handle->lgh_id.lgl_ogr = 1;
839 handle->lgh_id.lgl_oid = handle->lgh_file->f_dentry->d_inode->i_ino;
840 handle->lgh_id.lgl_ogen = handle->lgh_file->f_dentry->d_inode->i_generation;
841 rc = llog_add_link_object(ctxt, handle->lgh_id, handle->lgh_file->f_dentry);
846 handle->lgh_file = llog_object_create(ctxt, &handle->lgh_id);
847 if (IS_ERR(handle->lgh_file)) {
848 CERROR("cannot create llog object, error = %ld\n",
849 PTR_ERR(handle->lgh_file));
850 GOTO(cleanup, rc = PTR_ERR(handle->lgh_file));
854 handle->lgh_ctxt = ctxt;
856 if (ctxt->loc_lvfs_ctxt)
857 pop_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL);
860 llog_free_handle(handle);
864 static int llog_lvfs_close(struct llog_handle *handle)
869 rc = filp_close(handle->lgh_file, 0);
871 CERROR("error closing log: rc %d\n", rc);
875 static int llog_lvfs_destroy(struct llog_handle *loghandle)
877 struct llog_ctxt *ctxt = loghandle->lgh_ctxt;
878 struct lvfs_run_ctxt saved;
879 struct dentry *fdentry;
880 struct inode *parent_inode;
881 char id_name[LL_ID_NAMELEN];
883 int rc = -EINVAL, err, id_len;
886 if (ctxt->loc_lvfs_ctxt)
887 push_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL);
889 fdentry = loghandle->lgh_file->f_dentry;
890 parent_inode = fdentry->d_parent->d_inode;
892 if (!strcmp((char *)fdentry->d_parent->d_name.name, "LOGS")) {
893 LASSERT(parent_inode == ctxt->loc_logs_dir->d_inode);
895 id_len = ll_id2str(id_name, fdentry->d_inode->i_ino,
896 fdentry->d_inode->i_generation);
898 rc = llog_lvfs_close(loghandle);
904 handle = llog_fsfilt_start(ctxt, parent_inode,
905 FSFILT_OP_UNLINK, NULL);
906 if (IS_ERR(handle)) {
908 GOTO(out, rc = PTR_ERR(handle));
911 down(&parent_inode->i_sem);
912 rc = vfs_unlink(parent_inode, fdentry);
913 up(&parent_inode->i_sem);
917 down(&ctxt->loc_objects_dir->d_inode->i_sem);
918 fdentry = lookup_one_len(id_name, ctxt->loc_objects_dir,
920 if (IS_ERR(fdentry) || fdentry->d_inode == NULL) {
921 CERROR("destroy non_existent object %s\n",
923 GOTO(out_err, rc = IS_ERR(fdentry) ?
924 PTR_ERR(fdentry) : -ENOENT);
926 rc = vfs_unlink(ctxt->loc_objects_dir->d_inode, fdentry);
929 up(&ctxt->loc_objects_dir->d_inode->i_sem);
931 err = llog_fsfilt_commit(ctxt, parent_inode, handle, 0);
937 if (ctxt->loc_alone) {
938 if (!strcmp((char *)fdentry->d_parent->d_name.name, "OBJECTS")) {
939 LASSERT(parent_inode == ctxt->loc_objects_dir->d_inode);
942 rc = llog_lvfs_close(loghandle);
944 down(&parent_inode->i_sem);
945 rc = vfs_unlink(parent_inode, fdentry);
946 up(&parent_inode->i_sem);
951 struct obdo *oa = NULL;
953 OBD_ALLOC(oa, sizeof(*oa));
955 GOTO(out, rc = -ENOMEM);
957 oa->o_id = loghandle->lgh_id.lgl_oid;
958 oa->o_gr = loghandle->lgh_id.lgl_ogr;
959 oa->o_generation = loghandle->lgh_id.lgl_ogen;
960 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLGENER;
962 rc = llog_lvfs_close(loghandle);
964 GOTO(out_free_oa, rc);
966 rc = obd_destroy(loghandle->lgh_ctxt->loc_exp, oa, NULL, NULL);
968 OBD_FREE(oa, sizeof(*oa));
971 if (ctxt->loc_lvfs_ctxt)
972 pop_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL);
976 /* reads the catalog list */
977 int llog_get_cat_list(struct lvfs_run_ctxt *ctxt,
978 struct fsfilt_operations *fsops, const char *name,
979 int count, struct llog_catid *idarray)
981 struct lvfs_run_ctxt saved;
983 int size = sizeof(*idarray) * count;
990 push_ctxt(&saved, ctxt, NULL);
991 file = l_filp_open(name, O_RDWR | O_CREAT | O_LARGEFILE, 0700);
992 if (!file || IS_ERR(file)) {
994 CERROR("OBD filter: cannot open/create %s: rc = %d\n",
999 if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
1000 CERROR("%s is not a regular file!: mode = %o\n", name,
1001 file->f_dentry->d_inode->i_mode);
1002 GOTO(out, rc = -ENOENT);
1005 rc = fsops->fs_read_record(file, idarray, size, &off);
1007 CDEBUG(D_INODE,"OBD filter: error reading %s: rc %d\n",
1013 if (file && !IS_ERR(file))
1014 rc = filp_close(file, 0);
1016 pop_ctxt(&saved, ctxt, NULL);
1019 EXPORT_SYMBOL(llog_get_cat_list);
1021 /* writes the cat list */
1022 int llog_put_cat_list(struct lvfs_run_ctxt *ctxt,
1023 struct fsfilt_operations *fsops, const char *name,
1024 int count, struct llog_catid *idarray)
1026 struct lvfs_run_ctxt saved;
1027 struct l_file *file;
1028 int size = sizeof(*idarray) * count;
1035 push_ctxt(&saved, ctxt, NULL);
1036 file = filp_open(name, O_RDWR | O_CREAT | O_LARGEFILE, 0700);
1037 if (!file || IS_ERR(file)) {
1039 CERROR("OBD filter: cannot open/create %s: rc = %d\n",
1044 if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
1045 CERROR("%s is not a regular file!: mode = %o\n", name,
1046 file->f_dentry->d_inode->i_mode);
1047 GOTO(out, rc = -ENOENT);
1050 rc = fsops->fs_write_record(file, idarray, size, &off, 1);
1052 CDEBUG(D_INODE,"OBD filter: error reading %s: rc %d\n",
1058 if (file && !IS_ERR(file))
1059 rc = filp_close(file, 0);
1061 pop_ctxt(&saved, ctxt, NULL);
1064 EXPORT_SYMBOL(llog_put_cat_list);
1066 struct llog_operations llog_lvfs_ops = {
1067 lop_open: llog_lvfs_open,
1068 lop_destroy: llog_lvfs_destroy,
1069 lop_close: llog_lvfs_close,
1070 lop_read_header: llog_lvfs_read_header,
1071 lop_write_rec: llog_lvfs_write_rec,
1072 lop_next_block: llog_lvfs_next_block,
1073 lop_prev_block: llog_lvfs_prev_block,
1075 EXPORT_SYMBOL(llog_lvfs_ops);
1077 #else /* !__KERNEL__ */
1079 static int llog_lvfs_read_header(struct llog_handle *handle)
1085 static int llog_lvfs_write_rec(struct llog_handle *loghandle,
1086 struct llog_rec_hdr *rec,
1087 struct llog_cookie *reccookie, int cookiecount,
1094 static int llog_lvfs_open(struct llog_ctxt *ctxt, struct llog_handle **res,
1095 struct llog_logid *logid, char *name, int flags)
1101 static int llog_lvfs_close(struct llog_handle *handle)
1107 static int llog_lvfs_destroy(struct llog_handle *handle)
1113 int llog_get_cat_list(struct lvfs_run_ctxt *ctxt,
1114 struct fsfilt_operations *fsops, const char *name,
1115 int count, struct llog_catid *idarray)
1121 int llog_put_cat_list(struct lvfs_run_ctxt *ctxt,
1122 struct fsfilt_operations *fsops, const char *name,
1123 int count, struct llog_catid *idarray)
1129 int llog_lvfs_prev_block(struct llog_handle *loghandle,
1130 int prev_idx, void *buf, int len)
1136 int llog_lvfs_next_block(struct llog_handle *loghandle, int *curr_idx,
1137 int next_idx, __u64 *offset, void *buf, int len)
1143 struct llog_operations llog_lvfs_ops = {
1144 lop_open: llog_lvfs_open,
1145 lop_destroy: llog_lvfs_destroy,
1146 lop_close: llog_lvfs_close,
1147 lop_read_header: llog_lvfs_read_header,
1148 lop_write_rec: llog_lvfs_write_rec,
1149 lop_next_block: llog_lvfs_next_block,
1150 lop_prev_block: llog_lvfs_prev_block,