1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2001-2003 Cluster File Systems, Inc.
5 * Author: Andreas Dilger <adilger@clusterfs.com>
7 * This file is part of the Lustre file system, http://www.lustre.org
8 * Lustre is a trademark of Cluster File Systems, Inc.
10 * You may have signed or agreed to another license before downloading
11 * this software. If so, you are bound by the terms and conditions
12 * of that agreement, and the following does not apply to you. See the
13 * LICENSE file included with this distribution for more information.
15 * If you did not agree to a different license, then this copy of Lustre
16 * is open source software; you can redistribute it and/or modify it
17 * under the terms of version 2 of the GNU General Public License as
18 * published by the Free Software Foundation.
20 * In either case, Lustre is distributed in the hope that it will be
21 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
22 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * license text for more details.
25 * OST<->MDS recovery logging infrastructure.
27 * Invariants in implementation:
28 * - we do not share logs among different OST<->MDS connections, so that
29 * if an OST or MDS fails it need only look at log(s) relevant to itself
32 #define DEBUG_SUBSYSTEM S_LOG
39 #include <liblustre.h>
42 #include <obd_class.h>
43 #include <lustre_log.h>
44 #include <libcfs/list.h>
45 #include "llog_internal.h"
47 /* Allocate a new log or catalog handle */
48 struct llog_handle *llog_alloc_handle(void)
50 struct llog_handle *loghandle;
53 OBD_ALLOC(loghandle, sizeof(*loghandle));
54 if (loghandle == NULL)
55 RETURN(ERR_PTR(-ENOMEM));
57 init_rwsem(&loghandle->lgh_lock);
61 EXPORT_SYMBOL(llog_alloc_handle);
64 void llog_free_handle(struct llog_handle *loghandle)
69 if (!loghandle->lgh_hdr)
71 if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN)
72 list_del_init(&loghandle->u.phd.phd_entry);
73 if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_CAT)
74 LASSERT(list_empty(&loghandle->u.chd.chd_head));
75 OBD_FREE(loghandle->lgh_hdr, LLOG_CHUNK_SIZE);
78 OBD_FREE(loghandle, sizeof(*loghandle));
80 EXPORT_SYMBOL(llog_free_handle);
82 /* returns negative on error; 0 if success; 1 if success & log destroyed */
83 int llog_cancel_rec(struct llog_handle *loghandle, int index)
85 struct llog_log_hdr *llh = loghandle->lgh_hdr;
89 CDEBUG(D_RPCTRACE, "canceling %d in log "LPX64"\n",
90 index, loghandle->lgh_id.lgl_oid);
93 CERROR("cannot cancel index 0 (which is header)\n");
97 if (!ext2_clear_bit(index, llh->llh_bitmap)) {
98 CDEBUG(D_RPCTRACE, "catalog index %u already clear?\n", index);
104 if ((llh->llh_flags & LLOG_F_ZAP_WHEN_EMPTY) &&
105 (llh->llh_count == 1) &&
106 (loghandle->lgh_last_idx == (LLOG_BITMAP_BYTES * 8) - 1)) {
107 rc = llog_destroy(loghandle);
109 CERROR("failure destroying log after last cancel: %d\n",
111 ext2_set_bit(index, llh->llh_bitmap);
119 rc = llog_write_rec(loghandle, &llh->llh_hdr, NULL, 0, NULL, 0);
121 CERROR("failure re-writing header %d\n", rc);
122 ext2_set_bit(index, llh->llh_bitmap);
127 EXPORT_SYMBOL(llog_cancel_rec);
129 int llog_init_handle(struct llog_handle *handle, int flags,
130 struct obd_uuid *uuid)
133 struct llog_log_hdr *llh;
135 LASSERT(handle->lgh_hdr == NULL);
137 OBD_ALLOC(llh, sizeof(*llh));
140 handle->lgh_hdr = llh;
141 /* first assign flags to use llog_client_ops */
142 llh->llh_flags = flags;
143 rc = llog_read_header(handle);
145 flags = llh->llh_flags;
146 if (uuid && !obd_uuid_equals(uuid, &llh->llh_tgtuuid)) {
147 CERROR("uuid mismatch: %s/%s\n", (char *)uuid->uuid,
148 (char *)llh->llh_tgtuuid.uuid);
152 } else if (rc != LLOG_EEMPTY || !flags) {
153 /* set a pesudo flag for initialization */
154 flags = LLOG_F_IS_CAT;
159 handle->lgh_last_idx = 0; /* header is record with index 0 */
160 llh->llh_count = 1; /* for the header record */
161 llh->llh_hdr.lrh_type = LLOG_HDR_MAGIC;
162 llh->llh_hdr.lrh_len = llh->llh_tail.lrt_len = LLOG_CHUNK_SIZE;
163 llh->llh_hdr.lrh_index = llh->llh_tail.lrt_index = 0;
164 llh->llh_timestamp = CURRENT_SECONDS;
166 memcpy(&llh->llh_tgtuuid, uuid, sizeof(llh->llh_tgtuuid));
167 llh->llh_bitmap_offset = offsetof(typeof(*llh),llh_bitmap);
168 ext2_set_bit(0, llh->llh_bitmap);
171 if (flags & LLOG_F_IS_CAT) {
172 CFS_INIT_LIST_HEAD(&handle->u.chd.chd_head);
173 llh->llh_size = sizeof(struct llog_logid_rec);
174 } else if (flags & LLOG_F_IS_PLAIN) {
175 CFS_INIT_LIST_HEAD(&handle->u.phd.phd_entry);
177 CERROR("Unknown flags: %#x (Expected %#x or %#x\n",
178 flags, LLOG_F_IS_CAT, LLOG_F_IS_PLAIN);
183 OBD_FREE(llh, sizeof(*llh));
184 handle->lgh_hdr = NULL;
188 EXPORT_SYMBOL(llog_init_handle);
190 int llog_close(struct llog_handle *loghandle)
192 struct llog_operations *lop;
196 rc = llog_handle2ops(loghandle, &lop);
199 if (lop->lop_close == NULL)
200 GOTO(out, -EOPNOTSUPP);
201 rc = lop->lop_close(loghandle);
203 llog_free_handle(loghandle);
206 EXPORT_SYMBOL(llog_close);
208 static int llog_process_thread(void *arg)
210 struct llog_process_info *lpi = (struct llog_process_info *)arg;
211 struct llog_handle *loghandle = lpi->lpi_loghandle;
212 struct llog_log_hdr *llh = loghandle->lgh_hdr;
213 struct llog_process_cat_data *cd = lpi->lpi_catdata;
215 __u64 cur_offset = LLOG_CHUNK_SIZE;
217 int rc = 0, index = 1, last_index;
218 int saved_index = 0, last_called_index = 0;
222 OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
224 lpi->lpi_rc = -ENOMEM;
226 complete(&lpi->lpi_completion);
231 cfs_daemonize_ctxt("llog_process_thread");
234 last_called_index = cd->first_idx;
235 index = cd->first_idx + 1;
237 if (cd != NULL && cd->last_idx)
238 last_index = cd->last_idx;
240 last_index = LLOG_BITMAP_BYTES * 8 - 1;
243 struct llog_rec_hdr *rec;
245 /* skip records not set in bitmap */
246 while (index <= last_index &&
247 !ext2_test_bit(index, llh->llh_bitmap))
250 LASSERT(index <= last_index + 1);
251 if (index == last_index + 1)
254 CDEBUG(D_OTHER, "index: %d last_index %d\n",
257 /* get the buf with our target record; avoid old garbage */
258 last_offset = cur_offset;
259 rc = llog_next_block(loghandle, &saved_index, index,
260 &cur_offset, buf, LLOG_CHUNK_SIZE);
264 /* NB: when rec->lrh_len is accessed it is already swabbed
265 * since it is used at the "end" of the loop and the rec
266 * swabbing is done at the beginning of the loop. */
267 for (rec = (struct llog_rec_hdr *)buf;
268 (char *)rec < buf + LLOG_CHUNK_SIZE;
269 rec = (struct llog_rec_hdr *)((char *)rec + rec->lrh_len)){
271 CDEBUG(D_OTHER, "processing rec 0x%p type %#x\n",
274 if (LLOG_REC_HDR_NEEDS_SWABBING(rec))
275 lustre_swab_llog_rec(rec, NULL);
277 CDEBUG(D_OTHER, "after swabbing, type=%#x idx=%d\n",
278 rec->lrh_type, rec->lrh_index);
280 if (rec->lrh_index == 0)
281 GOTO(out, 0); /* no more records */
283 if (rec->lrh_len == 0 || rec->lrh_len >LLOG_CHUNK_SIZE){
284 CWARN("invalid length %d in llog record for "
285 "index %d/%d\n", rec->lrh_len,
286 rec->lrh_index, index);
287 GOTO(out, rc = -EINVAL);
290 if (rec->lrh_index < index) {
291 CDEBUG(D_OTHER, "skipping lrh_index %d\n",
297 "lrh_index: %d lrh_len: %d (%d remains)\n",
298 rec->lrh_index, rec->lrh_len,
299 (int)(buf + LLOG_CHUNK_SIZE - (char *)rec));
301 loghandle->lgh_cur_idx = rec->lrh_index;
302 loghandle->lgh_cur_offset = (char *)rec - (char *)buf +
305 /* if set, process the callback on this record */
306 if (ext2_test_bit(index, llh->llh_bitmap)) {
307 rc = lpi->lpi_cb(loghandle, rec,
309 last_called_index = index;
310 if (rc == LLOG_PROC_BREAK) {
311 CDEBUG(D_HA, "recovery from log: "LPX64
313 loghandle->lgh_id.lgl_oid,
314 loghandle->lgh_id.lgl_ogen);
316 } else if (rc == LLOG_DEL_RECORD) {
317 llog_cancel_rec(loghandle,
324 CDEBUG(D_OTHER, "Skipped index %d\n", index);
327 /* next record, still in buffer? */
329 if (index > last_index)
336 cd->last_idx = last_called_index;
338 OBD_FREE(buf, LLOG_CHUNK_SIZE);
341 complete(&lpi->lpi_completion);
346 int llog_process(struct llog_handle *loghandle, llog_cb_t cb,
347 void *data, void *catdata)
349 struct llog_process_info *lpi;
355 CERROR("cannot alloc pointer\n");
358 lpi->lpi_loghandle = loghandle;
360 lpi->lpi_cbdata = data;
361 lpi->lpi_catdata = catdata;
364 init_completion(&lpi->lpi_completion);
365 rc = cfs_kernel_thread(llog_process_thread, lpi, CLONE_VM | CLONE_FILES);
367 CERROR("cannot start thread: %d\n", rc);
371 wait_for_completion(&lpi->lpi_completion);
373 llog_process_thread(lpi);
379 EXPORT_SYMBOL(llog_process);
381 inline int llog_get_size(struct llog_handle *loghandle)
383 if (loghandle && loghandle->lgh_hdr)
384 return loghandle->lgh_hdr->llh_count;
387 EXPORT_SYMBOL(llog_get_size);
389 int llog_reverse_process(struct llog_handle *loghandle, llog_cb_t cb,
390 void *data, void *catdata)
392 struct llog_log_hdr *llh = loghandle->lgh_hdr;
393 struct llog_process_cat_data *cd = catdata;
395 int rc = 0, first_index = 1, index, idx;
398 OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
403 first_index = cd->first_idx + 1;
404 if (cd != NULL && cd->last_idx)
405 index = cd->last_idx;
407 index = LLOG_BITMAP_BYTES * 8 - 1;
410 struct llog_rec_hdr *rec;
411 struct llog_rec_tail *tail;
413 /* skip records not set in bitmap */
414 while (index >= first_index &&
415 !ext2_test_bit(index, llh->llh_bitmap))
418 LASSERT(index >= first_index - 1);
419 if (index == first_index - 1)
422 /* get the buf with our target record; avoid old garbage */
423 memset(buf, 0, LLOG_CHUNK_SIZE);
424 rc = llog_prev_block(loghandle, index, buf, LLOG_CHUNK_SIZE);
429 idx = le32_to_cpu(rec->lrh_index);
431 CDEBUG(D_RPCTRACE, "index %u : idx %u\n", index, idx);
432 while (idx < index) {
433 rec = ((void *)rec + le32_to_cpu(rec->lrh_len));
436 tail = (void *)rec + le32_to_cpu(rec->lrh_len) - sizeof(*tail);
438 /* process records in buffer, starting where we found one */
439 while ((void *)tail > buf) {
440 rec = (void *)tail - le32_to_cpu(tail->lrt_len) +
443 if (rec->lrh_index == 0)
444 GOTO(out, 0); /* no more records */
446 /* if set, process the callback on this record */
447 if (ext2_test_bit(index, llh->llh_bitmap)) {
448 rc = cb(loghandle, rec, data);
449 if (rc == LLOG_PROC_BREAK) {
450 CWARN("recovery from log: "LPX64":%x"
452 loghandle->lgh_id.lgl_oid,
453 loghandle->lgh_id.lgl_ogen);
460 /* previous record, still in buffer? */
462 if (index < first_index)
464 tail = (void *)rec - sizeof(*tail);
470 OBD_FREE(buf, LLOG_CHUNK_SIZE);
473 EXPORT_SYMBOL(llog_reverse_process);