Whamcloud - gitweb
b=8007
[fs/lustre-release.git] / lustre / obdclass / llog.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
5  *   Author: Andreas Dilger <adilger@clusterfs.com>
6  *
7  *   This file is part of the Lustre file system, http://www.lustre.org
8  *   Lustre is a trademark of Cluster File Systems, Inc.
9  *
10  *   You may have signed or agreed to another license before downloading
11  *   this software.  If so, you are bound by the terms and conditions
12  *   of that agreement, and the following does not apply to you.  See the
13  *   LICENSE file included with this distribution for more information.
14  *
15  *   If you did not agree to a different license, then this copy of Lustre
16  *   is open source software; you can redistribute it and/or modify it
17  *   under the terms of version 2 of the GNU General Public License as
18  *   published by the Free Software Foundation.
19  *
20  *   In either case, Lustre is distributed in the hope that it will be
21  *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
22  *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23  *   license text for more details.
24  *
25  * OST<->MDS recovery logging infrastructure.
26  *
27  * Invariants in implementation:
28  * - we do not share logs among different OST<->MDS connections, so that
29  *   if an OST or MDS fails it need only look at log(s) relevant to itself
30  */
31
32 #define DEBUG_SUBSYSTEM S_LOG
33
34 #ifndef EXPORT_SYMTAB
35 #define EXPORT_SYMTAB
36 #endif
37
38 #ifdef __KERNEL__
39 #include <linux/fs.h>
40 #else
41 #include <liblustre.h>
42 #endif
43
44 #include <linux/obd_class.h>
45 #include <linux/lustre_log.h>
46 #include <libcfs/list.h>
47
48 /* Allocate a new log or catalog handle */
49 struct llog_handle *llog_alloc_handle(void)
50 {
51         struct llog_handle *loghandle;
52         ENTRY;
53
54         OBD_ALLOC(loghandle, sizeof(*loghandle));
55         if (loghandle == NULL)
56                 RETURN(ERR_PTR(-ENOMEM));
57
58         init_rwsem(&loghandle->lgh_lock);
59
60         RETURN(loghandle);
61 }
62 EXPORT_SYMBOL(llog_alloc_handle);
63
64
65 void llog_free_handle(struct llog_handle *loghandle)
66 {
67         if (!loghandle)
68                 return;
69
70         if (!loghandle->lgh_hdr)
71                 goto out;
72         if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN)
73                 list_del_init(&loghandle->u.phd.phd_entry);
74         if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_CAT)
75                 LASSERT(list_empty(&loghandle->u.chd.chd_head));
76         OBD_FREE(loghandle->lgh_hdr, LLOG_CHUNK_SIZE);
77
78  out:
79         OBD_FREE(loghandle, sizeof(*loghandle));
80 }
81 EXPORT_SYMBOL(llog_free_handle);
82
83 /* returns negative on error; 0 if success; 1 if success & log destroyed */
84 int llog_cancel_rec(struct llog_handle *loghandle, int index)
85 {
86         struct llog_log_hdr *llh = loghandle->lgh_hdr;
87         int rc = 0;
88         ENTRY;
89
90         CDEBUG(D_HA, "canceling %d in log "LPX64"\n",
91                index, loghandle->lgh_id.lgl_oid);
92
93         if (index == 0) {
94                 CERROR("cannot cancel index 0 (which is header)\n");
95                 RETURN(-EINVAL);
96         }
97
98         if (!ext2_clear_bit(index, llh->llh_bitmap)) {
99                 CDEBUG(D_HA, "catalog index %u already clear?\n", index);
100                 RETURN(-EINVAL);
101         }
102
103         llh->llh_count--;
104
105         if ((llh->llh_flags & LLOG_F_ZAP_WHEN_EMPTY) &&
106             (llh->llh_count == 1) &&
107             (loghandle->lgh_last_idx == (LLOG_BITMAP_BYTES * 8) - 1)) {
108                 rc = llog_destroy(loghandle);
109                 if (rc) {
110                         CERROR("failure destroying log after last cancel: %d\n",
111                                rc);
112                         ext2_set_bit(index, llh->llh_bitmap);
113                         llh->llh_count++;
114                 } else {
115                         rc = 1;
116                 }
117                 RETURN(rc);
118         }
119
120         rc = llog_write_rec(loghandle, &llh->llh_hdr, NULL, 0, NULL, 0);
121         if (rc) {
122                 CERROR("failure re-writing header %d\n", rc);
123                 ext2_set_bit(index, llh->llh_bitmap);
124                 llh->llh_count++;
125         }
126         RETURN(rc);
127 }
128 EXPORT_SYMBOL(llog_cancel_rec);
129
130 int llog_init_handle(struct llog_handle *handle, int flags,
131                      struct obd_uuid *uuid)
132 {
133         int rc;
134         struct llog_log_hdr *llh;
135         ENTRY;
136         LASSERT(handle->lgh_hdr == NULL);
137
138         OBD_ALLOC(llh, sizeof(*llh));
139         if (llh == NULL)
140                 RETURN(-ENOMEM);
141         handle->lgh_hdr = llh;
142         /* first assign flags to use llog_client_ops */
143         llh->llh_flags = flags;
144         rc = llog_read_header(handle);
145         if (rc == 0) {
146                 flags = llh->llh_flags;
147                 if (uuid && !obd_uuid_equals(uuid, &llh->llh_tgtuuid)) {
148                         CERROR("uuid mismatch: %s/%s\n", (char *)uuid->uuid,
149                                (char *)llh->llh_tgtuuid.uuid);
150                         rc = -EEXIST;
151                 }
152                 GOTO(out, rc);
153         } else if (rc != LLOG_EEMPTY || !flags) {
154                 /* set a pesudo flag for initialization */
155                 flags = LLOG_F_IS_CAT;
156                 GOTO(out, rc);
157         }
158         rc = 0;
159
160         handle->lgh_last_idx = 0; /* header is record with index 0 */
161         llh->llh_count = 1;         /* for the header record */
162         llh->llh_hdr.lrh_type = LLOG_HDR_MAGIC;
163         llh->llh_hdr.lrh_len = llh->llh_tail.lrt_len = LLOG_CHUNK_SIZE;
164         llh->llh_hdr.lrh_index = llh->llh_tail.lrt_index = 0;
165         llh->llh_timestamp = CURRENT_SECONDS;
166         if (uuid)
167                 memcpy(&llh->llh_tgtuuid, uuid, sizeof(llh->llh_tgtuuid));
168         llh->llh_bitmap_offset = offsetof(typeof(*llh),llh_bitmap);
169         ext2_set_bit(0, llh->llh_bitmap);
170
171 out:
172         if (flags & LLOG_F_IS_CAT) {
173                 INIT_LIST_HEAD(&handle->u.chd.chd_head);
174                 llh->llh_size = sizeof(struct llog_logid_rec);
175         } else if (flags & LLOG_F_IS_PLAIN) {
176                 INIT_LIST_HEAD(&handle->u.phd.phd_entry);
177         } else {
178                 CERROR("Unknown flags: %#x (Expected %#x or %#x\n",
179                        flags, LLOG_F_IS_CAT, LLOG_F_IS_PLAIN);
180                 LBUG();
181         }
182
183         if (rc) {
184                 OBD_FREE(llh, sizeof(*llh));
185                 handle->lgh_hdr = NULL;
186         }
187         RETURN(rc);
188 }
189 EXPORT_SYMBOL(llog_init_handle);
190
191 int llog_close(struct llog_handle *loghandle)
192 {
193         struct llog_operations *lop;
194         int rc;
195         ENTRY;
196
197         rc = llog_handle2ops(loghandle, &lop);
198         if (rc)
199                 GOTO(out, rc);
200         if (lop->lop_close == NULL)
201                 GOTO(out, -EOPNOTSUPP);
202         rc = lop->lop_close(loghandle);
203  out:
204         llog_free_handle(loghandle);
205         RETURN(rc);
206 }
207 EXPORT_SYMBOL(llog_close);
208
209 int llog_process(struct llog_handle *loghandle, llog_cb_t cb,
210                  void *data, void *catdata)
211 {
212         struct llog_log_hdr *llh = loghandle->lgh_hdr;
213         struct llog_process_cat_data *cd = catdata;
214         char *buf;
215         __u64 cur_offset = LLOG_CHUNK_SIZE;
216         int rc = 0, index = 1, last_index;
217         int saved_index = 0, last_called_index = 0;
218         ENTRY;
219
220         LASSERT(llh);
221
222         OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
223         if (!buf)
224                 RETURN(-ENOMEM);
225
226         if (cd != NULL) {
227                 last_called_index = cd->first_idx;
228                 index = cd->first_idx + 1;
229         }
230         if (cd != NULL && cd->last_idx)
231                 last_index = cd->last_idx;
232         else
233                 last_index = LLOG_BITMAP_BYTES * 8 - 1;
234
235         while (rc == 0) {
236                 struct llog_rec_hdr *rec;
237
238                 /* skip records not set in bitmap */
239                 while (index <= last_index &&
240                        !ext2_test_bit(index, llh->llh_bitmap))
241                         ++index;
242
243                 LASSERT(index <= last_index + 1);
244                 if (index == last_index + 1)
245                         break;
246
247                 CDEBUG(D_OTHER, "index: %d last_index %d\n",
248                        index, last_index);
249
250                 /* get the buf with our target record; avoid old garbage */
251                 memset(buf, 0, LLOG_CHUNK_SIZE);
252                 rc = llog_next_block(loghandle, &saved_index, index,
253                                      &cur_offset, buf, LLOG_CHUNK_SIZE);
254                 if (rc)
255                         GOTO(out, rc);
256
257                 /* NB: when rec->lrh_len is accessed it is already swabbed
258                  * since it is used at the "end" of the loop and the rec
259                  * swabbing is done at the beginning of the loop. */
260                 for (rec = (struct llog_rec_hdr *)buf;
261                      (char *)rec < buf + LLOG_CHUNK_SIZE;
262                      rec = (struct llog_rec_hdr *)((char *)rec + rec->lrh_len)){
263
264                         CDEBUG(D_OTHER, "processing rec 0x%p type %#x\n",
265                                rec, rec->lrh_type);
266
267                         if (LLOG_REC_HDR_NEEDS_SWABBING(rec))
268                                 lustre_swab_llog_rec(rec, NULL);
269
270                         CDEBUG(D_OTHER, "after swabbing, type: %#x\n",
271                                rec->lrh_type);
272
273                         if (rec->lrh_index == 0)
274                                 GOTO(out, 0); /* no more records */
275
276                         if (rec->lrh_len == 0 || rec->lrh_len >LLOG_CHUNK_SIZE){
277                                 CWARN("invalid length %d in llog record for "
278                                       "index %d\n", rec->lrh_len,
279                                 rec->lrh_index);
280                                 GOTO(out, 0);
281                         }
282
283                         if (rec->lrh_index < index) {
284                                 CDEBUG(D_OTHER, "skipping lrh_index %d\n",
285                                        rec->lrh_index);
286                                 continue;
287                         }
288
289                         CDEBUG(D_OTHER,
290                                "lrh_index: %d lrh_len: %d (%d remains)\n",
291                                rec->lrh_index, rec->lrh_len,
292                                (int)(buf + LLOG_CHUNK_SIZE - (char *)rec));
293
294                         /* if set, process the callback on this record */
295                         if (ext2_test_bit(index, llh->llh_bitmap)) {
296                                 rc = cb(loghandle, rec, data);
297                                 last_called_index = index;
298                                 if (rc == LLOG_PROC_BREAK) {
299                                         CWARN("recovery from log: "LPX64":%x"
300                                               " stopped\n",
301                                               loghandle->lgh_id.lgl_oid,
302                                               loghandle->lgh_id.lgl_ogen);
303                                         GOTO(out, rc);
304                                 } else if (rc == LLOG_DEL_RECORD) {
305                                         llog_cancel_rec(loghandle, rec->lrh_index);
306                                         rc = 0;
307                                 }
308                                 if (rc)
309                                         GOTO(out, rc);
310                         } else {
311                                 CDEBUG(D_OTHER, "Skipped index %d\n", index);
312                         }
313
314                         /* next record, still in buffer? */
315                         ++index;
316                         if (index > last_index)
317                                 GOTO(out, rc = 0);
318                 }
319         }
320
321  out:
322         if (cd != NULL)
323                 cd->last_idx = last_called_index;
324         if (buf)
325                 OBD_FREE(buf, LLOG_CHUNK_SIZE);
326         RETURN(rc);
327 }
328 EXPORT_SYMBOL(llog_process);
329
330 inline int llog_get_size(struct llog_handle *loghandle)
331 {
332         if (loghandle && loghandle->lgh_hdr)
333                 return loghandle->lgh_hdr->llh_count;
334         return 0;
335 }
336 EXPORT_SYMBOL(llog_get_size);
337
338 int llog_reverse_process(struct llog_handle *loghandle, llog_cb_t cb,
339                          void *data, void *catdata)
340 {
341         struct llog_log_hdr *llh = loghandle->lgh_hdr;
342         struct llog_process_cat_data *cd = catdata;
343         void *buf;
344         int rc = 0, first_index = 1, index, idx;
345         ENTRY;
346
347         OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
348         if (!buf)
349                 RETURN(-ENOMEM);
350
351         if (cd != NULL)
352                 first_index = cd->first_idx + 1;
353         if (cd != NULL && cd->last_idx)
354                 index = cd->last_idx;
355         else
356                 index = LLOG_BITMAP_BYTES * 8 - 1;
357
358         while (rc == 0) {
359                 struct llog_rec_hdr *rec;
360                 struct llog_rec_tail *tail;
361
362                 /* skip records not set in bitmap */
363                 while (index >= first_index &&
364                        !ext2_test_bit(index, llh->llh_bitmap))
365                         --index;
366
367                 LASSERT(index >= first_index - 1);
368                 if (index == first_index - 1)
369                         break;
370
371                 /* get the buf with our target record; avoid old garbage */
372                 memset(buf, 0, LLOG_CHUNK_SIZE);
373                 rc = llog_prev_block(loghandle, index, buf, LLOG_CHUNK_SIZE);
374                 if (rc)
375                         GOTO(out, rc);
376
377                 rec = buf;
378                 idx = le32_to_cpu(rec->lrh_index);
379                 if (idx < index)
380                         CDEBUG(D_HA, "index %u : idx %u\n", index, idx);
381                 while (idx < index) {
382                         rec = ((void *)rec + le32_to_cpu(rec->lrh_len));
383                         idx ++;
384                 }
385                 tail = (void *)rec + le32_to_cpu(rec->lrh_len) - sizeof(*tail);
386
387                 /* process records in buffer, starting where we found one */
388                 while ((void *)tail > buf) {
389                         rec = (void *)tail - le32_to_cpu(tail->lrt_len) +
390                                 sizeof(*tail);
391
392                         if (rec->lrh_index == 0)
393                                 GOTO(out, 0); /* no more records */
394
395                         /* if set, process the callback on this record */
396                         if (ext2_test_bit(index, llh->llh_bitmap)) {
397                                 rc = cb(loghandle, rec, data);
398                                 if (rc == LLOG_PROC_BREAK) {
399                                         CWARN("recovery from log: "LPX64":%x"
400                                               " stopped\n",
401                                               loghandle->lgh_id.lgl_oid,
402                                               loghandle->lgh_id.lgl_ogen);
403                                         GOTO(out, rc);
404                                 }
405                                 if (rc)
406                                         GOTO(out, rc);
407                         }
408
409                         /* previous record, still in buffer? */
410                         --index;
411                         if (index < first_index)
412                                 GOTO(out, rc = 0);
413                         tail = (void *)rec - sizeof(*tail);
414                 }
415         }
416
417 out:
418         if (buf)
419                 OBD_FREE(buf, LLOG_CHUNK_SIZE);
420         RETURN(rc);
421 }
422 EXPORT_SYMBOL(llog_reverse_process);