Whamcloud - gitweb
current branches now use lnet from HEAD
[fs/lustre-release.git] / lustre / lvfs / llog.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
5  *   Author: Andreas Dilger <adilger@clusterfs.com>
6  *
7  *   This file is part of Lustre, http://www.lustre.org.
8  *
9  *   Lustre is free software; you can redistribute it and/or
10  *   modify it under the terms of version 2 of the GNU General Public
11  *   License as published by the Free Software Foundation.
12  *
13  *   Lustre is distributed in the hope that it will be useful,
14  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *   GNU General Public License for more details.
17  *
18  *   You should have received a copy of the GNU General Public License
19  *   along with Lustre; if not, write to the Free Software
20  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21  *
22  * OST<->MDS recovery logging infrastructure.
23  *
24  * Invariants in implementation:
25  * - we do not share logs among different OST<->MDS connections, so that
26  *   if an OST or MDS fails it need only look at log(s) relevant to itself
27  */
28
29 #define DEBUG_SUBSYSTEM S_LOG
30
31 #ifndef EXPORT_SYMTAB
32 #define EXPORT_SYMTAB
33 #endif
34
35 #ifdef __KERNEL__
36 #include <linux/fs.h>
37 #else
38 #include <liblustre.h>
39 #endif
40
41 #include <linux/lustre_log.h>
42
43 /* Allocate a new log or catalog handle */
44 struct llog_handle *llog_alloc_handle(void)
45 {
46         struct llog_handle *loghandle;
47         ENTRY;
48
49         OBD_ALLOC(loghandle, sizeof(*loghandle));
50         if (loghandle == NULL)
51                 RETURN(ERR_PTR(-ENOMEM));
52
53         init_rwsem(&loghandle->lgh_lock);
54
55         RETURN(loghandle);
56 }
57 EXPORT_SYMBOL(llog_alloc_handle);
58
59 void llog_free_handle(struct llog_handle *loghandle)
60 {
61         if (!loghandle)
62                 return;
63
64         if (!loghandle->lgh_hdr)
65                 goto out;
66         if (le32_to_cpu(loghandle->lgh_hdr->llh_flags) & LLOG_F_IS_PLAIN)
67                 list_del_init(&loghandle->u.phd.phd_entry);
68         if (le32_to_cpu(loghandle->lgh_hdr->llh_flags) & LLOG_F_IS_CAT)
69                 LASSERT(list_empty(&loghandle->u.chd.chd_head));
70         OBD_FREE(loghandle->lgh_hdr, LLOG_CHUNK_SIZE);
71
72  out:
73         OBD_FREE(loghandle, sizeof(*loghandle));
74 }
75 EXPORT_SYMBOL(llog_free_handle);
76
77 /* returns negative on error; 0 if success; 1 if success & log destroyed */
78 int llog_cancel_rec(struct llog_handle *loghandle, int index)
79 {
80         struct llog_log_hdr *llh = loghandle->lgh_hdr;
81         int rc = 0;
82         ENTRY;
83
84         CDEBUG(D_HA, "canceling %d in log "LPX64"\n",
85                index, loghandle->lgh_id.lgl_oid);
86
87         if (index == 0) {
88                 CERROR("cannot cancel index 0 (which is header)\n");
89                 RETURN(-EINVAL);
90         }
91
92         if (!ext2_clear_bit(index, llh->llh_bitmap)) {
93                 CERROR("catalog index %u already clear?\n", index);
94                 RETURN(-EINVAL);
95         }
96
97         llh->llh_count = cpu_to_le32(le32_to_cpu(llh->llh_count) - 1);
98
99         if ((le32_to_cpu(llh->llh_flags) & LLOG_F_ZAP_WHEN_EMPTY) &&
100             (le32_to_cpu(llh->llh_count) == 1) &&
101             (loghandle->lgh_last_idx == (LLOG_BITMAP_BYTES * 8) - 1)) {
102                 rc = llog_destroy(loghandle);
103                 if (rc) {
104                         CERROR("failure destroying log after last cancel: %d\n",
105                                rc);
106                         ext2_set_bit(index, llh->llh_bitmap);
107                         llh->llh_count++;
108                 } else {
109                         rc = 1;
110                 }
111                 RETURN(rc);
112         }
113
114         rc = llog_write_rec(loghandle, &llh->llh_hdr, NULL, 0, NULL, 0);
115         if (rc) {
116                 CERROR("failure re-writing header %d\n", rc);
117                 ext2_set_bit(index, llh->llh_bitmap);
118                 llh->llh_count++;
119         }
120         RETURN(rc);
121 }
122 EXPORT_SYMBOL(llog_cancel_rec);
123
124 int llog_init_handle(struct llog_handle *handle, int flags,
125                      struct obd_uuid *uuid)
126 {
127         int rc;
128         struct llog_log_hdr *llh;
129         ENTRY;
130         LASSERT(handle->lgh_hdr == NULL);
131
132         OBD_ALLOC(llh, sizeof(*llh));
133         if (llh == NULL)
134                 RETURN(-ENOMEM);
135         handle->lgh_hdr = llh;
136         /* first assign flags to use llog_client_ops */
137         llh->llh_flags = cpu_to_le32(flags);
138         rc = llog_read_header(handle);
139         if (rc == 0) {
140                 flags = le32_to_cpu(llh->llh_flags);
141                 if (uuid)
142                         LASSERT(obd_uuid_equals(uuid, &llh->llh_tgtuuid));
143                 GOTO(out, rc);
144         } else if (rc != LLOG_EEMPTY || !flags) {
145                 /* set a pesudo flag for initialization */
146                 flags = LLOG_F_IS_CAT;
147                 GOTO(out, rc);
148         }
149         rc = 0;
150
151         handle->lgh_last_idx = 0; /* header is record with index 0 */
152         llh->llh_count = cpu_to_le32(1);         /* for the header record */
153         llh->llh_hdr.lrh_type = cpu_to_le32(LLOG_HDR_MAGIC);
154         llh->llh_hdr.lrh_len = llh->llh_tail.lrt_len =
155                 cpu_to_le32(LLOG_CHUNK_SIZE);
156         llh->llh_hdr.lrh_index = llh->llh_tail.lrt_index = 0;
157         llh->llh_timestamp = cpu_to_le64(LTIME_S(CURRENT_TIME));
158         if (uuid)
159                 memcpy(&llh->llh_tgtuuid, uuid, sizeof(llh->llh_tgtuuid));
160         llh->llh_bitmap_offset = cpu_to_le32(offsetof(typeof(*llh),llh_bitmap));
161         ext2_set_bit(0, llh->llh_bitmap);
162
163 out:
164         if (flags & LLOG_F_IS_CAT) {
165                 INIT_LIST_HEAD(&handle->u.chd.chd_head);
166                 llh->llh_size = cpu_to_le32(sizeof(struct llog_logid_rec));
167         }
168         else if (flags & LLOG_F_IS_PLAIN)
169                 INIT_LIST_HEAD(&handle->u.phd.phd_entry);
170         else
171                 LBUG();
172
173         if (rc) {
174                 OBD_FREE(llh, sizeof(*llh));
175                 handle->lgh_hdr = NULL;
176         }
177         RETURN(rc);
178 }
179 EXPORT_SYMBOL(llog_init_handle);
180
181 int llog_close(struct llog_handle *loghandle)
182 {
183         struct llog_operations *lop;
184         int rc;
185         ENTRY;
186
187         rc = llog_handle2ops(loghandle, &lop);
188         if (rc)
189                 GOTO(out, rc);
190         if (lop->lop_close == NULL)
191                 GOTO(out, rc = -EOPNOTSUPP);
192         rc = lop->lop_close(loghandle);
193  out:
194         llog_free_handle(loghandle);
195         RETURN(rc);
196 }
197 EXPORT_SYMBOL(llog_close);
198
199 int llog_process(struct llog_handle *loghandle, llog_cb_t cb,
200                  void *data, void *catdata)
201 {
202         struct llog_log_hdr *llh = loghandle->lgh_hdr;
203         struct llog_process_cat_data *cd = catdata;
204         void *buf;
205         __u64 cur_offset = LLOG_CHUNK_SIZE;
206         int rc = 0, index = 1, last_index, idx;
207         int saved_index = 0;
208         ENTRY;
209
210         OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
211         if (!buf)
212                 RETURN(-ENOMEM);
213
214         if (cd != NULL)
215                 index = cd->first_idx + 1;
216         if (cd != NULL && cd->last_idx)
217                 last_index = cd->last_idx;
218         else
219                 last_index = LLOG_BITMAP_BYTES * 8 - 1;
220
221         while (rc == 0) {
222                 struct llog_rec_hdr *rec;
223
224                 /* skip records not set in bitmap */
225                 while (index <= last_index &&
226                        !ext2_test_bit(index, llh->llh_bitmap))
227                         ++index;
228
229                 LASSERT(index <= last_index + 1);
230                 if (index == last_index + 1)
231                         break;
232
233                 /* get the buf with our target record; avoid old garbage */
234                 memset(buf, 0, LLOG_CHUNK_SIZE);
235                 rc = llog_next_block(loghandle, &saved_index, index,
236                                      &cur_offset, buf, LLOG_CHUNK_SIZE);
237                 if (rc)
238                         GOTO(out, rc);
239
240                 rec = buf;
241                 idx = le32_to_cpu(rec->lrh_index);
242                 if (idx < index)
243                         CDEBUG(D_HA, "index %u : idx %u\n", index, idx);
244                 while (idx < index) {
245                         rec = ((void *)rec + le32_to_cpu(rec->lrh_len));
246                         idx ++;
247                 }
248
249                 /* process records in buffer, starting where we found one */
250                 while ((void *)rec < buf + LLOG_CHUNK_SIZE) {
251                         if (rec->lrh_index == 0)
252                                 GOTO(out, rc = 0); /* no more records */
253
254                         /* if set, process the callback on this record */
255                         if (ext2_test_bit(index, llh->llh_bitmap)) {
256                                 rc = cb(loghandle, rec, data);
257                                 if (rc == LLOG_PROC_BREAK) {
258                                         CDEBUG(D_HA, "recovery from log: "LPX64":%x"
259                                               " stopped\n",
260                                               loghandle->lgh_id.lgl_oid,
261                                               loghandle->lgh_id.lgl_ogen);
262                                         GOTO(out, rc);
263                                 } else if (rc == LLOG_DEL_RECORD) {
264                                         llog_cancel_rec(loghandle, rec->lrh_index);
265                                         rc = 0;
266                                 } 
267                                 if (rc)
268                                         GOTO(out, rc);
269                         }
270                         /* next record, still in buffer? */
271                         ++index;
272                         if (index > last_index)
273                                 GOTO(out, rc = 0);
274                         rec = ((void *)rec + le32_to_cpu(rec->lrh_len));
275                 }
276         }
277
278  out:
279         if (buf)
280                 OBD_FREE(buf, LLOG_CHUNK_SIZE);
281         RETURN(rc);
282 }
283 EXPORT_SYMBOL(llog_process);
284
285 int llog_reverse_process(struct llog_handle *loghandle, llog_cb_t cb,
286                          void *data, void *catdata)
287 {
288         struct llog_log_hdr *llh = loghandle->lgh_hdr;
289         struct llog_process_cat_data *cd = catdata;
290         void *buf;
291         int rc = 0, first_index = 1, index, idx;
292         struct llog_rec_tail *tail;
293         ENTRY;
294
295         OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
296         if (!buf)
297                 RETURN(-ENOMEM);
298
299         if (cd != NULL)
300                 first_index = cd->first_idx + 1;
301         if (cd != NULL && cd->last_idx)
302                 index = cd->last_idx;
303         else
304                 index = LLOG_BITMAP_BYTES * 8 - 1;
305
306         while (rc == 0) {
307                 struct llog_rec_hdr *rec;
308
309                 /* skip records not set in bitmap */
310                 while (index >= first_index &&
311                        !ext2_test_bit(index, llh->llh_bitmap))
312                         --index;
313
314                 LASSERT(index >= first_index - 1);
315                 if (index == first_index - 1)
316                         break;
317
318                 /* get the buf with our target record; avoid old garbage */
319                 memset(buf, 0, LLOG_CHUNK_SIZE);
320                 rc = llog_prev_block(loghandle, index, buf, LLOG_CHUNK_SIZE);
321                 if (rc)
322                         GOTO(out, rc);
323
324                 rec = buf;
325                 idx = le32_to_cpu(rec->lrh_index);
326                 if (idx < index)
327                         CDEBUG(D_HA, "index %u : idx %u\n", index, idx);
328                 while (idx < index) {
329                         rec = ((void *)rec + le32_to_cpu(rec->lrh_len));
330                         idx ++;
331                 }
332
333                 /* process records in buffer, starting where we found one */
334                 while ((void *)rec >= buf) {
335                         if (rec->lrh_index == 0)
336                                 GOTO(out, 0); /* no more records */
337
338                         /* if set, process the callback on this record */
339                         if (ext2_test_bit(index, llh->llh_bitmap)) {
340                                 rc = cb(loghandle, rec, data);
341                                 if (rc == LLOG_PROC_BREAK) {
342                                         CWARN("recovery from log: "LPX64":%x"
343                                               " stopped\n",
344                                               loghandle->lgh_id.lgl_oid,
345                                               loghandle->lgh_id.lgl_ogen);
346                                         GOTO(out, rc);
347                                 }
348                                 if (rc)
349                                         GOTO(out, rc);
350                         }
351
352                         /* previous record, still in buffer? */
353                         --index;
354                         if (index < first_index)
355                                 GOTO(out, rc = 0);
356
357                         if ((void *)rec == buf)
358                                 break;
359
360                         tail = (void *)rec - sizeof(struct llog_rec_tail);
361                         rec = ((void *)rec - le32_to_cpu(tail->lrt_len));
362                 }
363         }
364
365  out:
366         if (buf)
367                 OBD_FREE(buf, LLOG_CHUNK_SIZE);
368         RETURN(rc);
369 }
370 EXPORT_SYMBOL(llog_reverse_process);