Whamcloud - gitweb
- lost #define changes from 1_6
[fs/lustre-release.git] / lustre / obdclass / llog.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
5  *   Author: Andreas Dilger <adilger@clusterfs.com>
6  *
7  *   This file is part of the Lustre file system, http://www.lustre.org
8  *   Lustre is a trademark of Cluster File Systems, Inc.
9  *
10  *   You may have signed or agreed to another license before downloading
11  *   this software.  If so, you are bound by the terms and conditions
12  *   of that agreement, and the following does not apply to you.  See the
13  *   LICENSE file included with this distribution for more information.
14  *
15  *   If you did not agree to a different license, then this copy of Lustre
16  *   is open source software; you can redistribute it and/or modify it
17  *   under the terms of version 2 of the GNU General Public License as
18  *   published by the Free Software Foundation.
19  *
20  *   In either case, Lustre is distributed in the hope that it will be
21  *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
22  *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23  *   license text for more details.
24  *
25  * OST<->MDS recovery logging infrastructure.
26  *
27  * Invariants in implementation:
28  * - we do not share logs among different OST<->MDS connections, so that
29  *   if an OST or MDS fails it need only look at log(s) relevant to itself
30  */
31
32 #define DEBUG_SUBSYSTEM S_LOG
33
34 #ifndef EXPORT_SYMTAB
35 #define EXPORT_SYMTAB
36 #endif
37
38 #ifndef __KERNEL__
39 #include <liblustre.h>
40 #endif
41
42 #include <obd_class.h>
43 #include <lustre_log.h>
44 #include <libcfs/list.h>
45
46 /* Allocate a new log or catalog handle */
47 struct llog_handle *llog_alloc_handle(void)
48 {
49         struct llog_handle *loghandle;
50         ENTRY;
51
52         OBD_ALLOC(loghandle, sizeof(*loghandle));
53         if (loghandle == NULL)
54                 RETURN(ERR_PTR(-ENOMEM));
55
56         init_rwsem(&loghandle->lgh_lock);
57
58         RETURN(loghandle);
59 }
60 EXPORT_SYMBOL(llog_alloc_handle);
61
62
63 void llog_free_handle(struct llog_handle *loghandle)
64 {
65         if (!loghandle)
66                 return;
67
68         if (!loghandle->lgh_hdr)
69                 goto out;
70         if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN)
71                 list_del_init(&loghandle->u.phd.phd_entry);
72         if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_CAT)
73                 LASSERT(list_empty(&loghandle->u.chd.chd_head));
74         OBD_FREE(loghandle->lgh_hdr, LLOG_CHUNK_SIZE);
75
76  out:
77         OBD_FREE(loghandle, sizeof(*loghandle));
78 }
79 EXPORT_SYMBOL(llog_free_handle);
80
81 /* returns negative on error; 0 if success; 1 if success & log destroyed */
82 int llog_cancel_rec(struct llog_handle *loghandle, int index)
83 {
84         struct llog_log_hdr *llh = loghandle->lgh_hdr;
85         int rc = 0;
86         ENTRY;
87
88         CDEBUG(D_RPCTRACE, "canceling %d in log "LPX64"\n",
89                index, loghandle->lgh_id.lgl_oid);
90
91         if (index == 0) {
92                 CERROR("cannot cancel index 0 (which is header)\n");
93                 RETURN(-EINVAL);
94         }
95
96         if (!ext2_clear_bit(index, llh->llh_bitmap)) {
97                 CDEBUG(D_RPCTRACE, "catalog index %u already clear?\n", index);
98                 RETURN(-EINVAL);
99         }
100
101         llh->llh_count--;
102
103         if ((llh->llh_flags & LLOG_F_ZAP_WHEN_EMPTY) &&
104             (llh->llh_count == 1) &&
105             (loghandle->lgh_last_idx == (LLOG_BITMAP_BYTES * 8) - 1)) {
106                 rc = llog_destroy(loghandle);
107                 if (rc) {
108                         CERROR("failure destroying log after last cancel: %d\n",
109                                rc);
110                         ext2_set_bit(index, llh->llh_bitmap);
111                         llh->llh_count++;
112                 } else {
113                         rc = 1;
114                 }
115                 RETURN(rc);
116         }
117
118         rc = llog_write_rec(loghandle, &llh->llh_hdr, NULL, 0, NULL, 0);
119         if (rc) {
120                 CERROR("failure re-writing header %d\n", rc);
121                 ext2_set_bit(index, llh->llh_bitmap);
122                 llh->llh_count++;
123         }
124         RETURN(rc);
125 }
126 EXPORT_SYMBOL(llog_cancel_rec);
127
128 int llog_init_handle(struct llog_handle *handle, int flags,
129                      struct obd_uuid *uuid)
130 {
131         int rc;
132         struct llog_log_hdr *llh;
133         ENTRY;
134         LASSERT(handle->lgh_hdr == NULL);
135
136         OBD_ALLOC(llh, sizeof(*llh));
137         if (llh == NULL)
138                 RETURN(-ENOMEM);
139         handle->lgh_hdr = llh;
140         /* first assign flags to use llog_client_ops */
141         llh->llh_flags = flags;
142         rc = llog_read_header(handle);
143         if (rc == 0) {
144                 flags = llh->llh_flags;
145                 if (uuid && !obd_uuid_equals(uuid, &llh->llh_tgtuuid)) {
146                         CERROR("uuid mismatch: %s/%s\n", (char *)uuid->uuid,
147                                (char *)llh->llh_tgtuuid.uuid);
148                         rc = -EEXIST;
149                 }
150                 GOTO(out, rc);
151         } else if (rc != LLOG_EEMPTY || !flags) {
152                 /* set a pesudo flag for initialization */
153                 flags = LLOG_F_IS_CAT;
154                 GOTO(out, rc);
155         }
156         rc = 0;
157
158         handle->lgh_last_idx = 0; /* header is record with index 0 */
159         llh->llh_count = 1;         /* for the header record */
160         llh->llh_hdr.lrh_type = LLOG_HDR_MAGIC;
161         llh->llh_hdr.lrh_len = llh->llh_tail.lrt_len = LLOG_CHUNK_SIZE;
162         llh->llh_hdr.lrh_index = llh->llh_tail.lrt_index = 0;
163         llh->llh_timestamp = CURRENT_SECONDS;
164         if (uuid)
165                 memcpy(&llh->llh_tgtuuid, uuid, sizeof(llh->llh_tgtuuid));
166         llh->llh_bitmap_offset = offsetof(typeof(*llh),llh_bitmap);
167         ext2_set_bit(0, llh->llh_bitmap);
168
169 out:
170         if (flags & LLOG_F_IS_CAT) {
171                 CFS_INIT_LIST_HEAD(&handle->u.chd.chd_head);
172                 llh->llh_size = sizeof(struct llog_logid_rec);
173         } else if (flags & LLOG_F_IS_PLAIN) {
174                 CFS_INIT_LIST_HEAD(&handle->u.phd.phd_entry);
175         } else {
176                 CERROR("Unknown flags: %#x (Expected %#x or %#x\n",
177                        flags, LLOG_F_IS_CAT, LLOG_F_IS_PLAIN);
178                 LBUG();
179         }
180
181         if (rc) {
182                 OBD_FREE(llh, sizeof(*llh));
183                 handle->lgh_hdr = NULL;
184         }
185         RETURN(rc);
186 }
187 EXPORT_SYMBOL(llog_init_handle);
188
189 int llog_close(struct llog_handle *loghandle)
190 {
191         struct llog_operations *lop;
192         int rc;
193         ENTRY;
194
195         rc = llog_handle2ops(loghandle, &lop);
196         if (rc)
197                 GOTO(out, rc);
198         if (lop->lop_close == NULL)
199                 GOTO(out, -EOPNOTSUPP);
200         rc = lop->lop_close(loghandle);
201  out:
202         llog_free_handle(loghandle);
203         RETURN(rc);
204 }
205 EXPORT_SYMBOL(llog_close);
206
207 int llog_process(struct llog_handle *loghandle, llog_cb_t cb,
208                  void *data, void *catdata)
209 {
210         struct llog_log_hdr *llh = loghandle->lgh_hdr;
211         struct llog_process_cat_data *cd = catdata;
212         char *buf;
213         __u64 cur_offset = LLOG_CHUNK_SIZE, last_offset;
214         int rc = 0, index = 1, last_index;
215         int saved_index = 0, last_called_index = 0;
216         ENTRY;
217
218         LASSERT(llh);
219
220         OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
221         if (!buf)
222                 RETURN(-ENOMEM);
223
224         if (cd != NULL) {
225                 last_called_index = cd->first_idx;
226                 index = cd->first_idx + 1;
227         }
228         if (cd != NULL && cd->last_idx)
229                 last_index = cd->last_idx;
230         else
231                 last_index = LLOG_BITMAP_BYTES * 8 - 1;
232
233         while (rc == 0) {
234                 struct llog_rec_hdr *rec;
235
236                 /* skip records not set in bitmap */
237                 while (index <= last_index &&
238                        !ext2_test_bit(index, llh->llh_bitmap))
239                         ++index;
240
241                 LASSERT(index <= last_index + 1);
242                 if (index == last_index + 1)
243                         break;
244
245                 CDEBUG(D_OTHER, "index: %d last_index %d\n",
246                        index, last_index);
247
248                 /* get the buf with our target record; avoid old garbage */
249                 memset(buf, 0, LLOG_CHUNK_SIZE);
250                 last_offset = cur_offset;
251                 rc = llog_next_block(loghandle, &saved_index, index,
252                                      &cur_offset, buf, LLOG_CHUNK_SIZE);
253                 if (rc)
254                         GOTO(out, rc);
255
256                 /* NB: when rec->lrh_len is accessed it is already swabbed
257                  * since it is used at the "end" of the loop and the rec
258                  * swabbing is done at the beginning of the loop. */
259                 for (rec = (struct llog_rec_hdr *)buf;
260                      (char *)rec < buf + LLOG_CHUNK_SIZE;
261                      rec = (struct llog_rec_hdr *)((char *)rec + rec->lrh_len)){
262
263                         CDEBUG(D_OTHER, "processing rec 0x%p type %#x\n",
264                                rec, rec->lrh_type);
265
266                         if (LLOG_REC_HDR_NEEDS_SWABBING(rec))
267                                 lustre_swab_llog_rec(rec, NULL);
268
269                         CDEBUG(D_OTHER, "after swabbing, type=%#x idx=%d\n",
270                                rec->lrh_type, rec->lrh_index);
271  
272                         if (rec->lrh_index == 0)
273                                 GOTO(out, 0); /* no more records */
274
275                         if (rec->lrh_len == 0 || rec->lrh_len >LLOG_CHUNK_SIZE){
276                                 CWARN("invalid length %d in llog record for "
277                                       "index %d/%d\n", rec->lrh_len,
278                                       rec->lrh_index, index);
279                                 GOTO(out, rc = -EINVAL);
280                         }
281
282                         if (rec->lrh_index < index) {
283                                 CDEBUG(D_OTHER, "skipping lrh_index %d\n",
284                                        rec->lrh_index);
285                                 continue;
286                         }
287
288                         CDEBUG(D_OTHER,
289                                "lrh_index: %d lrh_len: %d (%d remains)\n",
290                                rec->lrh_index, rec->lrh_len,
291                                (int)(buf + LLOG_CHUNK_SIZE - (char *)rec));
292
293                         loghandle->lgh_cur_idx = rec->lrh_index;
294                         loghandle->lgh_cur_offset = (char *)rec - (char *)buf +
295                                 last_offset;
296
297                         /* if set, process the callback on this record */
298                         if (ext2_test_bit(index, llh->llh_bitmap)) {
299                                 rc = cb(loghandle, rec, data);
300                                 last_called_index = index;
301                                 if (rc == LLOG_PROC_BREAK) {
302                                         CDEBUG(D_HA, "recovery from log: "LPX64
303                                                ":%x stopped\n",
304                                                loghandle->lgh_id.lgl_oid,
305                                                loghandle->lgh_id.lgl_ogen);
306                                         GOTO(out, rc);
307                                 } else if (rc == LLOG_DEL_RECORD) {
308                                         llog_cancel_rec(loghandle, rec->lrh_index);
309                                         rc = 0;
310                                 }
311                                 if (rc)
312                                         GOTO(out, rc);
313                         } else {
314                                 CDEBUG(D_OTHER, "Skipped index %d\n", index);
315                         }
316
317                         /* next record, still in buffer? */
318                         ++index;
319                         if (index > last_index)
320                                 GOTO(out, rc = 0);
321                 }
322         }
323
324  out:
325         if (cd != NULL)
326                 cd->last_idx = last_called_index;
327         if (buf)
328                 OBD_FREE(buf, LLOG_CHUNK_SIZE);
329         RETURN(rc);
330 }
331 EXPORT_SYMBOL(llog_process);
332
333 inline int llog_get_size(struct llog_handle *loghandle)
334 {
335         if (loghandle && loghandle->lgh_hdr)
336                 return loghandle->lgh_hdr->llh_count;
337         return 0;
338 }
339 EXPORT_SYMBOL(llog_get_size);
340
341 int llog_reverse_process(struct llog_handle *loghandle, llog_cb_t cb,
342                          void *data, void *catdata)
343 {
344         struct llog_log_hdr *llh = loghandle->lgh_hdr;
345         struct llog_process_cat_data *cd = catdata;
346         void *buf;
347         int rc = 0, first_index = 1, index, idx;
348         ENTRY;
349
350         OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
351         if (!buf)
352                 RETURN(-ENOMEM);
353
354         if (cd != NULL)
355                 first_index = cd->first_idx + 1;
356         if (cd != NULL && cd->last_idx)
357                 index = cd->last_idx;
358         else
359                 index = LLOG_BITMAP_BYTES * 8 - 1;
360
361         while (rc == 0) {
362                 struct llog_rec_hdr *rec;
363                 struct llog_rec_tail *tail;
364
365                 /* skip records not set in bitmap */
366                 while (index >= first_index &&
367                        !ext2_test_bit(index, llh->llh_bitmap))
368                         --index;
369
370                 LASSERT(index >= first_index - 1);
371                 if (index == first_index - 1)
372                         break;
373
374                 /* get the buf with our target record; avoid old garbage */
375                 memset(buf, 0, LLOG_CHUNK_SIZE);
376                 rc = llog_prev_block(loghandle, index, buf, LLOG_CHUNK_SIZE);
377                 if (rc)
378                         GOTO(out, rc);
379
380                 rec = buf;
381                 idx = le32_to_cpu(rec->lrh_index);
382                 if (idx < index)
383                         CDEBUG(D_RPCTRACE, "index %u : idx %u\n", index, idx);
384                 while (idx < index) {
385                         rec = ((void *)rec + le32_to_cpu(rec->lrh_len));
386                         idx ++;
387                 }
388                 tail = (void *)rec + le32_to_cpu(rec->lrh_len) - sizeof(*tail);
389
390                 /* process records in buffer, starting where we found one */
391                 while ((void *)tail > buf) {
392                         rec = (void *)tail - le32_to_cpu(tail->lrt_len) +
393                                 sizeof(*tail);
394
395                         if (rec->lrh_index == 0)
396                                 GOTO(out, 0); /* no more records */
397
398                         /* if set, process the callback on this record */
399                         if (ext2_test_bit(index, llh->llh_bitmap)) {
400                                 rc = cb(loghandle, rec, data);
401                                 if (rc == LLOG_PROC_BREAK) {
402                                         CWARN("recovery from log: "LPX64":%x"
403                                               " stopped\n",
404                                               loghandle->lgh_id.lgl_oid,
405                                               loghandle->lgh_id.lgl_ogen);
406                                         GOTO(out, rc);
407                                 }
408                                 if (rc)
409                                         GOTO(out, rc);
410                         }
411
412                         /* previous record, still in buffer? */
413                         --index;
414                         if (index < first_index)
415                                 GOTO(out, rc = 0);
416                         tail = (void *)rec - sizeof(*tail);
417                 }
418         }
419
420 out:
421         if (buf)
422                 OBD_FREE(buf, LLOG_CHUNK_SIZE);
423         RETURN(rc);
424 }
425 EXPORT_SYMBOL(llog_reverse_process);