Whamcloud - gitweb
Obsolete CURRENT_SECONDS and use cfs_time_current_sec() instead.
[fs/lustre-release.git] / lustre / obdclass / llog.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
5  *   Author: Andreas Dilger <adilger@clusterfs.com>
6  *
7  *   This file is part of the Lustre file system, http://www.lustre.org
8  *   Lustre is a trademark of Cluster File Systems, Inc.
9  *
10  *   You may have signed or agreed to another license before downloading
11  *   this software.  If so, you are bound by the terms and conditions
12  *   of that agreement, and the following does not apply to you.  See the
13  *   LICENSE file included with this distribution for more information.
14  *
15  *   If you did not agree to a different license, then this copy of Lustre
16  *   is open source software; you can redistribute it and/or modify it
17  *   under the terms of version 2 of the GNU General Public License as
18  *   published by the Free Software Foundation.
19  *
20  *   In either case, Lustre is distributed in the hope that it will be
21  *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
22  *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23  *   license text for more details.
24  *
25  * OST<->MDS recovery logging infrastructure.
26  *
27  * Invariants in implementation:
28  * - we do not share logs among different OST<->MDS connections, so that
29  *   if an OST or MDS fails it need only look at log(s) relevant to itself
30  */
31
32 #define DEBUG_SUBSYSTEM S_LOG
33
34 #ifndef EXPORT_SYMTAB
35 #define EXPORT_SYMTAB
36 #endif
37
38 #ifndef __KERNEL__
39 #include <liblustre.h>
40 #endif
41
42 #include <obd_class.h>
43 #include <lustre_log.h>
44 #include <libcfs/list.h>
45 #include "llog_internal.h"
46
47 /* Allocate a new log or catalog handle */
48 struct llog_handle *llog_alloc_handle(void)
49 {
50         struct llog_handle *loghandle;
51         ENTRY;
52
53         OBD_ALLOC(loghandle, sizeof(*loghandle));
54         if (loghandle == NULL)
55                 RETURN(ERR_PTR(-ENOMEM));
56
57         init_rwsem(&loghandle->lgh_lock);
58
59         RETURN(loghandle);
60 }
61 EXPORT_SYMBOL(llog_alloc_handle);
62
63
64 void llog_free_handle(struct llog_handle *loghandle)
65 {
66         if (!loghandle)
67                 return;
68
69         if (!loghandle->lgh_hdr)
70                 goto out;
71         if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN)
72                 list_del_init(&loghandle->u.phd.phd_entry);
73         if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_CAT)
74                 LASSERT(list_empty(&loghandle->u.chd.chd_head));
75         OBD_FREE(loghandle->lgh_hdr, LLOG_CHUNK_SIZE);
76
77  out:
78         OBD_FREE(loghandle, sizeof(*loghandle));
79 }
80 EXPORT_SYMBOL(llog_free_handle);
81
82 /* returns negative on error; 0 if success; 1 if success & log destroyed */
83 int llog_cancel_rec(struct llog_handle *loghandle, int index)
84 {
85         struct llog_log_hdr *llh = loghandle->lgh_hdr;
86         int rc = 0;
87         ENTRY;
88
89         CDEBUG(D_RPCTRACE, "canceling %d in log "LPX64"\n",
90                index, loghandle->lgh_id.lgl_oid);
91
92         if (index == 0) {
93                 CERROR("cannot cancel index 0 (which is header)\n");
94                 RETURN(-EINVAL);
95         }
96
97         if (!ext2_clear_bit(index, llh->llh_bitmap)) {
98                 CDEBUG(D_RPCTRACE, "catalog index %u already clear?\n", index);
99                 RETURN(-EINVAL);
100         }
101
102         llh->llh_count--;
103
104         if ((llh->llh_flags & LLOG_F_ZAP_WHEN_EMPTY) &&
105             (llh->llh_count == 1) &&
106             (loghandle->lgh_last_idx == (LLOG_BITMAP_BYTES * 8) - 1)) {
107                 rc = llog_destroy(loghandle);
108                 if (rc) {
109                         CERROR("failure destroying log after last cancel: %d\n",
110                                rc);
111                         ext2_set_bit(index, llh->llh_bitmap);
112                         llh->llh_count++;
113                 } else {
114                         rc = 1;
115                 }
116                 RETURN(rc);
117         }
118
119         rc = llog_write_rec(loghandle, &llh->llh_hdr, NULL, 0, NULL, 0);
120         if (rc) {
121                 CERROR("failure re-writing header %d\n", rc);
122                 ext2_set_bit(index, llh->llh_bitmap);
123                 llh->llh_count++;
124         }
125         RETURN(rc);
126 }
127 EXPORT_SYMBOL(llog_cancel_rec);
128
129 int llog_init_handle(struct llog_handle *handle, int flags,
130                      struct obd_uuid *uuid)
131 {
132         int rc;
133         struct llog_log_hdr *llh;
134         ENTRY;
135         LASSERT(handle->lgh_hdr == NULL);
136
137         OBD_ALLOC(llh, sizeof(*llh));
138         if (llh == NULL)
139                 RETURN(-ENOMEM);
140         handle->lgh_hdr = llh;
141         /* first assign flags to use llog_client_ops */
142         llh->llh_flags = flags;
143         rc = llog_read_header(handle);
144         if (rc == 0) {
145                 flags = llh->llh_flags;
146                 if (uuid && !obd_uuid_equals(uuid, &llh->llh_tgtuuid)) {
147                         CERROR("uuid mismatch: %s/%s\n", (char *)uuid->uuid,
148                                (char *)llh->llh_tgtuuid.uuid);
149                         rc = -EEXIST;
150                 }
151                 GOTO(out, rc);
152         } else if (rc != LLOG_EEMPTY || !flags) {
153                 /* set a pesudo flag for initialization */
154                 flags = LLOG_F_IS_CAT;
155                 GOTO(out, rc);
156         }
157         rc = 0;
158
159         handle->lgh_last_idx = 0; /* header is record with index 0 */
160         llh->llh_count = 1;         /* for the header record */
161         llh->llh_hdr.lrh_type = LLOG_HDR_MAGIC;
162         llh->llh_hdr.lrh_len = llh->llh_tail.lrt_len = LLOG_CHUNK_SIZE;
163         llh->llh_hdr.lrh_index = llh->llh_tail.lrt_index = 0;
164         llh->llh_timestamp = cfs_time_current_sec();
165         if (uuid)
166                 memcpy(&llh->llh_tgtuuid, uuid, sizeof(llh->llh_tgtuuid));
167         llh->llh_bitmap_offset = offsetof(typeof(*llh),llh_bitmap);
168         ext2_set_bit(0, llh->llh_bitmap);
169
170 out:
171         if (flags & LLOG_F_IS_CAT) {
172                 CFS_INIT_LIST_HEAD(&handle->u.chd.chd_head);
173                 llh->llh_size = sizeof(struct llog_logid_rec);
174         } else if (flags & LLOG_F_IS_PLAIN) {
175                 CFS_INIT_LIST_HEAD(&handle->u.phd.phd_entry);
176         } else {
177                 CERROR("Unknown flags: %#x (Expected %#x or %#x\n",
178                        flags, LLOG_F_IS_CAT, LLOG_F_IS_PLAIN);
179                 LBUG();
180         }
181
182         if (rc) {
183                 OBD_FREE(llh, sizeof(*llh));
184                 handle->lgh_hdr = NULL;
185         }
186         RETURN(rc);
187 }
188 EXPORT_SYMBOL(llog_init_handle);
189
190 int llog_close(struct llog_handle *loghandle)
191 {
192         struct llog_operations *lop;
193         int rc;
194         ENTRY;
195
196         rc = llog_handle2ops(loghandle, &lop);
197         if (rc)
198                 GOTO(out, rc);
199         if (lop->lop_close == NULL)
200                 GOTO(out, -EOPNOTSUPP);
201         rc = lop->lop_close(loghandle);
202  out:
203         llog_free_handle(loghandle);
204         RETURN(rc);
205 }
206 EXPORT_SYMBOL(llog_close);
207
208 static int llog_process_thread(void *arg)
209 {
210         struct llog_process_info     *lpi = (struct llog_process_info *)arg;
211         struct llog_handle           *loghandle = lpi->lpi_loghandle;
212         struct llog_log_hdr          *llh = loghandle->lgh_hdr;
213         struct llog_process_cat_data *cd  = lpi->lpi_catdata;
214         char                         *buf;
215         __u64                         cur_offset = LLOG_CHUNK_SIZE;
216         __u64                         last_offset;
217         int                           rc = 0, index = 1, last_index;
218         int                           saved_index = 0, last_called_index = 0;
219
220         LASSERT(llh);
221
222         OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
223         if (!buf) {
224                 lpi->lpi_rc = -ENOMEM;
225 #ifdef __KERNEL__
226                 complete(&lpi->lpi_completion);
227 #endif
228                 return 0;
229         }
230
231         cfs_daemonize("llog_process");
232
233         if (cd != NULL) {
234                 last_called_index = cd->first_idx;
235                 index = cd->first_idx + 1;
236         }
237         if (cd != NULL && cd->last_idx)
238                 last_index = cd->last_idx;
239         else
240                 last_index = LLOG_BITMAP_BYTES * 8 - 1;
241
242         while (rc == 0) {
243                 struct llog_rec_hdr *rec;
244
245                 /* skip records not set in bitmap */
246                 while (index <= last_index &&
247                        !ext2_test_bit(index, llh->llh_bitmap))
248                         ++index;
249
250                 LASSERT(index <= last_index + 1);
251                 if (index == last_index + 1)
252                         break;
253
254                 CDEBUG(D_OTHER, "index: %d last_index %d\n",
255                        index, last_index);
256
257                 /* get the buf with our target record; avoid old garbage */
258                 memset(buf, 0, LLOG_CHUNK_SIZE);
259                 last_offset = cur_offset;
260                 rc = llog_next_block(loghandle, &saved_index, index,
261                                      &cur_offset, buf, LLOG_CHUNK_SIZE);
262                 if (rc)
263                         GOTO(out, rc);
264
265                 /* NB: when rec->lrh_len is accessed it is already swabbed
266                  * since it is used at the "end" of the loop and the rec
267                  * swabbing is done at the beginning of the loop. */
268                 for (rec = (struct llog_rec_hdr *)buf;
269                      (char *)rec < buf + LLOG_CHUNK_SIZE;
270                      rec = (struct llog_rec_hdr *)((char *)rec + rec->lrh_len)){
271
272                         CDEBUG(D_OTHER, "processing rec 0x%p type %#x\n",
273                                rec, rec->lrh_type);
274
275                         if (LLOG_REC_HDR_NEEDS_SWABBING(rec))
276                                 lustre_swab_llog_rec(rec, NULL);
277
278                         CDEBUG(D_OTHER, "after swabbing, type=%#x idx=%d\n",
279                                rec->lrh_type, rec->lrh_index);
280
281                         if (rec->lrh_index == 0)
282                                 GOTO(out, 0); /* no more records */
283
284                         if (rec->lrh_len == 0 || rec->lrh_len >LLOG_CHUNK_SIZE){
285                                 CWARN("invalid length %d in llog record for "
286                                       "index %d/%d\n", rec->lrh_len,
287                                       rec->lrh_index, index);
288                                 GOTO(out, rc = -EINVAL);
289                         }
290
291                         if (rec->lrh_index < index) {
292                                 CDEBUG(D_OTHER, "skipping lrh_index %d\n",
293                                        rec->lrh_index);
294                                 continue;
295                         }
296
297                         CDEBUG(D_OTHER,
298                                "lrh_index: %d lrh_len: %d (%d remains)\n",
299                                rec->lrh_index, rec->lrh_len,
300                                (int)(buf + LLOG_CHUNK_SIZE - (char *)rec));
301
302                         loghandle->lgh_cur_idx = rec->lrh_index;
303                         loghandle->lgh_cur_offset = (char *)rec - (char *)buf +
304                                                     last_offset;
305
306                         /* if set, process the callback on this record */
307                         if (ext2_test_bit(index, llh->llh_bitmap)) {
308                                 rc = lpi->lpi_cb(loghandle, rec, lpi->lpi_cbdata);
309                                 last_called_index = index;
310                                 if (rc == LLOG_PROC_BREAK) {
311                                         CDEBUG(D_HA, "recovery from log: "LPX64
312                                                ":%x stopped\n",
313                                                loghandle->lgh_id.lgl_oid,
314                                                loghandle->lgh_id.lgl_ogen);
315                                         GOTO(out, rc);
316                                 } else if (rc == LLOG_DEL_RECORD) {
317                                         llog_cancel_rec(loghandle,
318                                                         rec->lrh_index);
319                                         rc = 0;
320                                 }
321                                 if (rc)
322                                         GOTO(out, rc);
323                         } else {
324                                 CDEBUG(D_OTHER, "Skipped index %d\n", index);
325                         }
326
327                         /* next record, still in buffer? */
328                         ++index;
329                         if (index > last_index)
330                                 GOTO(out, rc = 0);
331                 }
332         }
333
334  out:
335         if (cd != NULL)
336                 cd->last_idx = last_called_index;
337         if (buf)
338                 OBD_FREE(buf, LLOG_CHUNK_SIZE);
339         lpi->lpi_rc = rc;
340 #ifdef __KERNEL__
341         complete(&lpi->lpi_completion);
342 #endif
343         return 0;
344 }
345
346 int llog_process(struct llog_handle *loghandle, llog_cb_t cb,
347                  void *data, void *catdata)
348 {
349         struct llog_process_info *lpi;
350         int                      rc;
351         ENTRY;
352
353         OBD_ALLOC_PTR(lpi);
354         if (lpi == NULL) {
355                 CERROR("cannot alloc pointer\n");
356                 RETURN(-ENOMEM);
357         }
358         lpi->lpi_loghandle = loghandle;
359         lpi->lpi_cb        = cb;
360         lpi->lpi_cbdata    = data;
361         lpi->lpi_catdata   = catdata;
362
363 #ifdef __KERNEL__
364         init_completion(&lpi->lpi_completion);
365         rc = cfs_kernel_thread(llog_process_thread, lpi, CLONE_VM | CLONE_FILES);
366         if (rc < 0) {
367                 CERROR("cannot start thread: %d\n", rc);
368                 OBD_FREE_PTR(lpi);
369                 RETURN(rc);
370         }
371         wait_for_completion(&lpi->lpi_completion);
372 #else
373         llog_process_thread(lpi);
374 #endif
375         rc = lpi->lpi_rc;
376         OBD_FREE_PTR(lpi);
377         RETURN(rc);
378 }
379 EXPORT_SYMBOL(llog_process);
380
381 inline int llog_get_size(struct llog_handle *loghandle)
382 {
383         if (loghandle && loghandle->lgh_hdr)
384                 return loghandle->lgh_hdr->llh_count;
385         return 0;
386 }
387 EXPORT_SYMBOL(llog_get_size);
388
389 int llog_reverse_process(struct llog_handle *loghandle, llog_cb_t cb,
390                          void *data, void *catdata)
391 {
392         struct llog_log_hdr *llh = loghandle->lgh_hdr;
393         struct llog_process_cat_data *cd = catdata;
394         void *buf;
395         int rc = 0, first_index = 1, index, idx;
396         ENTRY;
397
398         OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
399         if (!buf)
400                 RETURN(-ENOMEM);
401
402         if (cd != NULL)
403                 first_index = cd->first_idx + 1;
404         if (cd != NULL && cd->last_idx)
405                 index = cd->last_idx;
406         else
407                 index = LLOG_BITMAP_BYTES * 8 - 1;
408
409         while (rc == 0) {
410                 struct llog_rec_hdr *rec;
411                 struct llog_rec_tail *tail;
412
413                 /* skip records not set in bitmap */
414                 while (index >= first_index &&
415                        !ext2_test_bit(index, llh->llh_bitmap))
416                         --index;
417
418                 LASSERT(index >= first_index - 1);
419                 if (index == first_index - 1)
420                         break;
421
422                 /* get the buf with our target record; avoid old garbage */
423                 memset(buf, 0, LLOG_CHUNK_SIZE);
424                 rc = llog_prev_block(loghandle, index, buf, LLOG_CHUNK_SIZE);
425                 if (rc)
426                         GOTO(out, rc);
427
428                 rec = buf;
429                 idx = le32_to_cpu(rec->lrh_index);
430                 if (idx < index)
431                         CDEBUG(D_RPCTRACE, "index %u : idx %u\n", index, idx);
432                 while (idx < index) {
433                         rec = ((void *)rec + le32_to_cpu(rec->lrh_len));
434                         idx ++;
435                 }
436                 tail = (void *)rec + le32_to_cpu(rec->lrh_len) - sizeof(*tail);
437
438                 /* process records in buffer, starting where we found one */
439                 while ((void *)tail > buf) {
440                         rec = (void *)tail - le32_to_cpu(tail->lrt_len) +
441                                 sizeof(*tail);
442
443                         if (rec->lrh_index == 0)
444                                 GOTO(out, 0); /* no more records */
445
446                         /* if set, process the callback on this record */
447                         if (ext2_test_bit(index, llh->llh_bitmap)) {
448                                 rc = cb(loghandle, rec, data);
449                                 if (rc == LLOG_PROC_BREAK) {
450                                         CWARN("recovery from log: "LPX64":%x"
451                                               " stopped\n",
452                                               loghandle->lgh_id.lgl_oid,
453                                               loghandle->lgh_id.lgl_ogen);
454                                         GOTO(out, rc);
455                                 }
456                                 if (rc)
457                                         GOTO(out, rc);
458                         }
459
460                         /* previous record, still in buffer? */
461                         --index;
462                         if (index < first_index)
463                                 GOTO(out, rc = 0);
464                         tail = (void *)rec - sizeof(*tail);
465                 }
466         }
467
468 out:
469         if (buf)
470                 OBD_FREE(buf, LLOG_CHUNK_SIZE);
471         RETURN(rc);
472 }
473 EXPORT_SYMBOL(llog_reverse_process);