Whamcloud - gitweb
1e0c1fd64cc6c0d0aae84f985a3b4b0f4d421d7e
[fs/lustre-release.git] / lustre / obdclass / llog_cat.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2012, 2015, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/obdclass/llog_cat.c
37  *
38  * OST<->MDS recovery logging infrastructure.
39  *
40  * Invariants in implementation:
41  * - we do not share logs among different OST<->MDS connections, so that
42  *   if an OST or MDS fails it need only look at log(s) relevant to itself
43  *
44  * Author: Andreas Dilger <adilger@clusterfs.com>
45  * Author: Alexey Zhuravlev <alexey.zhuravlev@intel.com>
46  * Author: Mikhail Pershin <mike.pershin@intel.com>
47  */
48
49 #define DEBUG_SUBSYSTEM S_LOG
50
51
52 #include <obd_class.h>
53
54 #include "llog_internal.h"
55
56 /* Create a new log handle and add it to the open list.
57  * This log handle will be closed when all of the records in it are removed.
58  *
59  * Assumes caller has already pushed us into the kernel context and is locking.
60  */
61 static int llog_cat_new_log(const struct lu_env *env,
62                             struct llog_handle *cathandle,
63                             struct llog_handle *loghandle,
64                             struct thandle *th)
65 {
66         struct llog_thread_info *lgi = llog_info(env);
67         struct llog_logid_rec   *rec = &lgi->lgi_logid;
68         struct thandle *handle = NULL;
69         struct dt_device *dt = NULL;
70         struct llog_log_hdr     *llh = cathandle->lgh_hdr;
71         int                      rc, index;
72
73         ENTRY;
74
75         index = (cathandle->lgh_last_idx + 1) %
76                 (OBD_FAIL_PRECHECK(OBD_FAIL_CAT_RECORDS) ? (cfs_fail_val + 1) :
77                                                 LLOG_HDR_BITMAP_SIZE(llh));
78
79         /* check that new llog index will not overlap with the first one.
80          * - llh_cat_idx is the index just before the first/oldest still in-use
81          *      index in catalog
82          * - lgh_last_idx is the last/newest used index in catalog
83          *
84          * When catalog is not wrapped yet then lgh_last_idx is always larger
85          * than llh_cat_idx. After the wrap around lgh_last_idx re-starts
86          * from 0 and llh_cat_idx becomes the upper limit for it
87          *
88          * Check if catalog has already wrapped around or not by comparing
89          * last_idx and cat_idx */
90         if ((index == llh->llh_cat_idx + 1 && llh->llh_count > 1) ||
91             (index == 0 && llh->llh_cat_idx == 0)) {
92                 CWARN("%s: there are no more free slots in catalog\n",
93                       loghandle->lgh_ctxt->loc_obd->obd_name);
94                 RETURN(-ENOSPC);
95         }
96
97         if (OBD_FAIL_CHECK(OBD_FAIL_MDS_LLOG_CREATE_FAILED))
98                 RETURN(-ENOSPC);
99
100         if (loghandle->lgh_hdr != NULL) {
101                 /* If llog object is remote and creation is failed, lgh_hdr
102                  * might be left over here, free it first */
103                 LASSERT(!llog_exist(loghandle));
104                 OBD_FREE_LARGE(loghandle->lgh_hdr, loghandle->lgh_hdr_size);
105                 loghandle->lgh_hdr = NULL;
106         }
107
108         if (th == NULL) {
109                 dt = lu2dt_dev(cathandle->lgh_obj->do_lu.lo_dev);
110
111                 handle = dt_trans_create(env, dt);
112                 if (IS_ERR(handle))
113                         RETURN(PTR_ERR(handle));
114
115                 /* Create update llog object synchronously, which
116                  * happens during inialization process see
117                  * lod_sub_prep_llog(), to make sure the update
118                  * llog object is created before corss-MDT writing
119                  * updates into the llog object */
120                 if (cathandle->lgh_ctxt->loc_flags & LLOG_CTXT_FLAG_NORMAL_FID)
121                         handle->th_sync = 1;
122
123                 handle->th_wait_submit = 1;
124
125                 rc = llog_declare_create(env, loghandle, handle);
126                 if (rc != 0)
127                         GOTO(out, rc);
128
129                 rec->lid_hdr.lrh_len = sizeof(*rec);
130                 rec->lid_hdr.lrh_type = LLOG_LOGID_MAGIC;
131                 rec->lid_id = loghandle->lgh_id;
132                 rc = llog_declare_write_rec(env, cathandle, &rec->lid_hdr, -1,
133                                             handle);
134                 if (rc != 0)
135                         GOTO(out, rc);
136
137                 rc = dt_trans_start_local(env, dt, handle);
138                 if (rc != 0)
139                         GOTO(out, rc);
140
141                 th = handle;
142         }
143
144         rc = llog_create(env, loghandle, th);
145         /* if llog is already created, no need to initialize it */
146         if (rc == -EEXIST) {
147                 GOTO(out, rc = 0);
148         } else if (rc != 0) {
149                 CERROR("%s: can't create new plain llog in catalog: rc = %d\n",
150                        loghandle->lgh_ctxt->loc_obd->obd_name, rc);
151                 GOTO(out, rc);
152         }
153
154         rc = llog_init_handle(env, loghandle,
155                               LLOG_F_IS_PLAIN | LLOG_F_ZAP_WHEN_EMPTY,
156                               &cathandle->lgh_hdr->llh_tgtuuid);
157         if (rc < 0)
158                 GOTO(out, rc);
159
160         /* build the record for this log in the catalog */
161         rec->lid_hdr.lrh_len = sizeof(*rec);
162         rec->lid_hdr.lrh_type = LLOG_LOGID_MAGIC;
163         rec->lid_id = loghandle->lgh_id;
164
165         /* append the new record into catalog. The new index will be
166          * assigned to the record and updated in rec header */
167         rc = llog_write_rec(env, cathandle, &rec->lid_hdr,
168                             &loghandle->u.phd.phd_cookie, LLOG_NEXT_IDX, th);
169         if (rc < 0)
170                 GOTO(out_destroy, rc);
171
172         CDEBUG(D_OTHER, "new plain log "DOSTID":%x for index %u of catalog"
173                DOSTID"\n", POSTID(&loghandle->lgh_id.lgl_oi),
174                loghandle->lgh_id.lgl_ogen, rec->lid_hdr.lrh_index,
175                POSTID(&cathandle->lgh_id.lgl_oi));
176
177         loghandle->lgh_hdr->llh_cat_idx = rec->lid_hdr.lrh_index;
178 out:
179         if (handle != NULL) {
180                 handle->th_result = rc >= 0 ? 0 : rc;
181                 dt_trans_stop(env, dt, handle);
182         }
183         RETURN(rc);
184
185 out_destroy:
186         /* to signal llog_cat_close() it shouldn't try to destroy the llog,
187          * we want to destroy it in this transaction, otherwise the object
188          * becomes an orphan */
189         loghandle->lgh_hdr->llh_flags &= ~LLOG_F_ZAP_WHEN_EMPTY;
190         /* this is to mimic full log, so another llog_cat_current_log()
191          * can skip it and ask for another onet */
192         loghandle->lgh_last_idx = LLOG_HDR_BITMAP_SIZE(llh) + 1;
193         llog_trans_destroy(env, loghandle, th);
194         RETURN(rc);
195 }
196
197 /* Open an existent log handle and add it to the open list.
198  * This log handle will be closed when all of the records in it are removed.
199  *
200  * Assumes caller has already pushed us into the kernel context and is locking.
201  * We return a lock on the handle to ensure nobody yanks it from us.
202  *
203  * This takes extra reference on llog_handle via llog_handle_get() and require
204  * this reference to be put by caller using llog_handle_put()
205  */
206 int llog_cat_id2handle(const struct lu_env *env, struct llog_handle *cathandle,
207                        struct llog_handle **res, struct llog_logid *logid)
208 {
209         struct llog_handle      *loghandle;
210         enum llog_flag           fmt;
211         int                      rc = 0;
212
213         ENTRY;
214
215         if (cathandle == NULL)
216                 RETURN(-EBADF);
217
218         fmt = cathandle->lgh_hdr->llh_flags & LLOG_F_EXT_MASK;
219         down_write(&cathandle->lgh_lock);
220         list_for_each_entry(loghandle, &cathandle->u.chd.chd_head,
221                             u.phd.phd_entry) {
222                 struct llog_logid *cgl = &loghandle->lgh_id;
223
224                 if (ostid_id(&cgl->lgl_oi) == ostid_id(&logid->lgl_oi) &&
225                     ostid_seq(&cgl->lgl_oi) == ostid_seq(&logid->lgl_oi)) {
226                         if (cgl->lgl_ogen != logid->lgl_ogen) {
227                                 CERROR("%s: log "DOSTID" generation %x != %x\n",
228                                        loghandle->lgh_ctxt->loc_obd->obd_name,
229                                        POSTID(&logid->lgl_oi), cgl->lgl_ogen,
230                                        logid->lgl_ogen);
231                                 continue;
232                         }
233                         loghandle->u.phd.phd_cat_handle = cathandle;
234                         up_write(&cathandle->lgh_lock);
235                         GOTO(out, rc = 0);
236                 }
237         }
238         up_write(&cathandle->lgh_lock);
239
240         rc = llog_open(env, cathandle->lgh_ctxt, &loghandle, logid, NULL,
241                        LLOG_OPEN_EXISTS);
242         if (rc < 0) {
243                 CERROR("%s: error opening log id "DOSTID":%x: rc = %d\n",
244                        cathandle->lgh_ctxt->loc_obd->obd_name,
245                        POSTID(&logid->lgl_oi), logid->lgl_ogen, rc);
246                 RETURN(rc);
247         }
248
249         rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN | fmt, NULL);
250         if (rc < 0) {
251                 llog_close(env, loghandle);
252                 loghandle = NULL;
253                 RETURN(rc);
254         }
255
256         down_write(&cathandle->lgh_lock);
257         list_add_tail(&loghandle->u.phd.phd_entry, &cathandle->u.chd.chd_head);
258         up_write(&cathandle->lgh_lock);
259
260         loghandle->u.phd.phd_cat_handle = cathandle;
261         loghandle->u.phd.phd_cookie.lgc_lgl = cathandle->lgh_id;
262         loghandle->u.phd.phd_cookie.lgc_index =
263                                 loghandle->lgh_hdr->llh_cat_idx;
264         EXIT;
265 out:
266         llog_handle_get(loghandle);
267         *res = loghandle;
268         return 0;
269 }
270
271 int llog_cat_close(const struct lu_env *env, struct llog_handle *cathandle)
272 {
273         struct llog_handle      *loghandle, *n;
274         int                      rc;
275
276         ENTRY;
277
278         list_for_each_entry_safe(loghandle, n, &cathandle->u.chd.chd_head,
279                                  u.phd.phd_entry) {
280                 struct llog_log_hdr     *llh = loghandle->lgh_hdr;
281                 int                      index;
282
283                 /* unlink open-not-created llogs */
284                 list_del_init(&loghandle->u.phd.phd_entry);
285                 llh = loghandle->lgh_hdr;
286                 if (loghandle->lgh_obj != NULL && llh != NULL &&
287                     (llh->llh_flags & LLOG_F_ZAP_WHEN_EMPTY) &&
288                     (llh->llh_count == 1)) {
289                         rc = llog_destroy(env, loghandle);
290                         if (rc)
291                                 CERROR("%s: failure destroying log during "
292                                        "cleanup: rc = %d\n",
293                                        loghandle->lgh_ctxt->loc_obd->obd_name,
294                                        rc);
295
296                         index = loghandle->u.phd.phd_cookie.lgc_index;
297                         llog_cat_cleanup(env, cathandle, NULL, index);
298                 }
299                 llog_close(env, loghandle);
300         }
301         /* if handle was stored in ctxt, remove it too */
302         if (cathandle->lgh_ctxt->loc_handle == cathandle)
303                 cathandle->lgh_ctxt->loc_handle = NULL;
304         rc = llog_close(env, cathandle);
305         RETURN(rc);
306 }
307 EXPORT_SYMBOL(llog_cat_close);
308
309 /**
310  * lockdep markers for nested struct llog_handle::lgh_lock locking.
311  */
312 enum {
313         LLOGH_CAT,
314         LLOGH_LOG
315 };
316
317 /** Return the currently active log handle.  If the current log handle doesn't
318  * have enough space left for the current record, start a new one.
319  *
320  * If reclen is 0, we only want to know what the currently active log is,
321  * otherwise we get a lock on this log so nobody can steal our space.
322  *
323  * Assumes caller has already pushed us into the kernel context and is locking.
324  *
325  * NOTE: loghandle is write-locked upon successful return
326  */
327 static struct llog_handle *llog_cat_current_log(struct llog_handle *cathandle,
328                                                 struct thandle *th)
329 {
330         struct llog_handle *loghandle = NULL;
331         ENTRY;
332
333
334         if (OBD_FAIL_CHECK(OBD_FAIL_MDS_LLOG_CREATE_FAILED2)) {
335                 down_write_nested(&cathandle->lgh_lock, LLOGH_CAT);
336                 GOTO(next, loghandle);
337         }
338
339         down_read_nested(&cathandle->lgh_lock, LLOGH_CAT);
340         loghandle = cathandle->u.chd.chd_current_log;
341         if (loghandle) {
342                 struct llog_log_hdr *llh;
343
344                 down_write_nested(&loghandle->lgh_lock, LLOGH_LOG);
345                 llh = loghandle->lgh_hdr;
346                 if (llh == NULL || !llog_is_full(loghandle)) {
347                         up_read(&cathandle->lgh_lock);
348                         RETURN(loghandle);
349                 } else {
350                         up_write(&loghandle->lgh_lock);
351                 }
352         }
353         up_read(&cathandle->lgh_lock);
354
355         /* time to use next log */
356
357         /* first, we have to make sure the state hasn't changed */
358         down_write_nested(&cathandle->lgh_lock, LLOGH_CAT);
359         loghandle = cathandle->u.chd.chd_current_log;
360         if (loghandle) {
361                 struct llog_log_hdr *llh;
362
363                 down_write_nested(&loghandle->lgh_lock, LLOGH_LOG);
364                 llh = loghandle->lgh_hdr;
365                 LASSERT(llh);
366                 if (!llog_is_full(loghandle))
367                         GOTO(out_unlock, loghandle);
368                 else
369                         up_write(&loghandle->lgh_lock);
370         }
371
372 next:
373         /* Sigh, the chd_next_log and chd_current_log is initialized
374          * in declare phase, and we do not serialize the catlog
375          * accessing, so it might be possible the llog creation
376          * thread (see llog_cat_declare_add_rec()) did not create
377          * llog successfully, then the following thread might
378          * meet this situation. */
379         if (IS_ERR_OR_NULL(cathandle->u.chd.chd_next_log)) {
380                 CERROR("%s: next log does not exist!\n",
381                        cathandle->lgh_ctxt->loc_obd->obd_name);
382                 loghandle = ERR_PTR(-EIO);
383                 if (cathandle->u.chd.chd_next_log == NULL) {
384                         /* Store the error in chd_next_log, so
385                          * the following process can get correct
386                          * failure value */
387                         cathandle->u.chd.chd_next_log = loghandle;
388                 }
389                 GOTO(out_unlock, loghandle);
390         }
391
392         CDEBUG(D_INODE, "use next log\n");
393
394         loghandle = cathandle->u.chd.chd_next_log;
395         cathandle->u.chd.chd_current_log = loghandle;
396         cathandle->u.chd.chd_next_log = NULL;
397         down_write_nested(&loghandle->lgh_lock, LLOGH_LOG);
398
399 out_unlock:
400         up_write(&cathandle->lgh_lock);
401         LASSERT(loghandle);
402         RETURN(loghandle);
403 }
404
405 static int llog_cat_update_header(const struct lu_env *env,
406                            struct llog_handle *cathandle)
407 {
408         struct llog_handle *loghandle;
409         int rc;
410         ENTRY;
411
412         /* refresh llog */
413         down_write(&cathandle->lgh_lock);
414         if (!cathandle->lgh_stale) {
415                 up_write(&cathandle->lgh_lock);
416                 RETURN(0);
417         }
418         list_for_each_entry(loghandle, &cathandle->u.chd.chd_head,
419                             u.phd.phd_entry) {
420                 if (!llog_exist(loghandle))
421                         continue;
422
423                 rc = llog_read_header(env, loghandle, NULL);
424                 if (rc != 0) {
425                         up_write(&cathandle->lgh_lock);
426                         GOTO(out, rc);
427                 }
428         }
429         rc = llog_read_header(env, cathandle, NULL);
430         if (rc == 0)
431                 cathandle->lgh_stale = 0;
432         up_write(&cathandle->lgh_lock);
433         if (rc != 0)
434                 GOTO(out, rc);
435 out:
436         RETURN(rc);
437 }
438
439 /* Add a single record to the recovery log(s) using a catalog
440  * Returns as llog_write_record
441  *
442  * Assumes caller has already pushed us into the kernel context.
443  */
444 int llog_cat_add_rec(const struct lu_env *env, struct llog_handle *cathandle,
445                      struct llog_rec_hdr *rec, struct llog_cookie *reccookie,
446                      struct thandle *th)
447 {
448         struct llog_handle *loghandle;
449         int rc, retried = 0;
450         ENTRY;
451
452         LASSERT(rec->lrh_len <= cathandle->lgh_ctxt->loc_chunk_size);
453
454 retry:
455         loghandle = llog_cat_current_log(cathandle, th);
456         if (IS_ERR(loghandle))
457                 RETURN(PTR_ERR(loghandle));
458
459         /* loghandle is already locked by llog_cat_current_log() for us */
460         if (!llog_exist(loghandle)) {
461                 rc = llog_cat_new_log(env, cathandle, loghandle, th);
462                 if (rc < 0) {
463                         up_write(&loghandle->lgh_lock);
464                         /* nobody should be trying to use this llog */
465                         down_write(&cathandle->lgh_lock);
466                         if (cathandle->u.chd.chd_current_log == loghandle)
467                                 cathandle->u.chd.chd_current_log = NULL;
468                         up_write(&cathandle->lgh_lock);
469                         RETURN(rc);
470                 }
471         }
472         /* now let's try to add the record */
473         rc = llog_write_rec(env, loghandle, rec, reccookie, LLOG_NEXT_IDX, th);
474         if (rc < 0) {
475                 CDEBUG_LIMIT(rc == -ENOSPC ? D_HA : D_ERROR,
476                              "llog_write_rec %d: lh=%p\n", rc, loghandle);
477                 /* -ENOSPC is returned if no empty records left
478                  * and when it's lack of space on the stogage.
479                  * there is no point to try again if it's the second
480                  * case. many callers (like llog test) expect ENOSPC,
481                  * so we preserve this error code, but look for the
482                  * actual cause here */
483                 if (rc == -ENOSPC && llog_is_full(loghandle))
484                         rc = -ENOBUFS;
485         }
486         up_write(&loghandle->lgh_lock);
487
488         if (rc == -ENOBUFS) {
489                 if (retried++ == 0)
490                         GOTO(retry, rc);
491                 CERROR("%s: error on 2nd llog: rc = %d\n",
492                        cathandle->lgh_ctxt->loc_obd->obd_name, rc);
493         }
494
495         RETURN(rc);
496 }
497 EXPORT_SYMBOL(llog_cat_add_rec);
498
499 int llog_cat_declare_add_rec(const struct lu_env *env,
500                              struct llog_handle *cathandle,
501                              struct llog_rec_hdr *rec, struct thandle *th)
502 {
503         struct llog_thread_info *lgi = llog_info(env);
504         struct llog_logid_rec   *lirec = &lgi->lgi_logid;
505         struct llog_handle      *loghandle, *next;
506         int                      rc = 0;
507
508         ENTRY;
509
510         if (cathandle->u.chd.chd_current_log == NULL) {
511                 /* declare new plain llog */
512                 down_write(&cathandle->lgh_lock);
513                 if (cathandle->u.chd.chd_current_log == NULL) {
514                         rc = llog_open(env, cathandle->lgh_ctxt, &loghandle,
515                                        NULL, NULL, LLOG_OPEN_NEW);
516                         if (rc == 0) {
517                                 cathandle->u.chd.chd_current_log = loghandle;
518                                 list_add_tail(&loghandle->u.phd.phd_entry,
519                                               &cathandle->u.chd.chd_head);
520                         }
521                 }
522                 up_write(&cathandle->lgh_lock);
523         } else if (cathandle->u.chd.chd_next_log == NULL ||
524                    IS_ERR(cathandle->u.chd.chd_next_log)) {
525                 /* declare next plain llog */
526                 down_write(&cathandle->lgh_lock);
527                 if (cathandle->u.chd.chd_next_log == NULL ||
528                     IS_ERR(cathandle->u.chd.chd_next_log)) {
529                         rc = llog_open(env, cathandle->lgh_ctxt, &loghandle,
530                                        NULL, NULL, LLOG_OPEN_NEW);
531                         if (rc == 0) {
532                                 cathandle->u.chd.chd_next_log = loghandle;
533                                 list_add_tail(&loghandle->u.phd.phd_entry,
534                                               &cathandle->u.chd.chd_head);
535                         }
536                 }
537                 up_write(&cathandle->lgh_lock);
538         }
539         if (rc)
540                 GOTO(out, rc);
541
542         lirec->lid_hdr.lrh_len = sizeof(*lirec);
543
544         if (!llog_exist(cathandle->u.chd.chd_current_log)) {
545                 if (dt_object_remote(cathandle->lgh_obj)) {
546                         /* For remote operation, if we put the llog object
547                          * creation in the current transaction, then the
548                          * llog object will not be created on the remote
549                          * target until the transaction stop, if other
550                          * operations start before the transaction stop,
551                          * and use the same llog object, will be dependent
552                          * on the success of this transaction. So let's
553                          * create the llog object synchronously here to
554                          * remove the dependency. */
555 create_again:
556                         down_read_nested(&cathandle->lgh_lock, LLOGH_CAT);
557                         loghandle = cathandle->u.chd.chd_current_log;
558                         down_write_nested(&loghandle->lgh_lock, LLOGH_LOG);
559                         if (cathandle->lgh_stale) {
560                                 up_write(&loghandle->lgh_lock);
561                                 up_read(&cathandle->lgh_lock);
562                                 GOTO(out, rc = -EIO);
563                         }
564                         if (!llog_exist(loghandle)) {
565                                 rc = llog_cat_new_log(env, cathandle, loghandle,
566                                                       NULL);
567                                 if (rc == -ESTALE)
568                                         cathandle->lgh_stale = 1;
569                         }
570                         up_write(&loghandle->lgh_lock);
571                         up_read(&cathandle->lgh_lock);
572                         if (rc == -ESTALE) {
573                                 rc = llog_cat_update_header(env, cathandle);
574                                 if (rc != 0)
575                                         GOTO(out, rc);
576                                 goto create_again;
577                         } else if (rc < 0) {
578                                 GOTO(out, rc);
579                         }
580                 } else {
581                         rc = llog_declare_create(env,
582                                         cathandle->u.chd.chd_current_log, th);
583                         if (rc)
584                                 GOTO(out, rc);
585                         llog_declare_write_rec(env, cathandle,
586                                                &lirec->lid_hdr, -1, th);
587                 }
588         }
589
590 write_again:
591         /* declare records in the llogs */
592         rc = llog_declare_write_rec(env, cathandle->u.chd.chd_current_log,
593                                     rec, -1, th);
594         if (rc == -ESTALE) {
595                 down_write(&cathandle->lgh_lock);
596                 if (cathandle->lgh_stale) {
597                         up_write(&cathandle->lgh_lock);
598                         GOTO(out, rc = -EIO);
599                 }
600
601                 cathandle->lgh_stale = 1;
602                 up_write(&cathandle->lgh_lock);
603                 rc = llog_cat_update_header(env, cathandle);
604                 if (rc != 0)
605                         GOTO(out, rc);
606                 goto write_again;
607         } else if (rc < 0) {
608                 GOTO(out, rc);
609         }
610
611         next = cathandle->u.chd.chd_next_log;
612         if (!IS_ERR_OR_NULL(next)) {
613                 if (!llog_exist(next)) {
614                         if (dt_object_remote(cathandle->lgh_obj)) {
615                                 /* For remote operation, if we put the llog
616                                  * object creation in the current transaction,
617                                  * then the llog object will not be created on
618                                  * the remote target until the transaction stop,
619                                  * if other operations start before the
620                                  * transaction stop, and use the same llog
621                                  * object, will be dependent on the success of
622                                  * this transaction. So let's create the llog
623                                  * object synchronously here to remove the
624                                  * dependency. */
625                                 down_write_nested(&cathandle->lgh_lock,
626                                                  LLOGH_CAT);
627                                 next = cathandle->u.chd.chd_next_log;
628                                 if (IS_ERR_OR_NULL(next)) {
629                                         /* Sigh, another thread just tried,
630                                          * let's fail as well */
631                                         up_write(&cathandle->lgh_lock);
632                                         if (next == NULL)
633                                                 rc = -EIO;
634                                         else
635                                                 rc = PTR_ERR(next);
636                                         GOTO(out, rc);
637                                 }
638
639                                 down_write_nested(&next->lgh_lock, LLOGH_LOG);
640                                 if (!llog_exist(next)) {
641                                         rc = llog_cat_new_log(env, cathandle,
642                                                               next, NULL);
643                                         if (rc < 0)
644                                                 cathandle->u.chd.chd_next_log =
645                                                                 ERR_PTR(rc);
646                                 }
647                                 up_write(&next->lgh_lock);
648                                 up_write(&cathandle->lgh_lock);
649                                 if (rc < 0)
650                                         GOTO(out, rc);
651                         } else {
652                                 rc = llog_declare_create(env, next, th);
653                                 llog_declare_write_rec(env, cathandle,
654                                                 &lirec->lid_hdr, -1, th);
655                         }
656                 }
657                 /* XXX: we hope for declarations made for existing llog
658                  *      this might be not correct with some backends
659                  *      where declarations are expected against specific
660                  *      object like ZFS with full debugging enabled */
661                 /*llog_declare_write_rec(env, next, rec, -1, th);*/
662         }
663 out:
664         RETURN(rc);
665 }
666 EXPORT_SYMBOL(llog_cat_declare_add_rec);
667
668 int llog_cat_add(const struct lu_env *env, struct llog_handle *cathandle,
669                  struct llog_rec_hdr *rec, struct llog_cookie *reccookie)
670 {
671         struct llog_ctxt        *ctxt;
672         struct dt_device        *dt;
673         struct thandle          *th = NULL;
674         int                      rc;
675
676         ctxt = cathandle->lgh_ctxt;
677         LASSERT(ctxt);
678         LASSERT(ctxt->loc_exp);
679
680         LASSERT(cathandle->lgh_obj != NULL);
681         dt = lu2dt_dev(cathandle->lgh_obj->do_lu.lo_dev);
682
683         th = dt_trans_create(env, dt);
684         if (IS_ERR(th))
685                 RETURN(PTR_ERR(th));
686
687         rc = llog_cat_declare_add_rec(env, cathandle, rec, th);
688         if (rc)
689                 GOTO(out_trans, rc);
690
691         rc = dt_trans_start_local(env, dt, th);
692         if (rc)
693                 GOTO(out_trans, rc);
694         rc = llog_cat_add_rec(env, cathandle, rec, reccookie, th);
695 out_trans:
696         dt_trans_stop(env, dt, th);
697         RETURN(rc);
698 }
699 EXPORT_SYMBOL(llog_cat_add);
700
701 /* For each cookie in the cookie array, we clear the log in-use bit and either:
702  * - the log is empty, so mark it free in the catalog header and delete it
703  * - the log is not empty, just write out the log header
704  *
705  * The cookies may be in different log files, so we need to get new logs
706  * each time.
707  *
708  * Assumes caller has already pushed us into the kernel context.
709  */
710 int llog_cat_cancel_records(const struct lu_env *env,
711                             struct llog_handle *cathandle, int count,
712                             struct llog_cookie *cookies)
713 {
714         int i, index, rc = 0, failed = 0;
715
716         ENTRY;
717
718         for (i = 0; i < count; i++, cookies++) {
719                 struct llog_handle      *loghandle;
720                 struct llog_logid       *lgl = &cookies->lgc_lgl;
721                 int                      lrc;
722
723                 rc = llog_cat_id2handle(env, cathandle, &loghandle, lgl);
724                 if (rc) {
725                         CERROR("%s: cannot find handle for llog "DOSTID": %d\n",
726                                cathandle->lgh_ctxt->loc_obd->obd_name,
727                                POSTID(&lgl->lgl_oi), rc);
728                         failed++;
729                         continue;
730                 }
731
732                 lrc = llog_cancel_rec(env, loghandle, cookies->lgc_index);
733                 if (lrc == LLOG_DEL_PLAIN) { /* log has been destroyed */
734                         index = loghandle->u.phd.phd_cookie.lgc_index;
735                         rc = llog_cat_cleanup(env, cathandle, loghandle,
736                                               index);
737                 } else if (lrc == -ENOENT) {
738                         if (rc == 0) /* ENOENT shouldn't rewrite any error */
739                                 rc = lrc;
740                 } else if (lrc < 0) {
741                         failed++;
742                         rc = lrc;
743                 }
744                 llog_handle_put(loghandle);
745         }
746         if (rc)
747                 CERROR("%s: fail to cancel %d of %d llog-records: rc = %d\n",
748                        cathandle->lgh_ctxt->loc_obd->obd_name, failed, count,
749                        rc);
750
751         RETURN(rc);
752 }
753 EXPORT_SYMBOL(llog_cat_cancel_records);
754
755 static int llog_cat_process_cb(const struct lu_env *env,
756                                struct llog_handle *cat_llh,
757                                struct llog_rec_hdr *rec, void *data)
758 {
759         struct llog_process_data *d = data;
760         struct llog_logid_rec *lir = (struct llog_logid_rec *)rec;
761         struct llog_handle *llh;
762         struct llog_log_hdr *hdr;
763         int rc;
764
765         ENTRY;
766         if (rec->lrh_type != LLOG_LOGID_MAGIC) {
767                 CERROR("invalid record in catalog\n");
768                 RETURN(-EINVAL);
769         }
770         CDEBUG(D_HA, "processing log "DOSTID":%x at index %u of catalog "
771                DOSTID"\n", POSTID(&lir->lid_id.lgl_oi), lir->lid_id.lgl_ogen,
772                rec->lrh_index, POSTID(&cat_llh->lgh_id.lgl_oi));
773
774         rc = llog_cat_id2handle(env, cat_llh, &llh, &lir->lid_id);
775         if (rc) {
776                 CERROR("%s: cannot find handle for llog "DOSTID": %d\n",
777                        cat_llh->lgh_ctxt->loc_obd->obd_name,
778                        POSTID(&lir->lid_id.lgl_oi), rc);
779                 if (rc == -ENOENT || rc == -ESTALE) {
780                         /* After a server crash, a stub of index
781                          * record in catlog could be kept, because
782                          * plain log destroy + catlog index record
783                          * deletion are not atomic. So we end up with
784                          * an index but no actual record. Destroy the
785                          * index and move on. */
786                         rc = llog_cat_cleanup(env, cat_llh, NULL,
787                                               rec->lrh_index);
788                 }
789
790                 RETURN(rc);
791         }
792
793         /* clean old empty llogs, do not consider current llog in use */
794         /* ignore remote (lgh_obj=NULL) llogs */
795         hdr = llh->lgh_hdr;
796         if ((hdr->llh_flags & LLOG_F_ZAP_WHEN_EMPTY) &&
797             hdr->llh_count == 1 && cat_llh->lgh_obj != NULL &&
798             llh != cat_llh->u.chd.chd_current_log) {
799                 rc = llog_destroy(env, llh);
800                 if (rc)
801                         CERROR("%s: fail to destroy empty log: rc = %d\n",
802                                llh->lgh_ctxt->loc_obd->obd_name, rc);
803                 GOTO(out, rc = LLOG_DEL_PLAIN);
804         }
805
806         if (rec->lrh_index < d->lpd_startcat) {
807                 /* Skip processing of the logs until startcat */
808                 rc = 0;
809         } else if (d->lpd_startidx > 0) {
810                 struct llog_process_cat_data cd;
811
812                 cd.lpcd_first_idx = d->lpd_startidx;
813                 cd.lpcd_last_idx = 0;
814                 rc = llog_process_or_fork(env, llh, d->lpd_cb, d->lpd_data,
815                                           &cd, false);
816                 /* Continue processing the next log from idx 0 */
817                 d->lpd_startidx = 0;
818         } else {
819                 rc = llog_process_or_fork(env, llh, d->lpd_cb, d->lpd_data,
820                                           NULL, false);
821         }
822
823 out:
824         /* The empty plain log was destroyed while processing */
825         if (rc == LLOG_DEL_PLAIN)
826                 rc = llog_cat_cleanup(env, cat_llh, llh,
827                                       llh->u.phd.phd_cookie.lgc_index);
828         llog_handle_put(llh);
829
830         RETURN(rc);
831 }
832
833 int llog_cat_process_or_fork(const struct lu_env *env,
834                              struct llog_handle *cat_llh, llog_cb_t cat_cb,
835                              llog_cb_t cb, void *data, int startcat,
836                              int startidx, bool fork)
837 {
838         struct llog_process_data d;
839         struct llog_log_hdr *llh = cat_llh->lgh_hdr;
840         int rc;
841         ENTRY;
842
843         LASSERT(llh->llh_flags & LLOG_F_IS_CAT);
844         d.lpd_data = data;
845         d.lpd_cb = cb;
846         d.lpd_startcat = startcat;
847         d.lpd_startidx = startidx;
848
849         if (llh->llh_cat_idx >= cat_llh->lgh_last_idx &&
850             llh->llh_count > 1) {
851                 struct llog_process_cat_data cd;
852
853                 CWARN("catlog "DOSTID" crosses index zero\n",
854                       POSTID(&cat_llh->lgh_id.lgl_oi));
855
856                 cd.lpcd_first_idx = llh->llh_cat_idx;
857                 cd.lpcd_last_idx = 0;
858                 rc = llog_process_or_fork(env, cat_llh, cat_cb,
859                                           &d, &cd, fork);
860                 if (rc != 0)
861                         RETURN(rc);
862
863                 cd.lpcd_first_idx = 0;
864                 cd.lpcd_last_idx = cat_llh->lgh_last_idx;
865                 rc = llog_process_or_fork(env, cat_llh, cat_cb,
866                                           &d, &cd, fork);
867         } else {
868                 rc = llog_process_or_fork(env, cat_llh, cat_cb,
869                                           &d, NULL, fork);
870         }
871
872         RETURN(rc);
873 }
874
875 int llog_cat_process(const struct lu_env *env, struct llog_handle *cat_llh,
876                      llog_cb_t cb, void *data, int startcat, int startidx)
877 {
878         return llog_cat_process_or_fork(env, cat_llh, llog_cat_process_cb,
879                                         cb, data, startcat, startidx, false);
880 }
881 EXPORT_SYMBOL(llog_cat_process);
882
883 static int llog_cat_size_cb(const struct lu_env *env,
884                              struct llog_handle *cat_llh,
885                              struct llog_rec_hdr *rec, void *data)
886 {
887         struct llog_process_data *d = data;
888         struct llog_logid_rec *lir = (struct llog_logid_rec *)rec;
889         struct llog_handle *llh;
890         int rc;
891         __u64 *cum_size = d->lpd_data;
892         __u64 size;
893
894         ENTRY;
895         if (rec->lrh_type != LLOG_LOGID_MAGIC) {
896                 CERROR("%s: invalid record in catalog, rc = %d\n",
897                        cat_llh->lgh_ctxt->loc_obd->obd_name, -EINVAL);
898                 RETURN(-EINVAL);
899         }
900         CDEBUG(D_HA, "processing log "DOSTID":%x at index %u of catalog "
901                DOSTID"\n", POSTID(&lir->lid_id.lgl_oi), lir->lid_id.lgl_ogen,
902                rec->lrh_index, POSTID(&cat_llh->lgh_id.lgl_oi));
903
904         rc = llog_cat_id2handle(env, cat_llh, &llh, &lir->lid_id);
905         if (rc) {
906                 CWARN("%s: cannot find handle for llog "DOSTID": rc = %d\n",
907                       cat_llh->lgh_ctxt->loc_obd->obd_name,
908                       POSTID(&lir->lid_id.lgl_oi), rc);
909                 RETURN(0);
910         }
911         size = llog_size(env, llh);
912         *cum_size += size;
913
914         CDEBUG(D_INFO, "Add llog entry "DOSTID" size "LPU64"\n",
915                POSTID(&llh->lgh_id.lgl_oi), size);
916
917         llog_handle_put(llh);
918
919         RETURN(0);
920
921 }
922
923 __u64 llog_cat_size(const struct lu_env *env, struct llog_handle *cat_llh)
924 {
925         __u64 size = llog_size(env, cat_llh);
926
927         llog_cat_process_or_fork(env, cat_llh, llog_cat_size_cb,
928                                  NULL, &size, 0, 0, false);
929
930         return size;
931 }
932 EXPORT_SYMBOL(llog_cat_size);
933
934 static int llog_cat_reverse_process_cb(const struct lu_env *env,
935                                        struct llog_handle *cat_llh,
936                                        struct llog_rec_hdr *rec, void *data)
937 {
938         struct llog_process_data *d = data;
939         struct llog_logid_rec *lir = (struct llog_logid_rec *)rec;
940         struct llog_handle *llh;
941         struct llog_log_hdr *hdr;
942         int rc;
943
944         if (le32_to_cpu(rec->lrh_type) != LLOG_LOGID_MAGIC) {
945                 CERROR("invalid record in catalog\n");
946                 RETURN(-EINVAL);
947         }
948         CDEBUG(D_HA, "processing log "DOSTID":%x at index %u of catalog "
949                DOSTID"\n", POSTID(&lir->lid_id.lgl_oi), lir->lid_id.lgl_ogen,
950                le32_to_cpu(rec->lrh_index), POSTID(&cat_llh->lgh_id.lgl_oi));
951
952         rc = llog_cat_id2handle(env, cat_llh, &llh, &lir->lid_id);
953         if (rc) {
954                 CERROR("%s: cannot find handle for llog "DOSTID": %d\n",
955                        cat_llh->lgh_ctxt->loc_obd->obd_name,
956                        POSTID(&lir->lid_id.lgl_oi), rc);
957                 if (rc == -ENOENT || rc == -ESTALE) {
958                         /* After a server crash, a stub of index
959                          * record in catlog could be kept, because
960                          * plain log destroy + catlog index record
961                          * deletion are not atomic. So we end up with
962                          * an index but no actual record. Destroy the
963                          * index and move on. */
964                         rc = llog_cat_cleanup(env, cat_llh, NULL,
965                                               rec->lrh_index);
966                 }
967
968                 RETURN(rc);
969         }
970
971         /* clean old empty llogs, do not consider current llog in use */
972         hdr = llh->lgh_hdr;
973         if ((hdr->llh_flags & LLOG_F_ZAP_WHEN_EMPTY) &&
974             hdr->llh_count == 1 &&
975             llh != cat_llh->u.chd.chd_current_log) {
976                 rc = llog_destroy(env, llh);
977                 if (rc)
978                         CERROR("%s: fail to destroy empty log: rc = %d\n",
979                                llh->lgh_ctxt->loc_obd->obd_name, rc);
980                 GOTO(out, rc = LLOG_DEL_PLAIN);
981         }
982
983         rc = llog_reverse_process(env, llh, d->lpd_cb, d->lpd_data, NULL);
984
985 out:
986         /* The empty plain was destroyed while processing */
987         if (rc == LLOG_DEL_PLAIN)
988                 rc = llog_cat_cleanup(env, cat_llh, llh,
989                                       llh->u.phd.phd_cookie.lgc_index);
990
991         llog_handle_put(llh);
992         RETURN(rc);
993 }
994
995 int llog_cat_reverse_process(const struct lu_env *env,
996                              struct llog_handle *cat_llh,
997                              llog_cb_t cb, void *data)
998 {
999         struct llog_process_data d;
1000         struct llog_process_cat_data cd;
1001         struct llog_log_hdr *llh = cat_llh->lgh_hdr;
1002         int rc;
1003         ENTRY;
1004
1005         LASSERT(llh->llh_flags & LLOG_F_IS_CAT);
1006         d.lpd_data = data;
1007         d.lpd_cb = cb;
1008
1009         if (llh->llh_cat_idx >= cat_llh->lgh_last_idx &&
1010             llh->llh_count > 1) {
1011                 CWARN("catalog "DOSTID" crosses index zero\n",
1012                       POSTID(&cat_llh->lgh_id.lgl_oi));
1013
1014                 cd.lpcd_first_idx = 0;
1015                 cd.lpcd_last_idx = cat_llh->lgh_last_idx;
1016                 rc = llog_reverse_process(env, cat_llh,
1017                                           llog_cat_reverse_process_cb,
1018                                           &d, &cd);
1019                 if (rc != 0)
1020                         RETURN(rc);
1021
1022                 cd.lpcd_first_idx = le32_to_cpu(llh->llh_cat_idx);
1023                 cd.lpcd_last_idx = 0;
1024                 rc = llog_reverse_process(env, cat_llh,
1025                                           llog_cat_reverse_process_cb,
1026                                           &d, &cd);
1027         } else {
1028                 rc = llog_reverse_process(env, cat_llh,
1029                                           llog_cat_reverse_process_cb,
1030                                           &d, NULL);
1031         }
1032
1033         RETURN(rc);
1034 }
1035 EXPORT_SYMBOL(llog_cat_reverse_process);
1036
1037 static int llog_cat_set_first_idx(struct llog_handle *cathandle, int idx)
1038 {
1039         struct llog_log_hdr *llh = cathandle->lgh_hdr;
1040         int bitmap_size;
1041
1042         ENTRY;
1043
1044         bitmap_size = LLOG_HDR_BITMAP_SIZE(llh);
1045         /*
1046          * The llh_cat_idx equals to the first used index minus 1
1047          * so if we canceled the first index then llh_cat_idx
1048          * must be renewed.
1049          */
1050         if (llh->llh_cat_idx == (idx - 1)) {
1051                 llh->llh_cat_idx = idx;
1052
1053                 while (idx != cathandle->lgh_last_idx) {
1054                         idx = (idx + 1) % bitmap_size;
1055                         if (!ext2_test_bit(idx, LLOG_HDR_BITMAP(llh))) {
1056                                 /* update llh_cat_idx for each unset bit,
1057                                  * expecting the next one is set */
1058                                 llh->llh_cat_idx = idx;
1059                         } else if (idx == 0) {
1060                                 /* skip header bit */
1061                                 llh->llh_cat_idx = 0;
1062                                 continue;
1063                         } else {
1064                                 /* the first index is found */
1065                                 break;
1066                         }
1067                 }
1068
1069                 CDEBUG(D_RPCTRACE, "Set catlog "DOSTID" first idx %u,"
1070                        " (last_idx %u)\n", POSTID(&cathandle->lgh_id.lgl_oi),
1071                        llh->llh_cat_idx, cathandle->lgh_last_idx);
1072         }
1073
1074         RETURN(0);
1075 }
1076
1077 /* Cleanup deleted plain llog traces from catalog */
1078 int llog_cat_cleanup(const struct lu_env *env, struct llog_handle *cathandle,
1079                      struct llog_handle *loghandle, int index)
1080 {
1081         int rc;
1082
1083         LASSERT(index);
1084         if (loghandle != NULL) {
1085                 /* remove destroyed llog from catalog list and
1086                  * chd_current_log variable */
1087                 down_write(&cathandle->lgh_lock);
1088                 if (cathandle->u.chd.chd_current_log == loghandle)
1089                         cathandle->u.chd.chd_current_log = NULL;
1090                 list_del_init(&loghandle->u.phd.phd_entry);
1091                 up_write(&cathandle->lgh_lock);
1092                 LASSERT(index == loghandle->u.phd.phd_cookie.lgc_index);
1093                 /* llog was opened and keep in a list, close it now */
1094                 llog_close(env, loghandle);
1095         }
1096
1097         /* do not attempt to cleanup on-disk llog if on client side */
1098         if (cathandle->lgh_obj == NULL)
1099                 return 0;
1100
1101         /* remove plain llog entry from catalog by index */
1102         llog_cat_set_first_idx(cathandle, index);
1103         rc = llog_cancel_rec(env, cathandle, index);
1104         if (rc == 0)
1105                 CDEBUG(D_HA, "cancel plain log at index"
1106                        " %u of catalog "DOSTID"\n",
1107                        index, POSTID(&cathandle->lgh_id.lgl_oi));
1108         return rc;
1109 }