Whamcloud - gitweb
LU-957 lfsck: misc fixes for lfsck
[fs/lustre-release.git] / lustre / mdd / mdd_lfsck.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2012 Whamcloud, Inc.
24  */
25 /*
26  * lustre/mdd/mdd_lfsck.c
27  *
28  * Top-level entry points into mdd module
29  *
30  * LFSCK controller, which scans the whole device through low layer
31  * iteration APIs, drives all lfsck compeonents, controls the speed.
32  *
33  * Author: Fan Yong <yong.fan@whamcloud.com>
34  */
35
36 #ifndef EXPORT_SYMTAB
37 # define EXPORT_SYMTAB
38 #endif
39 #define DEBUG_SUBSYSTEM S_MDS
40
41 #include <lustre/lustre_idl.h>
42 #include <lustre_fid.h>
43
44 #include "mdd_internal.h"
45
46 static inline char *mdd_lfsck2name(struct md_lfsck *lfsck)
47 {
48         struct mdd_device *mdd;
49
50         mdd = container_of0(lfsck, struct mdd_device, mdd_lfsck);
51         return mdd->mdd_obd_dev->obd_name;
52 }
53
54 void mdd_lfsck_set_speed(struct md_lfsck *lfsck, __u32 limit)
55 {
56         cfs_spin_lock(&lfsck->ml_lock);
57         lfsck->ml_speed_limit = limit;
58         if (limit != LFSCK_SPEED_NO_LIMIT) {
59                 if (limit > CFS_HZ) {
60                         lfsck->ml_sleep_rate = limit / CFS_HZ;
61                         lfsck->ml_sleep_jif = 1;
62                 } else {
63                         lfsck->ml_sleep_rate = 1;
64                         lfsck->ml_sleep_jif = CFS_HZ / limit;
65                 }
66         } else {
67                 lfsck->ml_sleep_jif = 0;
68                 lfsck->ml_sleep_rate = 0;
69         }
70         cfs_spin_unlock(&lfsck->ml_lock);
71 }
72
73 static void mdd_lfsck_control_speed(struct md_lfsck *lfsck)
74 {
75         struct ptlrpc_thread *thread = &lfsck->ml_thread;
76         struct l_wait_info    lwi;
77
78         if (lfsck->ml_sleep_jif > 0 &&
79             lfsck->ml_new_scanned >= lfsck->ml_sleep_rate) {
80                 cfs_spin_lock(&lfsck->ml_lock);
81                 if (likely(lfsck->ml_sleep_jif > 0 &&
82                            lfsck->ml_new_scanned >= lfsck->ml_sleep_rate)) {
83                         lwi = LWI_TIMEOUT_INTR(lfsck->ml_sleep_jif, NULL,
84                                                LWI_ON_SIGNAL_NOOP, NULL);
85                         cfs_spin_unlock(&lfsck->ml_lock);
86
87                         l_wait_event(thread->t_ctl_waitq,
88                                      !thread_is_running(thread),
89                                      &lwi);
90                         lfsck->ml_new_scanned = 0;
91                 } else {
92                         cfs_spin_unlock(&lfsck->ml_lock);
93                 }
94         }
95 }
96
97 static int mdd_lfsck_main(void *args)
98 {
99         struct lu_env            env;
100         struct md_lfsck         *lfsck  = (struct md_lfsck *)args;
101         struct ptlrpc_thread    *thread = &lfsck->ml_thread;
102         struct dt_object        *obj    = lfsck->ml_it_obj;
103         const struct dt_it_ops  *iops   = &obj->do_index_ops->dio_it;
104         struct dt_it            *di;
105         struct lu_fid           *fid;
106         int                      rc;
107         ENTRY;
108
109         cfs_daemonize("lfsck");
110         rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD);
111         if (rc != 0) {
112                 CERROR("%s: LFSCK, fail to init env, rc = %d\n",
113                        mdd_lfsck2name(lfsck), rc);
114                 GOTO(noenv, rc);
115         }
116
117         di = iops->init(&env, obj, lfsck->ml_args, BYPASS_CAPA);
118         if (IS_ERR(di)) {
119                 rc = PTR_ERR(di);
120                 CERROR("%s: LFSCK, fail to init iteration, rc = %d\n",
121                        mdd_lfsck2name(lfsck), rc);
122                 GOTO(fini_env, rc);
123         }
124
125         CDEBUG(D_LFSCK, "LFSCK: flags = 0x%x, pid = %d\n",
126                lfsck->ml_args, cfs_curproc_pid());
127
128         /* XXX: Prepare before wakeup the sponsor.
129          *      Each lfsck component should call iops->get() API with
130          *      every bookmark, then low layer module can decide the
131          *      start point for current iteration. */
132
133         cfs_spin_lock(&lfsck->ml_lock);
134         thread_set_flags(thread, SVC_RUNNING);
135         cfs_spin_unlock(&lfsck->ml_lock);
136         cfs_waitq_broadcast(&thread->t_ctl_waitq);
137
138         /* Call iops->load() to finish the choosing start point. */
139         rc = iops->load(&env, di, 0);
140         if (rc != 0)
141                 GOTO(out, rc);
142
143         CDEBUG(D_LFSCK, "LFSCK: iteration start: pos = %s\n",
144                (char *)iops->key(&env, di));
145
146         lfsck->ml_new_scanned = 0;
147         fid = &mdd_env_info(&env)->mti_fid;
148         while (rc == 0) {
149                 iops->rec(&env, di, (struct dt_rec *)fid, 0);
150
151                 /* XXX: here, perform LFSCK when some LFSCK component(s)
152                  *      introduced in the future. */
153                 lfsck->ml_new_scanned++;
154
155                 /* XXX: here, make checkpoint when some LFSCK component(s)
156                  *      introduced in the future. */
157
158                 /* Rate control. */
159                 mdd_lfsck_control_speed(lfsck);
160                 if (unlikely(!thread_is_running(thread)))
161                         GOTO(out, rc = 0);
162
163                 rc = iops->next(&env, di);
164         }
165
166         GOTO(out, rc);
167
168 out:
169         if (lfsck->ml_paused) {
170                 /* XXX: It is hack here: if the lfsck is still running when MDS
171                  *      umounts, it should be restarted automatically after MDS
172                  *      remounts up.
173                  *
174                  *      To support that, we need to record the lfsck status in
175                  *      the lfsck on-disk bookmark file. But now, there is not
176                  *      lfsck component under the lfsck framework. To avoid to
177                  *      introduce unnecessary bookmark incompatibility issues,
178                  *      we write nothing to the lfsck bookmark file now.
179                  *
180                  *      Instead, we will reuse dt_it_ops::put() method to notify
181                  *      low layer iterator to process such case.
182                  *
183                  *      It is just temporary solution, and will be replaced when
184                  *      some lfsck component is introduced in the future. */
185                 iops->put(&env, di);
186                 CDEBUG(D_LFSCK, "LFSCK: iteration pasued: pos = %s, rc = %d\n",
187                        (char *)iops->key(&env, di), rc);
188         } else {
189                 CDEBUG(D_LFSCK, "LFSCK: iteration stop: pos = %s, rc = %d\n",
190                        (char *)iops->key(&env, di), rc);
191         }
192         iops->fini(&env, di);
193
194 fini_env:
195         lu_env_fini(&env);
196
197 noenv:
198         cfs_spin_lock(&lfsck->ml_lock);
199         thread_set_flags(thread, SVC_STOPPED);
200         cfs_waitq_broadcast(&thread->t_ctl_waitq);
201         cfs_spin_unlock(&lfsck->ml_lock);
202         return rc;
203 }
204
205 int mdd_lfsck_start(const struct lu_env *env, struct md_lfsck *lfsck,
206                     struct lfsck_start *start)
207 {
208         struct ptlrpc_thread *thread  = &lfsck->ml_thread;
209         struct l_wait_info    lwi     = { 0 };
210         int                   rc      = 0;
211         __u16                 valid   = 0;
212         __u16                 flags   = 0;
213         ENTRY;
214
215         cfs_mutex_lock(&lfsck->ml_mutex);
216         cfs_spin_lock(&lfsck->ml_lock);
217         if (thread_is_running(thread)) {
218                 cfs_spin_unlock(&lfsck->ml_lock);
219                 cfs_mutex_unlock(&lfsck->ml_mutex);
220                 RETURN(-EALREADY);
221         }
222
223         cfs_spin_unlock(&lfsck->ml_lock);
224         if (start->ls_valid & LSV_SPEED_LIMIT)
225                 mdd_lfsck_set_speed(lfsck, start->ls_speed_limit);
226
227         if (start->ls_valid & LSV_ERROR_HANDLE) {
228                 valid |= DOIV_ERROR_HANDLE;
229                 if (start->ls_flags & LPF_FAILOUT)
230                         flags |= DOIF_FAILOUT;
231         }
232
233         /* XXX: 1. low layer does not care 'dryrun'.
234          *      2. will process 'ls_active' when introduces LFSCK for layout
235          *         consistency, DNE consistency, and so on in the future. */
236         start->ls_active = 0;
237
238         if (start->ls_flags & LPF_RESET)
239                 flags |= DOIF_RESET;
240
241         if (start->ls_active != 0)
242                 flags |= DOIF_OUTUSED;
243
244         lfsck->ml_args = (flags << DT_OTABLE_IT_FLAGS_SHIFT) | valid;
245         thread_set_flags(thread, 0);
246         rc = cfs_create_thread(mdd_lfsck_main, lfsck, 0);
247         if (rc < 0)
248                 CERROR("%s: cannot start LFSCK thread, rc = %d\n",
249                        mdd_lfsck2name(lfsck), rc);
250         else
251                 l_wait_event(thread->t_ctl_waitq,
252                              thread_is_running(thread) ||
253                              thread_is_stopped(thread),
254                              &lwi);
255         cfs_mutex_unlock(&lfsck->ml_mutex);
256
257         RETURN(rc < 0 ? rc : 0);
258 }
259
260 int mdd_lfsck_stop(const struct lu_env *env, struct md_lfsck *lfsck)
261 {
262         struct ptlrpc_thread *thread = &lfsck->ml_thread;
263         struct l_wait_info    lwi    = { 0 };
264         ENTRY;
265
266         cfs_mutex_lock(&lfsck->ml_mutex);
267         cfs_spin_lock(&lfsck->ml_lock);
268         if (thread_is_init(thread) || thread_is_stopped(thread)) {
269                 cfs_spin_unlock(&lfsck->ml_lock);
270                 cfs_mutex_unlock(&lfsck->ml_mutex);
271                 RETURN(-EALREADY);
272         }
273
274         thread_set_flags(thread, SVC_STOPPING);
275         cfs_spin_unlock(&lfsck->ml_lock);
276
277         cfs_waitq_broadcast(&thread->t_ctl_waitq);
278         l_wait_event(thread->t_ctl_waitq,
279                      thread_is_stopped(thread),
280                      &lwi);
281         cfs_mutex_unlock(&lfsck->ml_mutex);
282
283         RETURN(0);
284 }
285
286 const char lfsck_bookmark_name[] = "lfsck_bookmark";
287
288 static const struct lu_fid lfsck_it_fid = { .f_seq = FID_SEQ_LOCAL_FILE,
289                                             .f_oid = OTABLE_IT_OID,
290                                             .f_ver = 0 };
291
292 int mdd_lfsck_setup(const struct lu_env *env, struct mdd_device *mdd)
293 {
294         struct md_lfsck  *lfsck = &mdd->mdd_lfsck;
295         struct dt_object *obj;
296         int               rc;
297
298         memset(lfsck, 0, sizeof(*lfsck));
299         lfsck->ml_version = LFSCK_VERSION_V1;
300         cfs_waitq_init(&lfsck->ml_thread.t_ctl_waitq);
301         cfs_mutex_init(&lfsck->ml_mutex);
302         cfs_spin_lock_init(&lfsck->ml_lock);
303
304         obj = dt_store_open(env, mdd->mdd_child, "", lfsck_bookmark_name,
305                             &mdd_env_info(env)->mti_fid);
306         if (IS_ERR(obj))
307                 return PTR_ERR(obj);
308
309         lfsck->ml_bookmark_obj = obj;
310
311         obj = dt_locate(env, mdd->mdd_child, &lfsck_it_fid);
312         if (IS_ERR(obj))
313                 return PTR_ERR(obj);
314
315         rc = obj->do_ops->do_index_try(env, obj, &dt_otable_features);
316         if (rc != 0) {
317                 lu_object_put(env, &obj->do_lu);
318                 return rc;
319         }
320
321         lfsck->ml_it_obj = obj;
322
323         return 0;
324 }
325
326 void mdd_lfsck_cleanup(const struct lu_env *env, struct mdd_device *mdd)
327 {
328         struct md_lfsck *lfsck = &mdd->mdd_lfsck;
329
330         if (lfsck->ml_it_obj != NULL) {
331                 lfsck->ml_paused = 1;
332                 mdd_lfsck_stop(env, lfsck);
333                 lu_object_put(env, &lfsck->ml_it_obj->do_lu);
334                 lfsck->ml_it_obj = NULL;
335         }
336
337         if (lfsck->ml_bookmark_obj != NULL) {
338                 lu_object_put(env, &lfsck->ml_bookmark_obj->do_lu);
339                 lfsck->ml_bookmark_obj = NULL;
340         }
341 }