Whamcloud - gitweb
LU-1866 osd: ancillary work for initial OI scrub
[fs/lustre-release.git] / lustre / mdt / mdt_capa.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2012, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/mdt/mdt_capa.c
37  *
38  * Lustre Metadata Target (mdt) capability key read/write/update.
39  *
40  * Author: Lai Siyao <lsy@clusterfs.com>
41  */
42
43 #define DEBUG_SUBSYSTEM S_MDS
44
45 #include "mdt_internal.h"
46
47 static inline void set_capa_key_expiry(struct mdt_device *mdt)
48 {
49         mdt->mdt_ck_expiry = jiffies + mdt->mdt_ck_timeout * CFS_HZ;
50 }
51
52 static void make_capa_key(struct lustre_capa_key *key,
53                           mdsno_t mdsnum, int keyid)
54 {
55         key->lk_seq = mdsnum;
56         key->lk_keyid = keyid + 1;
57         cfs_get_random_bytes(key->lk_key, sizeof(key->lk_key));
58 }
59
60 static inline void lck_cpu_to_le(struct lustre_capa_key *tgt,
61                                  struct lustre_capa_key *src)
62 {
63         tgt->lk_seq   = cpu_to_le64(src->lk_seq);
64         tgt->lk_keyid   = cpu_to_le32(src->lk_keyid);
65         tgt->lk_padding = cpu_to_le32(src->lk_padding);
66         memcpy(tgt->lk_key, src->lk_key, sizeof(src->lk_key));
67 }
68
69 static inline void lck_le_to_cpu(struct lustre_capa_key *tgt,
70                                  struct lustre_capa_key *src)
71 {
72         tgt->lk_seq   = le64_to_cpu(src->lk_seq);
73         tgt->lk_keyid   = le32_to_cpu(src->lk_keyid);
74         tgt->lk_padding = le32_to_cpu(src->lk_padding);
75         memcpy(tgt->lk_key, src->lk_key, sizeof(src->lk_key));
76 }
77
78 static int write_capa_keys(const struct lu_env *env,
79                            struct mdt_device *mdt,
80                            struct lustre_capa_key *keys)
81 {
82         struct mdt_thread_info *mti;
83         struct lustre_capa_key *tmp;
84         struct thandle *th;
85         loff_t off = 0;
86         int i, rc;
87
88         mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
89         th = dt_trans_create(env, mdt->mdt_bottom);
90         if (IS_ERR(th))
91                 RETURN(PTR_ERR(th));
92
93         rc = dt_declare_record_write(env, mdt->mdt_ck_obj,
94                                      sizeof(*tmp) * 3, 0, th);
95         if (rc)
96                 goto stop;
97
98         rc = dt_trans_start_local(env, mdt->mdt_bottom, th);
99         if (rc)
100                 goto stop;
101
102         tmp = &mti->mti_capa_key;
103
104         for (i = 0; i < 2; i++) {
105                 lck_cpu_to_le(tmp, &keys[i]);
106
107                 rc = dt_record_write(env, mdt->mdt_ck_obj,
108                                      mdt_buf_const(env, tmp, sizeof(*tmp)),
109                                      &off, th);
110                 if (rc)
111                         break;
112         }
113
114 stop:
115         dt_trans_stop(env, mdt->mdt_bottom, th);
116
117         CDEBUG(D_INFO, "write capability keys rc = %d:\n", rc);
118         return rc;
119 }
120
121 static int read_capa_keys(const struct lu_env *env,
122                           struct mdt_device *mdt,
123                           struct lustre_capa_key *keys)
124 {
125         struct mdt_thread_info *mti;
126         struct lustre_capa_key *tmp;
127         loff_t off = 0;
128         int i, rc;
129
130         mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
131         tmp = &mti->mti_capa_key;
132
133         for (i = 0; i < 2; i++) {
134                 rc = dt_record_read(env, mdt->mdt_ck_obj,
135                                     mdt_buf(env, tmp, sizeof(*tmp)), &off);
136                 if (rc)
137                         return rc;
138
139                 lck_le_to_cpu(&keys[i], tmp);
140                 DEBUG_CAPA_KEY(D_SEC, &keys[i], "read");
141         }
142
143         return 0;
144 }
145
146 int mdt_capa_keys_init(const struct lu_env *env, struct mdt_device *mdt)
147 {
148         struct lustre_capa_key  *keys = mdt->mdt_capa_keys;
149         struct mdt_thread_info  *mti;
150         struct dt_object        *obj;
151         struct lu_attr          *la;
152         mdsno_t                  mdsnum;
153         unsigned long            size;
154         int                      rc;
155         ENTRY;
156
157         mdsnum = mdt_seq_site(mdt)->ss_node_id;
158
159         mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
160         LASSERT(mti != NULL);
161         la = &mti->mti_attr.ma_attr;
162
163         obj = mdt->mdt_ck_obj;
164         rc = obj->do_ops->do_attr_get(env, mdt->mdt_ck_obj, la, BYPASS_CAPA);
165         if (rc)
166                 RETURN(rc);
167
168         size = (unsigned long)la->la_size;
169         if (size == 0) {
170                 int i;
171
172                 for (i = 0; i < 2; i++) {
173                         make_capa_key(&keys[i], mdsnum, i);
174                         DEBUG_CAPA_KEY(D_SEC, &keys[i], "initializing");
175                 }
176
177                 rc = write_capa_keys(env, mdt, keys);
178                 if (rc) {
179                         CERROR("error writing MDS %s: rc %d\n", CAPA_KEYS, rc);
180                         RETURN(rc);
181                 }
182         } else {
183                 rc = read_capa_keys(env, mdt, keys);
184                 if (rc) {
185                         CERROR("error reading MDS %s: rc %d\n", CAPA_KEYS, rc);
186                         RETURN(rc);
187                 }
188         }
189         set_capa_key_expiry(mdt);
190         cfs_timer_arm(&mdt->mdt_ck_timer, mdt->mdt_ck_expiry);
191         CDEBUG(D_SEC, "mds_ck_timer %lu\n", mdt->mdt_ck_expiry);
192         RETURN(0);
193 }
194
195 void mdt_ck_timer_callback(unsigned long castmeharder)
196 {
197         struct mdt_device *mdt = (struct mdt_device *)castmeharder;
198         struct ptlrpc_thread *thread = &mdt->mdt_ck_thread;
199
200         ENTRY;
201         thread_add_flags(thread, SVC_EVENT);
202         cfs_waitq_signal(&thread->t_ctl_waitq);
203         EXIT;
204 }
205
206 static int mdt_ck_thread_main(void *args)
207 {
208         struct mdt_device      *mdt = args;
209         struct ptlrpc_thread   *thread = &mdt->mdt_ck_thread;
210         struct lustre_capa_key *bkey = &mdt->mdt_capa_keys[0],
211                                *rkey = &mdt->mdt_capa_keys[1];
212         struct lustre_capa_key *tmp;
213         struct lu_env           env;
214         struct mdt_thread_info *info;
215         struct md_device       *next;
216         struct l_wait_info      lwi = { 0 };
217         mdsno_t                 mdsnum;
218         int                     rc;
219         ENTRY;
220
221         cfs_daemonize_ctxt("mdt_ck");
222         cfs_block_allsigs();
223
224         thread_set_flags(thread, SVC_RUNNING);
225         cfs_waitq_signal(&thread->t_ctl_waitq);
226
227         rc = lu_env_init(&env, LCT_MD_THREAD|LCT_REMEMBER|LCT_NOREF);
228         if (rc)
229                 RETURN(rc);
230
231         thread->t_env = &env;
232         env.le_ctx.lc_thread = thread;
233         env.le_ctx.lc_cookie = 0x1;
234
235         info = lu_context_key_get(&env.le_ctx, &mdt_thread_key);
236         LASSERT(info != NULL);
237
238         tmp = &info->mti_capa_key;
239         mdsnum = mdt_seq_site(mdt)->ss_node_id;
240         while (1) {
241                 l_wait_event(thread->t_ctl_waitq,
242                              thread_is_stopping(thread) ||
243                              thread_is_event(thread),
244                              &lwi);
245
246                 if (thread_is_stopping(thread))
247                         break;
248                 thread_clear_flags(thread, SVC_EVENT);
249
250                 if (cfs_time_before(cfs_time_current(), mdt->mdt_ck_expiry))
251                         break;
252
253                 *tmp = *rkey;
254                 make_capa_key(tmp, mdsnum, rkey->lk_keyid);
255
256                 next = mdt->mdt_child;
257                 rc = next->md_ops->mdo_update_capa_key(&env, next, tmp);
258                 if (!rc) {
259                         spin_lock(&capa_lock);
260                         *bkey = *rkey;
261                         *rkey = *tmp;
262                         spin_unlock(&capa_lock);
263
264                         rc = write_capa_keys(&env, mdt, mdt->mdt_capa_keys);
265                         if (rc) {
266                                 spin_lock(&capa_lock);
267                                 *rkey = *bkey;
268                                 memset(bkey, 0, sizeof(*bkey));
269                                 spin_unlock(&capa_lock);
270                         } else {
271                                 set_capa_key_expiry(mdt);
272                                 DEBUG_CAPA_KEY(D_SEC, rkey, "new");
273                         }
274                 }
275                 if (rc) {
276                         DEBUG_CAPA_KEY(D_ERROR, rkey, "update failed for");
277                         /* next retry is in 300 sec */
278                         mdt->mdt_ck_expiry = jiffies + 300 * CFS_HZ;
279                 }
280
281                 cfs_timer_arm(&mdt->mdt_ck_timer, mdt->mdt_ck_expiry);
282                 CDEBUG(D_SEC, "mdt_ck_timer %lu\n", mdt->mdt_ck_expiry);
283         }
284         lu_env_fini(&env);
285
286         thread_set_flags(thread, SVC_STOPPED);
287         cfs_waitq_signal(&thread->t_ctl_waitq);
288         RETURN(0);
289 }
290
291 int mdt_ck_thread_start(struct mdt_device *mdt)
292 {
293         struct ptlrpc_thread *thread = &mdt->mdt_ck_thread;
294         int rc;
295
296         cfs_waitq_init(&thread->t_ctl_waitq);
297         rc = cfs_create_thread(mdt_ck_thread_main, mdt, CFS_DAEMON_FLAGS);
298         if (rc < 0) {
299                 CERROR("cannot start mdt_ck thread, rc = %d\n", rc);
300                 return rc;
301         }
302
303         l_wait_condition(thread->t_ctl_waitq, thread_is_running(thread));
304         return 0;
305 }
306
307 void mdt_ck_thread_stop(struct mdt_device *mdt)
308 {
309         struct ptlrpc_thread *thread = &mdt->mdt_ck_thread;
310
311         if (!thread_is_running(thread))
312                 return;
313
314         thread_set_flags(thread, SVC_STOPPING);
315         cfs_waitq_signal(&thread->t_ctl_waitq);
316         l_wait_condition(thread->t_ctl_waitq, thread_is_stopped(thread));
317 }