Whamcloud - gitweb
968121d0226ea492e865a6850245919b6734b480
[fs/lustre-release.git] / lustre / mdt / mdt_capa.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
30  * Use is subject to license terms.
31  * Copyright (c) 2011 Whamcloud, Inc.
32  */
33 /*
34  * Copyright (c) 2011 Whamcloud, Inc.
35  */
36 /*
37  * This file is part of Lustre, http://www.lustre.org/
38  * Lustre is a trademark of Sun Microsystems, Inc.
39  *
40  * lustre/mdt/mdt_capa.c
41  *
42  * Lustre Metadata Target (mdt) capability key read/write/update.
43  *
44  * Author: Lai Siyao <lsy@clusterfs.com>
45  */
46
47 #ifndef EXPORT_SYMTAB
48 # define EXPORT_SYMTAB
49 #endif
50 #define DEBUG_SUBSYSTEM S_MDS
51
52 #include "mdt_internal.h"
53
54 static inline void set_capa_key_expiry(struct mdt_device *mdt)
55 {
56         mdt->mdt_ck_expiry = jiffies + mdt->mdt_ck_timeout * CFS_HZ;
57 }
58
59 static void make_capa_key(struct lustre_capa_key *key,
60                           mdsno_t mdsnum, int keyid)
61 {
62         key->lk_seq = mdsnum;
63         key->lk_keyid = keyid + 1;
64         cfs_get_random_bytes(key->lk_key, sizeof(key->lk_key));
65 }
66
67 static inline void lck_cpu_to_le(struct lustre_capa_key *tgt,
68                                  struct lustre_capa_key *src)
69 {
70         tgt->lk_seq   = cpu_to_le64(src->lk_seq);
71         tgt->lk_keyid   = cpu_to_le32(src->lk_keyid);
72         tgt->lk_padding = cpu_to_le32(src->lk_padding);
73         memcpy(tgt->lk_key, src->lk_key, sizeof(src->lk_key));
74 }
75
76 static inline void lck_le_to_cpu(struct lustre_capa_key *tgt,
77                                  struct lustre_capa_key *src)
78 {
79         tgt->lk_seq   = le64_to_cpu(src->lk_seq);
80         tgt->lk_keyid   = le32_to_cpu(src->lk_keyid);
81         tgt->lk_padding = le32_to_cpu(src->lk_padding);
82         memcpy(tgt->lk_key, src->lk_key, sizeof(src->lk_key));
83 }
84
85 static int write_capa_keys(const struct lu_env *env,
86                            struct mdt_device *mdt,
87                            struct lustre_capa_key *keys)
88 {
89         struct mdt_thread_info *mti;
90         struct lustre_capa_key *tmp;
91         struct thandle *th;
92         loff_t off = 0;
93         int i, rc;
94
95         mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
96         th = mdt_trans_create(env, mdt);
97         if (IS_ERR(th))
98                 RETURN(PTR_ERR(th));
99
100         rc = dt_declare_record_write(env, mdt->mdt_ck_obj,
101                                      sizeof(*tmp) * 3, 0, th);
102         if (rc)
103                 goto stop;
104
105         rc = mdt_trans_start(env, mdt, th);
106         if (rc)
107                 goto stop;
108
109         tmp = &mti->mti_capa_key;
110
111         for (i = 0; i < 2; i++) {
112                 lck_cpu_to_le(tmp, &keys[i]);
113
114                 rc = dt_record_write(env, mdt->mdt_ck_obj,
115                                      mdt_buf_const(env, tmp, sizeof(*tmp)),
116                                      &off, th);
117                 if (rc)
118                         break;
119         }
120
121 stop:
122         mdt_trans_stop(env, mdt, th);
123
124         CDEBUG(D_INFO, "write capability keys rc = %d:\n", rc);
125         return rc;
126 }
127
128 static int read_capa_keys(const struct lu_env *env,
129                           struct mdt_device *mdt,
130                           struct lustre_capa_key *keys)
131 {
132         struct mdt_thread_info *mti;
133         struct lustre_capa_key *tmp;
134         loff_t off = 0;
135         int i, rc;
136
137         mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
138         tmp = &mti->mti_capa_key;
139
140         for (i = 0; i < 2; i++) {
141                 rc = dt_record_read(env, mdt->mdt_ck_obj,
142                                     mdt_buf(env, tmp, sizeof(*tmp)), &off);
143                 if (rc)
144                         return rc;
145
146                 lck_le_to_cpu(&keys[i], tmp);
147                 DEBUG_CAPA_KEY(D_SEC, &keys[i], "read");
148         }
149
150         return 0;
151 }
152
153 int mdt_capa_keys_init(const struct lu_env *env, struct mdt_device *mdt)
154 {
155         struct lustre_capa_key  *keys = mdt->mdt_capa_keys;
156         struct mdt_thread_info  *mti;
157         struct dt_object        *obj;
158         struct lu_attr          *la;
159         mdsno_t                  mdsnum;
160         unsigned long            size;
161         int                      rc;
162         ENTRY;
163
164         mdsnum = mdt_md_site(mdt)->ms_node_id;
165
166         mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
167         LASSERT(mti != NULL);
168         la = &mti->mti_attr.ma_attr;
169
170         obj = mdt->mdt_ck_obj;
171         rc = obj->do_ops->do_attr_get(env, mdt->mdt_ck_obj, la, BYPASS_CAPA);
172         if (rc)
173                 RETURN(rc);
174
175         size = (unsigned long)la->la_size;
176         if (size == 0) {
177                 int i;
178
179                 for (i = 0; i < 2; i++) {
180                         make_capa_key(&keys[i], mdsnum, i);
181                         DEBUG_CAPA_KEY(D_SEC, &keys[i], "initializing");
182                 }
183
184                 rc = write_capa_keys(env, mdt, keys);
185                 if (rc) {
186                         CERROR("error writing MDS %s: rc %d\n", CAPA_KEYS, rc);
187                         RETURN(rc);
188                 }
189         } else {
190                 rc = read_capa_keys(env, mdt, keys);
191                 if (rc) {
192                         CERROR("error reading MDS %s: rc %d\n", CAPA_KEYS, rc);
193                         RETURN(rc);
194                 }
195         }
196         set_capa_key_expiry(mdt);
197         cfs_timer_arm(&mdt->mdt_ck_timer, mdt->mdt_ck_expiry);
198         CDEBUG(D_SEC, "mds_ck_timer %lu\n", mdt->mdt_ck_expiry);
199         RETURN(0);
200 }
201
202 void mdt_ck_timer_callback(unsigned long castmeharder)
203 {
204         struct mdt_device *mdt = (struct mdt_device *)castmeharder;
205         struct ptlrpc_thread *thread = &mdt->mdt_ck_thread;
206
207         ENTRY;
208         thread_add_flags(thread, SVC_EVENT);
209         cfs_waitq_signal(&thread->t_ctl_waitq);
210         EXIT;
211 }
212
213 static int mdt_ck_thread_main(void *args)
214 {
215         struct mdt_device      *mdt = args;
216         struct ptlrpc_thread   *thread = &mdt->mdt_ck_thread;
217         struct lustre_capa_key *bkey = &mdt->mdt_capa_keys[0],
218                                *rkey = &mdt->mdt_capa_keys[1];
219         struct lustre_capa_key *tmp;
220         struct lu_env           env;
221         struct mdt_thread_info *info;
222         struct md_device       *next;
223         struct l_wait_info      lwi = { 0 };
224         mdsno_t                 mdsnum;
225         int                     rc;
226         ENTRY;
227
228         cfs_daemonize_ctxt("mdt_ck");
229         cfs_block_allsigs();
230
231         thread_set_flags(thread, SVC_RUNNING);
232         cfs_waitq_signal(&thread->t_ctl_waitq);
233
234         rc = lu_env_init(&env, LCT_MD_THREAD|LCT_REMEMBER|LCT_NOREF);
235         if (rc)
236                 RETURN(rc);
237
238         thread->t_env = &env;
239         env.le_ctx.lc_thread = thread;
240         env.le_ctx.lc_cookie = 0x1;
241
242         info = lu_context_key_get(&env.le_ctx, &mdt_thread_key);
243         LASSERT(info != NULL);
244
245         tmp = &info->mti_capa_key;
246         mdsnum = mdt_md_site(mdt)->ms_node_id;
247         while (1) {
248                 l_wait_event(thread->t_ctl_waitq,
249                              thread_is_stopping(thread) ||
250                              thread_is_event(thread),
251                              &lwi);
252
253                 if (thread_is_stopping(thread))
254                         break;
255                 thread_clear_flags(thread, SVC_EVENT);
256
257                 if (cfs_time_before(cfs_time_current(), mdt->mdt_ck_expiry))
258                         break;
259
260                 *tmp = *rkey;
261                 make_capa_key(tmp, mdsnum, rkey->lk_keyid);
262
263                 next = mdt->mdt_child;
264                 rc = next->md_ops->mdo_update_capa_key(&env, next, tmp);
265                 if (!rc) {
266                         cfs_spin_lock(&capa_lock);
267                         *bkey = *rkey;
268                         *rkey = *tmp;
269                         cfs_spin_unlock(&capa_lock);
270
271                         rc = write_capa_keys(&env, mdt, mdt->mdt_capa_keys);
272                         if (rc) {
273                                 cfs_spin_lock(&capa_lock);
274                                 *rkey = *bkey;
275                                 memset(bkey, 0, sizeof(*bkey));
276                                 cfs_spin_unlock(&capa_lock);
277                         } else {
278                                 set_capa_key_expiry(mdt);
279                                 DEBUG_CAPA_KEY(D_SEC, rkey, "new");
280                         }
281                 }
282                 if (rc) {
283                         DEBUG_CAPA_KEY(D_ERROR, rkey, "update failed for");
284                         /* next retry is in 300 sec */
285                         mdt->mdt_ck_expiry = jiffies + 300 * CFS_HZ;
286                 }
287
288                 cfs_timer_arm(&mdt->mdt_ck_timer, mdt->mdt_ck_expiry);
289                 CDEBUG(D_SEC, "mdt_ck_timer %lu\n", mdt->mdt_ck_expiry);
290         }
291         lu_env_fini(&env);
292
293         thread_set_flags(thread, SVC_STOPPED);
294         cfs_waitq_signal(&thread->t_ctl_waitq);
295         RETURN(0);
296 }
297
298 int mdt_ck_thread_start(struct mdt_device *mdt)
299 {
300         struct ptlrpc_thread *thread = &mdt->mdt_ck_thread;
301         int rc;
302
303         cfs_waitq_init(&thread->t_ctl_waitq);
304         rc = cfs_create_thread(mdt_ck_thread_main, mdt, CFS_DAEMON_FLAGS);
305         if (rc < 0) {
306                 CERROR("cannot start mdt_ck thread, rc = %d\n", rc);
307                 return rc;
308         }
309
310         l_wait_condition(thread->t_ctl_waitq, thread_is_running(thread));
311         return 0;
312 }
313
314 void mdt_ck_thread_stop(struct mdt_device *mdt)
315 {
316         struct ptlrpc_thread *thread = &mdt->mdt_ck_thread;
317
318         if (!thread_is_running(thread))
319                 return;
320
321         thread_set_flags(thread, SVC_STOPPING);
322         cfs_waitq_signal(&thread->t_ctl_waitq);
323         l_wait_condition(thread->t_ctl_waitq, thread_is_stopped(thread));
324 }