Whamcloud - gitweb
land clio.
[fs/lustre-release.git] / lustre / mdt / mdt_capa.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/mdt/mdt_capa.c
37  *
38  * Lustre Metadata Target (mdt) capability key read/write/update.
39  *
40  * Author: Lai Siyao <lsy@clusterfs.com>
41  */
42
43 #ifndef EXPORT_SYMTAB
44 # define EXPORT_SYMTAB
45 #endif
46 #define DEBUG_SUBSYSTEM S_MDS
47
48 #include "mdt_internal.h"
49
50 static inline void set_capa_key_expiry(struct mdt_device *mdt)
51 {
52         mdt->mdt_ck_expiry = jiffies + mdt->mdt_ck_timeout * HZ;
53 }
54
55 static void make_capa_key(struct lustre_capa_key *key,
56                           mdsno_t mdsnum, int keyid)
57 {
58         key->lk_mdsid = mdsnum;
59         key->lk_keyid = keyid + 1;
60         ll_get_random_bytes(key->lk_key, sizeof(key->lk_key));
61 }
62
63 enum {
64         MDT_TXN_CAPA_KEYS_WRITE_CREDITS = 1
65 };
66
67 static inline void lck_cpu_to_le(struct lustre_capa_key *tgt,
68                                  struct lustre_capa_key *src)
69 {
70         tgt->lk_mdsid   = cpu_to_le64(src->lk_mdsid);
71         tgt->lk_keyid   = cpu_to_le32(src->lk_keyid);
72         tgt->lk_padding = cpu_to_le32(src->lk_padding);
73         memcpy(tgt->lk_key, src->lk_key, sizeof(src->lk_key));
74 }
75
76 static inline void lck_le_to_cpu(struct lustre_capa_key *tgt,
77                                  struct lustre_capa_key *src)
78 {
79         tgt->lk_mdsid   = le64_to_cpu(src->lk_mdsid);
80         tgt->lk_keyid   = le32_to_cpu(src->lk_keyid);
81         tgt->lk_padding = le32_to_cpu(src->lk_padding);
82         memcpy(tgt->lk_key, src->lk_key, sizeof(src->lk_key));
83 }
84
85 static int write_capa_keys(const struct lu_env *env,
86                            struct mdt_device *mdt,
87                            struct lustre_capa_key *keys)
88 {
89         struct mdt_thread_info *mti;
90         struct lustre_capa_key *tmp;
91         struct thandle *th;
92         loff_t off = 0;
93         int i, rc;
94
95         mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
96
97         th = mdt_trans_start(env, mdt, MDT_TXN_CAPA_KEYS_WRITE_CREDITS);
98         if (IS_ERR(th))
99                 RETURN(PTR_ERR(th));
100
101         tmp = &mti->mti_capa_key;
102
103         for (i = 0; i < 2; i++) {
104                 lck_cpu_to_le(tmp, &keys[i]);
105
106                 rc = mdt_record_write(env, mdt->mdt_ck_obj,
107                                       mdt_buf_const(env, tmp, sizeof(*tmp)),
108                                       &off, th);
109                 if (rc)
110                         break;
111         }
112
113         mdt_trans_stop(env, mdt, th);
114
115         CDEBUG(D_INFO, "write capability keys rc = %d:\n", rc);
116         return rc;
117 }
118
119 static int read_capa_keys(const struct lu_env *env,
120                           struct mdt_device *mdt,
121                           struct lustre_capa_key *keys)
122 {
123         struct mdt_thread_info *mti;
124         struct lustre_capa_key *tmp;
125         loff_t off = 0;
126         int i, rc;
127
128         mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
129         tmp = &mti->mti_capa_key;
130
131         for (i = 0; i < 2; i++) {
132                 rc = mdt_record_read(env, mdt->mdt_ck_obj,
133                                      mdt_buf(env, tmp, sizeof(*tmp)), &off);
134                 if (rc)
135                         return rc;
136
137                 lck_le_to_cpu(&keys[i], tmp);
138                 DEBUG_CAPA_KEY(D_SEC, &keys[i], "read");
139         }
140
141         return 0;
142 }
143
144 int mdt_capa_keys_init(const struct lu_env *env, struct mdt_device *mdt)
145 {
146         struct lustre_capa_key  *keys = mdt->mdt_capa_keys;
147         struct mdt_thread_info  *mti;
148         struct dt_object        *obj;
149         struct lu_attr          *la;
150         mdsno_t                  mdsnum;
151         unsigned long            size;
152         int                      rc;
153         ENTRY;
154
155         mdsnum = mdt_md_site(mdt)->ms_node_id;
156
157         mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
158         LASSERT(mti != NULL);
159         la = &mti->mti_attr.ma_attr;
160
161         obj = mdt->mdt_ck_obj;
162         rc = obj->do_ops->do_attr_get(env, mdt->mdt_ck_obj, la, BYPASS_CAPA);
163         if (rc)
164                 RETURN(rc);
165
166         size = (unsigned long)la->la_size;
167         if (size == 0) {
168                 int i;
169
170                 for (i = 0; i < 2; i++) {
171                         make_capa_key(&keys[i], mdsnum, i);
172                         DEBUG_CAPA_KEY(D_SEC, &keys[i], "initializing");
173                 }
174
175                 rc = write_capa_keys(env, mdt, keys);
176                 if (rc) {
177                         CERROR("error writing MDS %s: rc %d\n", CAPA_KEYS, rc);
178                         RETURN(rc);
179                 }
180         } else {
181                 rc = read_capa_keys(env, mdt, keys);
182                 if (rc) {
183                         CERROR("error reading MDS %s: rc %d\n", CAPA_KEYS, rc);
184                         RETURN(rc);
185                 }
186         }
187         set_capa_key_expiry(mdt);
188         cfs_timer_arm(&mdt->mdt_ck_timer, mdt->mdt_ck_expiry);
189         CDEBUG(D_SEC, "mds_ck_timer %lu\n", mdt->mdt_ck_expiry);
190         RETURN(0);
191 }
192
193 void mdt_ck_timer_callback(unsigned long castmeharder)
194 {
195         struct mdt_device *mdt = (struct mdt_device *)castmeharder;
196         struct ptlrpc_thread *thread = &mdt->mdt_ck_thread;
197
198         ENTRY;
199         thread->t_flags |= SVC_EVENT;
200         cfs_waitq_signal(&thread->t_ctl_waitq);
201         EXIT;
202 }
203
204 static int mdt_ck_thread_main(void *args)
205 {
206         struct mdt_device      *mdt = args;
207         struct ptlrpc_thread   *thread = &mdt->mdt_ck_thread;
208         struct lustre_capa_key *bkey = &mdt->mdt_capa_keys[0],
209                                *rkey = &mdt->mdt_capa_keys[1];
210         struct lustre_capa_key *tmp;
211         struct lu_env           env;
212         struct mdt_thread_info *info;
213         struct md_device       *next;
214         struct l_wait_info      lwi = { 0 };
215         mdsno_t                 mdsnum;
216         int                     rc;
217         ENTRY;
218
219         ptlrpc_daemonize("mdt_ck");
220         cfs_block_allsigs();
221
222         thread->t_flags = SVC_RUNNING;
223         cfs_waitq_signal(&thread->t_ctl_waitq);
224
225         rc = lu_env_init(&env, LCT_MD_THREAD|LCT_REMEMBER|LCT_NOREF);
226         if (rc)
227                 RETURN(rc);
228
229         thread->t_env = &env;
230         env.le_ctx.lc_thread = thread;
231         env.le_ctx.lc_cookie = 0x1;
232
233         info = lu_context_key_get(&env.le_ctx, &mdt_thread_key);
234         LASSERT(info != NULL);
235
236         tmp = &info->mti_capa_key;
237         mdsnum = mdt_md_site(mdt)->ms_node_id;
238         while (1) {
239                 l_wait_event(thread->t_ctl_waitq,
240                              thread->t_flags & (SVC_STOPPING | SVC_EVENT),
241                              &lwi);
242
243                 if (thread->t_flags & SVC_STOPPING)
244                         break;
245                 thread->t_flags &= ~SVC_EVENT;
246
247                 if (cfs_time_before(cfs_time_current(), mdt->mdt_ck_expiry))
248                         break;
249
250                 *tmp = *rkey;
251                 make_capa_key(tmp, mdsnum, rkey->lk_keyid);
252
253                 next = mdt->mdt_child;
254                 rc = next->md_ops->mdo_update_capa_key(&env, next, tmp);
255                 if (!rc) {
256                         spin_lock(&capa_lock);
257                         *bkey = *rkey;
258                         *rkey = *tmp;
259                         spin_unlock(&capa_lock);
260
261                         rc = write_capa_keys(&env, mdt, mdt->mdt_capa_keys);
262                         if (rc) {
263                                 spin_lock(&capa_lock);
264                                 *rkey = *bkey;
265                                 memset(bkey, 0, sizeof(*bkey));
266                                 spin_unlock(&capa_lock);
267                         } else {
268                                 set_capa_key_expiry(mdt);
269                                 DEBUG_CAPA_KEY(D_SEC, rkey, "new");
270                         }
271                 }
272                 if (rc) {
273                         DEBUG_CAPA_KEY(D_ERROR, rkey, "update failed for");
274                         /* next retry is in 300 sec */
275                         mdt->mdt_ck_expiry = jiffies + 300 * HZ;
276                 }
277
278                 cfs_timer_arm(&mdt->mdt_ck_timer, mdt->mdt_ck_expiry);
279                 CDEBUG(D_SEC, "mdt_ck_timer %lu\n", mdt->mdt_ck_expiry);
280         }
281         lu_env_fini(&env);
282
283         thread->t_flags = SVC_STOPPED;
284         cfs_waitq_signal(&thread->t_ctl_waitq);
285         RETURN(0);
286 }
287
288 int mdt_ck_thread_start(struct mdt_device *mdt)
289 {
290         struct ptlrpc_thread *thread = &mdt->mdt_ck_thread;
291         int rc;
292
293         cfs_waitq_init(&thread->t_ctl_waitq);
294         rc = cfs_kernel_thread(mdt_ck_thread_main, mdt,
295                            (CLONE_VM | CLONE_FILES));
296         if (rc < 0) {
297                 CERROR("cannot start mdt_ck thread, rc = %d\n", rc);
298                 return rc;
299         }
300
301         cfs_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_RUNNING);
302         return 0;
303 }
304
305 void mdt_ck_thread_stop(struct mdt_device *mdt)
306 {
307         struct ptlrpc_thread *thread = &mdt->mdt_ck_thread;
308
309         if (!(thread->t_flags & SVC_RUNNING))
310                 return;
311
312         thread->t_flags = SVC_STOPPING;
313         cfs_waitq_signal(&thread->t_ctl_waitq);
314         cfs_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED);
315 }