Whamcloud - gitweb
Move md-specific fields out of struct lu_site into special struct md_site, so
[fs/lustre-release.git] / lustre / mdt / mdt_capa.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/mdt/mdt_capa.c
37  *
38  * Lustre Metadata Target (mdt) capability key read/write/update.
39  *
40  * Author: Lai Siyao <lsy@clusterfs.com>
41  */
42
43 #ifndef EXPORT_SYMTAB
44 # define EXPORT_SYMTAB
45 #endif
46 #define DEBUG_SUBSYSTEM S_MDS
47
48 #include "mdt_internal.h"
49
50 static inline void set_capa_key_expiry(struct mdt_device *mdt)
51 {
52         mdt->mdt_ck_expiry = jiffies + mdt->mdt_ck_timeout * HZ;
53 }
54
55 static void make_capa_key(struct lustre_capa_key *key,
56                           mdsno_t mdsnum, int keyid)
57 {
58         key->lk_mdsid = mdsnum;
59         key->lk_keyid = keyid + 1;
60         ll_get_random_bytes(key->lk_key, sizeof(key->lk_key));
61 }
62
63 enum {
64         MDT_TXN_CAPA_KEYS_WRITE_CREDITS = 1
65 };
66
67 static inline void lck_cpu_to_le(struct lustre_capa_key *tgt,
68                                  struct lustre_capa_key *src)
69 {
70         tgt->lk_mdsid   = cpu_to_le64(src->lk_mdsid);
71         tgt->lk_keyid   = cpu_to_le32(src->lk_keyid);
72         tgt->lk_padding = cpu_to_le32(src->lk_padding);
73         memcpy(tgt->lk_key, src->lk_key, sizeof(src->lk_key));
74 }
75
76 static inline void lck_le_to_cpu(struct lustre_capa_key *tgt,
77                                  struct lustre_capa_key *src)
78 {
79         tgt->lk_mdsid   = le64_to_cpu(src->lk_mdsid);
80         tgt->lk_keyid   = le32_to_cpu(src->lk_keyid);
81         tgt->lk_padding = le32_to_cpu(src->lk_padding);
82         memcpy(tgt->lk_key, src->lk_key, sizeof(src->lk_key));
83 }
84
85 static int write_capa_keys(const struct lu_env *env,
86                            struct mdt_device *mdt,
87                            struct lustre_capa_key *keys)
88 {
89         struct mdt_thread_info *mti;
90         struct lustre_capa_key *tmp;
91         struct thandle *th;
92         loff_t off = 0;
93         int i, rc;
94
95         mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
96
97         th = mdt_trans_start(env, mdt, MDT_TXN_CAPA_KEYS_WRITE_CREDITS);
98         if (IS_ERR(th))
99                 RETURN(PTR_ERR(th));
100
101         tmp = &mti->mti_capa_key;
102
103         for (i = 0; i < 2; i++) {
104                 lck_cpu_to_le(tmp, &keys[i]);
105
106                 rc = mdt_record_write(env, mdt->mdt_ck_obj,
107                                       mdt_buf_const(env, tmp, sizeof(*tmp)),
108                                       &off, th);
109                 if (rc)
110                         break;
111         }
112
113         mdt_trans_stop(env, mdt, th);
114
115         CDEBUG(D_INFO, "write capability keys rc = %d:\n", rc);
116         return rc;
117 }
118
119 static int read_capa_keys(const struct lu_env *env,
120                           struct mdt_device *mdt,
121                           struct lustre_capa_key *keys)
122 {
123         struct mdt_thread_info *mti;
124         struct lustre_capa_key *tmp;
125         loff_t off = 0;
126         int i, rc;
127
128         mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
129         tmp = &mti->mti_capa_key;
130
131         for (i = 0; i < 2; i++) {
132                 rc = mdt_record_read(env, mdt->mdt_ck_obj,
133                                      mdt_buf(env, tmp, sizeof(*tmp)), &off);
134                 if (rc)
135                         return rc;
136
137                 lck_le_to_cpu(&keys[i], tmp);
138                 DEBUG_CAPA_KEY(D_SEC, &keys[i], "read");
139         }
140
141         return 0;
142 }
143
144 int mdt_capa_keys_init(const struct lu_env *env, struct mdt_device *mdt)
145 {
146         struct lustre_capa_key  *keys = mdt->mdt_capa_keys;
147         struct mdt_thread_info  *mti;
148         struct dt_object        *obj;
149         struct lu_attr          *la;
150         mdsno_t                  mdsnum;
151         unsigned long            size;
152         int                      rc;
153         ENTRY;
154
155         mdsnum = mdt_md_site(mdt)->ms_node_id;
156
157         mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
158         LASSERT(mti != NULL);
159         la = &mti->mti_attr.ma_attr;
160
161         obj = mdt->mdt_ck_obj;
162         rc = obj->do_ops->do_attr_get(env, mdt->mdt_ck_obj, la, BYPASS_CAPA);
163         if (rc)
164                 RETURN(rc);
165
166         size = (unsigned long)la->la_size;
167         if (size == 0) {
168                 int i;
169
170                 for (i = 0; i < 2; i++) {
171                         make_capa_key(&keys[i], mdsnum, i);
172                         DEBUG_CAPA_KEY(D_SEC, &keys[i], "initializing");
173                 }
174
175                 rc = write_capa_keys(env, mdt, keys);
176                 if (rc) {
177                         CERROR("error writing MDS %s: rc %d\n", CAPA_KEYS, rc);
178                         RETURN(rc);
179                 }
180         } else {
181                 rc = read_capa_keys(env, mdt, keys);
182                 if (rc) {
183                         CERROR("error reading MDS %s: rc %d\n", CAPA_KEYS, rc);
184                         RETURN(rc);
185                 }
186         }
187         set_capa_key_expiry(mdt);
188         cfs_timer_arm(&mdt->mdt_ck_timer, mdt->mdt_ck_expiry);
189         CDEBUG(D_SEC, "mds_ck_timer %lu\n", mdt->mdt_ck_expiry);
190         RETURN(0);
191 }
192
193 void mdt_ck_timer_callback(unsigned long castmeharder)
194 {
195         struct mdt_device *mdt = (struct mdt_device *)castmeharder;
196         struct ptlrpc_thread *thread = &mdt->mdt_ck_thread;
197
198         ENTRY;
199         thread->t_flags |= SVC_EVENT;
200         cfs_waitq_signal(&thread->t_ctl_waitq);
201         EXIT;
202 }
203
204 static int mdt_ck_thread_main(void *args)
205 {
206         struct mdt_device      *mdt = args;
207         struct ptlrpc_thread   *thread = &mdt->mdt_ck_thread;
208         struct lustre_capa_key *bkey = &mdt->mdt_capa_keys[0],
209                                *rkey = &mdt->mdt_capa_keys[1];
210         struct lustre_capa_key *tmp;
211         struct lu_env           env;
212         struct mdt_thread_info *info;
213         struct md_device       *next;
214         struct l_wait_info      lwi = { 0 };
215         mdsno_t                 mdsnum;
216         int                     rc;
217         ENTRY;
218
219         ptlrpc_daemonize("mdt_ck");
220         cfs_block_allsigs();
221
222         thread->t_flags = SVC_RUNNING;
223         cfs_waitq_signal(&thread->t_ctl_waitq);
224
225         rc = lu_env_init(&env, NULL, LCT_MD_THREAD);
226         if (rc)
227                 RETURN(rc);
228
229         thread->t_env = &env;
230         env.le_ctx.lc_thread = thread;
231
232         info = lu_context_key_get(&env.le_ctx, &mdt_thread_key);
233         LASSERT(info != NULL);
234
235         tmp = &info->mti_capa_key;
236         mdsnum = mdt_md_site(mdt)->ms_node_id;
237         while (1) {
238                 l_wait_event(thread->t_ctl_waitq,
239                              thread->t_flags & (SVC_STOPPING | SVC_EVENT),
240                              &lwi);
241
242                 if (thread->t_flags & SVC_STOPPING)
243                         break;
244                 thread->t_flags &= ~SVC_EVENT;
245
246                 if (cfs_time_before(cfs_time_current(), mdt->mdt_ck_expiry))
247                         break;
248
249                 *tmp = *rkey;
250                 make_capa_key(tmp, mdsnum, rkey->lk_keyid);
251
252                 next = mdt->mdt_child;
253                 rc = next->md_ops->mdo_update_capa_key(&env, next, tmp);
254                 if (!rc) {
255                         spin_lock(&capa_lock);
256                         *bkey = *rkey;
257                         *rkey = *tmp;
258                         spin_unlock(&capa_lock);
259
260                         rc = write_capa_keys(&env, mdt, mdt->mdt_capa_keys);
261                         if (rc) {
262                                 spin_lock(&capa_lock);
263                                 *rkey = *bkey;
264                                 memset(bkey, 0, sizeof(*bkey));
265                                 spin_unlock(&capa_lock);
266                         } else {
267                                 set_capa_key_expiry(mdt);
268                                 DEBUG_CAPA_KEY(D_SEC, rkey, "new");
269                         }
270                 }
271                 if (rc) {
272                         DEBUG_CAPA_KEY(D_ERROR, rkey, "update failed for");
273                         /* next retry is in 300 sec */
274                         mdt->mdt_ck_expiry = jiffies + 300 * HZ;
275                 }
276
277                 cfs_timer_arm(&mdt->mdt_ck_timer, mdt->mdt_ck_expiry);
278                 CDEBUG(D_SEC, "mdt_ck_timer %lu\n", mdt->mdt_ck_expiry);
279         }
280         lu_env_fini(&env);
281
282         thread->t_flags = SVC_STOPPED;
283         cfs_waitq_signal(&thread->t_ctl_waitq);
284         RETURN(0);
285 }
286
287 int mdt_ck_thread_start(struct mdt_device *mdt)
288 {
289         struct ptlrpc_thread *thread = &mdt->mdt_ck_thread;
290         int rc;
291
292         cfs_waitq_init(&thread->t_ctl_waitq);
293         rc = cfs_kernel_thread(mdt_ck_thread_main, mdt,
294                            (CLONE_VM | CLONE_FILES));
295         if (rc < 0) {
296                 CERROR("cannot start mdt_ck thread, rc = %d\n", rc);
297                 return rc;
298         }
299
300         cfs_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_RUNNING);
301         return 0;
302 }
303
304 void mdt_ck_thread_stop(struct mdt_device *mdt)
305 {
306         struct ptlrpc_thread *thread = &mdt->mdt_ck_thread;
307
308         if (!(thread->t_flags & SVC_RUNNING))
309                 return;
310
311         thread->t_flags = SVC_STOPPING;
312         cfs_waitq_signal(&thread->t_ctl_waitq);
313         cfs_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED);
314 }