Whamcloud - gitweb
Revert "b=19427 correct lmm_object_id and reserve fids for fid-on-OST."
[fs/lustre-release.git] / lustre / mdt / mdt_capa.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/mdt/mdt_capa.c
37  *
38  * Lustre Metadata Target (mdt) capability key read/write/update.
39  *
40  * Author: Lai Siyao <lsy@clusterfs.com>
41  */
42
43 #ifndef EXPORT_SYMTAB
44 # define EXPORT_SYMTAB
45 #endif
46 #define DEBUG_SUBSYSTEM S_MDS
47
48 #include "mdt_internal.h"
49
50 static inline void set_capa_key_expiry(struct mdt_device *mdt)
51 {
52         mdt->mdt_ck_expiry = jiffies + mdt->mdt_ck_timeout * CFS_HZ;
53 }
54
55 static void make_capa_key(struct lustre_capa_key *key,
56                           mdsno_t mdsnum, int keyid)
57 {
58         key->lk_mdsid = mdsnum;
59         key->lk_keyid = keyid + 1;
60         ll_get_random_bytes(key->lk_key, sizeof(key->lk_key));
61 }
62
63 static inline void lck_cpu_to_le(struct lustre_capa_key *tgt,
64                                  struct lustre_capa_key *src)
65 {
66         tgt->lk_mdsid   = cpu_to_le64(src->lk_mdsid);
67         tgt->lk_keyid   = cpu_to_le32(src->lk_keyid);
68         tgt->lk_padding = cpu_to_le32(src->lk_padding);
69         memcpy(tgt->lk_key, src->lk_key, sizeof(src->lk_key));
70 }
71
72 static inline void lck_le_to_cpu(struct lustre_capa_key *tgt,
73                                  struct lustre_capa_key *src)
74 {
75         tgt->lk_mdsid   = le64_to_cpu(src->lk_mdsid);
76         tgt->lk_keyid   = le32_to_cpu(src->lk_keyid);
77         tgt->lk_padding = le32_to_cpu(src->lk_padding);
78         memcpy(tgt->lk_key, src->lk_key, sizeof(src->lk_key));
79 }
80
81 static int write_capa_keys(const struct lu_env *env,
82                            struct mdt_device *mdt,
83                            struct lustre_capa_key *keys)
84 {
85         struct mdt_thread_info *mti;
86         struct lustre_capa_key *tmp;
87         struct thandle *th;
88         loff_t off = 0;
89         int i, rc;
90
91         mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
92         mdt_trans_credit_init(env, mdt, MDT_TXN_CAPA_KEYS_WRITE_OP);
93         th = mdt_trans_start(env, mdt);
94         if (IS_ERR(th))
95                 RETURN(PTR_ERR(th));
96
97         tmp = &mti->mti_capa_key;
98
99         for (i = 0; i < 2; i++) {
100                 lck_cpu_to_le(tmp, &keys[i]);
101
102                 rc = dt_record_write(env, mdt->mdt_ck_obj,
103                                      mdt_buf_const(env, tmp, sizeof(*tmp)),
104                                      &off, th);
105                 if (rc)
106                         break;
107         }
108
109         mdt_trans_stop(env, mdt, th);
110
111         CDEBUG(D_INFO, "write capability keys rc = %d:\n", rc);
112         return rc;
113 }
114
115 static int read_capa_keys(const struct lu_env *env,
116                           struct mdt_device *mdt,
117                           struct lustre_capa_key *keys)
118 {
119         struct mdt_thread_info *mti;
120         struct lustre_capa_key *tmp;
121         loff_t off = 0;
122         int i, rc;
123
124         mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
125         tmp = &mti->mti_capa_key;
126
127         for (i = 0; i < 2; i++) {
128                 rc = dt_record_read(env, mdt->mdt_ck_obj,
129                                     mdt_buf(env, tmp, sizeof(*tmp)), &off);
130                 if (rc)
131                         return rc;
132
133                 lck_le_to_cpu(&keys[i], tmp);
134                 DEBUG_CAPA_KEY(D_SEC, &keys[i], "read");
135         }
136
137         return 0;
138 }
139
140 int mdt_capa_keys_init(const struct lu_env *env, struct mdt_device *mdt)
141 {
142         struct lustre_capa_key  *keys = mdt->mdt_capa_keys;
143         struct mdt_thread_info  *mti;
144         struct dt_object        *obj;
145         struct lu_attr          *la;
146         mdsno_t                  mdsnum;
147         unsigned long            size;
148         int                      rc;
149         ENTRY;
150
151         mdsnum = mdt_md_site(mdt)->ms_node_id;
152
153         mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
154         LASSERT(mti != NULL);
155         la = &mti->mti_attr.ma_attr;
156
157         obj = mdt->mdt_ck_obj;
158         rc = obj->do_ops->do_attr_get(env, mdt->mdt_ck_obj, la, BYPASS_CAPA);
159         if (rc)
160                 RETURN(rc);
161
162         size = (unsigned long)la->la_size;
163         if (size == 0) {
164                 int i;
165
166                 for (i = 0; i < 2; i++) {
167                         make_capa_key(&keys[i], mdsnum, i);
168                         DEBUG_CAPA_KEY(D_SEC, &keys[i], "initializing");
169                 }
170
171                 rc = write_capa_keys(env, mdt, keys);
172                 if (rc) {
173                         CERROR("error writing MDS %s: rc %d\n", CAPA_KEYS, rc);
174                         RETURN(rc);
175                 }
176         } else {
177                 rc = read_capa_keys(env, mdt, keys);
178                 if (rc) {
179                         CERROR("error reading MDS %s: rc %d\n", CAPA_KEYS, rc);
180                         RETURN(rc);
181                 }
182         }
183         set_capa_key_expiry(mdt);
184         cfs_timer_arm(&mdt->mdt_ck_timer, mdt->mdt_ck_expiry);
185         CDEBUG(D_SEC, "mds_ck_timer %lu\n", mdt->mdt_ck_expiry);
186         RETURN(0);
187 }
188
189 void mdt_ck_timer_callback(unsigned long castmeharder)
190 {
191         struct mdt_device *mdt = (struct mdt_device *)castmeharder;
192         struct ptlrpc_thread *thread = &mdt->mdt_ck_thread;
193
194         ENTRY;
195         thread->t_flags |= SVC_EVENT;
196         cfs_waitq_signal(&thread->t_ctl_waitq);
197         EXIT;
198 }
199
200 static int mdt_ck_thread_main(void *args)
201 {
202         struct mdt_device      *mdt = args;
203         struct ptlrpc_thread   *thread = &mdt->mdt_ck_thread;
204         struct lustre_capa_key *bkey = &mdt->mdt_capa_keys[0],
205                                *rkey = &mdt->mdt_capa_keys[1];
206         struct lustre_capa_key *tmp;
207         struct lu_env           env;
208         struct mdt_thread_info *info;
209         struct md_device       *next;
210         struct l_wait_info      lwi = { 0 };
211         mdsno_t                 mdsnum;
212         int                     rc;
213         ENTRY;
214
215         cfs_daemonize_ctxt("mdt_ck");
216         cfs_block_allsigs();
217
218         thread->t_flags = SVC_RUNNING;
219         cfs_waitq_signal(&thread->t_ctl_waitq);
220
221         rc = lu_env_init(&env, LCT_MD_THREAD|LCT_REMEMBER|LCT_NOREF);
222         if (rc)
223                 RETURN(rc);
224
225         thread->t_env = &env;
226         env.le_ctx.lc_thread = thread;
227         env.le_ctx.lc_cookie = 0x1;
228
229         info = lu_context_key_get(&env.le_ctx, &mdt_thread_key);
230         LASSERT(info != NULL);
231
232         tmp = &info->mti_capa_key;
233         mdsnum = mdt_md_site(mdt)->ms_node_id;
234         while (1) {
235                 l_wait_event(thread->t_ctl_waitq,
236                              thread->t_flags & (SVC_STOPPING | SVC_EVENT),
237                              &lwi);
238
239                 if (thread->t_flags & SVC_STOPPING)
240                         break;
241                 thread->t_flags &= ~SVC_EVENT;
242
243                 if (cfs_time_before(cfs_time_current(), mdt->mdt_ck_expiry))
244                         break;
245
246                 *tmp = *rkey;
247                 make_capa_key(tmp, mdsnum, rkey->lk_keyid);
248
249                 next = mdt->mdt_child;
250                 rc = next->md_ops->mdo_update_capa_key(&env, next, tmp);
251                 if (!rc) {
252                         cfs_spin_lock(&capa_lock);
253                         *bkey = *rkey;
254                         *rkey = *tmp;
255                         cfs_spin_unlock(&capa_lock);
256
257                         rc = write_capa_keys(&env, mdt, mdt->mdt_capa_keys);
258                         if (rc) {
259                                 cfs_spin_lock(&capa_lock);
260                                 *rkey = *bkey;
261                                 memset(bkey, 0, sizeof(*bkey));
262                                 cfs_spin_unlock(&capa_lock);
263                         } else {
264                                 set_capa_key_expiry(mdt);
265                                 DEBUG_CAPA_KEY(D_SEC, rkey, "new");
266                         }
267                 }
268                 if (rc) {
269                         DEBUG_CAPA_KEY(D_ERROR, rkey, "update failed for");
270                         /* next retry is in 300 sec */
271                         mdt->mdt_ck_expiry = jiffies + 300 * CFS_HZ;
272                 }
273
274                 cfs_timer_arm(&mdt->mdt_ck_timer, mdt->mdt_ck_expiry);
275                 CDEBUG(D_SEC, "mdt_ck_timer %lu\n", mdt->mdt_ck_expiry);
276         }
277         lu_env_fini(&env);
278
279         thread->t_flags = SVC_STOPPED;
280         cfs_waitq_signal(&thread->t_ctl_waitq);
281         RETURN(0);
282 }
283
284 int mdt_ck_thread_start(struct mdt_device *mdt)
285 {
286         struct ptlrpc_thread *thread = &mdt->mdt_ck_thread;
287         int rc;
288
289         cfs_waitq_init(&thread->t_ctl_waitq);
290         rc = cfs_kernel_thread(mdt_ck_thread_main, mdt,
291                                (CLONE_VM | CLONE_FILES));
292         if (rc < 0) {
293                 CERROR("cannot start mdt_ck thread, rc = %d\n", rc);
294                 return rc;
295         }
296
297         l_cfs_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_RUNNING);
298         return 0;
299 }
300
301 void mdt_ck_thread_stop(struct mdt_device *mdt)
302 {
303         struct ptlrpc_thread *thread = &mdt->mdt_ck_thread;
304
305         if (!(thread->t_flags & SVC_RUNNING))
306                 return;
307
308         thread->t_flags = SVC_STOPPING;
309         cfs_waitq_signal(&thread->t_ctl_waitq);
310         l_cfs_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED);
311 }