1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2004, 2005 Cluster File Systems, Inc.
6 * Author: Lai Siyao <lsy@clusterfs.com>
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #define DEBUG_SUBSYSTEM S_MDS
27 #include <linux/version.h>
28 #include <asm/uaccess.h>
29 #include <linux/file.h>
30 #include <linux/kmod.h>
31 #include <linux/random.h>
33 #include <linux/obd.h>
34 #include <linux/lustre_mds.h>
35 #include <linux/lustre_fsfilt.h>
36 #include <linux/lustre_sec.h>
38 #include "mds_internal.h"
40 static struct ptlrpc_thread mds_eck_thread;
42 static struct thread_ctl {
43 struct completion ctl_starting;
44 struct completion ctl_finishing;
47 static LIST_HEAD(mds_capa_key_list);
48 static spinlock_t mds_capa_lock; /* protect capa and capa key */
49 struct timer_list mds_eck_timer;
51 #define CAPA_KEY_JIFFIES(key) \
52 expiry_to_jiffies(le64_to_cpu((key)->k_key->lk_expiry))
54 #define CUR_MDS_CAPA_KEY(mds) (mds)->mds_capa_keys[(mds)->mds_capa_key_idx]
55 #define CUR_CAPA_KEY(mds) CUR_MDS_CAPA_KEY(mds).k_key
56 #define CUR_CAPA_KEY_ID(mds) CUR_MDS_CAPA_KEY(mds).k_key->lk_keyid
57 #define CUR_CAPA_KEY_LIST(mds) CUR_MDS_CAPA_KEY(mds).k_list
58 #define CUR_CAPA_KEY_EXPIRY(mds) le64_to_cpu(CUR_CAPA_KEY(mds)->lk_expiry)
59 #define CUR_CAPA_KEY_JIFFIES(mds) CAPA_KEY_JIFFIES(&CUR_MDS_CAPA_KEY(mds))
61 static int mds_write_capa_key(struct obd_device *obd, int force_sync)
63 struct mds_obd *mds = &obd->u.mds;
64 struct mds_capa_key *keys = mds->mds_capa_keys;
65 struct file *filp = mds->mds_capa_keys_filp;
66 struct lvfs_run_ctxt saved;
71 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
72 for (i = 0; i < 2 && keys[i].k_key; i++) {
73 rc = fsfilt_write_record(obd, filp, keys[i].k_key,
74 sizeof(*keys[i].k_key),
77 CERROR("error writing MDS capa key: rc = %d\n", rc);
81 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
87 mds_capa_key_cmp(struct mds_obd *mds)
89 return le32_to_cpu(mds->mds_capa_keys[0].k_key->lk_keyid) -
90 le32_to_cpu(mds->mds_capa_keys[1].k_key->lk_keyid);
94 do_update_capa_key(struct mds_obd *mds, struct lustre_capa_key *key)
99 if (CUR_CAPA_KEY(mds))
100 keyid = le32_to_cpu(CUR_CAPA_KEY_ID(mds)) + 1;
101 spin_lock(&mds_capa_lock);
102 expiry_rounded = round_expiry(mds->mds_capa_key_timeout);
103 spin_unlock(&mds_capa_lock);
105 key->lk_mdsid = cpu_to_le32(mds->mds_num);
106 key->lk_keyid = cpu_to_le32(keyid);
107 key->lk_expiry = cpu_to_le64(expiry_rounded);
108 get_random_bytes(key->lk_key, sizeof(key->lk_key));
111 static void list_add_capa_key(struct mds_capa_key *key, struct list_head *head)
113 struct mds_capa_key *tmp;
115 list_for_each_entry_reverse(tmp, head, k_list) {
116 if (le64_to_cpu(key->k_key->lk_expiry) <
117 le64_to_cpu(tmp->k_key->lk_expiry)) {
118 /* put key before tmp */
119 list_add_tail(&key->k_list, &tmp->k_list);
124 list_add_tail(&key->k_list, head);
127 int mds_read_capa_key(struct obd_device *obd, struct file *file)
130 struct mds_obd *mds = &obd->u.mds;
131 struct lustre_capa_key *key;
132 unsigned long capa_keys_size = file->f_dentry->d_inode->i_size;
133 unsigned long expiry;
137 if (capa_keys_size == 0) {
138 CWARN("%s: initializing new %s\n", obd->obd_name,
139 file->f_dentry->d_name.name);
141 OBD_ALLOC(key, sizeof(*key));
145 do_update_capa_key(mds, key);
147 mds->mds_capa_keys[0].k_key = key;
148 mds->mds_capa_keys[0].k_obd = obd;
149 INIT_LIST_HEAD(&mds->mds_capa_keys[0].k_list);
150 mds->mds_capa_key_idx = 0;
152 rc = mds_write_capa_key(obd, 1);
156 LASSERT(capa_keys_size == sizeof(*key) ||
157 capa_keys_size == 2 * sizeof(*key));
159 while (capa_keys_size > i * sizeof(*key)) {
160 OBD_ALLOC(key, sizeof(*key));
164 rc = fsfilt_read_record(obd, file, key, sizeof(*key),
167 CERROR("error reading MDS %s capa key: %d\n",
168 file->f_dentry->d_name.name, rc);
169 OBD_FREE(key, sizeof(*key));
173 mds->mds_capa_keys[i].k_key = key;
174 mds->mds_capa_keys[i].k_obd = obd;
175 INIT_LIST_HEAD(&mds->mds_capa_keys[i].k_list);
179 mds->mds_capa_key_idx = 0;
180 if (mds->mds_capa_keys[1].k_key && mds_capa_key_cmp(mds) < 0)
181 mds->mds_capa_key_idx = 1;
184 expiry = CUR_CAPA_KEY_JIFFIES(mds);
185 spin_lock(&mds_capa_lock);
186 if (time_before(expiry, mds_eck_timer.expires) ||
187 !timer_pending(&mds_eck_timer)) {
188 mod_timer(&mds_eck_timer, expiry);
189 CDEBUG(D_INFO, "mds_eck_timer %lu", expiry);
191 list_add_capa_key(&CUR_MDS_CAPA_KEY(mds), &mds_capa_key_list);
192 spin_unlock(&mds_capa_lock);
197 void mds_capa_keys_cleanup(struct obd_device *obd)
199 struct mds_obd *mds = &obd->u.mds;
202 del_timer(&mds_eck_timer);
203 spin_lock(&mds_capa_lock);
204 if (CUR_CAPA_KEY(mds))
205 list_del_init(&CUR_CAPA_KEY_LIST(mds));
206 spin_unlock(&mds_capa_lock);
208 for (i = 0; i < 2; i++)
209 if (mds->mds_capa_keys[i].k_key)
210 OBD_FREE(mds->mds_capa_keys[i].k_key,
211 sizeof(struct lustre_capa_key));
214 static int mds_set_capa_key(struct obd_device *obd, struct lustre_capa_key *key)
216 struct mds_obd *mds = &obd->u.mds;
220 rc = obd_set_info(mds->mds_dt_exp, strlen("capa_key"), "capa_key",
226 mds_update_capa_key(struct obd_device *obd, struct mds_capa_key *mkey,
229 struct mds_obd *mds = &obd->u.mds;
230 int to_update = !mds->mds_capa_key_idx;
231 struct lustre_capa_key *key = mds->mds_capa_keys[to_update].k_key;
233 unsigned long expiry;
237 LASSERT(mkey != &mds->mds_capa_keys[to_update]);
241 OBD_ALLOC(key, sizeof(*key));
244 mds->mds_capa_keys[to_update].k_key = key;
245 mds->mds_capa_keys[to_update].k_obd = obd;
248 do_update_capa_key(mds, key);
250 keyid = le32_to_cpu(key->lk_keyid);
252 rc = mds_set_capa_key(obd, key);
254 /* XXX: anyway, it will be replayed */
255 CERROR("error set capa key(id %u), err = %d\n", keyid, rc);
257 rc2 = mds_write_capa_key(obd, 1);
261 CDEBUG(D_INFO, "wrote capa keyid %u\n", keyid);
263 spin_lock(&mds_capa_lock);
264 list_del_init(&CUR_CAPA_KEY_LIST(mds));
265 mds->mds_capa_key_idx = to_update;
266 expiry = CUR_CAPA_KEY_JIFFIES(mds);
267 list_add_capa_key(&CUR_MDS_CAPA_KEY(mds), &mds_capa_key_list);
269 if (time_before(expiry, mds_eck_timer.expires) ||
270 !timer_pending(&mds_eck_timer)) {
271 mod_timer(&mds_eck_timer, expiry);
272 CDEBUG(D_INFO, "mds_eck_timer %lu", expiry);
274 spin_unlock(&mds_capa_lock);
276 DEBUG_MDS_CAPA_KEY(D_INFO, &CUR_MDS_CAPA_KEY(mds),
277 "mds_update_capa_key");
282 static inline int have_expired_capa_key(void)
284 struct mds_capa_key *key;
285 unsigned long expiry;
289 spin_lock(&mds_capa_lock);
290 if (!list_empty(&mds_capa_key_list)) {
291 key = list_entry(mds_capa_key_list.next, struct mds_capa_key,
293 /* expiry is in sec, so in case it misses, the result will
294 * minus 5 sec and then compare with jiffies. (in case the
295 * clock is innacurate) */
296 expiry = CAPA_KEY_JIFFIES(key);
297 expired = time_before(expiry - 5 * HZ, jiffies);
299 if (time_before(expiry, mds_eck_timer.expires) ||
300 !timer_pending(&mds_eck_timer)) {
301 mod_timer(&mds_eck_timer, expiry);
302 CDEBUG(D_INFO, "mds_eck_timer %lu", expiry);
306 spin_unlock(&mds_capa_lock);
311 static int inline mds_capa_key_check_stop(void)
313 return (mds_eck_thread.t_flags & SVC_STOPPING) ? 1: 0;
316 static int mds_capa_key_thread_main(void *arg)
318 struct thread_ctl *ctl = arg;
326 SIGNAL_MASK_LOCK(current, flags);
327 sigfillset(¤t->blocked);
329 SIGNAL_MASK_UNLOCK(current, flags);
330 THREAD_NAME(current->comm, sizeof(current->comm), "mds_ck");
334 * letting starting function know, that we are ready and control may be
337 mds_eck_thread.t_flags = SVC_RUNNING;
338 complete(&ctl->ctl_starting);
340 while (!mds_capa_key_check_stop()) {
341 struct l_wait_info lwi = { 0 };
342 unsigned long expiry;
343 struct mds_capa_key *key, *tmp, *next = NULL;
345 l_wait_event(mds_eck_thread.t_ctl_waitq,
346 (have_expired_capa_key() ||
347 mds_capa_key_check_stop()),
350 spin_lock(&mds_capa_lock);
351 list_for_each_entry_safe(key, tmp, &mds_capa_key_list, k_list) {
352 if (time_after(CAPA_KEY_JIFFIES(key), jiffies)) {
357 spin_unlock(&mds_capa_lock);
359 CDEBUG(D_INFO, "mds capa key expired: "
360 "mds #%u, key #%u\n",
361 le32_to_cpu(key->k_key->lk_mdsid),
362 le32_to_cpu(key->k_key->lk_keyid));
364 rc = mds_update_capa_key(key->k_obd, key, 1);
365 spin_lock(&mds_capa_lock);
369 expiry = CAPA_KEY_JIFFIES(next);
370 mod_timer(&mds_eck_timer, expiry);
371 CDEBUG(D_INFO, "mds_eck_timer %lu", expiry);
373 spin_unlock(&mds_capa_lock);
376 mds_eck_thread.t_flags = SVC_STOPPED;
378 /* this is SMP-safe way to finish thread. */
379 complete_and_exit(&ctl->ctl_finishing, 0);
383 void mds_capa_key_timer_callback(unsigned long unused)
386 wake_up(&mds_eck_thread.t_ctl_waitq);
390 int mds_capa_key_start_thread(void)
395 LASSERT(mds_eck_thread.t_flags == 0);
396 init_completion(&mds_eck_ctl.ctl_starting);
397 init_completion(&mds_eck_ctl.ctl_finishing);
398 init_waitqueue_head(&mds_eck_thread.t_ctl_waitq);
399 spin_lock_init(&mds_capa_lock);
401 rc = kernel_thread(mds_capa_key_thread_main, &mds_eck_ctl,
402 (CLONE_VM | CLONE_FILES));
404 CERROR("cannot start capa key thread, "
409 wait_for_completion(&mds_eck_ctl.ctl_starting);
410 LASSERT(mds_eck_thread.t_flags == SVC_RUNNING);
414 void mds_capa_key_stop_thread(void)
417 mds_eck_thread.t_flags = SVC_STOPPING;
418 wake_up(&mds_eck_thread.t_ctl_waitq);
419 wait_for_completion(&mds_eck_ctl.ctl_finishing);
420 LASSERT(mds_eck_thread.t_flags == SVC_STOPPED);
421 mds_eck_thread.t_flags = 0;
425 void mds_update_capa_stat(struct obd_device *obd, int stat)
427 struct mds_obd *mds = &obd->u.mds;
429 spin_lock(&mds_capa_lock);
430 mds->mds_capa_stat = stat;
431 spin_unlock(&mds_capa_lock);
434 void mds_update_capa_timeout(struct obd_device *obd, unsigned long timeout)
436 struct mds_obd *mds = &obd->u.mds;
438 spin_lock(&mds_capa_lock);
439 mds->mds_capa_timeout = timeout;
440 /* XXX: update all capabilities in cache if their expiry too long */
441 spin_unlock(&mds_capa_lock);
444 int mds_update_capa_key_timeout(struct obd_device *obd, unsigned long timeout)
446 struct mds_obd *mds = &obd->u.mds;
451 do_gettimeofday(&tv);
453 spin_lock(&mds_capa_lock);
454 mds->mds_capa_key_timeout = timeout;
455 if (CUR_CAPA_KEY_EXPIRY(mds) < tv.tv_sec + timeout) {
456 spin_unlock(&mds_capa_lock);
459 spin_unlock(&mds_capa_lock);
461 rc = mds_update_capa_key(obd, &CUR_MDS_CAPA_KEY(mds), 1);
466 static void mds_capa_reverse_map(struct mds_export_data *med,
467 struct lustre_capa *capa)
471 if (!med->med_remote) {
472 /* when not remote uid, ruid == uid */
473 capa->lc_ruid = capa->lc_uid;
478 uid = mds_idmap_lookup_uid(med->med_idmap, 1, capa->lc_uid);
479 if (uid == MDS_IDMAP_NOTFOUND)
482 capa->lc_flags |= CAPA_FL_REMUID;
487 int mds_pack_capa(struct obd_device *obd, struct mds_export_data *med,
488 struct mds_body *req_body, struct lustre_capa *req_capa,
489 struct ptlrpc_request *req, int *offset, struct mds_body *body)
491 struct mds_obd *mds = &obd->u.mds;
492 struct lustre_capa *capa;
493 struct lustre_msg *repmsg = req->rq_repmsg;
494 struct obd_capa *ocapa;
495 __u8 key[CAPA_KEY_LEN]; /* key */
496 int stat, expired, rc = 0;
499 spin_lock(&mds_capa_lock);
500 stat = mds->mds_capa_stat;
501 spin_unlock(&mds_capa_lock);
505 RETURN(0); /* capability is disabled */
508 if (OBD_FAIL_CHECK(OBD_FAIL_MDS_PACK_CAPA))
512 /* capa renewal, check capa op against open mode */
513 struct mds_file_data *mfd;
516 mfd = mds_handle2mfd(&req_body->handle);
518 DEBUG_CAPA(D_INFO, req_capa, "no handle "LPX64" for",
519 req_body->handle.cookie);
523 mode = accmode(mfd->mfd_mode);
524 if (!(req_capa->lc_op & mode)) {
525 DEBUG_CAPA(D_ERROR, req_capa, "accmode %d mismatch",
533 LASSERT(repmsg->buflens[*offset] == sizeof(*capa));
534 capa = lustre_msg_buf(repmsg, (*offset)++, sizeof(*capa));
535 LASSERT(capa != NULL);
537 ocapa = capa_get(req_capa->lc_uid, req_capa->lc_op, req_capa->lc_mdsid,
538 req_capa->lc_ino, MDS_CAPA);
540 expired = capa_is_to_expire(ocapa);
542 capa_dup(capa, ocapa);
549 memcpy(capa, req_capa, sizeof(*capa));
550 mds_capa_reverse_map(med, capa);
552 spin_lock(&mds_capa_lock);
553 capa->lc_keyid = le32_to_cpu(CUR_CAPA_KEY_ID(mds));
554 capa->lc_expiry = round_expiry(mds->mds_capa_timeout);
555 if (mds->mds_capa_timeout < CAPA_EXPIRY)
556 capa->lc_flags |= CAPA_FL_SHORT;
557 memcpy(key, CUR_CAPA_KEY(mds)->lk_key, sizeof(key));
558 spin_unlock(&mds_capa_lock);
560 capa_hmac(mds->mds_capa_hmac, key, capa);
562 ocapa = capa_renew(capa, MDS_CAPA);
567 body->valid |= OBD_MD_CAPA;