1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 * Basic Lustre library routines.
32 # include <sys/types.h>
34 # include <asm/semaphore.h>
35 # include <linux/sched.h>
36 # include <linux/signal.h>
37 # include <linux/types.h>
39 #include <linux/portals_lib.h>
40 #include <linux/kp30.h> /* XXX just for LASSERT! */
41 #include <linux/lustre_idl.h>
44 #if BITS_PER_LONG > 32
56 struct ptlrpc_request;
60 #include <linux/lustre_ha.h>
61 #include <linux/lustre_net.h>
62 #include <linux/lustre_compat25.h>
64 int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler);
65 int target_handle_disconnect(struct ptlrpc_request *req);
66 int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp,
67 struct obd_uuid *cluuid);
68 int target_handle_ping(struct ptlrpc_request *req);
69 void target_cancel_recovery_timer(struct obd_device *obd);
71 #define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */
72 void target_start_recovery_timer(struct obd_device *obd, svc_handler_t handler);
73 void target_abort_recovery(void *data);
74 int target_queue_recovery_request(struct ptlrpc_request *req,
75 struct obd_device *obd);
76 int target_queue_final_reply(struct ptlrpc_request *req, int rc);
77 void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id);
81 int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf);
82 int client_sanobd_setup(struct obd_device *obddev, obd_count len, void *buf);
83 int client_obd_cleanup(struct obd_device * obddev, int flags);
84 struct client_obd *client_conn2cli(struct lustre_handle *conn);
85 struct obd_device *client_tgtuuid2obd(struct obd_uuid *tgtuuid);
87 /* It is important that och_fh remain the first item in this structure: that
88 * way, we don't have to re-pack the obdo's inline data before we send it to
89 * the server, we can just send the whole struct unaltered. */
90 struct obd_client_handle {
91 struct lustre_handle och_fh;
92 struct llog_cookie och_cookie;
93 struct ptlrpc_request *och_req;
96 #define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed
100 void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs);
101 void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs);
106 struct task_struct *l_owner;
107 struct semaphore l_sem;
111 void l_lock_init(struct lustre_lock *);
112 void l_lock(struct lustre_lock *);
113 void l_unlock(struct lustre_lock *);
114 int l_has_lock(struct lustre_lock *);
125 #define OBD_RUN_CTXT_MAGIC 0xC0FFEEAA
126 #define OBD_CTXT_DEBUG /* development-only debugging */
127 struct obd_run_ctxt {
128 struct vfsmount *pwdmnt;
131 struct obd_ucred ouc;
133 #ifdef OBD_CTXT_DEBUG
139 #ifdef OBD_CTXT_DEBUG
140 #define OBD_SET_CTXT_MAGIC(ctxt) (ctxt)->magic = OBD_RUN_CTXT_MAGIC
142 #define OBD_SET_CTXT_MAGIC(ctxt) do {} while(0)
147 void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx,
148 struct obd_ucred *cred);
149 void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx,
150 struct obd_ucred *cred);
151 struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode);
152 struct dentry *simple_mknod(struct dentry *dir, char *name, int mode);
153 int lustre_fread(struct file *file, void *buf, int len, loff_t *off);
154 int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off);
155 int lustre_fsync(struct file *file);
157 static inline void l_dput(struct dentry *de)
159 if (!de || IS_ERR(de))
161 //shrink_dcache_parent(de);
162 LASSERT(atomic_read(&de->d_count) > 0);
166 /* We need to hold the inode semaphore over the dcache lookup itself, or we
167 * run the risk of entering the filesystem lookup path concurrently on SMP
168 * systems, and instantiating two inodes for the same entry. We still
169 * protect against concurrent addition/removal races with the DLM locking.
171 static inline struct dentry *ll_lookup_one_len(char *fid_name,
172 struct dentry *dparent,
175 struct dentry *dchild;
177 down(&dparent->d_inode->i_sem);
178 dchild = lookup_one_len(fid_name, dparent, fid_namelen);
179 up(&dparent->d_inode->i_sem);
184 static inline void ll_sleep(int t)
186 set_current_state(TASK_INTERRUPTIBLE);
187 schedule_timeout(t * HZ);
188 set_current_state(TASK_RUNNING);
192 #define LL_FID_NAMELEN (16 + 1 + 8 + 1)
193 static inline int ll_fid2str(char *str, __u64 id, __u32 generation)
195 return sprintf(str, "%llx:%08x", (unsigned long long)id, generation);
198 #include <linux/portals_lib.h>
203 #define OBD_IOCTL_VERSION 0x00010003
205 struct obd_ioctl_data {
207 uint32_t ioc_version;
213 struct obdo ioc_obdo1;
214 struct obdo ioc_obdo2;
219 uint32_t ioc_command;
224 /* buffers the kernel will treat as user pointers */
230 /* inline buffers for various arguments */
231 uint32_t ioc_inllen1;
233 uint32_t ioc_inllen2;
235 uint32_t ioc_inllen3;
237 uint32_t ioc_inllen4;
243 struct obd_ioctl_hdr {
245 uint32_t ioc_version;
248 static inline int obd_ioctl_packlen(struct obd_ioctl_data *data)
250 int len = size_round(sizeof(struct obd_ioctl_data));
251 len += size_round(data->ioc_inllen1);
252 len += size_round(data->ioc_inllen2);
253 len += size_round(data->ioc_inllen3);
254 len += size_round(data->ioc_inllen4);
259 static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data)
261 if (data->ioc_len > (1<<30)) {
262 printk("OBD ioctl: ioc_len larger than 1<<30\n");
265 if (data->ioc_inllen1 > (1<<30)) {
266 printk("OBD ioctl: ioc_inllen1 larger than 1<<30\n");
269 if (data->ioc_inllen2 > (1<<30)) {
270 printk("OBD ioctl: ioc_inllen2 larger than 1<<30\n");
273 if (data->ioc_inllen3 > (1<<30)) {
274 printk("OBD ioctl: ioc_inllen3 larger than 1<<30\n");
277 if (data->ioc_inllen4 > (1<<30)) {
278 printk("OBD ioctl: ioc_inllen4 larger than 1<<30\n");
281 if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
282 printk("OBD ioctl: inlbuf1 pointer but 0 length\n");
285 if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
286 printk("OBD ioctl: inlbuf2 pointer but 0 length\n");
289 if (data->ioc_inlbuf3 && !data->ioc_inllen3) {
290 printk("OBD ioctl: inlbuf3 pointer but 0 length\n");
293 if (data->ioc_inlbuf4 && !data->ioc_inllen4) {
294 printk("OBD ioctl: inlbuf4 pointer but 0 length\n");
297 if (data->ioc_pbuf1 && !data->ioc_plen1) {
298 printk("OBD ioctl: pbuf1 pointer but 0 length\n");
301 if (data->ioc_pbuf2 && !data->ioc_plen2) {
302 printk("OBD ioctl: pbuf2 pointer but 0 length\n");
305 if (data->ioc_plen1 && !data->ioc_pbuf1) {
306 printk("OBD ioctl: plen1 set but NULL pointer\n");
309 if (data->ioc_plen2 && !data->ioc_pbuf2) {
310 printk("OBD ioctl: plen2 set but NULL pointer\n");
313 if (obd_ioctl_packlen(data) != data->ioc_len) {
314 printk("OBD ioctl: packlen exceeds ioc_len (%d != %d)\n",
315 obd_ioctl_packlen(data), data->ioc_len);
322 static inline int obd_ioctl_pack(struct obd_ioctl_data *data, char **pbuf,
326 struct obd_ioctl_data *overlay;
327 data->ioc_len = obd_ioctl_packlen(data);
328 data->ioc_version = OBD_IOCTL_VERSION;
330 if (*pbuf && data->ioc_len > max)
333 *pbuf = malloc(data->ioc_len);
337 overlay = (struct obd_ioctl_data *)*pbuf;
338 memcpy(*pbuf, data, sizeof(*data));
340 ptr = overlay->ioc_bulk;
341 if (data->ioc_inlbuf1)
342 LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
343 if (data->ioc_inlbuf2)
344 LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
345 if (data->ioc_inlbuf3)
346 LOGL(data->ioc_inlbuf3, data->ioc_inllen3, ptr);
347 if (data->ioc_inlbuf4)
348 LOGL(data->ioc_inlbuf4, data->ioc_inllen4, ptr);
349 if (obd_ioctl_is_invalid(overlay))
355 static inline int obd_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf,
359 struct obd_ioctl_data *overlay;
363 overlay = (struct obd_ioctl_data *)pbuf;
365 /* Preserve the caller's buffer pointers */
366 overlay->ioc_inlbuf1 = data->ioc_inlbuf1;
367 overlay->ioc_inlbuf2 = data->ioc_inlbuf2;
368 overlay->ioc_inlbuf3 = data->ioc_inlbuf3;
369 overlay->ioc_inlbuf4 = data->ioc_inlbuf4;
371 memcpy(data, pbuf, sizeof(*data));
373 ptr = overlay->ioc_bulk;
374 if (data->ioc_inlbuf1)
375 LOGU(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
376 if (data->ioc_inlbuf2)
377 LOGU(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
378 if (data->ioc_inlbuf3)
379 LOGU(data->ioc_inlbuf3, data->ioc_inllen3, ptr);
380 if (data->ioc_inlbuf4)
381 LOGU(data->ioc_inlbuf4, data->ioc_inllen4, ptr);
387 #include <linux/obd_support.h>
389 /* buffer MUST be at least the size of obd_ioctl_hdr */
390 static inline int obd_ioctl_getdata(char **buf, int *len, void *arg)
392 struct obd_ioctl_hdr hdr;
393 struct obd_ioctl_data *data;
397 err = copy_from_user(&hdr, (void *)arg, sizeof(hdr));
403 if (hdr.ioc_version != OBD_IOCTL_VERSION) {
404 CERROR("Version mismatch kernel vs application\n");
408 if (hdr.ioc_len > OBD_MAX_IOCTL_BUFFER) {
409 CERROR("User buffer len %d exceeds %d max buffer\n",
410 hdr.ioc_len, OBD_MAX_IOCTL_BUFFER);
414 if (hdr.ioc_len < sizeof(struct obd_ioctl_data)) {
415 printk("OBD: user buffer too small for ioctl\n");
419 /* XXX allocate this more intelligently, using kmalloc when
421 OBD_VMALLOC(*buf, hdr.ioc_len);
423 CERROR("Cannot allocate control buffer of len %d\n",
428 data = (struct obd_ioctl_data *)*buf;
430 err = copy_from_user(*buf, (void *)arg, hdr.ioc_len);
436 if (obd_ioctl_is_invalid(data)) {
437 CERROR("ioctl not correctly formatted\n");
441 if (data->ioc_inllen1) {
442 data->ioc_inlbuf1 = &data->ioc_bulk[0];
445 if (data->ioc_inllen2) {
446 data->ioc_inlbuf2 = &data->ioc_bulk[0] +
447 size_round(data->ioc_inllen1);
450 if (data->ioc_inllen3) {
451 data->ioc_inlbuf3 = &data->ioc_bulk[0] +
452 size_round(data->ioc_inllen1) +
453 size_round(data->ioc_inllen2);
456 if (data->ioc_inllen4) {
457 data->ioc_inlbuf4 = &data->ioc_bulk[0] +
458 size_round(data->ioc_inllen1) +
459 size_round(data->ioc_inllen2) +
460 size_round(data->ioc_inllen3) ;
467 static inline void obd_ioctl_freedata(char *buf, int len)
476 #define OBD_IOC_CREATE _IOR ('f', 101, long)
477 #define OBD_IOC_SETUP _IOW ('f', 102, long)
478 #define OBD_IOC_CLEANUP _IO ('f', 103 )
479 #define OBD_IOC_DESTROY _IOW ('f', 104, long)
480 #define OBD_IOC_PREALLOCATE _IOWR('f', 105, long)
482 #define OBD_IOC_SETATTR _IOW ('f', 107, long)
483 #define OBD_IOC_GETATTR _IOR ('f', 108, long)
484 #define OBD_IOC_READ _IOWR('f', 109, long)
485 #define OBD_IOC_WRITE _IOWR('f', 110, long)
486 #define OBD_IOC_CONNECT _IOR ('f', 111, long)
487 #define OBD_IOC_DISCONNECT _IOW ('f', 112, long)
488 #define OBD_IOC_STATFS _IOWR('f', 113, long)
489 #define OBD_IOC_SYNC _IOR ('f', 114, long)
490 #define OBD_IOC_READ2 _IOWR('f', 115, long)
491 #define OBD_IOC_FORMAT _IOWR('f', 116, long)
492 #define OBD_IOC_PARTITION _IOWR('f', 117, long)
493 #define OBD_IOC_ATTACH _IOWR('f', 118, long)
494 #define OBD_IOC_DETACH _IOWR('f', 119, long)
495 #define OBD_IOC_COPY _IOWR('f', 120, long)
496 #define OBD_IOC_MIGR _IOWR('f', 121, long)
497 #define OBD_IOC_PUNCH _IOWR('f', 122, long)
498 #define OBD_IOC_DEVICE _IOWR('f', 123, long)
499 #define OBD_IOC_MODULE_DEBUG _IOWR('f', 124, long)
500 #define OBD_IOC_BRW_READ _IOWR('f', 125, long)
501 #define OBD_IOC_BRW_WRITE _IOWR('f', 126, long)
502 #define OBD_IOC_NAME2DEV _IOWR('f', 127, long)
503 #define OBD_IOC_NEWDEV _IOWR('f', 128, long)
504 #define OBD_IOC_LIST _IOWR('f', 129, long)
505 #define OBD_IOC_UUID2DEV _IOWR('f', 130, long)
507 #define OBD_IOC_LOV_SET_CONFIG _IOWR('f', 131, long)
508 #define OBD_IOC_LOV_GET_CONFIG _IOWR('f', 132, long)
509 #define OBD_IOC_LOV_CONFIG OBD_IOC_LOV_SET_CONFIG
510 #define OBD_IOC_CLIENT_RECOVER _IOW ('f', 133, long)
512 #define OBD_IOC_OPEN _IOWR('f', 134, long)
513 #define OBD_IOC_CLOSE _IOWR('f', 135, long)
515 #define OBD_IOC_DEC_FS_USE_COUNT _IO ('f', 139 )
516 #define OBD_IOC_NO_TRANSNO _IOW ('f', 140, long)
517 #define OBD_IOC_SET_READONLY _IOW ('f', 141, long)
518 #define OBD_IOC_ABORT_RECOVERY _IOR ('f', 142, long)
520 #define OBD_GET_VERSION _IOWR ('f', 144, long)
522 #define OBD_IOC_ADD_UUID _IOWR ('f', 145, long)
523 #define OBD_IOC_DEL_UUID _IOWR ('f', 146, long)
524 #define OBD_IOC_CLOSE_UUID _IOWR ('f', 147, long)
526 #define OBD_IOC_MOUNTOPT _IOWR('f', 170, long)
528 #define ECHO_IOC_GET_STRIPE _IOWR('f', 200, long)
529 #define ECHO_IOC_SET_STRIPE _IOWR('f', 201, long)
530 #define ECHO_IOC_ENQUEUE _IOWR('f', 202, long)
531 #define ECHO_IOC_CANCEL _IOWR('f', 203, long)
533 /* XXX _IOWR('f', 250, long) has been defined in
534 * portals/include/linux/kp30.h for debug, don't use it
537 /* Until such time as we get_info the per-stripe maximum from the OST,
538 * we define this to be 2T - 4k, which is the ext3 maxbytes. */
539 #define LUSTRE_STRIPE_MAXBYTES 0x1fffffff000ULL
541 #define CHECKSUM_BULK 0
544 static inline void ost_checksum(obd_count *cksum, void *addr, int len)
546 unsigned char *ptr = (unsigned char *)addr;
549 /* very stupid, but means I don't have to think about byte order */
553 *cksum = (*cksum << 2) + sum;
558 * l_wait_event is a flexible sleeping function, permitting simple caller
559 * configuration of interrupt and timeout sensitivity along with actions to
560 * be performed in the event of either exception.
562 * Common usage looks like this:
564 * struct l_wait_info lwi = LWI_TIMEOUT_INTR(timeout, timeout_handler,
565 * intr_handler, callback_data);
566 * rc = l_wait_event(waitq, condition, &lwi);
568 * (LWI_TIMEOUT and LWI_INTR macros are available for timeout- and
569 * interrupt-only variants, respectively.)
571 * If a timeout is specified, the timeout_handler will be invoked in the event
572 * that the timeout expires before the process is awakened. (Note that any
573 * waking of the process will restart the timeout, even if the condition is
574 * not satisfied and the process immediately returns to sleep. This might be
575 * considered a bug.) If the timeout_handler returns non-zero, l_wait_event
576 * will return -ETIMEDOUT and the caller will continue. If the handler returns
577 * zero instead, the process will go back to sleep until it is awakened by the
578 * waitq or some similar mechanism, or an interrupt occurs (if the caller has
579 * asked for interrupts to be detected). The timeout will only fire once, so
580 * callers should take care that a timeout_handler which returns zero will take
581 * future steps to awaken the process. N.B. that these steps must include
582 * making the provided condition become true.
584 * If the interrupt flag (lwi_signals) is non-zero, then the process will be
585 * interruptible, and will be awakened by any "killable" signal (SIGTERM,
586 * SIGKILL or SIGINT). If a timeout is also specified, then the process will
587 * only become interruptible _after_ the timeout has expired, though it can be
588 * awakened by a signal that was delivered before the timeout and is still
589 * pending when the timeout expires. If a timeout is not specified, the process
590 * will be interruptible at all times during l_wait_event.
595 int (*lwi_on_timeout)(void *);
597 void (*lwi_on_signal)(void *);
601 #define LWI_TIMEOUT(time, cb, data) \
602 ((struct l_wait_info) { \
604 lwi_on_timeout: cb, \
608 #define LWI_INTR(cb, data) \
609 ((struct l_wait_info) { \
615 #define LWI_TIMEOUT_INTR(time, time_cb, sig_cb, data) \
616 ((struct l_wait_info) { \
618 lwi_on_timeout: time_cb, \
620 lwi_on_signal: sig_cb, \
624 #define LUSTRE_FATAL_SIGS (sigmask(SIGKILL) | sigmask(SIGINT) | \
625 sigmask(SIGTERM) | sigmask(SIGQUIT))
628 static inline sigset_t l_w_e_set_sigs(int sigs)
631 unsigned long irqflags;
633 SIGNAL_MASK_LOCK(current, irqflags);
634 old = current->blocked;
635 siginitsetinv(¤t->blocked, sigs);
637 SIGNAL_MASK_UNLOCK(current, irqflags);
642 #define __l_wait_event(wq, condition, info, ret) \
644 wait_queue_t __wait; \
645 int __timed_out = 0; \
646 unsigned long irqflags; \
649 init_waitqueue_entry(&__wait, current); \
650 add_wait_queue(&wq, &__wait); \
652 /* Block all signals (just the non-fatal ones if no timeout). */ \
653 if (info->lwi_signals && !info->lwi_timeout) \
654 blocked = l_w_e_set_sigs(LUSTRE_FATAL_SIGS); \
656 blocked = l_w_e_set_sigs(0); \
659 set_current_state(TASK_INTERRUPTIBLE); \
662 if (signal_pending(current)) { \
663 if (info->lwi_on_signal) \
664 info->lwi_on_signal(info->lwi_cb_data); \
668 if (info->lwi_timeout && !__timed_out) { \
669 if (schedule_timeout(info->lwi_timeout) == 0) { \
671 if (!info->lwi_on_timeout || \
672 info->lwi_on_timeout(info->lwi_cb_data)) { \
676 /* We'll take signals after a timeout. */ \
677 if (info->lwi_signals) \
678 (void)l_w_e_set_sigs(LUSTRE_FATAL_SIGS); \
685 SIGNAL_MASK_LOCK(current, irqflags); \
686 current->blocked = blocked; \
688 SIGNAL_MASK_UNLOCK(current, irqflags); \
690 current->state = TASK_RUNNING; \
691 remove_wait_queue(&wq, &__wait); \
694 #define l_wait_event(wq, condition, info) \
697 struct l_wait_info *__info = (info); \
699 __l_wait_event(wq, condition, __info, __ret); \
703 #define l_wait_event(wq, condition, info) \
707 #endif /* __KERNEL__ */
709 #endif /* _LUSTRE_LIB_H */