1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 * Basic Lustre library routines.
30 # include <sys/types.h>
32 # include <asm/semaphore.h>
33 # include <linux/rwsem.h>
34 # include <linux/sched.h>
35 # include <linux/signal.h>
36 # include <linux/types.h>
38 #include <libcfs/kp30.h>
39 #include <linux/lustre_idl.h>
40 #include <linux/lustre_cfg.h>
43 #if BITS_PER_LONG > 32
44 # define LI_POISON ((int)0x5a5a5a5a5a5a5a5a)
45 # define LL_POISON ((long)0x5a5a5a5a5a5a5a5a)
46 # define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a)
48 # define LI_POISON ((int)0x5a5a5a5a)
49 # define LL_POISON ((long)0x5a5a5a5a)
50 # define LP_POISON ((void *)(long)0x5a5a5a5a)
55 /* x86_64 has 64bit longs and defines u64 as long long */
56 #if BITS_PER_LONG > 32 && !defined(__x86_64__)
67 /* lustre_id output helper macros */
68 #define DLID4 "%lu/%lu/%lu/%lu"
71 (unsigned long)(id)->li_fid.lf_id, \
72 (unsigned long)(id)->li_fid.lf_group, \
73 (unsigned long)(id)->li_stc.u.e3s.l3s_ino, \
74 (unsigned long)(id)->li_stc.u.e3s.l3s_gen
77 struct ptlrpc_request;
82 #include <linux/lustre_ha.h>
83 #include <linux/lustre_net.h>
84 #include <linux/lustre_compat25.h>
85 #include <linux/lvfs.h>
87 int target_handle_connect(struct ptlrpc_request *req);
88 int target_handle_disconnect(struct ptlrpc_request *req);
89 void target_destroy_export(struct obd_export *exp);
90 int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp,
91 struct obd_uuid *cluuid, int);
92 int target_handle_ping(struct ptlrpc_request *req);
93 void target_cancel_recovery_timer(struct obd_device *obd);
95 #define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */
96 void target_start_recovery_timer(struct obd_device *obd);
97 int target_start_recovery_thread(struct obd_device *obd,
98 svc_handler_t handler);
99 void target_stop_recovery_thread(struct obd_device *obd);
100 void target_cleanup_recovery(struct obd_device *obd);
101 int target_queue_recovery_request(struct ptlrpc_request *req,
102 struct obd_device *obd);
103 int target_queue_final_reply(struct ptlrpc_request *req, int rc);
104 void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id);
108 int client_sanobd_setup(struct obd_device *obddev, obd_count len, void *buf);
109 struct client_obd *client_conn2cli(struct lustre_handle *conn);
111 struct mdc_open_data;
112 struct obd_client_handle {
113 struct lustre_handle och_fh;
114 struct llog_cookie och_cookie;
115 struct mdc_open_data *och_mod;
116 struct obd_capa *och_capa;
119 #define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed
122 void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs);
123 void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs);
128 #define OBD_IOCTL_VERSION 0x00010004
130 struct obd_ioctl_data {
132 uint32_t ioc_version;
138 struct obdo ioc_obdo1;
139 struct obdo ioc_obdo2;
144 uint32_t ioc_command;
150 /* buffers the kernel will treat as user pointers */
156 /* inline buffers for various arguments */
157 uint32_t ioc_inllen1;
159 uint32_t ioc_inllen2;
161 uint32_t ioc_inllen3;
163 uint32_t ioc_inllen4;
169 struct obd_ioctl_hdr {
171 uint32_t ioc_version;
174 static inline int obd_ioctl_packlen(struct obd_ioctl_data *data)
176 int len = size_round(sizeof(struct obd_ioctl_data));
177 len += size_round(data->ioc_inllen1);
178 len += size_round(data->ioc_inllen2);
179 len += size_round(data->ioc_inllen3);
180 len += size_round(data->ioc_inllen4);
185 static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data)
187 if (data->ioc_len > (1<<30)) {
188 CERROR("OBD ioctl: ioc_len larger than 1<<30\n");
191 if (data->ioc_inllen1 > (1<<30)) {
192 CERROR("OBD ioctl: ioc_inllen1 larger than 1<<30\n");
195 if (data->ioc_inllen2 > (1<<30)) {
196 CERROR("OBD ioctl: ioc_inllen2 larger than 1<<30\n");
199 if (data->ioc_inllen3 > (1<<30)) {
200 CERROR("OBD ioctl: ioc_inllen3 larger than 1<<30\n");
203 if (data->ioc_inllen4 > (1<<30)) {
204 CERROR("OBD ioctl: ioc_inllen4 larger than 1<<30\n");
207 if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
208 CERROR("OBD ioctl: inlbuf1 pointer but 0 length\n");
211 if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
212 CERROR("OBD ioctl: inlbuf2 pointer but 0 length\n");
215 if (data->ioc_inlbuf3 && !data->ioc_inllen3) {
216 CERROR("OBD ioctl: inlbuf3 pointer but 0 length\n");
219 if (data->ioc_inlbuf4 && !data->ioc_inllen4) {
220 CERROR("OBD ioctl: inlbuf4 pointer but 0 length\n");
223 if (data->ioc_pbuf1 && !data->ioc_plen1) {
224 CERROR("OBD ioctl: pbuf1 pointer but 0 length\n");
227 if (data->ioc_pbuf2 && !data->ioc_plen2) {
228 CERROR("OBD ioctl: pbuf2 pointer but 0 length\n");
231 if (data->ioc_plen1 && !data->ioc_pbuf1) {
232 CERROR("OBD ioctl: plen1 set but NULL pointer\n");
235 if (data->ioc_plen2 && !data->ioc_pbuf2) {
236 CERROR("OBD ioctl: plen2 set but NULL pointer\n");
239 if (obd_ioctl_packlen(data) != data->ioc_len) {
240 CERROR("OBD ioctl: packlen exceeds ioc_len (%d != %d)\n",
241 obd_ioctl_packlen(data), data->ioc_len);
248 static inline int obd_ioctl_pack(struct obd_ioctl_data *data, char **pbuf,
252 struct obd_ioctl_data *overlay;
253 data->ioc_len = obd_ioctl_packlen(data);
254 data->ioc_version = OBD_IOCTL_VERSION;
256 if (*pbuf && data->ioc_len > max)
259 *pbuf = malloc(data->ioc_len);
263 overlay = (struct obd_ioctl_data *)*pbuf;
264 memcpy(*pbuf, data, sizeof(*data));
266 ptr = overlay->ioc_bulk;
267 if (data->ioc_inlbuf1)
268 LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
269 if (data->ioc_inlbuf2)
270 LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
271 if (data->ioc_inlbuf3)
272 LOGL(data->ioc_inlbuf3, data->ioc_inllen3, ptr);
273 if (data->ioc_inlbuf4)
274 LOGL(data->ioc_inlbuf4, data->ioc_inllen4, ptr);
275 if (obd_ioctl_is_invalid(overlay))
281 static inline int obd_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf,
285 struct obd_ioctl_data *overlay;
289 overlay = (struct obd_ioctl_data *)pbuf;
291 /* Preserve the caller's buffer pointers */
292 overlay->ioc_inlbuf1 = data->ioc_inlbuf1;
293 overlay->ioc_inlbuf2 = data->ioc_inlbuf2;
294 overlay->ioc_inlbuf3 = data->ioc_inlbuf3;
295 overlay->ioc_inlbuf4 = data->ioc_inlbuf4;
297 memcpy(data, pbuf, sizeof(*data));
299 ptr = overlay->ioc_bulk;
300 if (data->ioc_inlbuf1)
301 LOGU(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
302 if (data->ioc_inlbuf2)
303 LOGU(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
304 if (data->ioc_inlbuf3)
305 LOGU(data->ioc_inlbuf3, data->ioc_inllen3, ptr);
306 if (data->ioc_inlbuf4)
307 LOGU(data->ioc_inlbuf4, data->ioc_inllen4, ptr);
313 #include <linux/obd_support.h>
315 /* buffer MUST be at least the size of obd_ioctl_hdr */
316 static inline int obd_ioctl_getdata(char **buf, int *len, void *arg)
318 struct obd_ioctl_hdr hdr;
319 struct obd_ioctl_data *data;
324 err = copy_from_user(&hdr, (void *)arg, sizeof(hdr));
330 if (hdr.ioc_version != OBD_IOCTL_VERSION) {
331 CERROR("Version mismatch kernel vs application\n");
335 if (hdr.ioc_len > OBD_MAX_IOCTL_BUFFER) {
336 CERROR("User buffer len %d exceeds %d max buffer\n",
337 hdr.ioc_len, OBD_MAX_IOCTL_BUFFER);
341 if (hdr.ioc_len < sizeof(struct obd_ioctl_data)) {
342 CERROR("OBD: user buffer too small for ioctl\n");
346 /* XXX allocate this more intelligently, using kmalloc when
348 OBD_VMALLOC(*buf, hdr.ioc_len);
350 CERROR("Cannot allocate control buffer of len %d\n",
355 data = (struct obd_ioctl_data *)*buf;
357 err = copy_from_user(*buf, (void *)arg, hdr.ioc_len);
363 if (obd_ioctl_is_invalid(data)) {
364 CERROR("ioctl not correctly formatted\n");
368 if (data->ioc_inllen1) {
369 data->ioc_inlbuf1 = &data->ioc_bulk[0];
370 offset += size_round(data->ioc_inllen1);
373 if (data->ioc_inllen2) {
374 data->ioc_inlbuf2 = &data->ioc_bulk[0] + offset;
375 offset += size_round(data->ioc_inllen2);
378 if (data->ioc_inllen3) {
379 data->ioc_inlbuf3 = &data->ioc_bulk[0] + offset;
380 offset += size_round(data->ioc_inllen3);
383 if (data->ioc_inllen4) {
384 data->ioc_inlbuf4 = &data->ioc_bulk[0] + offset;
391 static inline void obd_ioctl_freedata(char *buf, int len)
400 #define OBD_IOC_CREATE _IOR ('f', 101, long)
401 #define OBD_IOC_DESTROY _IOW ('f', 104, long)
402 #define OBD_IOC_PREALLOCATE _IOWR('f', 105, long)
404 #define OBD_IOC_SETATTR _IOW ('f', 107, long)
405 #define OBD_IOC_GETATTR _IOR ('f', 108, long)
406 #define OBD_IOC_READ _IOWR('f', 109, long)
407 #define OBD_IOC_WRITE _IOWR('f', 110, long)
410 #define OBD_IOC_STATFS _IOWR('f', 113, long)
411 #define OBD_IOC_SYNC _IOR ('f', 114, long)
412 #define OBD_IOC_READ2 _IOWR('f', 115, long)
413 #define OBD_IOC_FORMAT _IOWR('f', 116, long)
414 #define OBD_IOC_PARTITION _IOWR('f', 117, long)
415 #define OBD_IOC_COPY _IOWR('f', 120, long)
416 #define OBD_IOC_MIGR _IOWR('f', 121, long)
417 #define OBD_IOC_PUNCH _IOWR('f', 122, long)
419 #define OBD_IOC_MODULE_DEBUG _IOWR('f', 124, long)
420 #define OBD_IOC_BRW_READ _IOWR('f', 125, long)
421 #define OBD_IOC_BRW_WRITE _IOWR('f', 126, long)
422 #define OBD_IOC_NAME2DEV _IOWR('f', 127, long)
423 #define OBD_IOC_UUID2DEV _IOWR('f', 130, long)
425 #define OBD_IOC_LOV_GET_CONFIG _IOWR('f', 132, long)
426 #define OBD_IOC_CLIENT_RECOVER _IOW ('f', 133, long)
428 #define OBD_IOC_PING _IOWR('f', 135, long)
430 #define OBD_IOC_DEC_FS_USE_COUNT _IO ('f', 139 )
431 #define OBD_IOC_NO_TRANSNO _IOW ('f', 140, long)
432 #define OBD_IOC_SET_READONLY _IOW ('f', 141, long)
433 #define OBD_IOC_ABORT_RECOVERY _IOR ('f', 142, long)
434 #define OBD_IOC_ROOT_SQUASH _IOWR('f', 143, long)
436 #define OBD_GET_VERSION _IOWR ('f', 144, long)
438 #define OBD_IOC_CLOSE_UUID _IOWR ('f', 147, long)
440 #define OBD_IOC_LOV_SETSTRIPE _IOW ('f', 154, long)
441 #define OBD_IOC_LOV_GETSTRIPE _IOW ('f', 155, long)
442 #define OBD_IOC_LOV_SETEA _IOW ('f', 156, long)
444 #define OBD_IOC_MOUNTOPT _IOWR('f', 170, long)
446 #define OBD_IOC_RECORD _IOWR('f', 180, long)
447 #define OBD_IOC_ENDRECORD _IOWR('f', 181, long)
448 #define OBD_IOC_PARSE _IOWR('f', 182, long)
449 #define OBD_IOC_DORECORD _IOWR('f', 183, long)
450 #define OBD_IOC_PROCESS_CFG _IOWR('f', 184, long)
451 #define OBD_IOC_DUMP_LOG _IOWR('f', 185, long)
452 #define OBD_IOC_CLEAR_LOG _IOWR('f', 186, long)
453 #define OBD_IOC_START _IOWR('f', 187, long)
455 #define OBD_IOC_CATLOGLIST _IOWR('f', 190, long)
456 #define OBD_IOC_LLOG_INFO _IOWR('f', 191, long)
457 #define OBD_IOC_LLOG_PRINT _IOWR('f', 192, long)
458 #define OBD_IOC_LLOG_CANCEL _IOWR('f', 193, long)
459 #define OBD_IOC_LLOG_REMOVE _IOWR('f', 194, long)
460 #define OBD_IOC_LLOG_CHECK _IOWR('f', 195, long)
461 #define OBD_IOC_LLOG_CATINFO _IOWR('f', 196, long)
463 #define ECHO_IOC_GET_STRIPE _IOWR('f', 200, long)
464 #define ECHO_IOC_SET_STRIPE _IOWR('f', 201, long)
465 #define ECHO_IOC_ENQUEUE _IOWR('f', 202, long)
466 #define ECHO_IOC_CANCEL _IOWR('f', 203, long)
468 #define OBD_IOC_CMOBD_SYNC _IOWR('f', 210, long)
470 #define OBD_IOC_COBD_CON _IOWR('f', 220, long)
471 #define OBD_IOC_COBD_COFF _IOWR('f', 221, long)
473 #define OBD_IOC_SMFS_SNAP_ADD _IOWR('f', 230, long)
475 /* XXX _IOWR('f', 250, long) has been defined in
476 * portals/include/libcfs/kp30.h for debug, don't use it
479 /* Until such time as we get_info the per-stripe maximum from the OST,
480 * we define this to be 2T - 4k, which is the ext3 maxbytes. */
481 #define LUSTRE_STRIPE_MAXBYTES 0x1fffffff000ULL
483 #define CHECKSUM_BULK 0
484 #define POISON_BULK 0
487 static inline void ost_checksum(obd_count *cksum, void *addr, int len)
489 unsigned char *ptr = (unsigned char *)addr;
492 /* very stupid, but means I don't have to think about byte order */
496 *cksum = (*cksum << 2) + sum;
500 static inline int ll_insecure_random_int(void)
504 return (int)(t.tv_usec);
508 * l_wait_event is a flexible sleeping function, permitting simple caller
509 * configuration of interrupt and timeout sensitivity along with actions to
510 * be performed in the event of either exception.
512 * Common usage looks like this:
514 * struct l_wait_info lwi = LWI_TIMEOUT_INTR(timeout, timeout_handler,
515 * intr_handler, callback_data);
516 * rc = l_wait_event(waitq, condition, &lwi);
518 * (LWI_TIMEOUT and LWI_INTR macros are available for timeout- and
519 * interrupt-only variants, respectively.)
521 * If a timeout is specified, the timeout_handler will be invoked in the event
522 * that the timeout expires before the process is awakened. (Note that any
523 * waking of the process will restart the timeout, even if the condition is
524 * not satisfied and the process immediately returns to sleep. This might be
525 * considered a bug.) If the timeout_handler returns non-zero, l_wait_event
526 * will return -ETIMEDOUT and the caller will continue. If the handler returns
527 * zero instead, the process will go back to sleep until it is awakened by the
528 * waitq or some similar mechanism, or an interrupt occurs (if the caller has
529 * asked for interrupts to be detected). The timeout will only fire once, so
530 * callers should take care that a timeout_handler which returns zero will take
531 * future steps to awaken the process. N.B. that these steps must include
532 * making the provided condition become true.
534 * If the interrupt flag (lwi_signals) is non-zero, then the process will be
535 * interruptible, and will be awakened by any "killable" signal (SIGTERM,
536 * SIGKILL or SIGINT). If a timeout is also specified, then the process will
537 * only become interruptible _after_ the timeout has expired, though it can be
538 * awakened by a signal that was delivered before the timeout and is still
539 * pending when the timeout expires. If a timeout is not specified, the process
540 * will be interruptible at all times during l_wait_event.
545 int (*lwi_on_timeout)(void *);
547 void (*lwi_on_signal)(void *);
551 #define LWI_TIMEOUT(time, cb, data) \
552 ((struct l_wait_info) { \
554 lwi_on_timeout: cb, \
558 #define LWI_INTR(cb, data) \
559 ((struct l_wait_info) { \
565 #define LWI_TIMEOUT_INTR(time, time_cb, sig_cb, data) \
566 ((struct l_wait_info) { \
568 lwi_on_timeout: time_cb, \
570 lwi_on_signal: sig_cb, \
574 #define LUSTRE_FATAL_SIGS (sigmask(SIGKILL) | sigmask(SIGINT) | \
575 sigmask(SIGTERM) | sigmask(SIGQUIT) | \
579 static inline sigset_t l_w_e_set_sigs(int sigs)
582 unsigned long irqflags;
584 SIGNAL_MASK_LOCK(current, irqflags);
585 old = current->blocked;
586 siginitsetinv(¤t->blocked, sigs);
588 SIGNAL_MASK_UNLOCK(current, irqflags);
593 #define __l_wait_event(wq, condition, info, ret, excl) \
595 wait_queue_t __wait; \
596 int __timed_out = 0; \
597 unsigned long irqflags; \
599 signed long timeout_remaining; \
601 init_waitqueue_entry(&__wait, current); \
603 add_wait_queue_exclusive(&wq, &__wait); \
605 add_wait_queue(&wq, &__wait); \
607 /* Block all signals (just the non-fatal ones if no timeout). */ \
608 if (info->lwi_signals && !info->lwi_timeout) \
609 blocked = l_w_e_set_sigs(LUSTRE_FATAL_SIGS); \
611 blocked = l_w_e_set_sigs(0); \
613 timeout_remaining = info->lwi_timeout; \
616 set_current_state(TASK_INTERRUPTIBLE); \
619 if (info->lwi_timeout && !__timed_out) { \
620 timeout_remaining = schedule_timeout(timeout_remaining); \
621 if (timeout_remaining == 0) { \
623 if (!info->lwi_on_timeout || \
624 info->lwi_on_timeout(info->lwi_cb_data)) { \
628 /* We'll take signals after a timeout. */ \
629 if (info->lwi_signals) \
630 (void)l_w_e_set_sigs(LUSTRE_FATAL_SIGS); \
637 if (signal_pending(current)) { \
641 /* We have to do this here because some signals */ \
642 /* are not blockable - ie from strace(1). */ \
643 /* In these cases we want to schedule_timeout() */ \
644 /* again, because we don't want that to return */ \
645 /* -EINTR when the RPC actually succeeded. */ \
646 /* the RECALC_SIGPENDING below will deliver the */ \
647 /* signal properly. */ \
648 SIGNAL_MASK_LOCK(current, irqflags); \
650 SIGNAL_MASK_UNLOCK(current, irqflags); \
655 SIGNAL_MASK_LOCK(current, irqflags); \
656 current->blocked = blocked; \
658 SIGNAL_MASK_UNLOCK(current, irqflags); \
660 if (__timed_out && signal_pending(current)) { \
661 if (info->lwi_on_signal) \
662 info->lwi_on_signal(info->lwi_cb_data); \
666 current->state = TASK_RUNNING; \
667 remove_wait_queue(&wq, &__wait); \
670 #else /* !__KERNEL__ */
671 #define __l_wait_event(wq, condition, info, ret, excl) \
673 long timeout = info->lwi_timeout, elapse, last = 0; \
674 int __timed_out = 0; \
676 if (info->lwi_timeout == 0) \
677 timeout = 1000000000; \
684 if (liblustre_wait_event(timeout)) { \
685 if (timeout == 0 || info->lwi_timeout == 0) \
687 elapse = time(NULL) - last; \
696 if (info->lwi_timeout && !__timed_out) { \
698 if (info->lwi_on_timeout == NULL || \
699 info->lwi_on_timeout(info->lwi_cb_data)) { \
707 #endif /* __KERNEL__ */
709 #define l_wait_event(wq, condition, info) \
712 struct l_wait_info *__info = (info); \
714 __l_wait_event(wq, condition, __info, __ret, 0); \
718 #define l_wait_event_exclusive(wq, condition, info) \
721 struct l_wait_info *__info = (info); \
723 __l_wait_event(wq, condition, __info, __ret, 1); \
727 #endif /* _LUSTRE_LIB_H */
729 #define LMD_MAGIC 0xbdacbdac
731 #define lmd_bad_magic(LMDP) \
733 struct lustre_mount_data *_lmd__ = (LMDP); \
736 CERROR("Missing mount data: " \
737 "check that /sbin/mount.lustre is installed.\n");\
739 } else if (_lmd__->lmd_magic != LMD_MAGIC) { \
740 CERROR("Invalid mount data (%#x != %#x): " \
741 "check that /sbin/mount.lustre is installed\n", \
742 _lmd__->lmd_magic, LMD_MAGIC); \