* along with Lustre; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
- * Basic Lustre library routines.
+ * Basic Lustre library routines.
*
*/
#ifndef __KERNEL__
# include <string.h>
+# include <sys/types.h>
#else
# include <asm/semaphore.h>
+# include <linux/sched.h>
+# include <linux/signal.h>
+# include <linux/types.h>
#endif
-
#include <linux/portals_lib.h>
+#include <linux/kp30.h> /* XXX just for LASSERT! */
#include <linux/lustre_idl.h>
-#ifdef __KERNEL__
-/* l_net.c */
+#ifndef LPU64
+#if BITS_PER_LONG > 32
+#define LPU64 "%lu"
+#define LPD64 "%ld"
+#define LPX64 "%#lx"
+#else
+#define LPU64 "%Lu"
+#define LPD64 "%Ld"
+#define LPX64 "%#Lx"
+#endif
+#endif
+
+/* target.c */
struct ptlrpc_request;
-int target_handle_connect(struct ptlrpc_request *req);
+struct recovd_data;
+struct recovd_obd;
+struct obd_export;
+#include <linux/lustre_ha.h>
+#include <linux/lustre_net.h>
+#include <linux/lustre_compat25.h>
+
+int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler);
int target_handle_disconnect(struct ptlrpc_request *req);
+int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp,
+ struct obd_uuid *cluuid);
+int target_handle_ping(struct ptlrpc_request *req);
+void target_cancel_recovery_timer(struct obd_device *obd);
+
+#define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */
+void target_start_recovery_timer(struct obd_device *obd, svc_handler_t handler);
+void target_abort_recovery(void *data);
+int target_queue_recovery_request(struct ptlrpc_request *req,
+ struct obd_device *obd);
+int target_queue_final_reply(struct ptlrpc_request *req, int rc);
+void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id);
+
+/* client.c */
+
+int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf);
+int client_sanobd_setup(struct obd_device *obddev, obd_count len, void *buf);
+int client_obd_cleanup(struct obd_device * obddev, int force, int failover);
+struct client_obd *client_conn2cli(struct lustre_handle *conn);
+struct obd_device *client_tgtuuid2obd(struct obd_uuid *tgtuuid);
+
+/* It is important that och_fh remain the first item in this structure: that
+ * way, we don't have to re-pack the obdo's inline data before we send it to
+ * the server, we can just send the whole struct unaltered. */
+struct obd_client_handle {
+ struct lustre_handle och_fh;
+ struct ptlrpc_request *och_req;
+ __u32 och_magic;
+};
+#define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed
+
+/* statfs_pack.c */
+int obd_self_statfs(struct obd_device *dev, struct statfs *sfs);
/* l_lock.c */
-struct lustre_lock {
+struct lustre_lock {
int l_depth;
struct task_struct *l_owner;
struct semaphore l_sem;
void l_lock_init(struct lustre_lock *);
void l_lock(struct lustre_lock *);
void l_unlock(struct lustre_lock *);
+int l_has_lock(struct lustre_lock *);
+
+/* simple.c */
+struct obd_ucred {
+ __u32 ouc_fsuid;
+ __u32 ouc_fsgid;
+ __u32 ouc_cap;
+ __u32 ouc_suppgid1;
+ __u32 ouc_suppgid2;
+};
+#define OBD_RUN_CTXT_MAGIC 0xC0FFEEAA
+#define OBD_CTXT_DEBUG /* development-only debugging */
+struct obd_run_ctxt {
+ struct vfsmount *pwdmnt;
+ struct dentry *pwd;
+ mm_segment_t fs;
+ struct obd_ucred ouc;
+ int ngroups;
+#ifdef OBD_CTXT_DEBUG
+ __u32 magic;
+#endif
+};
-/* page.c */
-inline void lustre_put_page(struct page *page);
-struct page *lustre_get_page_read(struct inode *dir, unsigned long index);
-struct page *lustre_get_page_write(struct inode *dir, unsigned long index);
-int lustre_commit_write(struct page *page, unsigned from, unsigned to);
-void set_page_clean(struct page *page);
-void set_page_dirty(struct page *page);
-/* simple.c */
-struct obd_run_ctxt;
-void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new);
-void pop_ctxt(struct obd_run_ctxt *saved);
#ifdef OBD_CTXT_DEBUG
#define OBD_SET_CTXT_MAGIC(ctxt) (ctxt)->magic = OBD_RUN_CTXT_MAGIC
#else
#define OBD_SET_CTXT_MAGIC(ctxt) do {} while(0)
#endif
+
+#ifdef __KERNEL__
+
+void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx,
+ struct obd_ucred *cred);
+void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx,
+ struct obd_ucred *cred);
struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode);
-int lustre_fread(struct file *file, char *str, int len, loff_t *off);
-int lustre_fwrite(struct file *file, const char *str, int len, loff_t *off);
+struct dentry *simple_mknod(struct dentry *dir, char *name, int mode);
+int lustre_fread(struct file *file, void *buf, int len, loff_t *off);
+int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off);
int lustre_fsync(struct file *file);
static inline void l_dput(struct dentry *de)
{
if (!de || IS_ERR(de))
return;
- shrink_dcache_parent(de);
+ //shrink_dcache_parent(de);
+ LASSERT(atomic_read(&de->d_count) > 0);
dput(de);
}
+/* We need to hold the inode semaphore over the dcache lookup itself, or we
+ * run the risk of entering the filesystem lookup path concurrently on SMP
+ * systems, and instantiating two inodes for the same entry. We still
+ * protect against concurrent addition/removal races with the DLM locking.
+ */
+static inline struct dentry *ll_lookup_one_len(char *fid_name,
+ struct dentry *dparent,
+ int fid_namelen)
+{
+ struct dentry *dchild;
+
+ down(&dparent->d_inode->i_sem);
+ dchild = lookup_one_len(fid_name, dparent, fid_namelen);
+ up(&dparent->d_inode->i_sem);
+
+ return dchild;
+}
+
static inline void ll_sleep(int t)
{
set_current_state(TASK_INTERRUPTIBLE);
}
#endif
-/* FIXME: This needs to validate pointers and cookies */
-static inline void *lustre_handle2object(struct lustre_handle *handle)
-{
- if (handle)
- return (void *)(unsigned long)(handle->addr);
- return NULL;
-}
-
-static inline void ldlm_object2handle(void *object, struct lustre_handle *handle)
+#define LL_FID_NAMELEN (16 + 1 + 8 + 1)
+static inline int ll_fid2str(char *str, __u64 id, __u32 generation)
{
- handle->addr = (__u64)(unsigned long)object;
+ return sprintf(str, "%llx:%08x", (unsigned long long)id, generation);
}
-struct obd_statfs;
-struct statfs;
-void obd_statfs_pack(struct obd_statfs *osfs, struct statfs *sfs);
-void obd_statfs_unpack(struct obd_statfs *osfs, struct statfs *sfs);
-
#include <linux/portals_lib.h>
/*
* OBD IOCTLS
*/
-#define OBD_IOCTL_VERSION 0x00010001
+#define OBD_IOCTL_VERSION 0x00010002
struct obd_ioctl_data {
uint32_t ioc_len;
uint32_t ioc_version;
- uint64_t ioc_addr;
uint64_t ioc_cookie;
uint32_t ioc_conn1;
uint32_t ioc_conn2;
obd_size ioc_count;
obd_off ioc_offset;
uint32_t ioc_dev;
- uint32_t ____padding;
+ uint32_t ioc_command;
+
+ uint64_t ioc_nid;
+ uint32_t ioc_nal;
/* buffers the kernel will treat as user pointers */
uint32_t ioc_plen1;
printk("OBD ioctl: plen2 set but NULL pointer\n");
return 1;
}
- if (obd_ioctl_packlen(data) != data->ioc_len ) {
- printk("OBD ioctl: packlen exceeds ioc_len\n");
+ if (obd_ioctl_packlen(data) != data->ioc_len) {
+ printk("OBD ioctl: packlen exceeds ioc_len (%d != %d)\n",
+ obd_ioctl_packlen(data), data->ioc_len);
return 1;
}
#if 0
printk("OBD ioctl: inlbuf3 not 0 terminated\n");
return 1;
}
-#endif
+#endif
return 0;
}
data->ioc_len = obd_ioctl_packlen(data);
data->ioc_version = OBD_IOCTL_VERSION;
- if (*pbuf && obd_ioctl_packlen(data) > max)
+ if (*pbuf && data->ioc_len > max)
return 1;
if (*pbuf == NULL) {
*pbuf = malloc(data->ioc_len);
return 0;
}
-#else
+static inline int obd_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf,
+ int max)
+{
+ char *ptr;
+ struct obd_ioctl_data *overlay;
+
+ if (!pbuf)
+ return 1;
+ overlay = (struct obd_ioctl_data *)pbuf;
+
+ /* Preserve the caller's buffer pointers */
+ overlay->ioc_inlbuf1 = data->ioc_inlbuf1;
+ overlay->ioc_inlbuf2 = data->ioc_inlbuf2;
+ overlay->ioc_inlbuf3 = data->ioc_inlbuf3;
+
+ memcpy(data, pbuf, sizeof(*data));
+
+ ptr = overlay->ioc_bulk;
+ if (data->ioc_inlbuf1)
+ LOGU(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
+ if (data->ioc_inlbuf2)
+ LOGU(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
+ if (data->ioc_inlbuf3)
+ LOGU(data->ioc_inlbuf3, data->ioc_inllen3, ptr);
+
+ return 0;
+}
+#endif
+
+#include <linux/obd_support.h>
+
+#define OBD_MAX_IOCTL_BUFFER 8192
/* buffer MUST be at least the size of obd_ioctl_hdr */
static inline int obd_ioctl_getdata(char **buf, int *len, void *arg)
int err;
ENTRY;
-
err = copy_from_user(&hdr, (void *)arg, sizeof(hdr));
if ( err ) {
EXIT;
}
if (hdr.ioc_version != OBD_IOCTL_VERSION) {
- printk("OBD: version mismatch kernel vs application\n");
+ CERROR("Version mismatch kernel vs application\n");
return -EINVAL;
}
- if (hdr.ioc_len > 8192) {
- printk("OBD: user buffer exceeds 8192 max buffer\n");
+ if (hdr.ioc_len > OBD_MAX_IOCTL_BUFFER) {
+ CERROR("User buffer len %d exceeds %d max buffer\n",
+ hdr.ioc_len, OBD_MAX_IOCTL_BUFFER);
return -EINVAL;
}
return -EINVAL;
}
- OBD_ALLOC(*buf, hdr.ioc_len);
- if (!*buf) {
+ /* XXX allocate this more intelligently, using kmalloc when
+ * appropriate */
+ OBD_VMALLOC(*buf, hdr.ioc_len);
+ if (*buf == NULL) {
CERROR("Cannot allocate control buffer of len %d\n",
hdr.ioc_len);
RETURN(-EINVAL);
}
if (obd_ioctl_is_invalid(data)) {
- printk("OBD: ioctl not correctly formatted\n");
+ CERROR("ioctl not correctly formatted\n");
return -EINVAL;
}
}
if (data->ioc_inllen2) {
- data->ioc_inlbuf2 = &data->ioc_bulk[0] + size_round(data->ioc_inllen1);
+ data->ioc_inlbuf2 = &data->ioc_bulk[0] +
+ size_round(data->ioc_inllen1);
}
if (data->ioc_inllen3) {
- data->ioc_inlbuf3 = &data->ioc_bulk[0] + size_round(data->ioc_inllen1) +
+ data->ioc_inlbuf3 = &data->ioc_bulk[0] +
+ size_round(data->ioc_inllen1) +
size_round(data->ioc_inllen2);
}
EXIT;
return 0;
}
-#endif
+
+static inline void obd_ioctl_freedata(char *buf, int len)
+{
+ ENTRY;
+
+ OBD_VFREE(buf, len);
+ EXIT;
+ return;
+}
#define OBD_IOC_CREATE _IOR ('f', 101, long)
#define OBD_IOC_SETUP _IOW ('f', 102, long)
#define OBD_IOC_CLEANUP _IO ('f', 103 )
#define OBD_IOC_DESTROY _IOW ('f', 104, long)
#define OBD_IOC_PREALLOCATE _IOWR('f', 105, long)
-#define OBD_IOC_DEC_USE_COUNT _IO ('f', 106 )
+
#define OBD_IOC_SETATTR _IOW ('f', 107, long)
#define OBD_IOC_GETATTR _IOR ('f', 108, long)
#define OBD_IOC_READ _IOWR('f', 109, long)
#define OBD_IOC_LIST _IOWR('f', 129, long)
#define OBD_IOC_UUID2DEV _IOWR('f', 130, long)
-#define OBD_IOC_RECOVD_NEWCONN _IOWR('f', 131, long)
-#define OBD_IOC_LOV_CONFIG _IOWR('f', 132, long)
+#define OBD_IOC_LOV_SET_CONFIG _IOWR('f', 131, long)
+#define OBD_IOC_LOV_GET_CONFIG _IOWR('f', 132, long)
+#define OBD_IOC_LOV_CONFIG OBD_IOC_LOV_SET_CONFIG
+#define OBD_IOC_CLIENT_RECOVER _IOW ('f', 133, long)
+
+#define OBD_IOC_OPEN _IOWR('f', 134, long)
+#define OBD_IOC_CLOSE _IOWR('f', 135, long)
+
+#define OBD_IOC_DEC_FS_USE_COUNT _IO ('f', 139 )
+#define OBD_IOC_NO_TRANSNO _IOW ('f', 140, long)
+#define OBD_IOC_SET_READONLY _IOW ('f', 141, long)
+#define OBD_IOC_ABORT_RECOVERY _IOR ('f', 142, long)
+
+#define OBD_GET_VERSION _IOWR ('f', 144, long)
-#define OBD_IOC_DEC_FS_USE_COUNT _IO ('f', 133 )
+#define OBD_IOC_ADD_UUID _IOWR ('f', 145, long)
+#define OBD_IOC_DEL_UUID _IOWR ('f', 146, long)
+#define OBD_IOC_CLOSE_UUID _IOWR ('f', 147, long)
+#define OBD_IOC_MOUNTOPT _IOWR('f', 170, long)
+#define ECHO_IOC_GET_STRIPE _IOWR('f', 200, long)
+#define ECHO_IOC_SET_STRIPE _IOWR('f', 201, long)
+#define ECHO_IOC_ENQUEUE _IOWR('f', 202, long)
+#define ECHO_IOC_CANCEL _IOWR('f', 203, long)
+/* XXX _IOWR('f', 250, long) has been defined in
+ * portals/include/linux/kp30.h for debug, don't use it
+ */
+
+/* Until such time as we get_info the per-stripe maximum from the OST,
+ * we define this to be 2T - 4k, which is the ext3 maxbytes. */
+#define LUSTRE_STRIPE_MAXBYTES 0x1fffffff000ULL
+
+#define CHECKSUM_BULK 0
+
+#if CHECKSUM_BULK
+static inline void ost_checksum(__u64 *cksum, void *addr, int len)
+{
+ unsigned char *ptr = (unsigned char *)addr;
+ __u64 sum = 0;
+ /* very stupid, but means I don't have to think about byte order */
+ while (len-- > 0)
+ sum += *ptr++;
-/* XXX this should be one mask-check */
-#define l_killable_pending(task) \
-(sigismember(&(task->pending.signal), SIGKILL) || \
- sigismember(&(task->pending.signal), SIGINT) || \
- sigismember(&(task->pending.signal), SIGTERM))
+ *cksum = (*cksum << 2) + sum;
+}
+#endif
/*
- * Like wait_event_interruptible, but we're only interruptible by KILL, INT, or
- * TERM.
+ * l_wait_event is a flexible sleeping function, permitting simple caller
+ * configuration of interrupt and timeout sensitivity along with actions to
+ * be performed in the event of either exception.
+ *
+ * Common usage looks like this:
+ *
+ * struct l_wait_info lwi = LWI_TIMEOUT_INTR(timeout, timeout_handler,
+ * intr_handler, callback_data);
+ * rc = l_wait_event(waitq, condition, &lwi);
+ *
+ * (LWI_TIMEOUT and LWI_INTR macros are available for timeout- and
+ * interrupt-only variants, respectively.)
+ *
+ * If a timeout is specified, the timeout_handler will be invoked in the event
+ * that the timeout expires before the process is awakened. (Note that any
+ * waking of the process will restart the timeout, even if the condition is
+ * not satisfied and the process immediately returns to sleep. This might be
+ * considered a bug.) If the timeout_handler returns non-zero, l_wait_event
+ * will return -ETIMEDOUT and the caller will continue. If the handler returns
+ * zero instead, the process will go back to sleep until it is awakened by the
+ * waitq or some similar mechanism, or an interrupt occurs (if the caller has
+ * asked for interrupts to be detected). The timeout will only fire once, so
+ * callers should take care that a timeout_handler which returns zero will take
+ * future steps to awaken the process. N.B. that these steps must include
+ * making the provided condition become true.
*
- * XXXshaver These are going away soon, I hope.
+ * If the interrupt flag (lwi_signals) is non-zero, then the process will be
+ * interruptible, and will be awakened by any "killable" signal (SIGTERM,
+ * SIGKILL or SIGINT). If a timeout is also specified, then the process will
+ * only become interruptible _after_ the timeout has expired, though it can be
+ * awakened by a signal that was delivered before the timeout and is still
+ * pending when the timeout expires. If a timeout is not specified, the process
+ * will be interruptible at all times during l_wait_event.
*/
-#define __l_wait_event_killable(wq, condition, ret) \
-do { \
- wait_queue_t __wait; \
- init_waitqueue_entry(&__wait, current); \
- \
- add_wait_queue(&wq, &__wait); \
- for (;;) { \
- set_current_state(TASK_INTERRUPTIBLE); \
- if (condition) \
- break; \
- if (!signal_pending(current) || \
- !l_killable_pending(current)) { \
- schedule(); \
- continue; \
- } \
- ret = -ERESTARTSYS; \
- break; \
- } \
- current->state = TASK_RUNNING; \
- remove_wait_queue(&wq, &__wait); \
-} while(0)
-#define l_wait_event_killable(wq, condition) \
-({ \
- int __ret = 0; \
- if (!(condition)) \
- __l_wait_event_killable(wq, condition, __ret); \
- __ret; \
+struct l_wait_info {
+ long lwi_timeout;
+ int (*lwi_on_timeout)(void *);
+ long lwi_signals;
+ void (*lwi_on_signal)(void *);
+ void *lwi_cb_data;
+};
+
+#define LWI_TIMEOUT(time, cb, data) \
+((struct l_wait_info) { \
+ lwi_timeout: time, \
+ lwi_on_timeout: cb, \
+ lwi_cb_data: data \
+})
+
+#define LWI_INTR(cb, data) \
+((struct l_wait_info) { \
+ lwi_signals: 1, \
+ lwi_on_signal: cb, \
+ lwi_cb_data: data \
})
-#define __l_wait_event_timeout(wq, condition, timeout, ret) \
-do { \
- wait_queue_t __wait; \
- init_waitqueue_entry(&__wait, current); \
- \
- add_wait_queue(&wq, &__wait); \
- for (;;) { \
- set_current_state(TASK_INTERRUPTIBLE); \
- if (condition) \
- break; \
- if (timeout) \
- schedule_timeout(timeout); \
- else \
- schedule(); \
- } \
- current->state = TASK_RUNNING; \
- remove_wait_queue(&wq, &__wait); \
+#define LWI_TIMEOUT_INTR(time, time_cb, sig_cb, data) \
+((struct l_wait_info) { \
+ lwi_timeout: time, \
+ lwi_on_timeout: time_cb, \
+ lwi_signals: 1, \
+ lwi_on_signal: sig_cb, \
+ lwi_cb_data: data \
+})
+
+#define LUSTRE_FATAL_SIGS (sigmask(SIGKILL) | sigmask(SIGINT) | \
+ sigmask(SIGTERM) | sigmask(SIGQUIT))
+
+#ifdef __KERNEL__
+static inline sigset_t l_w_e_set_sigs(int sigs)
+{
+ sigset_t old;
+ unsigned long irqflags;
+
+ SIGNAL_MASK_LOCK(current, irqflags);
+ old = current->blocked;
+ siginitsetinv(¤t->blocked, sigs);
+ RECALC_SIGPENDING;
+ SIGNAL_MASK_UNLOCK(current, irqflags);
+
+ return old;
+}
+
+#define __l_wait_event(wq, condition, info, ret) \
+do { \
+ wait_queue_t __wait; \
+ int __timed_out = 0; \
+ unsigned long irqflags; \
+ sigset_t blocked; \
+ \
+ init_waitqueue_entry(&__wait, current); \
+ add_wait_queue(&wq, &__wait); \
+ \
+ /* Block all signals (just the non-fatal ones if no timeout). */ \
+ if (info->lwi_signals && !info->lwi_timeout) \
+ blocked = l_w_e_set_sigs(LUSTRE_FATAL_SIGS); \
+ else \
+ blocked = l_w_e_set_sigs(0); \
+ \
+ for (;;) { \
+ set_current_state(TASK_INTERRUPTIBLE); \
+ if (condition) \
+ break; \
+ if (signal_pending(current)) { \
+ if (info->lwi_on_signal) \
+ info->lwi_on_signal(info->lwi_cb_data); \
+ ret = -EINTR; \
+ break; \
+ } \
+ if (info->lwi_timeout && !__timed_out) { \
+ if (schedule_timeout(info->lwi_timeout) == 0) { \
+ __timed_out = 1; \
+ if (!info->lwi_on_timeout || \
+ info->lwi_on_timeout(info->lwi_cb_data)) { \
+ ret = -ETIMEDOUT; \
+ break; \
+ } \
+ /* We'll take signals after a timeout. */ \
+ if (info->lwi_signals) \
+ (void)l_w_e_set_sigs(LUSTRE_FATAL_SIGS); \
+ } \
+ } else { \
+ schedule(); \
+ } \
+ } \
+ \
+ SIGNAL_MASK_LOCK(current, irqflags); \
+ current->blocked = blocked; \
+ RECALC_SIGPENDING; \
+ SIGNAL_MASK_UNLOCK(current, irqflags); \
+ \
+ current->state = TASK_RUNNING; \
+ remove_wait_queue(&wq, &__wait); \
} while(0)
-#define l_wait_event_timeout(wq, condition, timeout) \
-({ \
- int __ret = 0; \
- if (!(condition)) \
- __l_wait_event_timeout(wq, condition, timeout, __ret); \
- __ret; \
+#define l_wait_event(wq, condition, info) \
+({ \
+ int __ret = 0; \
+ struct l_wait_info *__info = (info); \
+ if (!(condition)) \
+ __l_wait_event(wq, condition, __info, __ret); \
+ __ret; \
+})
+#else
+#define l_wait_event(wq, condition, info) \
+({ \
+ 0; \
})
+#endif /* __KERNEL__ */
#endif /* _LUSTRE_LIB_H */