From a770aca5f9d157b0dd28dafcfe4f24c750a7de20 Mon Sep 17 00:00:00 2001 From: adilger Date: Sat, 25 Jun 2005 06:39:23 +0000 Subject: [PATCH] Branch b1_4 - merge of b_cray changes - lots of semantically-NULL changes - flock fixes, enabling in liblustre (not llite yet) - lock conversion fixes (unused code in b1_4 at present) - liblustre llog parsing bitops fixes - liblustre/libsysio umask fixes - catamount build fixes/cray portals build support - lconf uses "tune2fs -O dir_index" instead of "debugfs" to enable htree index b=6931, b=6420, b=6927 --- lustre/include/liblustre.h | 99 +++++++++++++++++++++++++--------- lustre/include/linux/lustre_compat25.h | 4 +- lustre/include/linux/lustre_fsfilt.h | 2 +- lustre/include/linux/lustre_lib.h | 49 +++++++++++------ lustre/include/linux/lustre_net.h | 5 +- lustre/ldlm/ldlm_flock.c | 36 ++++++------- lustre/ldlm/ldlm_lib.c | 5 +- lustre/ldlm/ldlm_lock.c | 32 +++++++++-- lustre/ldlm/ldlm_lockd.c | 26 +++++---- lustre/ldlm/ldlm_request.c | 47 ++++++++++++---- lustre/ldlm/ldlm_resource.c | 2 +- lustre/liblustre/lutil.c | 3 -- lustre/liblustre/rw.c | 18 +++---- lustre/liblustre/super.c | 31 ++++------- lustre/llite/file.c | 39 ++++++++------ lustre/llite/llite_internal.h | 1 + lustre/llite/llite_lib.c | 4 +- lustre/llite/llite_mmap.c | 1 - lustre/llite/namei.c | 2 + lustre/llite/rw.c | 41 +++----------- lustre/mdc/mdc_locks.c | 6 +-- lustre/mds/handler.c | 20 +++++-- lustre/mds/mds_fs.c | 17 +++--- lustre/mds/mds_internal.h | 1 - lustre/mds/mds_lib.c | 6 ++- lustre/mds/mds_lov.c | 1 + lustre/mds/mds_unlink_open.c | 13 +++-- lustre/obdecho/echo.c | 16 +++--- lustre/osc/osc_internal.h | 4 ++ lustre/ptlrpc/autoMakefile.am | 3 +- lustre/ptlrpc/events.c | 32 +++++++++-- lustre/ptlrpc/pers.c | 6 +++ lustre/ptlrpc/recover.c | 2 +- lustre/tests/.RC_CURRENT.tag | 2 +- lustre/tests/.cvsignore | 1 + lustre/tests/acceptance-small.sh | 95 +++++++++++++++++++------------- lustre/tests/echo.sh | 7 +++ lustre/tests/llmount.sh | 5 +- lustre/tests/mount2fs.sh | 15 +++--- lustre/tests/recovery-small.sh | 2 +- lustre/tests/uml.sh | 9 +++- lustre/utils/lconf | 19 ++++--- lustre/utils/liblustreapi.c | 10 ++++ lustre/utils/llmount.c | 82 ++++++++++++++++++++++++---- lustre/utils/lrun | 4 ++ lustre/utils/obd.c | 16 +++--- 46 files changed, 551 insertions(+), 290 deletions(-) diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h index ca328bd..e41e3bc 100644 --- a/lustre/include/liblustre.h +++ b/lustre/include/liblustre.h @@ -58,6 +58,8 @@ #include #include #include +#include +#include #include #include @@ -90,7 +92,7 @@ typedef unsigned short umode_t; /* always adopt 2.5 definitions */ #define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c) -#define LINUX_VERSION_CODE (2*200+5*10+0) +#define LINUX_VERSION_CODE KERNEL_VERSION(2,5,0) static inline void inter_module_put(void *a) { @@ -168,35 +170,38 @@ typedef int (write_proc_t)(struct file *file, const char *buffer, #endif /* __LITTLE_ENDIAN */ /* bits ops */ -static __inline__ int set_bit(int nr,long * addr) + +/* a long can be more than 32 bits, so use BITS_PER_LONG + * to allow the compiler to adjust the bit shifting accordingly + */ + +/* test if bit nr is set in bitmap addr; returns previous value of bit nr */ +static __inline__ int set_bit(int nr, long * addr) { - int mask, retval; + long mask; - addr += nr >> 5; - mask = 1 << (nr & 0x1f); - retval = (mask & *addr) != 0; - *addr |= mask; - return retval; + addr += nr / BITS_PER_LONG; + mask = 1UL << (nr & (BITS_PER_LONG - 1)); + nr = (mask & *addr) != 0; + *addr |= mask; + return nr; } +/* clear bit nr in bitmap addr; returns previous value of bit nr*/ static __inline__ int clear_bit(int nr, long * addr) { - int mask, retval; + long mask; - addr += nr >> 5; - mask = 1 << (nr & 0x1f); - retval = (mask & *addr) != 0; - *addr &= ~mask; - return retval; + addr += nr / BITS_PER_LONG; + mask = 1UL << (nr & (BITS_PER_LONG - 1)); + nr = (mask & *addr) != 0; + *addr &= ~mask; + return nr; } static __inline__ int test_bit(int nr, long * addr) { - int mask; - - addr += nr >> 5; - mask = 1 << (nr & 0x1f); - return ((mask & *addr) != 0); + return ((1UL << (nr & (BITS_PER_LONG - 1))) & ((addr)[nr / BITS_PER_LONG])) != 0; } static __inline__ int ext2_set_bit(int nr, void *addr) @@ -555,12 +560,7 @@ struct signal { int signal; }; -struct fs_struct { - int umask; -}; - struct task_struct { - struct fs_struct *fs; int state; struct signal pending; char comm[32]; @@ -571,8 +571,6 @@ struct task_struct { int ngroups; gid_t *groups; __u32 cap_effective; - - struct fs_struct __fs; }; extern struct task_struct *current; @@ -768,10 +766,59 @@ void *liblustre_register_wait_callback(int (*fn)(void *arg), void *arg); void liblustre_deregister_wait_callback(void *notifier); int liblustre_wait_event(int timeout); +/* flock related */ +struct nfs_lock_info { + __u32 state; + __u32 flags; + void *host; +}; + +struct file_lock { + struct file_lock *fl_next; /* singly linked list for this inode */ + struct list_head fl_link; /* doubly linked list of all locks */ + struct list_head fl_block; /* circular list of blocked processes */ + void *fl_owner; + unsigned int fl_pid; + wait_queue_head_t fl_wait; + struct file *fl_file; + unsigned char fl_flags; + unsigned char fl_type; + loff_t fl_start; + loff_t fl_end; + + void (*fl_notify)(struct file_lock *); /* unblock callback */ + void (*fl_insert)(struct file_lock *); /* lock insertion callback */ + void (*fl_remove)(struct file_lock *); /* lock removal callback */ + + void *fl_fasync; /* for lease break notifications */ + unsigned long fl_break_time; /* for nonblocking lease breaks */ + + union { + struct nfs_lock_info nfs_fl; + } fl_u; +}; + +#ifndef OFFSET_MAX +#define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) +#define OFFSET_MAX INT_LIMIT(loff_t) +#endif + +/* XXX: defined in kernel */ +#define FL_POSIX 1 +#define FL_SLEEP 128 + /* quota */ #define QUOTA_OK 0 #define NO_QUOTA 1 +/* proc */ +#define proc_symlink(...) \ +({ \ + void *result = NULL; \ + result; \ +}) + + #include #include #include diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index 8cc15f8..35a7cde 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -118,8 +118,8 @@ static inline int cleanup_group_info(void) #else /* 2.4.. */ -#ifdef HAVE_MM_INLINE -#include +#ifdef HAVE_MM_INLINE +#include #endif #define ll_vfs_create(a,b,c,d) vfs_create(a,b,c) diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h index ceb3a41..a90ed40 100644 --- a/lustre/include/linux/lustre_fsfilt.h +++ b/lustre/include/linux/lustre_fsfilt.h @@ -1,7 +1,7 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * Copyright (C) 2001 Cluster File Systems, Inc. + * Copyright (C) 2001-2004 Cluster File Systems, Inc. * * This file is part of Lustre, http://www.lustre.org. * diff --git a/lustre/include/linux/lustre_lib.h b/lustre/include/linux/lustre_lib.h index 6a961d1..71b9ff4 100644 --- a/lustre/include/linux/lustre_lib.h +++ b/lustre/include/linux/lustre_lib.h @@ -35,8 +35,7 @@ # include # include #endif -#include -#include /* XXX just for LASSERT! */ +#include #include #include @@ -206,59 +205,59 @@ static inline int obd_ioctl_packlen(struct obd_ioctl_data *data) static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data) { if (data->ioc_len > (1<<30)) { - printk("LustreError: OBD ioctl: ioc_len larger than 1<<30\n"); + CERROR("OBD ioctl: ioc_len larger than 1<<30\n"); return 1; } if (data->ioc_inllen1 > (1<<30)) { - printk("LustreError: OBD ioctl: ioc_inllen1 larger than 1<<30\n"); + CERROR("OBD ioctl: ioc_inllen1 larger than 1<<30\n"); return 1; } if (data->ioc_inllen2 > (1<<30)) { - printk("LustreError: OBD ioctl: ioc_inllen2 larger than 1<<30\n"); + CERROR("OBD ioctl: ioc_inllen2 larger than 1<<30\n"); return 1; } if (data->ioc_inllen3 > (1<<30)) { - printk("LustreError: OBD ioctl: ioc_inllen3 larger than 1<<30\n"); + CERROR("OBD ioctl: ioc_inllen3 larger than 1<<30\n"); return 1; } if (data->ioc_inllen4 > (1<<30)) { - printk("LustreError: OBD ioctl: ioc_inllen4 larger than 1<<30\n"); + CERROR("OBD ioctl: ioc_inllen4 larger than 1<<30\n"); return 1; } if (data->ioc_inlbuf1 && !data->ioc_inllen1) { - printk("LustreError: OBD ioctl: inlbuf1 pointer but 0 length\n"); + CERROR("OBD ioctl: inlbuf1 pointer but 0 length\n"); return 1; } if (data->ioc_inlbuf2 && !data->ioc_inllen2) { - printk("LustreError: OBD ioctl: inlbuf2 pointer but 0 length\n"); + CERROR("OBD ioctl: inlbuf2 pointer but 0 length\n"); return 1; } if (data->ioc_inlbuf3 && !data->ioc_inllen3) { - printk("LustreError: OBD ioctl: inlbuf3 pointer but 0 length\n"); + CERROR("OBD ioctl: inlbuf3 pointer but 0 length\n"); return 1; } if (data->ioc_inlbuf4 && !data->ioc_inllen4) { - printk("LustreError: OBD ioctl: inlbuf4 pointer but 0 length\n"); + CERROR("OBD ioctl: inlbuf4 pointer but 0 length\n"); return 1; } if (data->ioc_pbuf1 && !data->ioc_plen1) { - printk("LustreError: OBD ioctl: pbuf1 pointer but 0 length\n"); + CERROR("OBD ioctl: pbuf1 pointer but 0 length\n"); return 1; } if (data->ioc_pbuf2 && !data->ioc_plen2) { - printk("LustreError: OBD ioctl: pbuf2 pointer but 0 length\n"); + CERROR("OBD ioctl: pbuf2 pointer but 0 length\n"); return 1; } if (data->ioc_plen1 && !data->ioc_pbuf1) { - printk("LustreError: OBD ioctl: plen1 set but NULL pointer\n"); + CERROR("OBD ioctl: plen1 set but NULL pointer\n"); return 1; } if (data->ioc_plen2 && !data->ioc_pbuf2) { - printk("LustreError: OBD ioctl: plen2 set but NULL pointer\n"); + CERROR("OBD ioctl: plen2 set but NULL pointer\n"); return 1; } if (obd_ioctl_packlen(data) != data->ioc_len) { - printk("LustreError: OBD ioctl: packlen exceeds ioc_len (%d != %d)\n", + CERROR("OBD ioctl: packlen exceeds ioc_len (%d != %d)\n", obd_ioctl_packlen(data), data->ioc_len); return 1; } @@ -668,13 +667,29 @@ do { \ #else /* !__KERNEL__ */ #define __l_wait_event(wq, condition, info, ret, excl) \ do { \ + long timeout = info->lwi_timeout, elapse, last = 0; \ int __timed_out = 0; \ \ + if (info->lwi_timeout == 0) \ + timeout = 1000000000; \ + else \ + last = time(NULL); \ + \ for (;;) { \ if (condition) \ break; \ - if (liblustre_wait_event(info->lwi_timeout)) \ + if (liblustre_wait_event(timeout)) { \ + if (timeout == 0 || info->lwi_timeout == 0) \ + continue; \ + elapse = time(NULL) - last; \ + if (elapse) { \ + last += elapse; \ + timeout -= elapse; \ + if (timeout < 0) \ + timeout = 0; \ + } \ continue; \ + } \ if (info->lwi_timeout && !__timed_out) { \ __timed_out = 1; \ if (info->lwi_on_timeout == NULL || \ diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index 3cac3bd..98ae32e 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -333,7 +333,10 @@ struct ptlrpc_request { struct timeval rq_arrival_time; /* request arrival time */ struct ptlrpc_reply_state *rq_reply_state; /* separated reply state */ struct ptlrpc_request_buffer_desc *rq_rqbd; /* incoming request buffer*/ - +#if CRAY_PORTALS + ptl_uid_t rq_uid; /* peer uid, used in MDS only */ +#endif + /* client-only incoming reply */ ptl_handle_md_t rq_reply_md_h; wait_queue_head_t rq_reply_waitq; diff --git a/lustre/ldlm/ldlm_flock.c b/lustre/ldlm/ldlm_flock.c index f4d660a..b4bae36 100644 --- a/lustre/ldlm/ldlm_flock.c +++ b/lustre/ldlm/ldlm_flock.c @@ -31,6 +31,7 @@ #include #else #include +#include #endif #include "ldlm_internal.h" @@ -362,9 +363,11 @@ ldlm_process_flock_lock(struct ldlm_lock *req, int *flags, int first_enq, break; } + /* At this point we're granting the lock request. */ + req->l_granted_mode = req->l_req_mode; + /* Add req to the granted queue before calling ldlm_reprocess_all(). */ if (!added) { - req->l_granted_mode = req->l_req_mode; list_del_init(&req->l_res_link); /* insert new lock before ownlocks in list. */ ldlm_resource_add_lock(res, ownlocks, req); @@ -431,6 +434,9 @@ ldlm_flock_interrupted_wait(void *data) /* take lock off the deadlock detection waitq. */ list_del_init(&lock->l_flock_waitq); + /* client side - set flag to prevent lock from being put on lru list */ + lock->l_flags |= LDLM_FL_CBPENDING; + ldlm_lock_decref_internal(lock, lock->l_req_mode); ldlm_lock2handle(lock, &lockh); ldlm_cli_cancel(&lockh); @@ -456,11 +462,6 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data) LASSERT(flags != LDLM_FL_WAIT_NOREPROC); - if (flags == 0) { - wake_up(&lock->l_waitq); - RETURN(0); - } - if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED | LDLM_FL_BLOCK_CONV))) goto granted; @@ -468,8 +469,6 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data) LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, " "sleeping"); - ldlm_lock_dump(D_OTHER, lock, 0); - fwd.fwd_lock = lock; obd = class_exp2obd(lock->l_conn_export); @@ -490,17 +489,12 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data) ((lock->l_req_mode == lock->l_granted_mode) || lock->l_destroyed), &lwi); - if (rc) { - LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)", - rc); - RETURN(rc); - } - - LASSERT(!(lock->l_destroyed)); - + LDLM_DEBUG(lock, "client-side enqueue waking up: rc = %d", rc); + RETURN(rc); + granted: - LDLM_DEBUG(lock, "client-side enqueue waking up"); + LDLM_DEBUG(lock, "client-side enqueue granted"); ns = lock->l_resource->lr_namespace; l_lock(&ns->ns_lock); @@ -529,14 +523,18 @@ granted: getlk->fl_start = lock->l_policy_data.l_flock.start; getlk->fl_end = lock->l_policy_data.l_flock.end; } else { + int noreproc = LDLM_FL_WAIT_NOREPROC; + /* We need to reprocess the lock to do merges or splits * with existing locks owned by this process. */ - flags = LDLM_FL_WAIT_NOREPROC; - ldlm_process_flock_lock(lock, &flags, 1, &err); + ldlm_process_flock_lock(lock, &noreproc, 1, &err); + if (flags == 0) + wake_up(&lock->l_waitq); } l_unlock(&ns->ns_lock); RETURN(0); } +EXPORT_SYMBOL(ldlm_flock_completion_ast); int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, void *data, int flag) diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 0db65ce..cdc43ab 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -644,8 +644,9 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) if (export == NULL) { if (target->obd_recovering) { - CERROR("denying connection for new client %s: " - "%d clients in recovery for %lds\n", cluuid.uuid, + CERROR("%s: denying connection for new client %s: " + "%d clients in recovery for %lds\n", + target->obd_name, cluuid.uuid, target->obd_recoverable_clients, (target->obd_recovery_timer.expires-jiffies)/HZ); rc = -EBUSY; diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 1e7fedf..8f73b2a 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -157,6 +157,7 @@ void ldlm_lock_remove_from_lru(struct ldlm_lock *lock) ENTRY; l_lock(&lock->l_resource->lr_namespace->ns_lock); if (!list_empty(&lock->l_lru)) { + LASSERT(lock->l_resource->lr_type != LDLM_FLOCK); list_del_init(&lock->l_lru); lock->l_resource->lr_namespace->ns_nr_unused--; LASSERT(lock->l_resource->lr_namespace->ns_nr_unused >= 0); @@ -1093,15 +1094,24 @@ struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode, struct ldlm_resource *res; struct ldlm_namespace *ns; int granted = 0; + int old_mode, rc; + ldlm_error_t err; ENTRY; - LBUG(); + if (new_mode == lock->l_granted_mode) { // No changes? Just return. + *flags |= LDLM_FL_BLOCK_GRANTED; + RETURN(lock->l_resource); + } + + LASSERTF(new_mode == LCK_PW && lock->l_granted_mode == LCK_PR, + "new_mode %u, granted %u\n", new_mode, lock->l_granted_mode); res = lock->l_resource; ns = res->lr_namespace; l_lock(&ns->ns_lock); + old_mode = lock->l_req_mode; lock->l_req_mode = new_mode; ldlm_resource_unlink_lock(lock); @@ -1112,6 +1122,8 @@ struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode, } else { /* This should never happen, because of the way the * server handles conversions. */ + LDLM_ERROR(lock, "Erroneous flags %d on local lock\n", + *flags); LBUG(); res->lr_tmp = &rpc_list; @@ -1123,10 +1135,20 @@ struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode, lock->l_completion_ast(lock, 0, NULL); } } else { - /* FIXME: We should try the conversion right away and possibly - * return success without the need for an extra AST */ - ldlm_resource_add_lock(res, &res->lr_converting, lock); - *flags |= LDLM_FL_BLOCK_CONV; + int pflags = 0; + ldlm_processing_policy policy; + policy = ldlm_processing_policy_table[res->lr_type]; + res->lr_tmp = &rpc_list; + rc = policy(lock, &pflags, 0, &err); + res->lr_tmp = NULL; + if (rc == LDLM_ITER_STOP) { + lock->l_req_mode = old_mode; + ldlm_resource_add_lock(res, &res->lr_granted, lock); + res = NULL; + } else { + *flags |= LDLM_FL_BLOCK_GRANTED; + granted = 1; + } } l_unlock(&ns->ns_lock); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 0533a10..0738c73 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -839,14 +839,23 @@ int ldlm_handle_convert(struct ptlrpc_request *req) if (!lock) { req->rq_status = EINVAL; } else { + void *res; l_lock(&lock->l_resource->lr_namespace->ns_lock); LDLM_DEBUG(lock, "server-side convert handler START"); - ldlm_lock_convert(lock, dlm_req->lock_desc.l_req_mode, - &dlm_rep->lock_flags); - if (ldlm_del_waiting_lock(lock)) - CDEBUG(D_DLMTRACE, "converted waiting lock %p\n", lock); l_unlock(&lock->l_resource->lr_namespace->ns_lock); - req->rq_status = 0; + do_gettimeofday(&lock->l_enqueued_time); + res = ldlm_lock_convert(lock, dlm_req->lock_desc.l_req_mode, + &dlm_rep->lock_flags); + if (res) { + l_lock(&lock->l_resource->lr_namespace->ns_lock); + if (ldlm_del_waiting_lock(lock)) + CDEBUG(D_DLMTRACE,"converted waiting lock %p\n", + lock); + l_unlock(&lock->l_resource->lr_namespace->ns_lock); + req->rq_status = 0; + } else { + req->rq_status = EDEADLOCK; + } } if (lock) { @@ -1548,7 +1557,9 @@ static int ldlm_cleanup(int force) wake_up(&expired_lock_thread.elt_waitq); wait_event(expired_lock_thread.elt_waitq, expired_lock_thread.elt_state == ELT_STOPPED); - +#else + ptlrpc_unregister_service(ldlm_state->ldlm_cb_service); + ptlrpc_unregister_service(ldlm_state->ldlm_cancel_service); #endif OBD_FREE(ldlm_state, sizeof(*ldlm_state)); @@ -1588,9 +1599,6 @@ void __exit ldlm_exit(void) "couldn't free ldlm lock slab\n"); } -/* ldlm_flock.c */ -EXPORT_SYMBOL(ldlm_flock_completion_ast); - /* ldlm_extent.c */ EXPORT_SYMBOL(ldlm_extent_shift_kms); diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index 83445af..83055cb 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -210,6 +210,12 @@ static void failed_lock_cleanup(struct ldlm_namespace *ns, l_unlock(&ns->ns_lock); ldlm_lock_decref_and_cancel(lockh, mode); + + /* XXX - HACK because we shouldn't call ldlm_lock_destroy() + * from llite/file.c/ll_file_flock(). */ + if (lock->l_resource->lr_type == LDLM_FLOCK) { + ldlm_lock_destroy(lock); + } } int ldlm_cli_enqueue(struct obd_export *exp, @@ -433,6 +439,8 @@ cleanup: static int ldlm_cli_convert_local(struct ldlm_lock *lock, int new_mode, int *flags) { + struct ldlm_resource *res; + int rc; ENTRY; if (lock->l_resource->lr_namespace->ns_client) { CERROR("Trying to cancel local lock\n"); @@ -440,16 +448,22 @@ static int ldlm_cli_convert_local(struct ldlm_lock *lock, int new_mode, } LDLM_DEBUG(lock, "client-side local convert"); - ldlm_lock_convert(lock, new_mode, flags); - ldlm_reprocess_all(lock->l_resource); - + res = ldlm_lock_convert(lock, new_mode, flags); + if (res) { + ldlm_reprocess_all(res); + rc = 0; + } else { + rc = EDEADLOCK; + } LDLM_DEBUG(lock, "client-side local convert handler END"); LDLM_LOCK_PUT(lock); - RETURN(0); + RETURN(rc); } /* FIXME: one of ldlm_cli_convert or the server side should reject attempted * conversion of locks which are on the waiting or converting queue */ +/* Caller of this code is supposed to take care of lock readers/writers + accounting */ int ldlm_cli_convert(struct lustre_handle *lockh, int new_mode, int *flags) { struct ldlm_request *body; @@ -498,13 +512,23 @@ int ldlm_cli_convert(struct lustre_handle *lockh, int new_mode, int *flags) GOTO (out, rc = -EPROTO); } + if (req->rq_status) + GOTO(out, rc = req->rq_status); + res = ldlm_lock_convert(lock, new_mode, &reply->lock_flags); - if (res != NULL) + if (res != NULL) { ldlm_reprocess_all(res); - /* Go to sleep until the lock is granted. */ - /* FIXME: or cancelled. */ - if (lock->l_completion_ast) - lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC, NULL); + /* Go to sleep until the lock is granted. */ + /* FIXME: or cancelled. */ + if (lock->l_completion_ast) { + rc = lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC, + NULL); + if (rc) + GOTO(out, rc); + } + } else { + rc = EDEADLOCK; + } EXIT; out: LDLM_LOCK_PUT(lock); @@ -613,6 +637,10 @@ int ldlm_cancel_lru(struct ldlm_namespace *ns, ldlm_sync_t sync) LIST_HEAD(cblist); ENTRY; +#ifndef __KERNEL__ + sync = LDLM_SYNC; /* force to be sync in user space */ +#endif + l_lock(&ns->ns_lock); count = ns->ns_nr_unused - ns->ns_max_unused; @@ -644,6 +672,7 @@ int ldlm_cancel_lru(struct ldlm_namespace *ns, ldlm_sync_t sync) "lock %p next %p prev %p\n", lock, &lock->l_export_chain.next, &lock->l_export_chain.prev); + __LDLM_DEBUG(D_INFO, lock, "adding to LRU clear list"); list_add(&lock->l_export_chain, &cblist); } diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c index e8d43a0..2738587 100644 --- a/lustre/ldlm/ldlm_resource.c +++ b/lustre/ldlm/ldlm_resource.c @@ -356,7 +356,7 @@ int ldlm_namespace_cleanup(struct ldlm_namespace *ns, int flags) if (!ldlm_resource_putref(res)) { CERROR("Namespace %s resource refcount %d " - "after lock cleanup\n", + "after lock cleanup; forcing cleanup.\n", ns->ns_name, atomic_read(&res->lr_refcount)); } diff --git a/lustre/liblustre/lutil.c b/lustre/liblustre/lutil.c index bc4f64e..64212fa 100644 --- a/lustre/liblustre/lutil.c +++ b/lustre/liblustre/lutil.c @@ -277,9 +277,6 @@ int liblustre_init_current(char *comm) CERROR("Not enough memory\n"); return -ENOMEM; } - current->fs = ¤t->__fs; - current->fs->umask = umask(0777); - umask(current->fs->umask); strncpy(current->comm, comm, sizeof(current->comm)); current->pid = getpid(); diff --git a/lustre/liblustre/rw.c b/lustre/liblustre/rw.c index 81e9c1a..a20e18c 100644 --- a/lustre/liblustre/rw.c +++ b/lustre/liblustre/rw.c @@ -356,20 +356,20 @@ static void llu_ap_completion(void *data, int cmd, struct obdo *oa, int rc) EXIT; } -static void llu_ap_get_ucred(void *data, struct lvfs_ucred *ouc) +static void llu_ap_get_ucred(void *data, struct lvfs_ucred *luc) { struct ll_async_page *llap; struct ll_uctxt ctxt; - ENTRY; - + ENTRY; + llap = LLAP_FROM_COOKIE(data); - - ouc->luc_fsuid = current->fsuid; - ouc->luc_fsgid = current->fsgid; - ouc->luc_cap = current->cap_effective; + + luc->luc_fsuid = current->fsuid; + luc->luc_fsgid = current->fsgid; + luc->luc_cap = current->cap_effective; ll_i2uctxt(&ctxt, llap->llap_inode, NULL); - ouc->luc_suppgid1 = ctxt.gid1; - + luc->luc_suppgid1 = ctxt.gid1; + EXIT; } diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c index 486b915..51a35f2 100644 --- a/lustre/liblustre/super.c +++ b/lustre/liblustre/super.c @@ -942,8 +942,6 @@ static int llu_iop_mknod_raw(struct pnode *pno, if (llu_i2stat(dir)->st_nlink >= EXT2_LINK_MAX) RETURN(err); - mode &= ~current->fs->umask; - switch (mode & S_IFMT) { case 0: case S_IFREG: @@ -1178,7 +1176,6 @@ static int llu_iop_mkdir_raw(struct pnode *pno, mode_t mode) if (st->st_nlink >= EXT2_LINK_MAX) RETURN(err); - mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR; llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); err = mdc_create(llu_i2sbi(dir)->ll_mdc_exp, &op_data, NULL, 0, mode, current->fsuid, current->fsgid, 0, &request); @@ -1213,7 +1210,6 @@ static int llu_iop_rmdir_raw(struct pnode *pno) #endif #define FCNTL_FLMASK_INVALID (O_NONBLOCK|O_ASYNC) -#if 0 /* refer to ll_file_flock() for details */ static int llu_file_flock(struct inode *ino, int cmd, @@ -1284,7 +1280,7 @@ static int llu_file_flock(struct inode *ino, LBUG(); } - CDEBUG(D_DLMTRACE, "inode="LPU64", pid="LPU64", flags=%#x, mode=%u, " + CDEBUG(D_DLMTRACE, "inode="LPU64", pid=%u, flags=%#x, mode=%u, " "start="LPU64", end="LPU64"\n", st->st_ino, flock.l_flock.pid, flags, mode, flock.l_flock.start, flock.l_flock.end); @@ -1409,12 +1405,13 @@ static int llu_fcntl_setlk(struct inode *ino, int cmd, struct flock *flock) out: return error; } -#endif static int llu_iop_fcntl(struct inode *ino, int cmd, va_list ap, int *rtn) { struct llu_inode_info *lli = llu_i2info(ino); long flags; + struct flock *flock; + long err; switch (cmd) { case F_GETFL: @@ -1433,24 +1430,18 @@ static int llu_iop_fcntl(struct inode *ino, int cmd, va_list ap, int *rtn) (lli->lli_open_flags & ~FCNTL_FLMASK); *rtn = 0; return 0; -#if 0 - case F_GETLK: { - struct flock *flock = va_arg(ap, struct flock *); - int err = llu_fcntl_getlk(ino, flock); - *rtn = err ? -1: 0; - + case F_GETLK: + flock = va_arg(ap, struct flock *); + err = llu_fcntl_getlk(ino, flock); + *rtn = err? -1: 0; return err; - } case F_SETLK: - case F_SETLKW: { - struct flock *flock = va_arg(ap, struct flock *); - int err = llu_fcntl_setlk(ino, cmd, flock); - *rtn = err ? -1: 0; - + case F_SETLKW: + flock = va_arg(ap, struct flock *); + err = llu_fcntl_setlk(ino, cmd, flock); + *rtn = err? -1: 0; return err; } -#endif - } CERROR("unsupported fcntl cmd %x\n", cmd); *rtn = -ENOSYS; diff --git a/lustre/llite/file.c b/lustre/llite/file.c index cab93f1..0e72f5a 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -748,10 +748,10 @@ int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode, RETURN(rc); } -static ssize_t ll_file_read(struct file *filp, char *buf, size_t count, +static ssize_t ll_file_read(struct file *file, char *buf, size_t count, loff_t *ppos) { - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = file->f_dentry->d_inode; struct ll_inode_info *lli = ll_i2info(inode); struct lov_stripe_md *lsm = lli->lli_smd; struct ll_lock_tree tree; @@ -760,8 +760,8 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count, ssize_t retval; __u64 kms; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size=%lu,offset=%Ld=%#Lx\n", - inode->i_ino, inode->i_generation,inode,(long)count,*ppos,*ppos); + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n", + inode->i_ino, inode->i_generation, inode, count, *ppos); /* "If nbyte is 0, read() will return 0 and have no other results." * -- Single Unix Spec */ @@ -798,9 +798,9 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count, } node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, LCK_PR); - tree.lt_fd = LUSTRE_FPRIVATE(filp); + tree.lt_fd = LUSTRE_FPRIVATE(file); rc = ll_tree_lock(&tree, node, buf, count, - filp->f_flags & O_NONBLOCK ? LDLM_FL_BLOCK_NOWAIT :0); + file->f_flags & O_NONBLOCK ? LDLM_FL_BLOCK_NOWAIT :0); if (rc != 0) RETURN(rc); @@ -823,11 +823,11 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count, /* turn off the kernel's read-ahead */ #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - filp->f_ramax = 0; + file->f_ramax = 0; #else - filp->f_ra.ra_pages = 0; + file->f_ra.ra_pages = 0; #endif - retval = generic_file_read(filp, buf, count, ppos); + retval = generic_file_read(file, buf, count, ppos); out: ll_tree_unlock(&tree); @@ -841,15 +841,14 @@ static ssize_t ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) { struct inode *inode = file->f_dentry->d_inode; - struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; struct ll_lock_tree tree; struct ll_lock_tree_node *node; loff_t maxbytes = ll_file_maxbytes(inode); ssize_t retval; int rc; ENTRY; - CDEBUG(D_VFSTRACE,"VFS Op:inode=%lu/%u(%p),size=%lu,offset=%Ld=%#Lx\n", - inode->i_ino, inode->i_generation,inode,(long)count,*ppos,*ppos); + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n", + inode->i_ino, inode->i_generation, inode, count, *ppos); SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */ @@ -859,16 +858,18 @@ static ssize_t ll_file_write(struct file *file, const char *buf, size_t count, /* If file was opened for LL_IOC_LOV_SETSTRIPE but the ioctl wasn't * called on the file, don't fail the below assertion (bug 2388). */ - if (file->f_flags & O_LOV_DELAY_CREATE && lsm == NULL) + if (file->f_flags & O_LOV_DELAY_CREATE && + ll_i2info(inode)->lli_smd == NULL) RETURN(-EBADF); - LASSERT(lsm != NULL); + LASSERT(ll_i2info(inode)->lli_smd != NULL); if (file->f_flags & O_APPEND) node = ll_node_from_inode(inode, 0, OBD_OBJECT_EOF, LCK_PW); else - node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, + node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, LCK_PW); + if (IS_ERR(node)) RETURN(PTR_ERR(node)); @@ -1067,6 +1068,11 @@ static int ll_lov_setstripe(struct inode *inode, struct file *file, RETURN(-EFAULT); rc = ll_lov_setstripe_ea_info(inode, file, flags, &lum, sizeof(lum)); + if (rc == 0) { + put_user(0, &lump->lmm_stripe_count); + rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2obdexp(inode), + 0, ll_i2info(inode)->lli_smd, lump); + } RETURN(rc); } @@ -1322,8 +1328,7 @@ int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) flags, mode, flock.l_flock.start, flock.l_flock.end); obddev = sbi->ll_mdc_exp->exp_obd; - rc = ldlm_cli_enqueue(obddev->obd_self_export, NULL, - obddev->obd_namespace, + rc = ldlm_cli_enqueue(sbi->ll_mdc_exp, NULL, obddev->obd_namespace, res_id, LDLM_FLOCK, &flock, mode, &flags, NULL, ldlm_flock_completion_ast, NULL, file_lock, NULL, 0, NULL, &lockh); diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 7ab6585..829bd02 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -49,6 +49,7 @@ extern struct file_operations ll_pgcache_seq_fops; #define LLI_INODE_DEAD 0xdeadd00d #define LLI_F_HAVE_OST_SIZE_LOCK 0 #define LLI_F_HAVE_MDS_SIZE_LOCK 1 + struct ll_inode_info { int lli_inode_magic; int lli_size_pid; diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 86f9506..c0127f5 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -1430,7 +1430,8 @@ void ll_umount_begin(struct super_block *sb) struct obd_device *obd; struct obd_ioctl_data ioc_data = { 0 }; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:\n"); + CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb, + sb->s_count, atomic_read(&sb->s_active)); obd = class_exp2obd(sbi->ll_mdc_exp); if (obd == NULL) { @@ -1540,4 +1541,3 @@ struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi, LBUG(); return NULL; } - diff --git a/lustre/llite/llite_mmap.c b/lustre/llite/llite_mmap.c index 1e2b1d0..4be05bf 100644 --- a/lustre/llite/llite_mmap.c +++ b/lustre/llite/llite_mmap.c @@ -42,7 +42,6 @@ #include #endif - #define DEBUG_SUBSYSTEM S_LLITE #include diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 226c3ed4..48a166b 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -373,6 +373,8 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry, ll_inode2fid(&pfid, parent); ll_i2uctxt(&ctxt, parent, NULL); + it->it_create_mode &= ~current->fs->umask; + rc = mdc_intent_lock(ll_i2mdcexp(parent), &ctxt, &pfid, dentry->d_name.name, dentry->d_name.len, NULL, 0, NULL, it, lookup_flags, &req, ll_mdc_blocking_ast); diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index 10c5f28..f912850 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -101,8 +101,6 @@ static int ll_brw(int cmd, struct inode *inode, struct obdo *oa, RETURN(rc); } -__u64 lov_merge_size(struct lov_stripe_md *lsm, int kms); - /* this isn't where truncate starts. roughly: * sys_truncate->ll_setattr_raw->vmtruncate->ll_truncate * we grab the lock back in setattr_raw to avoid races. @@ -185,7 +183,6 @@ void ll_truncate(struct inode *inode) up(&lli->lli_size_sem); } /* ll_truncate */ -__u64 lov_merge_size(struct lov_stripe_md *lsm, int kms); int ll_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { @@ -260,24 +257,13 @@ int ll_prepare_write(struct file *file, struct page *page, unsigned from, return rc; } -struct ll_async_page *llap_from_cookie(void *cookie) -{ - struct ll_async_page *llap = cookie; - if (llap->llap_magic != LLAP_MAGIC) - return ERR_PTR(-EINVAL); - return llap; -}; - static int ll_ap_make_ready(void *data, int cmd) { struct ll_async_page *llap; struct page *page; ENTRY; - llap = llap_from_cookie(data); - if (IS_ERR(llap)) - RETURN(-EINVAL); - + llap = LLAP_FROM_COOKIE(data); page = llap->llap_page; LASSERT(cmd != OBD_BRW_READ); @@ -327,10 +313,7 @@ static int ll_ap_refresh_count(void *data, int cmd) /* readpage queues with _COUNT_STABLE, shouldn't get here. */ LASSERT(cmd != OBD_BRW_READ); - llap = llap_from_cookie(data); - if (IS_ERR(llap)) - RETURN(PTR_ERR(llap)); - + llap = LLAP_FROM_COOKIE(data); page = llap->llap_page; lli = ll_i2info(page->mapping->host); lsm = lli->lli_smd; @@ -379,12 +362,7 @@ static void ll_ap_fill_obdo(void *data, int cmd, struct obdo *oa) struct ll_async_page *llap; ENTRY; - llap = llap_from_cookie(data); - if (IS_ERR(llap)) { - EXIT; - return; - } - + llap = LLAP_FROM_COOKIE(data); ll_inode_fill_obdo(llap->llap_page->mapping->host, cmd, oa); EXIT; } @@ -393,7 +371,7 @@ static void ll_ap_get_ucred(void *data, struct lvfs_ucred *ouc) { struct ll_async_page *llap; - llap = llap_from_cookie(data); + llap = LLAP_FROM_COOKIE(data); if (IS_ERR(llap)) { EXIT; return; @@ -801,12 +779,7 @@ void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc) struct page *page; ENTRY; - llap = llap_from_cookie(data); - if (IS_ERR(llap)) { - EXIT; - return; - } - + llap = LLAP_FROM_COOKIE(data); page = llap->llap_page; LASSERT(PageLocked(page)); @@ -1222,8 +1195,8 @@ int ll_writepage(struct page *page) llap->llap_cookie, ASYNC_READY | ASYNC_URGENT); } else { - rc = queue_or_sync_write(exp, inode, llap, - PAGE_SIZE, ASYNC_READY | ASYNC_URGENT); + rc = queue_or_sync_write(exp, inode, llap, PAGE_SIZE, + ASYNC_READY | ASYNC_URGENT); } if (rc) page_cache_release(page); diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c index bb3e974..c525a2a 100644 --- a/lustre/mdc/mdc_locks.c +++ b/lustre/mdc/mdc_locks.c @@ -277,7 +277,6 @@ int mdc_enqueue(struct obd_export *exp, if (it->it_op & IT_OPEN) { it->it_create_mode |= S_IFREG; - it->it_create_mode &= ~current->fs->umask; size[2] = sizeof(struct mds_rec_create); size[3] = data->namelen + 1; @@ -431,9 +430,8 @@ int mdc_enqueue(struct obd_export *exp, } if ((body->valid & OBD_MD_FLEASIZE) != 0) { - /* The eadata is opaque; just check that it is - * there. Eventually, obd_unpackmd() will check - * the contents */ + /* The eadata is opaque; just check that it is there. + * Eventually, obd_unpackmd() will check the contents */ eadata = lustre_swab_repbuf(req, 2, body->eadatasize, NULL); if (eadata == NULL) { diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index bf19fdf..512c9af 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -700,7 +700,11 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req, offset = 1; } +#if CRAY_PORTALS + uc.luc_fsuid = req->rq_uid; +#else uc.luc_fsuid = body->fsuid; +#endif uc.luc_fsgid = body->fsgid; uc.luc_cap = body->capability; uc.luc_suppgid1 = body->suppgid; @@ -820,7 +824,11 @@ static int mds_getattr(int offset, struct ptlrpc_request *req) RETURN(-EFAULT); } +#if CRAY_PORTALS + uc.luc_fsuid = req->rq_uid; +#else uc.luc_fsuid = body->fsuid; +#endif uc.luc_fsgid = body->fsgid; uc.luc_cap = body->capability; push_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); @@ -966,7 +974,11 @@ static int mds_readpage(struct ptlrpc_request *req) if (body == NULL) GOTO (out, rc = -EFAULT); +#if CRAY_PORTALS + uc.luc_fsuid = req->rq_uid; +#else uc.luc_fsuid = body->fsuid; +#endif uc.luc_fsgid = body->fsgid; uc.luc_cap = body->capability; push_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); @@ -1703,7 +1715,8 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) rc = mds_fs_setup(obd, mnt); if (rc) { - CERROR("MDS filesystem method init failed: rc = %d\n", rc); + CERROR("%s: MDS filesystem method init failed: rc = %d\n", + obd->obd_name, rc); GOTO(err_ns, rc); } @@ -1896,7 +1909,7 @@ int mds_postrecov(struct obd_device *obd) } out: - RETURN(rc < 0 ? rc: item); + RETURN(rc < 0 ? rc : item); err_llog: /* cleanup all llogging subsystems */ @@ -2057,7 +2070,8 @@ static void fixup_handle_for_resent_req(struct ptlrpc_request *req, /* If the xid matches, then we know this is a resent request, * and allow it. (It's probably an OPEN, for which we don't * send a lock */ - if (req->rq_xid == exp->exp_mds_data.med_mcd->mcd_last_xid) + if (req->rq_xid == + le64_to_cpu(exp->exp_mds_data.med_mcd->mcd_last_xid)) return; /* This remote handle isn't enqueued, so we never received or diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c index d6be9cd..9266426 100644 --- a/lustre/mds/mds_fs.c +++ b/lustre/mds/mds_fs.c @@ -47,7 +47,6 @@ /* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */ #define MDS_MAX_CLIENTS (PAGE_SIZE * 8) -#define MDS_MAX_CLIENT_WORDS (MDS_MAX_CLIENTS / sizeof(unsigned long)) #define LAST_RCVD "last_rcvd" #define LOV_OBJID "lov_objid" @@ -195,8 +194,7 @@ free: static int mds_server_free_data(struct mds_obd *mds) { - OBD_FREE(mds->mds_client_bitmap, - MDS_MAX_CLIENT_WORDS * sizeof(unsigned long)); + OBD_FREE(mds->mds_client_bitmap, MDS_MAX_CLIENTS / 8); OBD_FREE(mds->mds_server_data, sizeof(*mds->mds_server_data)); mds->mds_server_data = NULL; @@ -224,8 +222,7 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file) if (!msd) RETURN(-ENOMEM); - OBD_ALLOC_WAIT(mds->mds_client_bitmap, - MDS_MAX_CLIENT_WORDS * sizeof(unsigned long)); + OBD_ALLOC_WAIT(mds->mds_client_bitmap, MDS_MAX_CLIENTS / 8); if (!mds->mds_client_bitmap) { OBD_FREE(msd, sizeof(*msd)); RETURN(-ENOMEM); @@ -705,8 +702,8 @@ int mds_obd_destroy(struct obd_export *exp, struct obdo *oa, GOTO(out_dput, rc); } if (de->d_inode == NULL) { - CERROR("destroying non-existent object "LPU64" %s\n", - oa->o_id, fidname); + CERROR("destroying non-existent object "LPU64" %s: rc %d\n", + oa->o_id, fidname, rc); GOTO(out_dput, rc = -ENOENT); } @@ -714,10 +711,10 @@ int mds_obd_destroy(struct obd_export *exp, struct obdo *oa, that is unlinked, not spanned across multiple OSTs */ handle = fsfilt_start_log(obd, mds->mds_objects_dir->d_inode, FSFILT_OP_UNLINK, oti, 1); - if (IS_ERR(handle)) { - GOTO(out_dput, rc = PTR_ERR(handle)); - } + if (IS_ERR(handle)) + GOTO(out_dput, rc = PTR_ERR(handle)); + rc = vfs_unlink(mds->mds_objects_dir->d_inode, de); if (rc) CERROR("error destroying object "LPU64":%u: rc %d\n", diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h index c6540e2..b33d53e 100644 --- a/lustre/mds/mds_internal.h +++ b/lustre/mds/mds_internal.h @@ -147,7 +147,6 @@ int mds_lov_disconnect(struct obd_device *obd); void mds_lov_set_cleanup_flags(struct obd_device *); int mds_lov_write_objids(struct obd_device *obd); void mds_lov_update_objids(struct obd_device *obd, obd_id *ids); -int mds_lov_set_growth(struct mds_obd *mds, int count); int mds_lov_set_nextid(struct obd_device *obd); int mds_lov_clearorphans(struct mds_obd *mds, struct obd_uuid *ost_uuid); int mds_post_mds_lovconf(struct obd_device *obd); diff --git a/lustre/mds/mds_lib.c b/lustre/mds/mds_lib.c index b4ea941..57dfb38 100644 --- a/lustre/mds/mds_lib.c +++ b/lustre/mds/mds_lib.c @@ -325,8 +325,7 @@ static update_unpacker mds_unpackers[REINT_MAX] = { int mds_update_unpack(struct ptlrpc_request *req, int offset, struct mds_update_record *rec) { - __u32 *opcodep; - __u32 opcode; + mds_reint_t opcode, *opcodep; int rc; ENTRY; @@ -348,5 +347,8 @@ int mds_update_unpack(struct ptlrpc_request *req, int offset, rec->ur_opcode = opcode; rc = mds_unpackers[opcode](req, offset, rec); +#if CRAY_PORTALS + rec->ur_fsuid = req->rq_uid; +#endif RETURN(rc); } diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c index 06b9d1c..9a2f080 100644 --- a/lustre/mds/mds_lov.c +++ b/lustre/mds/mds_lov.c @@ -476,6 +476,7 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, RETURN(0); default: + CDEBUG(D_INFO, "unknown command %x\n", cmd); RETURN(-EINVAL); } RETURN(0); diff --git a/lustre/mds/mds_unlink_open.c b/lustre/mds/mds_unlink_open.c index 3aec0e9..6298eb4 100644 --- a/lustre/mds/mds_unlink_open.c +++ b/lustre/mds/mds_unlink_open.c @@ -129,10 +129,10 @@ static int mds_unlink_orphan(struct obd_device *obd, struct dentry *dchild, } rc = vfs_unlink(pending_dir, dchild); - if (rc) + if (rc) { CERROR("error %d unlinking orphan %.*s from PENDING\n", rc, dchild->d_name.len, dchild->d_name.name); - else if (lmm_size) { + } else if (lmm_size) { OBD_ALLOC(logcookies, mds->mds_max_cookiesize); if (logcookies == NULL) rc = -ENOMEM; @@ -174,8 +174,6 @@ int mds_cleanup_orphans(struct obd_device *obd) ENTRY; push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - /* dentry and mnt ref dropped in dentry_open() on error, or - * in filp_close() if dentry_open() succeeds */ dentry = dget(mds->mds_pending_dir); if (IS_ERR(dentry)) GOTO(err_pop, rc = PTR_ERR(dentry)); @@ -221,6 +219,13 @@ int mds_cleanup_orphans(struct obd_device *obd) GOTO(next, rc = 0); } + if (is_bad_inode(dchild->d_inode)) { + CERROR("bad orphan inode found %lu/%u\n", + dchild->d_inode->i_ino, + dchild->d_inode->i_generation); + GOTO(next, rc = -ENOENT); + } + child_inode = dchild->d_inode; MDS_DOWN_READ_ORPHAN_SEM(child_inode); if (mds_inode_is_orphan(child_inode) && diff --git a/lustre/obdecho/echo.c b/lustre/obdecho/echo.c index 9eb289a6..a84f321 100644 --- a/lustre/obdecho/echo.c +++ b/lustre/obdecho/echo.c @@ -94,7 +94,7 @@ static int echo_disconnect(struct obd_export *exp) static int echo_destroy_export(struct obd_export *exp) { ENTRY; - + target_destroy_export(exp); RETURN(0); @@ -117,7 +117,7 @@ int echo_create(struct obd_export *exp, struct obdo *oa, struct obd_device *obd = class_exp2obd(exp); if (!obd) { - CERROR("invalid client cookie "LPX64"\n", + CERROR("invalid client cookie "LPX64"\n", exp->exp_handle.h_cookie); return -EINVAL; } @@ -169,7 +169,7 @@ static int echo_getattr(struct obd_export *exp, struct obdo *oa, obd_id id = oa->o_id; if (!obd) { - CERROR("invalid client cookie "LPX64"\n", + CERROR("invalid client cookie "LPX64"\n", exp->exp_handle.h_cookie); RETURN(-EINVAL); } @@ -191,7 +191,7 @@ static int echo_setattr(struct obd_export *exp, struct obdo *oa, struct obd_device *obd = class_exp2obd(exp); if (!obd) { - CERROR("invalid client cookie "LPX64"\n", + CERROR("invalid client cookie "LPX64"\n", exp->exp_handle.h_cookie); RETURN(-EINVAL); } @@ -231,7 +231,7 @@ echo_page_debug_setup(struct page *page, int rw, obd_id id, block_debug_setup(addr, OBD_ECHO_BLOCK_SIZE, 0xecc0ecc0ecc0ecc0ULL, 0xecc0ecc0ecc0ecc0ULL); - + addr += OBD_ECHO_BLOCK_SIZE; offset += OBD_ECHO_BLOCK_SIZE; len -= OBD_ECHO_BLOCK_SIZE; @@ -258,7 +258,7 @@ echo_page_debug_check(struct page *page, obd_id id, if (rc2 != 0 && rc == 0) rc = rc2; - + addr += OBD_ECHO_BLOCK_SIZE; offset += OBD_ECHO_BLOCK_SIZE; len -= OBD_ECHO_BLOCK_SIZE; @@ -429,7 +429,7 @@ int echo_commitrw(int cmd, struct obd_export *export, struct obdo *oa, r->page, addr, r->offset); if (verify) { - vrc = echo_page_debug_check(page, obj->ioo_id, + vrc = echo_page_debug_check(page, obj->ioo_id, r->offset, r->len); /* check all the pages always */ if (vrc != 0 && rc == 0) @@ -513,7 +513,7 @@ static int echo_cleanup(struct obd_device *obd) * happened before calling ldlm_namespace_free() */ set_current_state (TASK_UNINTERRUPTIBLE); schedule_timeout (HZ); - + ldlm_namespace_free(obd->obd_namespace, obd->obd_force); leaked = atomic_read(&obd->u.echo.eo_prep); diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h index ac8d500..95fe8ee 100644 --- a/lustre/osc/osc_internal.h +++ b/lustre/osc/osc_internal.h @@ -32,6 +32,10 @@ struct osc_async_page { void *oap_caller_data; }; +#define OAP_FROM_COOKIE(c) \ + (LASSERT(((struct osc_async_page *)(c))->oap_magic == OAP_MAGIC), \ + (struct osc_async_page *)(c)) + struct osc_cache_waiter { struct list_head ocw_entry; wait_queue_head_t ocw_waitq; diff --git a/lustre/ptlrpc/autoMakefile.am b/lustre/ptlrpc/autoMakefile.am index 2262e48..c77dcfb 100644 --- a/lustre/ptlrpc/autoMakefile.am +++ b/lustre/ptlrpc/autoMakefile.am @@ -11,7 +11,8 @@ LDLM_COMM_SOURCES= $(top_srcdir)/lustre/ldlm/l_lock.c \ $(top_srcdir)/lustre/ldlm/ldlm_extent.c \ $(top_srcdir)/lustre/ldlm/ldlm_request.c \ $(top_srcdir)/lustre/ldlm/ldlm_lockd.c \ - $(top_srcdir)/lustre/ldlm/ldlm_internal.h + $(top_srcdir)/lustre/ldlm/ldlm_internal.h \ + $(top_srcdir)/lustre/ldlm/ldlm_flock.c COMMON_SOURCES = client.c recover.c connection.c niobuf.c pack_generic.c \ events.c ptlrpc_module.c service.c pinger.c recov_thread.c llog_net.c \ diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index 735fc20..fd4260b 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -217,6 +217,9 @@ void request_in_callback(ptl_event_t *ev) ptlrpc_id2str(&req->rq_peer, req->rq_peerstr); req->rq_rqbd = rqbd; req->rq_phase = RQ_PHASE_NEW; +#if CRAY_PORTALS + req->rq_uid = ev->uid; +#endif spin_lock_irqsave (&service->srv_lock, flags); @@ -371,7 +374,13 @@ int ptlrpc_uuid_to_peer (struct obd_uuid *uuid, struct ptlrpc_peer *peer) for (i = 0; i < ptlrpc_ninterfaces; i++) { pni = &ptlrpc_interfaces[i]; +#if !CRAY_PORTALS if (pni->pni_number == peer_nal) { +#else + /* compatible nals but may be from different bridges */ + if (NALID_FROM_IFACE(pni->pni_number) == + NALID_FROM_IFACE(peer_nal)) { +#endif peer->peer_id.nid = peer_nid; peer->peer_id.pid = LUSTRE_SRV_PTL_PID; peer->peer_ni = pni; @@ -427,21 +436,27 @@ ptl_pid_t ptl_get_pid(void) #ifndef __KERNEL__ pid = getpid(); +# if CRAY_PORTALS + /* hack to keep pid in range accepted by ernal */ + pid &= 0xFF; + if (pid == LUSTRE_SRV_PTL_PID) + pid++; +# endif #else pid = LUSTRE_SRV_PTL_PID; #endif return pid; } - + int ptlrpc_ni_init(int number, char *name, struct ptlrpc_ni *pni) { int rc; char str[20]; ptl_handle_ni_t nih; ptl_pid_t pid; - + pid = ptl_get_pid(); - + /* We're not passing any limits yet... */ rc = PtlNIInit(number, pid, NULL, NULL, &nih); if (rc != PTL_OK && rc != PTL_IFACE_DUP) { @@ -451,7 +466,7 @@ int ptlrpc_ni_init(int number, char *name, struct ptlrpc_ni *pni) } CDEBUG(D_NET, "My pid is: %x\n", ptl_get_pid()); - + PtlSnprintHandle(str, sizeof(str), nih); CDEBUG (D_NET, "init %x %s: %s\n", number, name, str); @@ -467,6 +482,11 @@ int ptlrpc_ni_init(int number, char *name, struct ptlrpc_ni *pni) /* kernel portals calls our master callback when events are added to * the event queue. In fact lustre never pulls events off this queue, * so it's only sized for some debug history. */ +# if CRAY_PORTALS + rc = PtlNIDebug(pni->pni_ni_h, 0xffffffff); + if (rc != PTL_OK) + CDEBUG(D_ERROR, "Can't enable Cray Portals Debug: rc %d\n", rc); +# endif rc = PtlEQAlloc(pni->pni_ni_h, 1024, ptlrpc_master_callback, &pni->pni_eq_h); #else @@ -637,7 +657,9 @@ int ptlrpc_init_portals(void) {LONAL, "lonal"}, {RANAL, "ranal"}, #else - {CRAY_KB_ERNAL, "cray_kb_ernal"}, + {CRAY_KERN_NAL, "cray_kern_nal"}, + {CRAY_QK_NAL, "cray_qk_nal"}, + {CRAY_USER_NAL, "cray_user_nal"}, #endif }; int rc; diff --git a/lustre/ptlrpc/pers.c b/lustre/ptlrpc/pers.c index 6f5d086..2bd04f2 100644 --- a/lustre/ptlrpc/pers.c +++ b/lustre/ptlrpc/pers.c @@ -93,7 +93,13 @@ void ptlrpc_fill_bulk_md(ptl_md_t *md, struct ptlrpc_bulk_desc *desc) { #if CRAY_PORTALS LASSERT (!(md->options & (PTL_MD_IOVEC | PTL_MD_PHYS))); +#if defined(REDSTORM) && (NALID_FROM_IFACE(CRAY_QK_NAL) == PTL_IFACE_SS_ACCEL) + /* Enforce iov_count == 1 constraint only for SeaStar accel mode on + * compute nodes (ie, REDSTORM) + * + * iov_count of > 1 is supported via PTL_MD_IOVEC in other contexts */ LASSERT (desc->bd_iov_count == 1); +#endif #else LASSERT (!(md->options & (PTL_MD_IOVEC | PTL_MD_KIOV | PTL_MD_PHYS))); #endif diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index d1c33c8..e79e567 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -71,7 +71,7 @@ void ptlrpc_run_recovery_over_upcall(struct obd_device *obd) } else { CWARN("Invoked upcall %s %s %s\n", - argv[0], argv[1], argv[2]); + argv[0], argv[1], argv[2]); } } diff --git a/lustre/tests/.RC_CURRENT.tag b/lustre/tests/.RC_CURRENT.tag index 7382334..1b1bf07 100644 --- a/lustre/tests/.RC_CURRENT.tag +++ b/lustre/tests/.RC_CURRENT.tag @@ -1 +1 @@ -RC_1_3_0_19 +RC_1_3_0_30 diff --git a/lustre/tests/.cvsignore b/lustre/tests/.cvsignore index c39784f..0b2290a 100644 --- a/lustre/tests/.cvsignore +++ b/lustre/tests/.cvsignore @@ -68,4 +68,5 @@ rename_many memhog mmap_sanity rmdirmany +flock_test writemany diff --git a/lustre/tests/acceptance-small.sh b/lustre/tests/acceptance-small.sh index e381da4..280f81d 100755 --- a/lustre/tests/acceptance-small.sh +++ b/lustre/tests/acceptance-small.sh @@ -24,6 +24,8 @@ fi [ "$DEBUG_OFF" ] || DEBUG_OFF="eval echo $DEBUG_LVL > /proc/sys/portals/debug" [ "$DEBUG_ON" ] || DEBUG_ON="eval echo -1 > /proc/sys/portals/debug" +LIBLUSTRETESTS=${LIBLUSTRETESTS:-../liblustre/tests} + for NAME in $CONFIGS; do export NAME MOUNT START CLEAN [ -e $NAME.sh ] && sh $NAME.sh @@ -40,7 +42,7 @@ for NAME in $CONFIGS; do if [ "$DBENCH" != "no" ]; then mount | grep $MOUNT || sh llmount.sh - SPACE=`df $MOUNT | tail -n 1 | awk '{ print $4 }'` + SPACE=`df -P $MOUNT | tail -n 1 | awk '{ print $4 }'` DB_THREADS=`expr $SPACE / 50000` [ $THREADS -lt $DB_THREADS ] && DB_THREADS=$THREADS @@ -69,68 +71,72 @@ for NAME in $CONFIGS; do fi IOZONE_OPTS="-i 0 -i 1 -i 2 -+d -r $RSIZE -s $SIZE" if [ "$O_DIRECT" -a "$O_DIRECT" != "no" ]; then - IOZONE_OPTS="-I $IOZONE_OPTS" + IOZONE_OPTS="-I $IOZONE_OPTS" fi - IOZONE_FILE="-f $MOUNT/iozone" + IOZFILE="-f $MOUNT/iozone" if [ "$IOZONE" != "no" ]; then mount | grep $MOUNT || sh llmount.sh $DEBUG_OFF - iozone $IOZONE_OPTS $IOZONE_FILE + iozone $IOZONE_OPTS $IOZFILE $DEBUG_ON sh llmountcleanup.sh sh llrmount.sh - fi - if [ "$IOZONE_DIR" != "no" ]; then - mount | grep $MOUNT || sh llmount.sh - SPACE=`df $MOUNT | tail -n 1 | awk '{ print $4 }'` - IOZ_THREADS=`expr $SPACE / \( $SIZE + $SIZE / 512 \)` - [ $THREADS -lt $IOZ_THREADS ] && IOZ_THREADS=$THREADS - $DEBUG_OFF - iozone $IOZONE_OPTS $IOZONE_FILE.odir - IOZVER=`iozone -v | awk '/Revision:/ { print $3 }' | tr -d '.'` - $DEBUG_ON - sh llmountcleanup.sh - sh llrmount.sh - if [ "$IOZ_THREADS" -gt 1 -a "$IOZVER" -ge 3145 ]; then + if [ "$IOZONE_DIR" != "no" ]; then + mount | grep $MOUNT || sh llmount.sh + SPACE=`df -P $MOUNT | tail -n 1 | awk '{ print $4 }'` + IOZ_THREADS=`expr $SPACE / \( $SIZE + $SIZE / 512 \)` + [ $THREADS -lt $IOZ_THREADS ] && IOZ_THREADS=$THREADS + $DEBUG_OFF - THREAD=1 - IOZONE_FILE="-F " - while [ $THREAD -le $IOZ_THREADS ]; do - IOZONE_FILE="$IOZONE_FILE $MOUNT/iozone.$THREAD" - THREAD=`expr $THREAD + 1` - done - iozone $IOZONE_OPTS -t $IOZ_THREADS $IOZONE_FILE + iozone $IOZONE_OPTS $IOZFILE.odir + IOZVER=`iozone -v|awk '/Revision:/ {print $3}'|tr -d .` $DEBUG_ON sh llmountcleanup.sh sh llrmount.sh - elif [ $IOZVER -lt 3145 ]; then - VER=`iozone -v | awk '/Revision:/ { print $3 }'` - echo "iozone $VER too old for multi-threaded tests" + if [ "$IOZ_THREADS" -gt 1 -a "$IOZVER" -ge 3145 ]; then + $DEBUG_OFF + THREAD=1 + IOZFILE="-F " + while [ $THREAD -le $IOZ_THREADS ]; do + IOZFILE="$IOZFILE $MOUNT/iozone.$THREAD" + THREAD=`expr $THREAD + 1` + done + iozone $IOZONE_OPTS -t $IOZ_THREADS $IOZFILE + $DEBUG_ON + sh llmountcleanup.sh + sh llrmount.sh + elif [ $IOZVER -lt 3145 ]; then + VER=`iozone -v | awk '/Revision:/ { print $3 }'` + echo "iozone $VER too old for multi-thread test" + fi fi fi if [ "$FSX" != "no" ]; then mount | grep $MOUNT || sh llmount.sh $DEBUG_OFF - ./fsx -c 50 -p 1000 -P $TMP -l $(($SIZE * 100)) \ + ./fsx -c 50 -p 1000 -P $TMP -l $SIZE \ -N $(($COUNT * 100)) $MOUNT/fsxfile $DEBUG_ON sh llmountcleanup.sh sh llrmount.sh fi + + mkdir -p $MOUNT2 + case $NAME in + local|lov) + MDSNODE=`hostname` + MDSNAME=mds1 + CLIENT=client + ;; + *) # we could extract this from $NAME.xml somehow + ;; + esac + if [ "$SANITYN" != "no" ]; then mount | grep $MOUNT || sh llmount.sh + $DEBUG_OFF - mkdir -p $MOUNT2 - case $NAME in - local|lov) - MDSNODE=`hostname` - MDSNAME=mds1 - CLIENT=client - ;; - *) # we could extract this from $NAME.xml somehow - ;; - esac if [ "$MDSNODE" -a "$MDSNAME" -a "$CLIENT" ]; then llmount $MDSNODE:/$MDSNAME/$CLIENT $MOUNT2 SANITYLOG=$TMP/sanity.log START=: CLEAN=: sh sanityN.sh @@ -140,6 +146,19 @@ for NAME in $CONFIGS; do echo "can't mount2 for '$NAME', skipping sanityN.sh" fi + $DEBUG_ON + sh llmountcleanup.sh + sh llrmount.sh + fi + + if [ "$LIBLUSTRE" != "no" ]; then + mount | grep $MOUNT || sh llmount.sh + IPADDR=`ping -c 1 $MDSNODE|head -n 1|sed -e "s/[^(]*(//" -e "s/).*//"` + export ENV_LUSTRE_MNTPNT=$MOUNT2 + export ENV_LUSTRE_MNTTGT=$IPADDR:/$MDSNAME/$CLIENT + if [ -x $LIBLUSTRETESTS/sanity ]; then + $LIBLUSTRETESTS/sanity --target=$ENV_LUSTRE_MNTTGT + fi sh llmountcleanup.sh #sh llrmount.sh fi diff --git a/lustre/tests/echo.sh b/lustre/tests/echo.sh index b937c17..1d90308 100755 --- a/lustre/tests/echo.sh +++ b/lustre/tests/echo.sh @@ -31,6 +31,13 @@ h2gm () { h2elan () { echo $1 | sed 's/[^0-9]*//g' } + +h2iib () { + case $1 in + client) echo '\*' ;; + *) echo $1 | sed "s/[^0-9]*//" ;; + esac +} # FIXME: make LMC not require MDS for obdecho LOV MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`} diff --git a/lustre/tests/llmount.sh b/lustre/tests/llmount.sh index 49a962b..90ef09d 100755 --- a/lustre/tests/llmount.sh +++ b/lustre/tests/llmount.sh @@ -26,12 +26,11 @@ else fi [ "$NODE" ] && node_opt="--node $NODE" +[ "$DEBUG" ] && debug_opt="--ptldebug=$DEBUG" -${LCONF} $NOMOD $portals_opt $lustre_opt $node_opt ${REFORMAT:---reformat} $@ \ +${LCONF} $NOMOD $portals_opt $lustre_opt $debug_opt $node_opt ${REFORMAT:---reformat} $@ \ $conf_opt || exit 2 -[ $DEBUG ] && sysctl -w portals.debug=$DEBUG - if [ "$MOUNT2" ]; then $LLMOUNT -v `hostname`:/mds1/client $MOUNT2 || exit 3 fi diff --git a/lustre/tests/mount2fs.sh b/lustre/tests/mount2fs.sh index cd51424..949f447 100644 --- a/lustre/tests/mount2fs.sh +++ b/lustre/tests/mount2fs.sh @@ -15,21 +15,22 @@ MOUNT2=${MOUNT2:-${MOUNT}2} MDSSIZE=50000 FSTYPE=${FSTYPE:-ext3} +STRIPE_BYTES=${STRIPE_BYTES:-1048576} OSTDEV1=${OSTDEV1:-$TMP/ost1-`hostname`} OSTDEV2=${OSTDEV2:-$TMP/ost2-`hostname`} OSTSIZE=100000 MDSNODE=${MDSNODE:-uml1} -OSTNODE=${OSTNODE:-uml1} -CLIENT=${CLIENT:-uml2} -CLIENT2=${CLIENT2:-uml2} +OSTNODE=${OSTNODE:-uml2} +CLIENT=${CLIENT:-client1} +CLIENT2=${CLIENT2:-client2} # create nodes ${LMC} -o $config --add net --node $MDSNODE --nid $MDSNODE --nettype tcp || exit 1 ${LMC} -m $config --add net --node $OSTNODE --nid $OSTNODE --nettype tcp || exit 2 -${LMC} -m $config --add net --node $CLIENT --nid $CLIENT --nettype tcp || exit 3 +${LMC} -m $config --add net --node $CLIENT --nid '*' --nettype tcp || exit 3 if [ "$CLIENT" != "$CLIENT2" ]; then - ${LMC} -m $config --add net --node $CLIENT2 --nid $CLIENT --nettype tcp || exit 3 + ${LMC} -m $config --add net --node $CLIENT2 --nid '*' --nettype tcp || exit 3 fi # configure mds server @@ -37,8 +38,8 @@ ${LMC} -m $config --add mds --node $MDSNODE --mds mds1 --group fs1 --fstype $FST ${LMC} -m $config --add mds --node $MDSNODE --mds mds2 --group fs2 --fstype $FSTYPE --dev $MDSDEV2 --size $MDSSIZE ||exit 10 # configure ost -${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz 65536 --stripe_cnt 0 --stripe_pattern 0 || exit 20 -${LMC} -m $config --add lov --lov lov2 --mds mds2 --stripe_sz 65536 --stripe_cnt 0 --stripe_pattern 0 || exit 20 +${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt 0 --stripe_pattern 0 || exit 20 +${LMC} -m $config --add lov --lov lov2 --mds mds2 --stripe_sz $STRIPE_BYTES --stripe_cnt 0 --stripe_pattern 0 || exit 20 ${LMC} -m $config --add ost --node $OSTNODE --group fs1 --lov lov1 --fstype $FSTYPE --dev $OSTDEV1 --size $OSTSIZE || exit 21 ${LMC} -m $config --add ost --node $OSTNODE --group fs2 --lov lov2 --fstype $FSTYPE --dev $OSTDEV2 --size $OSTSIZE || exit 22 diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index b56cdbf..e0d4b3c 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -393,7 +393,7 @@ test_24() { # bug 2248 - eviction fails writeback but app doesn't see it client_reconnect [ $rc -eq 0 ] && error "multiop didn't fail fsync: rc $rc" || true } -run_test 24 "fsync error (should return error)" +run_test 24 "fsync error (should return error)" test_26() { # bug 5921 - evict dead exports # this test can only run from a client on a separate node. diff --git a/lustre/tests/uml.sh b/lustre/tests/uml.sh index 7b72020..a9480a8 100644 --- a/lustre/tests/uml.sh +++ b/lustre/tests/uml.sh @@ -15,7 +15,7 @@ OSTDEVBASE=$TMP/ost #etc OSTSIZE=${OSTSIZE:-100000} STRIPECNT=${STRIPECNT:-1} -STRIPE_BYTES=${STRIPE_BYTES:-$((1024 * 1024))} +STRIPE_BYTES=${STRIPE_BYTES:-1048576} OSDTYPE=${OSDTYPE:-obdfilter} OSTFAILOVER=${OSTFAILOVER:-} @@ -77,6 +77,13 @@ h2gm () { echo `gmnalnid -n$1` } +h2iib () { + case $1 in + client) echo '\*' ;; + *) echo $1 | sed "s/[^0-9]*//" ;; + esac +} + # create nodes echo -n "adding NET for:" for NODE in `echo $MDSNODE $OSTNODES $CLIENTS | tr -s " " "\n" | sort -u`; do diff --git a/lustre/utils/lconf b/lustre/utils/lconf index 6b0a399..edf1c3c 100755 --- a/lustre/utils/lconf +++ b/lustre/utils/lconf @@ -45,6 +45,11 @@ def development_mode(): return 1 return 0 +if development_mode(): + sys.path.append('../utils') +else: + sys.path.extend(PYMOD_DIR) + import Lustre # Global parameters @@ -148,8 +153,10 @@ def logall(msgs): print string.strip(s) def debug(*args): + # apparently, (non)execution of the following line affects mds device + # startup order (e.g. two mds's using loopback devices), so always do it. + msg = string.join(map(str,args)) if config.verbose: - msg = string.join(map(str,args)) print msg # ack, python's builtin int() does not support '0x123' syntax. @@ -469,7 +476,7 @@ class LCTLInterface: self.run(cmds) def add_peer(self, net_type, nid, hostaddr, port): - if net_type in ('tcp','openib','ra') and not config.lctl_dump: + if net_type in ('tcp','openib','ra','cray_kern_nal') and not config.lctl_dump: cmds = """ network %s add_peer %s %s %d @@ -938,7 +945,7 @@ def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1): panic("Unable to build fs:", dev, string.join(out)) # enable hash tree indexing on fsswe if fstype in ('ext3', 'extN', 'ldiskfs'): - htree = 'echo "feature FEATURE_C5" | debugfs -w' + htree = 'tune2fs -O dir_index' (ret, out) = run (htree, dev) if ret: panic("Unable to enable htree:", dev) @@ -953,7 +960,7 @@ def loop_base(): loop='/dev/loop' return loop -# find loop device assigned to thefile +# find loop device assigned to the file def find_loop(file): loop = loop_base() for n in xrange(0, MAX_LOOP_DEVICES): @@ -1065,7 +1072,7 @@ def sys_get_local_nid(net_type, wildcard, cluster_id): # don't need a real nid for config log - client will replace (bug5619) if config.record: local = "54321" - elif net_type in ('tcp','openib','iib','vib','ra'): + elif net_type in ('tcp','openib','iib','vib','ra','cray_kern_nal'): if ':' in wildcard: iface, star = string.split(wildcard, ':') local = if2addr(iface) @@ -2079,7 +2086,7 @@ class Client(Module): self.target_name = tgtdb.getName() self.target_uuid = tgtdb.getUUID() self.db = tgtdb - self.backup_targets = [] + self.backup_targets = [] self.tgt_dev_uuid = get_active_target(tgtdb) if not self.tgt_dev_uuid: diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index 12782df..8dffe33 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -40,6 +40,16 @@ #include #include #include +#ifdef HAVE_LINUX_TYPES_H +#include +#else +#include "types.h" +#endif +#ifdef HAVE_LINUX_UNISTD_H +#include +#else +#include +#endif #include diff --git a/lustre/utils/llmount.c b/lustre/utils/llmount.c index 12c8962..131b480 100644 --- a/lustre/utils/llmount.c +++ b/lustre/utils/llmount.c @@ -26,11 +26,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include "obdctl.h" #include @@ -147,15 +149,26 @@ init_options(struct lustre_mount_data *lmd) int print_options(struct lustre_mount_data *lmd) { +#if CRAY_PORTALS + const int cond_print = (lmd->lmd_nal != CRAY_KB_SSNAL); +#else + const int cond_print = 1; +#endif int i; printf("mds: %s\n", lmd->lmd_mds); printf("profile: %s\n", lmd->lmd_profile); printf("server_nid: "LPX64"\n", lmd->lmd_server_nid); - printf("local_nid: "LPX64"\n", lmd->lmd_local_nid); + + if (cond_print) + printf("local_nid: "LPX64"\n", lmd->lmd_local_nid); + printf("nal: %x\n", lmd->lmd_nal); - printf("server_ipaddr: 0x%x\n", lmd->lmd_server_ipaddr); - printf("port: %d\n", lmd->lmd_port); + + if (cond_print) { + printf("server_ipaddr: 0x%x\n", lmd->lmd_server_ipaddr); + printf("port: %d\n", lmd->lmd_port); + } for (i = 0; i < route_index; i++) printf("route: "LPX64" : "LPX64" - "LPX64"\n", @@ -384,6 +397,12 @@ set_local(struct lustre_mount_data *lmd) progname, lmd->lmd_nal); return 1; +#if CRAY_PORTALS + case CRAY_KB_SSNAL: + return 0; + + case CRAY_KB_ERNAL: +#else case SOCKNAL: /* We need to do this before the mount is started if routing */ system("/sbin/modprobe ksocknal"); @@ -392,14 +411,49 @@ set_local(struct lustre_mount_data *lmd) case IIBNAL: case VIBNAL: case RANAL: +#endif + { + struct utsname uts; + rc = gethostname(buf, sizeof(buf) - 1); if (rc) { - fprintf (stderr, "%s: can't get local buf: %d\n", - progname, rc); + fprintf(stderr, "%s: can't get hostname: %s\n", + progname, strerror(rc)); return rc; } + + rc = uname(&uts); + /* for 2.6 kernels, reserve at least 8MB free, or we will + * go OOM during heavy read load */ + if (rc == 0 && strncmp(uts.release, "2.6", 3) == 0) { + int f, minfree = 32768; + char name[40], val[40]; + FILE *meminfo; + + meminfo = fopen("/proc/meminfo", "r"); + if (meminfo != NULL) { + while (fscanf(meminfo, "%s %s %*s\n", name, val) != EOF) { + if (strcmp(name, "MemTotal:") == 0) { + f = strtol(val, NULL, 0); + if (f > 0 && f < 8 * minfree) + minfree = f / 16; + break; + } + } + fclose(meminfo); + } + f = open("/proc/sys/vm/min_free_kbytes", O_WRONLY); + if (f >= 0) { + sprintf(val, "%d", minfree); + write(f, val, strlen(val)); + close(f); + } + } break; - case QSWNAL: { + } +#if !CRAY_PORTALS + case QSWNAL: + { char *pfiles[] = {"/proc/qsnet/elan3/device0/position", "/proc/qsnet/elan4/device0/position", "/proc/elan/device0/position", @@ -429,6 +483,7 @@ set_local(struct lustre_mount_data *lmd) } break; } +#endif } if (ptl_parse_nid (&nid, ptr) != 0) { @@ -452,6 +507,13 @@ set_peer(char *hostname, struct lustre_mount_data *lmd) progname, lmd->lmd_nal); return 1; +#if CRAY_PORTALS + case CRAY_KB_SSNAL: + lmd->lmd_server_nid = strtoll(hostname,0,0); + return 0; + + case CRAY_KB_ERNAL: +#else case IIBNAL: if (lmd->lmd_server_nid != PTL_NID_ANY) break; @@ -468,6 +530,7 @@ set_peer(char *hostname, struct lustre_mount_data *lmd) case OPENIBNAL: case VIBNAL: case RANAL: +#endif if (lmd->lmd_server_nid == PTL_NID_ANY) { if (ptl_parse_nid (&nid, hostname) != 0) { fprintf (stderr, "%s: can't parse NID %s\n", @@ -483,6 +546,7 @@ set_peer(char *hostname, struct lustre_mount_data *lmd) return (1); } break; +#if !CRAY_PORTALS case QSWNAL: { char buf[64]; @@ -504,8 +568,8 @@ set_peer(char *hostname, struct lustre_mount_data *lmd) break; } +#endif } - return 0; } @@ -518,12 +582,12 @@ build_data(char *source, char *options, struct lustre_mount_data *lmd, int rc; if (lmd_bad_magic(lmd)) - return -EINVAL; + return 4; if (strlen(source) >= sizeof(buf)) { fprintf(stderr, "%s: host:/mds/profile argument too long\n", progname); - return -EINVAL; + return 1; } strcpy(buf, source); if ((s = strchr(buf, ':'))) { diff --git a/lustre/utils/lrun b/lustre/utils/lrun index 56d3d04..e832dc3 100755 --- a/lustre/utils/lrun +++ b/lustre/utils/lrun @@ -3,11 +3,15 @@ LIBLUSTRE_MOUNT_POINT=${LIBLUSTRE_MOUNT_POINT:-"/mnt/lustre"} LIBLUSTRE_MOUNT_TARGET=${LIBLUSTRE_MOUNT_TARGET:-"TARGET_NOT_SET"} LIBLUSTRE_DUMPFILE=${LIBLUSTRE_DUMPFILE:-"/tmp/DUMP_FILE"} +LIBLUSTRE_DEBUG_MASK=${LIBLUSTRE_DEBUG_MASK:-"0"} +LIBLUSTRE_DEBUG_SUBSYS=${LIBLUSTRE_DEBUG_SUBSYS:-"0"} LD_PRELOAD=${LD_PRELOAD:-"/usr/lib/liblustre.so"} export LIBLUSTRE_MOUNT_POINT export LIBLUSTRE_MOUNT_TARGET export LIBLUSTRE_DUMPFILE +export LIBLUSTRE_DEBUG_MASK +export LIBLUSTRE_DEBUG_SUBSYS export LD_PRELOAD exec $@ diff --git a/lustre/utils/obd.c b/lustre/utils/obd.c index eab833c..4dff394 100644 --- a/lustre/utils/obd.c +++ b/lustre/utils/obd.c @@ -48,7 +48,9 @@ #include #include +#ifdef HAVE_ASM_PAGE_H #include /* needed for PAGE_SIZE - rread */ +#endif #include #include @@ -1397,11 +1399,11 @@ int jt_obd_test_brw(int argc, char **argv) case 'r': repeat_offset = 1; break; - + case 'x': verify = 0; break; - + default: fprintf (stderr, "Can't parse cmd '%s'\n", argv[2]); @@ -1490,7 +1492,7 @@ int jt_obd_test_brw(int argc, char **argv) len = pages * getpagesize(); thr_offset = offset_pages * getpagesize(); stride = len; - + if (thread) { pthread_mutex_lock (&shared_data->mutex); if (nthr_per_obj != 0) { @@ -2019,14 +2021,14 @@ int jt_llog_catlist(int argc, char **argv) IOC_INIT(data); data.ioc_inllen1 = max - size_round(sizeof(data)); IOC_PACK(argv[0], data); - + rc = l_ioctl(OBD_DEV_ID, OBD_IOC_CATLOGLIST, buf); - if (rc == 0) + if (rc == 0) fprintf(stdout, "%s", ((struct obd_ioctl_data*)buf)->ioc_bulk); else - fprintf(stderr, "OBD_IOC_CATLOGLIST failed: %s\n", + fprintf(stderr, "OBD_IOC_CATLOGLIST failed: %s\n", strerror(errno)); - + return rc; } -- 1.8.3.1