*
* Lustre Light common routines
*
- * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ * Copyright (c) 2002-2004 Cluster File Systems, Inc.
*
* This file is part of Lustre, http://www.lustre.org.
*
#include <signal.h>
#include <sys/types.h>
#include <sys/queue.h>
-#include <sys/capability.h>
-
-#include <netinet/in.h>
-#include <sys/socket.h>
-#include <arpa/inet.h>
+#ifdef HAVE_XTIO_H
+#include <xtio.h>
+#endif
#include <sysio.h>
#include <fs.h>
#include <mount.h>
#include <inode.h>
+#ifdef HAVE_FILE_H
#include <file.h>
+#endif
+
+/* env variables */
+#define ENV_LUSTRE_MNTPNT "LIBLUSTRE_MOUNT_POINT"
+#define ENV_LUSTRE_MNTTGT "LIBLUSTRE_MOUNT_TARGET"
+#define ENV_LUSTRE_TIMEOUT "LIBLUSTRE_TIMEOUT"
+#define ENV_LUSTRE_DUMPFILE "LIBLUSTRE_DUMPFILE"
+#define ENV_LUSTRE_DEBUG_MASK "LIBLUSTRE_DEBUG_MASK"
+#define ENV_LUSTRE_DEBUG_SUBSYS "LIBLUSTRE_DEBUG_SUBSYS"
+#define ENV_LUSTRE_NAL_NAME "LIBLUSTRE_NAL_NAME"
+
+#ifdef REDSTORM
+#define CSTART_INIT
+#endif
/* both sys/queue.h (libsysio require it) and portals/lists.h have definition
* of 'LIST_HEAD'. undef it to suppress warnings
*/
#undef LIST_HEAD
+#include <portals/ptlctl.h>
-#include <portals/ptlctl.h> /* needed for parse_dump */
-#include <procbridge.h>
-
+#include "lutil.h"
#include "llite_lib.h"
-unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL |
- S_GMNAL | S_IBNAL);
-
-ptl_handle_ni_t tcpnal_ni;
-struct task_struct *current;
-
-/* portals interfaces */
-
-struct ldlm_namespace;
-struct ldlm_res_id;
-struct obd_import;
-
-void *inter_module_get(char *arg)
+static int lllib_init(void)
{
- if (!strcmp(arg, "tcpnal_ni"))
- return &tcpnal_ni;
- else if (!strcmp(arg, "ldlm_cli_cancel_unused"))
- return ldlm_cli_cancel_unused;
- else if (!strcmp(arg, "ldlm_namespace_cleanup"))
- return ldlm_namespace_cleanup;
- else if (!strcmp(arg, "ldlm_replay_locks"))
- return ldlm_replay_locks;
- else
- return NULL;
-}
-
-/* XXX move to proper place */
-char *portals_nid2str(int nal, ptl_nid_t nid, char *str)
-{
- switch(nal){
- case TCPNAL:
- /* userspace NAL */
- case SOCKNAL:
- snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u",
- (__u32)(nid >> 32), HIPQUAD(nid));
- break;
- case QSWNAL:
- case GMNAL:
- case IBNAL:
- snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u",
- (__u32)(nid >> 32), (__u32)nid);
- break;
- default:
- snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx",
- nal, (long long)nid);
- break;
- }
- return str;
-}
-
-int in_group_p(gid_t gid)
-{
- int i;
-
- if (gid == current->fsgid)
- return 1;
-
- for (i = 0; i < current->ngroups; i++) {
- if (gid == current->groups[i])
- return 1;
- }
-
- return 0;
-}
-
-static void init_capability(int *res)
-{
- cap_value_t cap_types[] = {
- CAP_CHOWN,
- CAP_DAC_OVERRIDE,
- CAP_DAC_READ_SEARCH,
- CAP_FOWNER,
- CAP_FSETID,
- CAP_KILL,
- CAP_SETGID,
- CAP_SETUID,
- /* following are linux specific, we could simply
- * remove them I think */
- CAP_SETPCAP,
- CAP_LINUX_IMMUTABLE,
- CAP_NET_BIND_SERVICE,
- CAP_NET_BROADCAST,
- CAP_NET_ADMIN,
- CAP_NET_RAW,
- CAP_IPC_LOCK,
- CAP_IPC_OWNER,
- CAP_SYS_MODULE,
- CAP_SYS_RAWIO,
- CAP_SYS_CHROOT,
- CAP_SYS_PTRACE,
- CAP_SYS_PACCT,
- CAP_SYS_ADMIN,
- CAP_SYS_BOOT,
- CAP_SYS_NICE,
- CAP_SYS_RESOURCE,
- CAP_SYS_TIME,
- CAP_SYS_TTY_CONFIG,
- CAP_MKNOD,
- CAP_LEASE,
- };
- cap_t syscap;
- cap_flag_value_t capval;
- int i;
-
- *res = 0;
-
- syscap = cap_get_proc();
- if (!syscap) {
- printf("Liblustre: Warning: failed to get system capability, "
- "set to minimal\n");
- return;
- }
-
- for (i = 0; i < sizeof(cap_types)/sizeof(cap_t); i++) {
- LASSERT(cap_types[i] < 32);
- if (!cap_get_flag(syscap, cap_types[i],
- CAP_EFFECTIVE, &capval)) {
- if (capval == CAP_SET) {
- *res |= 1 << cap_types[i];
- }
- }
- }
-}
-
-static int init_current(char *comm)
-{
- current = malloc(sizeof(*current));
- if (!current) {
- CERROR("Not enough memory\n");
- return -ENOMEM;
- }
- current->fs = ¤t->__fs;
- current->fs->umask = umask(0777);
- umask(current->fs->umask);
-
- strncpy(current->comm, comm, sizeof(current->comm));
- current->pid = getpid();
- current->fsuid = geteuid();
- current->fsgid = getegid();
- memset(¤t->pending, 0, sizeof(current->pending));
-
- current->max_groups = sysconf(_SC_NGROUPS_MAX);
- current->groups = malloc(sizeof(gid_t) * current->max_groups);
- if (!current->groups) {
- CERROR("Not enough memory\n");
- return -ENOMEM;
- }
- current->ngroups = getgroups(current->max_groups, current->groqps);
- if (current->ngroups < 0) {
- perror("Error getgroups");
- return -EINVAL;
- }
-
- init_capability(¤t->cap_effective);
-
- return 0;
-}
-
-/* FIXME */
-void generate_random_uuid(unsigned char uuid_out[16])
-{
- int *arr = (int*)uuid_out;
- int i;
-
- for (i = 0; i < sizeof(uuid_out)/sizeof(int); i++)
- arr[i] = rand();
-}
-
-ptl_nid_t tcpnal_mynid;
-
-int init_lib_portals()
-{
- int max_interfaces;
- int rc;
- ENTRY;
-
- rc = PtlInit(&max_interfaces);
- if (rc != PTL_OK) {
- CERROR("PtlInit failed: %d\n", rc);
- RETURN (-ENXIO);
- }
- RETURN(0);
-}
-
-int
-libcfs_nal_cmd(struct portals_cfg *pcfg)
-{
- /* handle portals command if we want */
- return 0;
-}
-
-extern int class_handle_ioctl(unsigned int cmd, unsigned long arg);
-
-int lib_ioctl_nalcmd(int dev_id, unsigned int opc, void * ptr)
-{
- struct portal_ioctl_data *ptldata;
-
- if (opc == IOC_PORTAL_NAL_CMD) {
- ptldata = (struct portal_ioctl_data *) ptr;
-
- if (ptldata->ioc_nal_cmd == NAL_CMD_REGISTER_MYNID) {
- tcpnal_mynid = ptldata->ioc_nid;
- printf("mynid: %u.%u.%u.%u\n",
- (unsigned)(tcpnal_mynid>>24) & 0xFF,
- (unsigned)(tcpnal_mynid>>16) & 0xFF,
- (unsigned)(tcpnal_mynid>>8) & 0xFF,
- (unsigned)(tcpnal_mynid) & 0xFF);
- }
- }
-
- return (0);
-}
-
-int lib_ioctl(int dev_id, unsigned int opc, void * ptr)
-{
- int rc;
-
- if (dev_id == OBD_DEV_ID) {
- struct obd_ioctl_data *ioc = ptr;
-
- //XXX hack!!!
- ioc->ioc_plen1 = ioc->ioc_inllen1;
- ioc->ioc_pbuf1 = ioc->ioc_bulk;
- //XXX
-
- rc = class_handle_ioctl(opc, (unsigned long)ptr);
+ liblustre_set_nal_nid();
- printf ("proccssing ioctl cmd: %x, rc %d\n", opc, rc);
-
- if (rc)
- return rc;
- }
- return (0);
-}
-
-int lllib_init(char *dumpfile)
-{
- if (!g_zconf) {
- /* this parse only get my nid from config file
- * before initialize portals
- */
- if (parse_dump(dumpfile, lib_ioctl_nalcmd))
- return -1;
- } else {
- /* XXX need setup mynid before tcpnal initialize */
- tcpnal_mynid = ((uint64_t)getpid() << 32) | time(0);
- printf("LibLustre: TCPNAL NID: %016llx\n", tcpnal_mynid);
- }
-
- if (init_current("dummy") ||
+ if (liblustre_init_current("dummy") ||
init_obdclass() ||
init_lib_portals() ||
ptlrpc_init() ||
osc_init())
return -1;
- if (!g_zconf && parse_dump(dumpfile, lib_ioctl))
- return -1;
-
return _sysio_fssw_register("llite", &llu_fssw_ops);
}
-#if 0
-static void llu_check_request()
-{
- liblustre_wait_event(0);
-}
+#ifndef CRAY_PORTALS
+#define LIBLUSTRE_NAL_NAME "tcp"
+#elif defined REDSTORM
+#define LIBLUSTRE_NAL_NAME "cray_qk_ernal"
+#else
+#define LIBLUSTRE_NAL_NAME "cray_pb_ernal"
#endif
int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov)
{
- struct lustre_cfg lcfg;
+ struct lustre_cfg_bufs bufs;
+ struct lustre_cfg *lcfg;
+
char *peer = "MDS_PEER_UUID";
struct obd_device *obd;
struct lustre_handle mdc_conn = {0, };
struct llog_ctxt *ctxt;
ptl_nid_t nid = 0;
int nal, err, rc = 0;
+ char *nal_name;
ENTRY;
generate_random_uuid(uuid);
RETURN(-EINVAL);
}
- nal = ptl_name2nal("tcp");
+ nal_name = getenv(ENV_LUSTRE_NAL_NAME);
+ if (!nal_name)
+ nal_name = "tcp";
+ nal = ptl_name2nal(nal_name);
if (nal <= 0) {
- CERROR("Can't parse NAL tcp\n");
+ CERROR("Can't parse NAL %s\n", nal_name);
RETURN(-EINVAL);
}
- LCFG_INIT(lcfg, LCFG_ADD_UUID, NULL);
- lcfg.lcfg_nid = nid;
- lcfg.lcfg_inllen1 = strlen(peer) + 1;
- lcfg.lcfg_inlbuf1 = peer;
- lcfg.lcfg_nal = nal;
- err = class_process_config(&lcfg);
+ lustre_cfg_bufs_reset(&bufs, NULL);
+ lustre_cfg_bufs_set_string(&bufs, 1, peer);
+ lcfg = lustre_cfg_new(LCFG_ADD_UUID, &bufs);
+ lcfg->lcfg_nid = nid;
+ lcfg->lcfg_nal = nal;
+ err = class_process_config(lcfg);
+ lustre_cfg_free(lcfg);
if (err < 0)
GOTO(out, err);
- LCFG_INIT(lcfg, LCFG_ATTACH, name);
- lcfg.lcfg_inlbuf1 = "mdc";
- lcfg.lcfg_inllen1 = strlen(lcfg.lcfg_inlbuf1) + 1;
- lcfg.lcfg_inlbuf2 = mdc_uuid.uuid;
- lcfg.lcfg_inllen2 = strlen(lcfg.lcfg_inlbuf2) + 1;
- err = class_process_config(&lcfg);
+ lustre_cfg_bufs_reset(&bufs, name);
+ lustre_cfg_bufs_set_string(&bufs, 1, OBD_MDC_DEVICENAME);
+ lustre_cfg_bufs_set_string(&bufs, 2, mdc_uuid.uuid);
+ lcfg = lustre_cfg_new(LCFG_ATTACH, &bufs);
+ err = class_process_config(lcfg);
+ lustre_cfg_free(lcfg);
if (err < 0)
GOTO(out_del_uuid, err);
- LCFG_INIT(lcfg, LCFG_SETUP, name);
- lcfg.lcfg_inlbuf1 = g_zconf_mdsname;
- lcfg.lcfg_inllen1 = strlen(lcfg.lcfg_inlbuf1) + 1;
- lcfg.lcfg_inlbuf2 = peer;
- lcfg.lcfg_inllen2 = strlen(lcfg.lcfg_inlbuf2) + 1;
- err = class_process_config(&lcfg);
+ lustre_cfg_bufs_reset(&bufs, name);
+ lustre_cfg_bufs_set_string(&bufs, 1, g_zconf_mdsname);
+ lustre_cfg_bufs_set_string(&bufs, 2, peer);
+ lcfg = lustre_cfg_new(LCFG_SETUP, &bufs);
+ err = class_process_config(lcfg);
+ lustre_cfg_free(lcfg);
if (err < 0)
GOTO(out_detach, err);
-
+
obd = class_name2obd(name);
if (obd == NULL)
GOTO(out_cleanup, err = -EINVAL);
strlen("initial_recov"), "initial_recov",
sizeof(allow_recov), &allow_recov);
- err = obd_connect(&mdc_conn, obd, &mdc_uuid);
+ err = obd_connect(&mdc_conn, obd, &mdc_uuid, NULL, 0);
if (err) {
CERROR("cannot connect to %s: rc = %d\n",
g_zconf_mdsname, err);
GOTO(out_cleanup, err);
}
-
+
exp = class_conn2export(&mdc_conn);
-
+
ctxt = exp->exp_obd->obd_llog_ctxt[LLOG_CONFIG_REPL_CTXT];
- rc = class_config_parse_llog(ctxt, g_zconf_profile, cfg);
- if (rc) {
- CERROR("class_config_parse_llog failed: rc = %d\n", rc);
- }
+ rc = class_config_process_llog(ctxt, g_zconf_profile, cfg);
+ if (rc)
+ CERROR("class_config_process_llog failed: rc = %d\n", rc);
err = obd_disconnect(exp, 0);
out_cleanup:
- LCFG_INIT(lcfg, LCFG_CLEANUP, name);
- err = class_process_config(&lcfg);
+ lustre_cfg_bufs_reset(&bufs, name);
+ lcfg = lustre_cfg_new(LCFG_CLEANUP, &bufs);
+ err = class_process_config(lcfg);
+ lustre_cfg_free(lcfg);
if (err < 0)
GOTO(out, err);
out_detach:
- LCFG_INIT(lcfg, LCFG_DETACH, name);
- err = class_process_config(&lcfg);
+ lustre_cfg_bufs_reset(&bufs, name);
+ lcfg = lustre_cfg_new(LCFG_DETACH, &bufs);
+ err = class_process_config(lcfg);
+ lustre_cfg_free(lcfg);
if (err < 0)
GOTO(out, err);
out_del_uuid:
- LCFG_INIT(lcfg, LCFG_DEL_UUID, name);
- lcfg.lcfg_inllen1 = strlen(peer) + 1;
- lcfg.lcfg_inlbuf1 = peer;
- err = class_process_config(&lcfg);
-
+ lustre_cfg_bufs_reset(&bufs, name);
+ lustre_cfg_bufs_set_string(&bufs, 1, peer);
+ lcfg = lustre_cfg_new(LCFG_DEL_UUID, &bufs);
+ err = class_process_config(lcfg);
+ lustre_cfg_free(lcfg);
out:
if (rc == 0)
rc = err;
-
+
RETURN(rc);
}
if ((s = strchr(buf, ':'))) {
*mdsnid = buf;
*s = '\0';
-
+
while (*++s == '/')
;
*mdsname = s;
return -1;
}
-/* env variables */
-#define ENV_LUSTRE_MNTPNT "LIBLUSTRE_MOUNT_POINT"
-#define ENV_LUSTRE_MNTTGT "LIBLUSTRE_MOUNT_TARGET"
-#define ENV_LUSTRE_TIMEOUT "LIBLUSTRE_TIMEOUT"
-#define ENV_LUSTRE_DUMPFILE "LIBLUSTRE_DUMPFILE"
+/*
+ * early liblustre init
+ * called from C startup in catamount apps, before main()
+ *
+ * The following is a skeleton sysio startup sequence,
+ * as implemented in C startup (skipping error handling).
+ * In this framework none of these calls need be made here
+ * or in the apps themselves. The NAMESPACE_STRING specifying
+ * the initial set of fs ops (creates, mounts, etc.) is passed
+ * as an environment variable.
+ *
+ * _sysio_init();
+ * _sysio_incore_init();
+ * _sysio_native_init();
+ * _sysio_lustre_init();
+ * _sysio_boot(NAMESPACE_STRING);
+ *
+ * the name _sysio_lustre_init() follows the naming convention
+ * established in other fs drivers from libsysio:
+ * _sysio_incore_init(), _sysio_native_init()
+ *
+ * _sysio_lustre_init() must be called before _sysio_boot()
+ * to enable libsysio's processing of namespace init strings containing
+ * lustre filesystem operations
+ */
+int _sysio_lustre_init(void)
+{
+ int err;
-extern int _sysio_native_init();
+#if 0
+ portal_debug = -1;
+ portal_subsystem_debug = -1;
+#endif
+ liblustre_init_random();
+
+ err = lllib_init();
+ if (err) {
+ perror("init llite driver");
+ }
+ return err;
+}
+
+extern int _sysio_native_init();
extern unsigned int obd_timeout;
+char *lustre_path = NULL;
+
/* global variables */
-int g_zconf = 0; /* zeroconf or dumpfile */
char *g_zconf_mdsname = NULL; /* mdsname, for zeroconf */
char *g_zconf_mdsnid = NULL; /* mdsnid, for zeroconf */
char *g_zconf_profile = NULL; /* profile, for zeroconf */
void __liblustre_setup_(void)
{
- char *lustre_path = NULL;
char *target = NULL;
char *timeout = NULL;
- char *dumpfile = NULL;
+ char *debug_mask = NULL;
+ char *debug_subsys = NULL;
char *root_driver = "native";
char *lustre_driver = "llite";
char *root_path = "/";
unsigned mntflgs = 0;
-
int err;
- /* consider tha case of starting multiple liblustre instances
- * at a same time on single node.
- */
- srand(time(NULL) + getpid());
-
lustre_path = getenv(ENV_LUSTRE_MNTPNT);
if (!lustre_path) {
lustre_path = "/mnt/lustre";
}
+ /* mount target */
target = getenv(ENV_LUSTRE_MNTTGT);
if (!target) {
- dumpfile = getenv(ENV_LUSTRE_DUMPFILE);
- if (!dumpfile) {
- CERROR("Neither mount target, nor dumpfile\n");
- exit(1);
- }
- g_zconf = 0;
- printf("LibLustre: mount point %s, dumpfile %s\n",
- lustre_path, dumpfile);
- } else {
- if (ll_parse_mount_target(target,
- &g_zconf_mdsnid,
- &g_zconf_mdsname,
- &g_zconf_profile)) {
- CERROR("mal-formed target %s \n", target);
- exit(1);
- }
- g_zconf = 1;
- printf("LibLustre: mount point %s, target %s\n",
- lustre_path, target);
+ printf("LibLustre: no mount target specified\n");
+ exit(1);
+ }
+ if (ll_parse_mount_target(target,
+ &g_zconf_mdsnid,
+ &g_zconf_mdsname,
+ &g_zconf_profile)) {
+ CERROR("mal-formed target %s \n", target);
+ exit(1);
}
+ if (!g_zconf_mdsnid || !g_zconf_mdsname || !g_zconf_profile) {
+ printf("Liblustre: invalid target %s\n", target);
+ exit(1);
+ }
+ printf("LibLustre: mount point %s, target %s\n",
+ lustre_path, target);
timeout = getenv(ENV_LUSTRE_TIMEOUT);
if (timeout) {
- obd_timeout = (unsigned int) atoi(timeout);
+ obd_timeout = (unsigned int) strtol(timeout, NULL, 0);
printf("LibLustre: set obd timeout as %u seconds\n",
obd_timeout);
}
- if (_sysio_init() != 0) {
- perror("init sysio");
- exit(1);
- }
+ /* debug masks */
+ debug_mask = getenv(ENV_LUSTRE_DEBUG_MASK);
+ if (debug_mask)
+ portal_debug = (unsigned int) strtol(debug_mask, NULL, 0);
+
+ debug_subsys = getenv(ENV_LUSTRE_DEBUG_SUBSYS);
+ if (debug_subsys)
+ portal_subsystem_debug =
+ (unsigned int) strtol(debug_subsys, NULL, 0);
+
- /* cygwin don't need native driver */
-#ifndef __CYGWIN__
+#ifdef INIT_SYSIO
+ /* initialize libsysio & mount rootfs */
+ if (_sysio_init()) {
+ perror("init sysio");
+ exit(1);
+ }
_sysio_native_init();
-#endif
- err = _sysio_mount_root(root_path, root_driver, mntflgs, NULL);
- if (err) {
- perror(root_driver);
- exit(1);
- }
+ err = _sysio_mount_root(root_path, root_driver, mntflgs, NULL);
+ if (err) {
+ perror(root_driver);
+ exit(1);
+ }
-#if 1
- portal_debug = 0;
- portal_subsystem_debug = 0;
-#endif
- err = lllib_init(dumpfile);
- if (err) {
- perror("init llite driver");
- exit(1);
- }
+ if (_sysio_lustre_init())
+ exit(1);
+#endif /* INIT_SYSIO */
err = mount("/", lustre_path, lustre_driver, mntflgs, NULL);
- if (err) {
- errno = -err;
- perror(lustre_driver);
- exit(1);
- }
-
-#if 0
- __sysio_hook_sys_enter = llu_check_request;
- __sysio_hook_sys_leave = NULL;
-#endif
+ if (err) {
+ errno = -err;
+ perror(lustre_driver);
+ exit(1);
+ }
}
void __liblustre_cleanup_(void)
{
- _sysio_shutdown();
+ /* user app might chdir to a lustre directory, and leave busy pnode
+ * during finaly libsysio cleanup. here we chdir back to "/".
+ * but it can't fix the situation that liblustre is mounted
+ * at "/".
+ */
+ chdir("/");
+#if 0
+ umount(lustre_path);
+#endif
+ /* we can't call umount here, because libsysio will not cleanup
+ * opening files for us. _sysio_shutdown() will cleanup fds at
+ * first but which will also close the sockets we need for umount
+ * liblutre. this delima lead to another hack in
+ * libsysio/src/file_hack.c FIXME
+ */
+#ifdef INIT_SYSIO
+ _sysio_shutdown();
+ cleanup_lib_portals();
PtlFini();
+#else
+ /*
+ * don't do any libsysio or low level portals cleanups
+ * platform framework does it
+ */
+ cleanup_lib_portals();
+#endif
}