Whamcloud - gitweb
Land b_smallfix onto HEAD (20040512_1806)
[fs/lustre-release.git] / lustre / liblustre / llite_lib.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Lustre Light common routines
5  *
6  *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
7  *
8  *   This file is part of Lustre, http://www.lustre.org.
9  *
10  *   Lustre is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   Lustre is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with Lustre; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24 #include <stdlib.h>
25 #include <string.h>
26 #include <assert.h>
27 #include <signal.h>
28 #include <fcntl.h>
29 #include <netdb.h>
30 #include <syscall.h>
31 #include <sys/utsname.h>
32 #include <sys/types.h>
33 #include <sys/queue.h>
34
35 #include <netinet/in.h>
36 #include <sys/socket.h>
37 #include <arpa/inet.h>
38
39 #include <sysio.h>
40 #include <fs.h>
41 #include <mount.h>
42 #include <inode.h>
43 #include <file.h>
44
45 /* both sys/queue.h (libsysio require it) and portals/lists.h have definition
46  * of 'LIST_HEAD'. undef it to suppress warnings
47  */
48 #undef LIST_HEAD
49
50 #include <portals/ptlctl.h>     /* needed for parse_dump */
51 #include <procbridge.h>
52
53 #include "llite_lib.h"
54
55 unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL |
56                                             S_GMNAL | S_IBNAL);
57
58 ptl_handle_ni_t         tcpnal_ni;
59 struct task_struct     *current;
60
61 /* portals interfaces */
62
63 struct ldlm_namespace;
64 struct ldlm_res_id;
65 struct obd_import;
66
67 void *inter_module_get(char *arg)
68 {
69         if (!strcmp(arg, "tcpnal_ni"))
70                 return &tcpnal_ni;
71         else if (!strcmp(arg, "ldlm_cli_cancel_unused"))
72                 return ldlm_cli_cancel_unused;
73         else if (!strcmp(arg, "ldlm_namespace_cleanup"))
74                 return ldlm_namespace_cleanup;
75         else if (!strcmp(arg, "ldlm_replay_locks"))
76                 return ldlm_replay_locks;
77         else
78                 return NULL;
79 }
80
81 /* XXX move to proper place */
82 char *portals_nid2str(int nal, ptl_nid_t nid, char *str)
83 {
84         switch(nal){
85         case TCPNAL:
86                 /* userspace NAL */
87         case SOCKNAL:
88                 snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u",
89                          (__u32)(nid >> 32), HIPQUAD(nid));
90                 break;
91         case QSWNAL:
92         case GMNAL:
93         case IBNAL:
94                 snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u",
95                          (__u32)(nid >> 32), (__u32)nid);
96                 break;
97         default:
98                 snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx",
99                          nal, (long long)nid);
100                 break;
101         }
102         return str;
103 }
104
105 /*
106  * random number generator stuff
107  */
108 static int _rand_dev_fd = -1;
109
110 static int get_ipv4_addr()
111 {
112         struct utsname myname;
113         struct hostent *hptr;
114         int ip;
115
116         if (uname(&myname) < 0)
117                 return 0;
118
119         hptr = gethostbyname(myname.nodename);
120         if (hptr == NULL ||
121             hptr->h_addrtype != AF_INET ||
122             *hptr->h_addr_list == NULL) {
123                 printf("LibLustre: Warning: fail to get local IPv4 address\n");
124                 return 0;
125         }
126
127         ip = ntohl(*((int *) *hptr->h_addr_list));
128
129         return ip;
130 }
131
132 static void init_random()
133 {
134         int seed;
135         struct timeval tv;
136
137         _rand_dev_fd = syscall(SYS_open, "/dev/urandom", O_RDONLY);
138         if (_rand_dev_fd >= 0) {
139                 if (syscall(SYS_read, _rand_dev_fd, &seed, sizeof(int)) ==
140                     sizeof(int)) {
141                         srand(seed);
142                         return;
143                 }
144                 syscall(SYS_close, _rand_dev_fd);
145                 _rand_dev_fd = -1;
146         }
147
148         gettimeofday(&tv, NULL);
149         srand(tv.tv_sec + tv.tv_usec + getpid() + __swab32(get_ipv4_addr()));
150 }
151
152 void get_random_bytes(void *buf, int size)
153 {
154         char *p = buf;
155
156         if (size < 1)
157                 return;
158
159         if (_rand_dev_fd >= 0) {
160                 if (syscall(SYS_read, _rand_dev_fd, buf, size) == size)
161                         return;
162                 syscall(SYS_close, _rand_dev_fd);
163                 _rand_dev_fd = -1;
164         }
165
166         while (size--) 
167                 *p++ = rand();
168 }
169
170 int in_group_p(gid_t gid)
171 {
172         int i;
173
174         if (gid == current->fsgid)
175                 return 1;
176
177         for (i = 0; i < current->ngroups; i++) {
178                 if (gid == current->groups[i])
179                         return 1;
180         }
181
182         return 0;
183 }
184
185 static void init_capability(int *res)
186 {
187         cap_t syscap;
188         cap_flag_value_t capval;
189         int i;
190
191         *res = 0;
192
193         syscap = cap_get_proc();
194         if (!syscap) {
195                 printf("Liblustre: Warning: failed to get system capability, "
196                        "set to minimal\n");
197                 return;
198         }
199
200         for (i = 0; i < sizeof(cap_value_t) * 8; i++) {
201                 if (!cap_get_flag(syscap, i, CAP_EFFECTIVE, &capval)) {
202                         if (capval == CAP_SET) {
203                                 *res |= 1 << i;
204                         }
205                 }
206         }
207 }
208
209 static int init_current(char *comm)
210 {
211         current = malloc(sizeof(*current));
212         if (!current) {
213                 CERROR("Not enough memory\n");
214                 return -ENOMEM;
215         }
216         current->fs = &current->__fs;
217         current->fs->umask = umask(0777);
218         umask(current->fs->umask);
219
220         strncpy(current->comm, comm, sizeof(current->comm));
221         current->pid = getpid();
222         current->fsuid = geteuid();
223         current->fsgid = getegid();
224         memset(&current->pending, 0, sizeof(current->pending));
225
226         current->max_groups = sysconf(_SC_NGROUPS_MAX);
227         current->groups = malloc(sizeof(gid_t) * current->max_groups);
228         if (!current->groups) {
229                 CERROR("Not enough memory\n");
230                 return -ENOMEM;
231         }
232         current->ngroups = getgroups(current->max_groups, current->groups);
233         if (current->ngroups < 0) {
234                 perror("Error getgroups");
235                 return -EINVAL;
236         }
237
238         init_capability(&current->cap_effective);
239
240         return 0;
241 }
242
243 void generate_random_uuid(unsigned char uuid_out[16])
244 {
245         get_random_bytes(uuid_out, sizeof(uuid_out));
246 }
247
248 ptl_nid_t tcpnal_mynid;
249
250 int init_lib_portals()
251 {
252         int max_interfaces;
253         int rc;
254         ENTRY;
255
256         rc = PtlInit(&max_interfaces);
257         if (rc != PTL_OK) {
258                 CERROR("PtlInit failed: %d\n", rc);
259                 RETURN (-ENXIO);
260         }
261         RETURN(0);
262 }
263
264 int
265 libcfs_nal_cmd(struct portals_cfg *pcfg)
266 {
267         /* handle portals command if we want */
268         return 0;
269 }
270
271 extern int class_handle_ioctl(unsigned int cmd, unsigned long arg);
272
273 int lib_ioctl_nalcmd(int dev_id, unsigned int opc, void * ptr)
274 {
275         struct portal_ioctl_data *ptldata;
276
277         if (opc == IOC_PORTAL_NAL_CMD) {
278                 ptldata = (struct portal_ioctl_data *) ptr;
279
280                 if (ptldata->ioc_nal_cmd == NAL_CMD_REGISTER_MYNID) {
281                         tcpnal_mynid = ptldata->ioc_nid;
282                         printf("mynid: %u.%u.%u.%u\n",
283                                 (unsigned)(tcpnal_mynid>>24) & 0xFF,
284                                 (unsigned)(tcpnal_mynid>>16) & 0xFF,
285                                 (unsigned)(tcpnal_mynid>>8) & 0xFF,
286                                 (unsigned)(tcpnal_mynid) & 0xFF);
287                 }
288         }
289
290         return (0);
291 }
292
293 int lib_ioctl(int dev_id, unsigned int opc, void * ptr)
294 {
295         int rc;
296
297         if (dev_id == OBD_DEV_ID) {
298                 struct obd_ioctl_data *ioc = ptr;
299
300                 //XXX hack!!!
301                 ioc->ioc_plen1 = ioc->ioc_inllen1;
302                 ioc->ioc_pbuf1 = ioc->ioc_bulk;
303                 //XXX
304
305                 rc = class_handle_ioctl(opc, (unsigned long)ptr);
306
307                 printf ("proccssing ioctl cmd: %x, rc %d\n", opc,  rc);
308
309                 if (rc)
310                         return rc;
311         }
312         return (0);
313 }
314
315 int lllib_init(char *dumpfile)
316 {
317         pid_t pid;
318         uint32_t ip;
319         struct in_addr in;
320
321         if (!g_zconf) {
322                 /* this parse only get my nid from config file
323                  * before initialize portals
324                  */
325                 if (parse_dump(dumpfile, lib_ioctl_nalcmd))
326                         return -1;
327         } else {
328                 /* need to setup mynid before tcpnal initialization */
329                 /* a meaningful nid could help debugging */
330                 ip = get_ipv4_addr();
331                 if (ip == 0)
332                         get_random_bytes(&ip, sizeof(ip));
333                 pid = getpid() & 0xffffffff;
334                 tcpnal_mynid = ((uint64_t)ip << 32) | pid;
335
336                 in.s_addr = htonl(ip);
337                 printf("LibLustre: TCPNAL NID: %016llx (%s:%u)\n", 
338                        tcpnal_mynid, inet_ntoa(in), pid);
339         }
340
341         if (init_current("dummy") ||
342             init_obdclass() ||
343             init_lib_portals() ||
344             ptlrpc_init() ||
345             mdc_init() ||
346             lov_init() ||
347             osc_init())
348                 return -1;
349
350         if (!g_zconf && parse_dump(dumpfile, lib_ioctl))
351                 return -1;
352
353         return _sysio_fssw_register("llite", &llu_fssw_ops);
354 }
355  
356 #if 0
357 static void llu_check_request()
358 {
359         liblustre_wait_event(0);
360 }
361 #endif
362
363 int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov)
364 {
365         struct lustre_cfg lcfg;
366         char  *peer = "MDS_PEER_UUID";
367         struct obd_device *obd;
368         struct lustre_handle mdc_conn = {0, };
369         struct obd_export *exp;
370         char  *name = "mdc_dev";
371         class_uuid_t uuid;
372         struct obd_uuid mdc_uuid;
373         struct llog_ctxt *ctxt;
374         ptl_nid_t nid = 0;
375         int nal, err, rc = 0;
376         ENTRY;
377
378         generate_random_uuid(uuid);
379         class_uuid_unparse(uuid, &mdc_uuid);
380
381         if (ptl_parse_nid(&nid, g_zconf_mdsnid)) {
382                 CERROR("Can't parse NID %s\n", g_zconf_mdsnid);
383                 RETURN(-EINVAL);
384         }
385
386         nal = ptl_name2nal("tcp");
387         if (nal <= 0) {
388                 CERROR("Can't parse NAL tcp\n");
389                 RETURN(-EINVAL);
390         }
391         LCFG_INIT(lcfg, LCFG_ADD_UUID, NULL);
392         lcfg.lcfg_nid = nid;
393         lcfg.lcfg_inllen1 = strlen(peer) + 1;
394         lcfg.lcfg_inlbuf1 = peer;
395         lcfg.lcfg_nal = nal;
396         err = class_process_config(&lcfg);
397         if (err < 0)
398                 GOTO(out, err);
399
400         LCFG_INIT(lcfg, LCFG_ATTACH, name);
401         lcfg.lcfg_inlbuf1 = "mdc";
402         lcfg.lcfg_inllen1 = strlen(lcfg.lcfg_inlbuf1) + 1;
403         lcfg.lcfg_inlbuf2 = mdc_uuid.uuid;
404         lcfg.lcfg_inllen2 = strlen(lcfg.lcfg_inlbuf2) + 1;
405         err = class_process_config(&lcfg);
406         if (err < 0)
407                 GOTO(out_del_uuid, err);
408
409         LCFG_INIT(lcfg, LCFG_SETUP, name);
410         lcfg.lcfg_inlbuf1 = g_zconf_mdsname;
411         lcfg.lcfg_inllen1 = strlen(lcfg.lcfg_inlbuf1) + 1;
412         lcfg.lcfg_inlbuf2 = peer;
413         lcfg.lcfg_inllen2 = strlen(lcfg.lcfg_inlbuf2) + 1;
414         err = class_process_config(&lcfg);
415         if (err < 0)
416                 GOTO(out_detach, err);
417         
418         obd = class_name2obd(name);
419         if (obd == NULL)
420                 GOTO(out_cleanup, err = -EINVAL);
421
422         /* Disable initial recovery on this import */
423         err = obd_set_info(obd->obd_self_export,
424                            strlen("initial_recov"), "initial_recov",
425                            sizeof(allow_recov), &allow_recov);
426
427         err = obd_connect(&mdc_conn, obd, &mdc_uuid);
428         if (err) {
429                 CERROR("cannot connect to %s: rc = %d\n",
430                         g_zconf_mdsname, err);
431                 GOTO(out_cleanup, err);
432         }
433         
434         exp = class_conn2export(&mdc_conn);
435         
436         ctxt = exp->exp_obd->obd_llog_ctxt[LLOG_CONFIG_REPL_CTXT];
437         rc = class_config_parse_llog(ctxt, g_zconf_profile, cfg);
438         if (rc) {
439                 CERROR("class_config_parse_llog failed: rc = %d\n", rc);
440         }
441
442         err = obd_disconnect(exp, 0);
443
444 out_cleanup:
445         LCFG_INIT(lcfg, LCFG_CLEANUP, name);
446         err = class_process_config(&lcfg);
447         if (err < 0)
448                 GOTO(out, err);
449
450 out_detach:
451         LCFG_INIT(lcfg, LCFG_DETACH, name);
452         err = class_process_config(&lcfg);
453         if (err < 0)
454                 GOTO(out, err);
455
456 out_del_uuid:
457         LCFG_INIT(lcfg, LCFG_DEL_UUID, name);
458         lcfg.lcfg_inllen1 = strlen(peer) + 1;
459         lcfg.lcfg_inlbuf1 = peer;
460         err = class_process_config(&lcfg);
461
462 out:
463         if (rc == 0)
464                 rc = err;
465         
466         RETURN(rc);
467 }
468
469 /* parse host:/mdsname/profile string */
470 int ll_parse_mount_target(const char *target, char **mdsnid,
471                           char **mdsname, char **profile)
472 {
473         static char buf[256];
474         char *s;
475
476         buf[255] = 0;
477         strncpy(buf, target, 255);
478
479         if ((s = strchr(buf, ':'))) {
480                 *mdsnid = buf;
481                 *s = '\0';
482                                                                                                                         
483                 while (*++s == '/')
484                         ;
485                 *mdsname = s;
486                 if ((s = strchr(*mdsname, '/'))) {
487                         *s = '\0';
488                         *profile = s + 1;
489                         return 0;
490                 }
491         }
492
493         return -1;
494 }
495
496 /* env variables */
497 #define ENV_LUSTRE_MNTPNT               "LIBLUSTRE_MOUNT_POINT"
498 #define ENV_LUSTRE_MNTTGT               "LIBLUSTRE_MOUNT_TARGET"
499 #define ENV_LUSTRE_TIMEOUT              "LIBLUSTRE_TIMEOUT"
500 #define ENV_LUSTRE_DUMPFILE             "LIBLUSTRE_DUMPFILE"
501
502 extern int _sysio_native_init();
503
504 extern unsigned int obd_timeout;
505
506 /* global variables */
507 int     g_zconf = 0;            /* zeroconf or dumpfile */
508 char   *g_zconf_mdsname = NULL; /* mdsname, for zeroconf */
509 char   *g_zconf_mdsnid = NULL;  /* mdsnid, for zeroconf */
510 char   *g_zconf_profile = NULL; /* profile, for zeroconf */
511
512
513 void __liblustre_setup_(void)
514 {
515         char *lustre_path = NULL;
516         char *target = NULL;
517         char *timeout = NULL;
518         char *dumpfile = NULL;
519         char *root_driver = "native";
520         char *lustre_driver = "llite";
521         char *root_path = "/";
522         unsigned mntflgs = 0;
523         int err;
524
525         lustre_path = getenv(ENV_LUSTRE_MNTPNT);
526         if (!lustre_path) {
527                 lustre_path = "/mnt/lustre";
528         }
529
530         target = getenv(ENV_LUSTRE_MNTTGT);
531         if (!target) {
532                 dumpfile = getenv(ENV_LUSTRE_DUMPFILE);
533                 if (!dumpfile) {
534                         CERROR("Neither mount target, nor dumpfile\n");
535                         exit(1);
536                 }
537                 g_zconf = 0;
538                 printf("LibLustre: mount point %s, dumpfile %s\n",
539                         lustre_path, dumpfile);
540         } else {
541                 if (ll_parse_mount_target(target,
542                                           &g_zconf_mdsnid,
543                                           &g_zconf_mdsname,
544                                           &g_zconf_profile)) {
545                         CERROR("mal-formed target %s \n", target);
546                         exit(1);
547                 }
548                 g_zconf = 1;
549                 printf("LibLustre: mount point %s, target %s\n",
550                         lustre_path, target);
551         }
552
553         timeout = getenv(ENV_LUSTRE_TIMEOUT);
554         if (timeout) {
555                 obd_timeout = (unsigned int) atoi(timeout);
556                 printf("LibLustre: set obd timeout as %u seconds\n",
557                         obd_timeout);
558         }
559
560         if (_sysio_init() != 0) {
561                 perror("init sysio");
562                 exit(1);
563         }
564
565         /* cygwin don't need native driver */
566 #ifndef __CYGWIN__
567         _sysio_native_init();
568 #endif
569
570         err = _sysio_mount_root(root_path, root_driver, mntflgs, NULL);
571         if (err) {
572                 perror(root_driver);
573                 exit(1);
574         }
575
576 #if 1
577         portal_debug = 0;
578         portal_subsystem_debug = 0;
579 #endif
580         init_random();
581
582         err = lllib_init(dumpfile);
583         if (err) {
584                 perror("init llite driver");
585                 exit(1);
586         }       
587
588         err = mount("/", lustre_path, lustre_driver, mntflgs, NULL);
589         if (err) {
590                 errno = -err;
591                 perror(lustre_driver);
592                 exit(1);
593         }
594
595 #if 0
596         __sysio_hook_sys_enter = llu_check_request;
597         __sysio_hook_sys_leave = NULL;
598 #endif
599 }
600
601 void __liblustre_cleanup_(void)
602 {
603         _sysio_shutdown();
604         PtlFini();
605 }