Whamcloud - gitweb
Land from b_hd_pid to HEAD
[fs/lustre-release.git] / lustre / liblustre / llite_lib.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Lustre Light common routines
5  *
6  *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
7  *
8  *   This file is part of Lustre, http://www.lustre.org.
9  *
10  *   Lustre is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   Lustre is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with Lustre; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24 #include <stdlib.h>
25 #include <string.h>
26 #include <assert.h>
27 #include <signal.h>
28 #include <fcntl.h>
29 #include <netdb.h>
30 #include <syscall.h>
31 #include <sys/utsname.h>
32 #include <sys/types.h>
33 #include <sys/queue.h>
34
35 #include <netinet/in.h>
36 #include <sys/socket.h>
37 #include <arpa/inet.h>
38
39 #include <sysio.h>
40 #include <fs.h>
41 #include <mount.h>
42 #include <inode.h>
43 #include <file.h>
44
45 /* both sys/queue.h (libsysio require it) and portals/lists.h have definition
46  * of 'LIST_HEAD'. undef it to suppress warnings
47  */
48 #undef LIST_HEAD
49
50 #include <portals/ptlctl.h>     /* needed for parse_dump */
51 #include <procbridge.h>
52
53 #include "llite_lib.h"
54
55 unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL |
56                                             S_GMNAL | S_IBNAL);
57
58 ptl_handle_ni_t         tcpnal_ni;
59 struct task_struct     *current;
60
61 /* portals interfaces */
62
63 struct ldlm_namespace;
64 struct ldlm_res_id;
65 struct obd_import;
66
67 void *inter_module_get(char *arg)
68 {
69         if (!strcmp(arg, "tcpnal_ni"))
70                 return &tcpnal_ni;
71         else if (!strcmp(arg, "ldlm_cli_cancel_unused"))
72                 return ldlm_cli_cancel_unused;
73         else if (!strcmp(arg, "ldlm_namespace_cleanup"))
74                 return ldlm_namespace_cleanup;
75         else if (!strcmp(arg, "ldlm_replay_locks"))
76                 return ldlm_replay_locks;
77         else
78                 return NULL;
79 }
80
81 /* XXX move to proper place */
82 char *portals_nid2str(int nal, ptl_nid_t nid, char *str)
83 {
84         switch(nal){
85         case TCPNAL:
86                 /* userspace NAL */
87         case SOCKNAL:
88                 snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u",
89                          (__u32)(nid >> 32), HIPQUAD(nid));
90                 break;
91         case QSWNAL:
92         case GMNAL:
93         case IBNAL:
94                 snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u",
95                          (__u32)(nid >> 32), (__u32)nid);
96                 break;
97         default:
98                 snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx",
99                          nal, (long long)nid);
100                 break;
101         }
102         return str;
103 }
104 /*      bug #4615       */
105 char *portals_id2str(int nal, ptl_process_id_t id, char *str)
106 {
107         switch(nal){
108         case TCPNAL:
109                 /* userspace NAL */
110         case SOCKNAL:
111                 snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u,%u",
112                          (__u32)(id.nid >> 32), HIPQUAD((id.nid)) , id.pid);
113                 break;
114         case QSWNAL:
115         case GMNAL:
116         case IBNAL:
117                 snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u,%u",
118                          (__u32)(id.nid >> 32), (__u32)id.nid, id.pid);
119                 break;
120         default:
121                 snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx,%lx",
122                          nal, (long long)id.nid, (long)id.pid );
123                 break;
124         }
125         return str;
126 }
127 /*
128  * random number generator stuff
129  */
130 static int _rand_dev_fd = -1;
131
132 static int get_ipv4_addr()
133 {
134         struct utsname myname;
135         struct hostent *hptr;
136         int ip;
137
138         if (uname(&myname) < 0)
139                 return 0;
140
141         hptr = gethostbyname(myname.nodename);
142         if (hptr == NULL ||
143             hptr->h_addrtype != AF_INET ||
144             *hptr->h_addr_list == NULL) {
145                 printf("LibLustre: Warning: fail to get local IPv4 address\n");
146                 return 0;
147         }
148
149         ip = ntohl(*((int *) *hptr->h_addr_list));
150
151         return ip;
152 }
153
154 static void init_random()
155 {
156         int seed;
157         struct timeval tv;
158
159         _rand_dev_fd = syscall(SYS_open, "/dev/urandom", O_RDONLY);
160         if (_rand_dev_fd >= 0) {
161                 if (syscall(SYS_read, _rand_dev_fd, &seed, sizeof(int)) ==
162                     sizeof(int)) {
163                         srand(seed);
164                         return;
165                 }
166                 syscall(SYS_close, _rand_dev_fd);
167                 _rand_dev_fd = -1;
168         }
169
170         gettimeofday(&tv, NULL);
171         srand(tv.tv_sec + tv.tv_usec + getpid() + __swab32(get_ipv4_addr()));
172 }
173
174 void get_random_bytes(void *buf, int size)
175 {
176         char *p = buf;
177
178         if (size < 1)
179                 return;
180
181         if (_rand_dev_fd >= 0) {
182                 if (syscall(SYS_read, _rand_dev_fd, buf, size) == size)
183                         return;
184                 syscall(SYS_close, _rand_dev_fd);
185                 _rand_dev_fd = -1;
186         }
187
188         while (size--) 
189                 *p++ = rand();
190 }
191
192 int in_group_p(gid_t gid)
193 {
194         int i;
195
196         if (gid == current->fsgid)
197                 return 1;
198
199         for (i = 0; i < current->ngroups; i++) {
200                 if (gid == current->groups[i])
201                         return 1;
202         }
203
204         return 0;
205 }
206
207 static void init_capability(int *res)
208 {
209         cap_t syscap;
210         cap_flag_value_t capval;
211         int i;
212
213         *res = 0;
214
215         syscap = cap_get_proc();
216         if (!syscap) {
217                 printf("Liblustre: Warning: failed to get system capability, "
218                        "set to minimal\n");
219                 return;
220         }
221
222         for (i = 0; i < sizeof(cap_value_t) * 8; i++) {
223                 if (!cap_get_flag(syscap, i, CAP_EFFECTIVE, &capval)) {
224                         if (capval == CAP_SET) {
225                                 *res |= 1 << i;
226                         }
227                 }
228         }
229 }
230
231 static int init_current(char *comm)
232 {
233         current = malloc(sizeof(*current));
234         if (!current) {
235                 CERROR("Not enough memory\n");
236                 return -ENOMEM;
237         }
238         current->fs = &current->__fs;
239         current->fs->umask = umask(0777);
240         umask(current->fs->umask);
241
242         strncpy(current->comm, comm, sizeof(current->comm));
243         current->pid = getpid();
244         current->fsuid = geteuid();
245         current->fsgid = getegid();
246         memset(&current->pending, 0, sizeof(current->pending));
247
248         current->max_groups = sysconf(_SC_NGROUPS_MAX);
249         current->groups = malloc(sizeof(gid_t) * current->max_groups);
250         if (!current->groups) {
251                 CERROR("Not enough memory\n");
252                 return -ENOMEM;
253         }
254         current->ngroups = getgroups(current->max_groups, current->groups);
255         if (current->ngroups < 0) {
256                 perror("Error getgroups");
257                 return -EINVAL;
258         }
259
260         init_capability(&current->cap_effective);
261
262         return 0;
263 }
264
265 void generate_random_uuid(unsigned char uuid_out[16])
266 {
267         get_random_bytes(uuid_out, sizeof(uuid_out));
268 }
269
270 ptl_nid_t tcpnal_mynid;
271
272 int init_lib_portals()
273 {
274         int max_interfaces;
275         int rc;
276         ENTRY;
277
278         rc = PtlInit(&max_interfaces);
279         if (rc != PTL_OK) {
280                 CERROR("PtlInit failed: %d\n", rc);
281                 RETURN (-ENXIO);
282         }
283         RETURN(0);
284 }
285
286 int
287 libcfs_nal_cmd(struct portals_cfg *pcfg)
288 {
289         /* handle portals command if we want */
290         return 0;
291 }
292
293 extern int class_handle_ioctl(unsigned int cmd, unsigned long arg);
294
295 int lib_ioctl_nalcmd(int dev_id, unsigned int opc, void * ptr)
296 {
297         struct portal_ioctl_data *ptldata;
298
299         if (opc == IOC_PORTAL_NAL_CMD) {
300                 ptldata = (struct portal_ioctl_data *) ptr;
301
302                 if (ptldata->ioc_nal_cmd == NAL_CMD_REGISTER_MYNID) {
303                         tcpnal_mynid = ptldata->ioc_nid;
304                         printf("mynid: %u.%u.%u.%u\n",
305                                 (unsigned)(tcpnal_mynid>>24) & 0xFF,
306                                 (unsigned)(tcpnal_mynid>>16) & 0xFF,
307                                 (unsigned)(tcpnal_mynid>>8) & 0xFF,
308                                 (unsigned)(tcpnal_mynid) & 0xFF);
309                 }
310         }
311
312         return (0);
313 }
314
315 int lib_ioctl(int dev_id, unsigned int opc, void * ptr)
316 {
317         int rc;
318
319         if (dev_id == OBD_DEV_ID) {
320                 struct obd_ioctl_data *ioc = ptr;
321
322                 //XXX hack!!!
323                 ioc->ioc_plen1 = ioc->ioc_inllen1;
324                 ioc->ioc_pbuf1 = ioc->ioc_bulk;
325                 //XXX
326
327                 rc = class_handle_ioctl(opc, (unsigned long)ptr);
328
329                 printf ("proccssing ioctl cmd: %x, rc %d\n", opc,  rc);
330
331                 if (rc)
332                         return rc;
333         }
334         return (0);
335 }
336
337 int lllib_init(char *dumpfile)
338 {
339         pid_t pid;
340         uint32_t ip;
341         struct in_addr in;
342
343         if (!g_zconf) {
344                 /* this parse only get my nid from config file
345                  * before initialize portals
346                  */
347                 if (parse_dump(dumpfile, lib_ioctl_nalcmd))
348                         return -1;
349         } else {
350                 /* need to setup mynid before tcpnal initialization */
351                 /* a meaningful nid could help debugging */
352                 ip = get_ipv4_addr();
353                 if (ip == 0)
354                         get_random_bytes(&ip, sizeof(ip));
355                 pid = getpid() & 0xffffffff;
356                 tcpnal_mynid = ((uint64_t)ip << 32) | pid;
357
358                 in.s_addr = htonl(ip);
359                 printf("LibLustre: TCPNAL NID: %016llx (%s:%u)\n", 
360                        tcpnal_mynid, inet_ntoa(in), pid);
361         }
362
363         if (init_current("dummy") ||
364             init_obdclass() ||
365             init_lib_portals() ||
366             ptlrpc_init() ||
367             mdc_init() ||
368             lov_init() ||
369             osc_init())
370                 return -1;
371
372         if (!g_zconf && parse_dump(dumpfile, lib_ioctl))
373                 return -1;
374
375         return _sysio_fssw_register("llite", &llu_fssw_ops);
376 }
377  
378 #if 0
379 static void llu_check_request()
380 {
381         liblustre_wait_event(0);
382 }
383 #endif
384
385 int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov)
386 {
387         struct lustre_cfg lcfg;
388         char  *peer = "MDS_PEER_UUID";
389         struct obd_device *obd;
390         struct lustre_handle mdc_conn = {0, };
391         struct obd_export *exp;
392         char  *name = "mdc_dev";
393         class_uuid_t uuid;
394         struct obd_uuid mdc_uuid;
395         struct llog_ctxt *ctxt;
396         ptl_nid_t nid = 0;
397         int nal, err, rc = 0;
398         ENTRY;
399
400         generate_random_uuid(uuid);
401         class_uuid_unparse(uuid, &mdc_uuid);
402
403         if (ptl_parse_nid(&nid, g_zconf_mdsnid)) {
404                 CERROR("Can't parse NID %s\n", g_zconf_mdsnid);
405                 RETURN(-EINVAL);
406         }
407
408         nal = ptl_name2nal("tcp");
409         if (nal <= 0) {
410                 CERROR("Can't parse NAL tcp\n");
411                 RETURN(-EINVAL);
412         }
413         LCFG_INIT(lcfg, LCFG_ADD_UUID, NULL);
414         lcfg.lcfg_nid = nid;
415         lcfg.lcfg_inllen1 = strlen(peer) + 1;
416         lcfg.lcfg_inlbuf1 = peer;
417         lcfg.lcfg_nal = nal;
418         err = class_process_config(&lcfg);
419         if (err < 0)
420                 GOTO(out, err);
421
422         LCFG_INIT(lcfg, LCFG_ATTACH, name);
423         lcfg.lcfg_inlbuf1 = "mdc";
424         lcfg.lcfg_inllen1 = strlen(lcfg.lcfg_inlbuf1) + 1;
425         lcfg.lcfg_inlbuf2 = mdc_uuid.uuid;
426         lcfg.lcfg_inllen2 = strlen(lcfg.lcfg_inlbuf2) + 1;
427         err = class_process_config(&lcfg);
428         if (err < 0)
429                 GOTO(out_del_uuid, err);
430
431         LCFG_INIT(lcfg, LCFG_SETUP, name);
432         lcfg.lcfg_inlbuf1 = g_zconf_mdsname;
433         lcfg.lcfg_inllen1 = strlen(lcfg.lcfg_inlbuf1) + 1;
434         lcfg.lcfg_inlbuf2 = peer;
435         lcfg.lcfg_inllen2 = strlen(lcfg.lcfg_inlbuf2) + 1;
436         err = class_process_config(&lcfg);
437         if (err < 0)
438                 GOTO(out_detach, err);
439         
440         obd = class_name2obd(name);
441         if (obd == NULL)
442                 GOTO(out_cleanup, err = -EINVAL);
443
444         /* Disable initial recovery on this import */
445         err = obd_set_info(obd->obd_self_export,
446                            strlen("initial_recov"), "initial_recov",
447                            sizeof(allow_recov), &allow_recov);
448
449         err = obd_connect(&mdc_conn, obd, &mdc_uuid);
450         if (err) {
451                 CERROR("cannot connect to %s: rc = %d\n",
452                         g_zconf_mdsname, err);
453                 GOTO(out_cleanup, err);
454         }
455         
456         exp = class_conn2export(&mdc_conn);
457         
458         ctxt = exp->exp_obd->obd_llog_ctxt[LLOG_CONFIG_REPL_CTXT];
459         rc = class_config_process_llog(ctxt, g_zconf_profile, &lcfg);
460         if (rc)
461                 CERROR("class_config_process_llog failed: rc = %d\n", rc);
462
463         err = obd_disconnect(exp, 0);
464
465 out_cleanup:
466         LCFG_INIT(lcfg, LCFG_CLEANUP, name);
467         err = class_process_config(&lcfg);
468         if (err < 0)
469                 GOTO(out, err);
470
471 out_detach:
472         LCFG_INIT(lcfg, LCFG_DETACH, name);
473         err = class_process_config(&lcfg);
474         if (err < 0)
475                 GOTO(out, err);
476
477 out_del_uuid:
478         LCFG_INIT(lcfg, LCFG_DEL_UUID, name);
479         lcfg.lcfg_inllen1 = strlen(peer) + 1;
480         lcfg.lcfg_inlbuf1 = peer;
481         err = class_process_config(&lcfg);
482
483 out:
484         if (rc == 0)
485                 rc = err;
486         
487         RETURN(rc);
488 }
489
490 /* parse host:/mdsname/profile string */
491 int ll_parse_mount_target(const char *target, char **mdsnid,
492                           char **mdsname, char **profile)
493 {
494         static char buf[256];
495         char *s;
496
497         buf[255] = 0;
498         strncpy(buf, target, 255);
499
500         if ((s = strchr(buf, ':'))) {
501                 *mdsnid = buf;
502                 *s = '\0';
503                                                                                                                         
504                 while (*++s == '/')
505                         ;
506                 *mdsname = s;
507                 if ((s = strchr(*mdsname, '/'))) {
508                         *s = '\0';
509                         *profile = s + 1;
510                         return 0;
511                 }
512         }
513
514         return -1;
515 }
516
517 /* env variables */
518 #define ENV_LUSTRE_MNTPNT               "LIBLUSTRE_MOUNT_POINT"
519 #define ENV_LUSTRE_MNTTGT               "LIBLUSTRE_MOUNT_TARGET"
520 #define ENV_LUSTRE_TIMEOUT              "LIBLUSTRE_TIMEOUT"
521 #define ENV_LUSTRE_DUMPFILE             "LIBLUSTRE_DUMPFILE"
522
523 extern int _sysio_native_init();
524
525 extern unsigned int obd_timeout;
526
527 /* global variables */
528 int     g_zconf = 0;            /* zeroconf or dumpfile */
529 char   *g_zconf_mdsname = NULL; /* mdsname, for zeroconf */
530 char   *g_zconf_mdsnid = NULL;  /* mdsnid, for zeroconf */
531 char   *g_zconf_profile = NULL; /* profile, for zeroconf */
532
533
534 void __liblustre_setup_(void)
535 {
536         char *lustre_path = NULL;
537         char *target = NULL;
538         char *timeout = NULL;
539         char *dumpfile = NULL;
540         char *root_driver = "native";
541         char *lustre_driver = "llite";
542         char *root_path = "/";
543         unsigned mntflgs = 0;
544         int err;
545
546         lustre_path = getenv(ENV_LUSTRE_MNTPNT);
547         if (!lustre_path) {
548                 lustre_path = "/mnt/lustre";
549         }
550
551         target = getenv(ENV_LUSTRE_MNTTGT);
552         if (!target) {
553                 dumpfile = getenv(ENV_LUSTRE_DUMPFILE);
554                 if (!dumpfile) {
555                         CERROR("Neither mount target, nor dumpfile\n");
556                         exit(1);
557                 }
558                 g_zconf = 0;
559                 printf("LibLustre: mount point %s, dumpfile %s\n",
560                         lustre_path, dumpfile);
561         } else {
562                 if (ll_parse_mount_target(target,
563                                           &g_zconf_mdsnid,
564                                           &g_zconf_mdsname,
565                                           &g_zconf_profile)) {
566                         CERROR("mal-formed target %s \n", target);
567                         exit(1);
568                 }
569                 g_zconf = 1;
570                 printf("LibLustre: mount point %s, target %s\n",
571                         lustre_path, target);
572         }
573
574         timeout = getenv(ENV_LUSTRE_TIMEOUT);
575         if (timeout) {
576                 obd_timeout = (unsigned int) atoi(timeout);
577                 printf("LibLustre: set obd timeout as %u seconds\n",
578                         obd_timeout);
579         }
580
581         if (_sysio_init() != 0) {
582                 perror("init sysio");
583                 exit(1);
584         }
585
586         /* cygwin don't need native driver */
587 #ifndef __CYGWIN__
588         _sysio_native_init();
589 #endif
590
591         err = _sysio_mount_root(root_path, root_driver, mntflgs, NULL);
592         if (err) {
593                 perror(root_driver);
594                 exit(1);
595         }
596
597 #if 1
598         portal_debug = 0;
599         portal_subsystem_debug = 0;
600 #endif
601         init_random();
602
603         err = lllib_init(dumpfile);
604         if (err) {
605                 perror("init llite driver");
606                 exit(1);
607         }       
608
609         err = mount("/", lustre_path, lustre_driver, mntflgs, NULL);
610         if (err) {
611                 errno = -err;
612                 perror(lustre_driver);
613                 exit(1);
614         }
615
616 #if 0
617         __sysio_hook_sys_enter = llu_check_request;
618         __sysio_hook_sys_leave = NULL;
619 #endif
620 }
621
622 void __liblustre_cleanup_(void)
623 {
624         _sysio_shutdown();
625         PtlFini();
626 }