Whamcloud - gitweb
LU-8066 obdclass : Add infrastructure for procfs to sysfs migration
[fs/lustre-release.git] / lustre / obdecho / echo.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2010, 2016, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * lustre/obdecho/echo.c
33  *
34  * Author: Peter Braam <braam@clusterfs.com>
35  * Author: Andreas Dilger <adilger@clusterfs.com>
36  */
37
38 #define DEBUG_SUBSYSTEM S_ECHO
39
40 #include <obd_support.h>
41 #include <obd_class.h>
42 #include <lustre_debug.h>
43 #include <lustre_dlm.h>
44 #include <lprocfs_status.h>
45
46 #include "echo_internal.h"
47
48 /* The echo objid needs to be below 2^32, because regular FID numbers are
49  * limited to 2^32 objects in f_oid for the FID_SEQ_ECHO range. b=23335 */
50 #define ECHO_INIT_OID        0x10000000ULL
51 #define ECHO_HANDLE_MAGIC    0xabcd0123fedc9876ULL
52
53 #define ECHO_PERSISTENT_PAGES (ECHO_PERSISTENT_SIZE >> PAGE_SHIFT)
54 static struct page *echo_persistent_pages[ECHO_PERSISTENT_PAGES];
55
56 enum {
57         LPROC_ECHO_READ_BYTES = 1,
58         LPROC_ECHO_WRITE_BYTES = 2,
59         LPROC_ECHO_LAST = LPROC_ECHO_WRITE_BYTES +1
60 };
61
62 static int echo_connect(const struct lu_env *env,
63                         struct obd_export **exp, struct obd_device *obd,
64                         struct obd_uuid *cluuid, struct obd_connect_data *data,
65                         void *localdata)
66 {
67         struct lustre_handle conn = { 0 };
68         int rc;
69
70         data->ocd_connect_flags &= ECHO_CONNECT_SUPPORTED;
71
72         if (data->ocd_connect_flags & OBD_CONNECT_FLAGS2)
73                 data->ocd_connect_flags2 &= ECHO_CONNECT_SUPPORTED2;
74
75         rc = class_connect(&conn, obd, cluuid);
76         if (rc) {
77                 CERROR("can't connect %d\n", rc);
78                 return rc;
79         }
80         *exp = class_conn2export(&conn);
81
82         return 0;
83 }
84
85 static int echo_disconnect(struct obd_export *exp)
86 {
87         LASSERT (exp != NULL);
88
89         return server_disconnect_export(exp);
90 }
91
92 static int echo_init_export(struct obd_export *exp)
93 {
94         return ldlm_init_export(exp);
95 }
96
97 static int echo_destroy_export(struct obd_export *exp)
98 {
99         ENTRY;
100
101         target_destroy_export(exp);
102         ldlm_destroy_export(exp);
103
104         RETURN(0);
105 }
106
107 static u64 echo_next_id(struct obd_device *obddev)
108 {
109         u64 id;
110
111         spin_lock(&obddev->u.echo.eo_lock);
112         id = ++obddev->u.echo.eo_lastino;
113         spin_unlock(&obddev->u.echo.eo_lock);
114
115         return id;
116 }
117
118 static int echo_create(const struct lu_env *env, struct obd_export *exp,
119                        struct obdo *oa)
120 {
121         struct obd_device *obd = class_exp2obd(exp);
122
123         if (!obd) {
124                 CERROR("invalid client cookie %#llx\n",
125                        exp->exp_handle.h_cookie);
126                 return -EINVAL;
127         }
128
129         if (!(oa->o_mode & S_IFMT)) {
130                 CERROR("echo obd: no type!\n");
131                 return -ENOENT;
132         }
133
134         if (!(oa->o_valid & OBD_MD_FLTYPE)) {
135                 CERROR("invalid o_valid %#llx\n", oa->o_valid);
136                 return -EINVAL;
137         }
138
139         ostid_set_seq_echo(&oa->o_oi);
140         if (ostid_set_id(&oa->o_oi, echo_next_id(obd))) {
141                 CERROR("Bad %llu to set " DOSTID "\n",
142                        echo_next_id(obd), POSTID(&oa->o_oi));
143                 return -EINVAL;
144         }
145         oa->o_valid = OBD_MD_FLID;
146
147         return 0;
148 }
149
150 static int echo_destroy(const struct lu_env *env, struct obd_export *exp,
151                         struct obdo *oa)
152 {
153         struct obd_device *obd = class_exp2obd(exp);
154
155         ENTRY;
156         if (!obd) {
157                 CERROR("invalid client cookie %#llx\n",
158                        exp->exp_handle.h_cookie);
159                 RETURN(-EINVAL);
160         }
161
162         if (!(oa->o_valid & OBD_MD_FLID)) {
163                 CERROR("obdo missing FLID valid flag: %#llx\n", oa->o_valid);
164                 RETURN(-EINVAL);
165         }
166
167         if (ostid_id(&oa->o_oi) > obd->u.echo.eo_lastino ||
168             ostid_id(&oa->o_oi) < ECHO_INIT_OID) {
169                 CERROR("bad destroy objid: "DOSTID"\n", POSTID(&oa->o_oi));
170                 RETURN(-EINVAL);
171         }
172
173         RETURN(0);
174 }
175
176 static int echo_getattr(const struct lu_env *env, struct obd_export *exp,
177                         struct obdo *oa)
178 {
179         struct obd_device *obd = class_exp2obd(exp);
180         u64 id = ostid_id(&oa->o_oi);
181
182         ENTRY;
183         if (!obd) {
184                 CERROR("invalid client cookie %#llx\n",
185                        exp->exp_handle.h_cookie);
186                 RETURN(-EINVAL);
187         }
188
189         if (!(oa->o_valid & OBD_MD_FLID)) {
190                 CERROR("obdo missing FLID valid flag: %#llx\n", oa->o_valid);
191                 RETURN(-EINVAL);
192         }
193
194         obdo_cpy_md(oa, &obd->u.echo.eo_oa, oa->o_valid);
195         ostid_set_seq_echo(&oa->o_oi);
196         if (ostid_set_id(&oa->o_oi, id)) {
197                 CERROR("Bad %llu to set " DOSTID "\n",
198                        id, POSTID(&oa->o_oi));
199                 RETURN(-EINVAL);
200         }
201
202         RETURN(0);
203 }
204
205 static int echo_setattr(const struct lu_env *env, struct obd_export *exp,
206                         struct obdo *oa)
207 {
208         struct obd_device *obd = class_exp2obd(exp);
209
210         ENTRY;
211         if (!obd) {
212                 CERROR("invalid client cookie %#llx\n",
213                        exp->exp_handle.h_cookie);
214                 RETURN(-EINVAL);
215         }
216
217         if (!(oa->o_valid & OBD_MD_FLID)) {
218                 CERROR("obdo missing FLID valid flag: %#llx\n", oa->o_valid);
219                 RETURN(-EINVAL);
220         }
221
222         obd->u.echo.eo_oa = *oa;
223
224         RETURN(0);
225 }
226
227 static void
228 echo_page_debug_setup(struct page *page, int rw, u64 id,
229                       __u64 offset, int len)
230 {
231         int   page_offset = offset & ~PAGE_MASK;
232         char *addr        = ((char *)kmap(page)) + page_offset;
233
234         if (len % OBD_ECHO_BLOCK_SIZE != 0)
235                 CERROR("Unexpected block size %d\n", len);
236
237         while (len > 0) {
238                 if (rw & OBD_BRW_READ)
239                         block_debug_setup(addr, OBD_ECHO_BLOCK_SIZE,
240                                           offset, id);
241                 else
242                         block_debug_setup(addr, OBD_ECHO_BLOCK_SIZE,
243                                           0xecc0ecc0ecc0ecc0ULL,
244                                           0xecc0ecc0ecc0ecc0ULL);
245
246                 addr   += OBD_ECHO_BLOCK_SIZE;
247                 offset += OBD_ECHO_BLOCK_SIZE;
248                 len    -= OBD_ECHO_BLOCK_SIZE;
249         }
250
251         kunmap(page);
252 }
253
254 static int
255 echo_page_debug_check(struct page *page, u64 id,
256                       __u64 offset, int len)
257 {
258         int   page_offset = offset & ~PAGE_MASK;
259         char *addr        = ((char *)kmap(page)) + page_offset;
260         int   rc          = 0;
261         int   rc2;
262
263         if (len % OBD_ECHO_BLOCK_SIZE != 0)
264                 CERROR("Unexpected block size %d\n", len);
265
266         while (len > 0) {
267                 rc2 = block_debug_check("echo", addr, OBD_ECHO_BLOCK_SIZE,
268                                         offset, id);
269
270                 if (rc2 != 0 && rc == 0)
271                         rc = rc2;
272
273                 addr   += OBD_ECHO_BLOCK_SIZE;
274                 offset += OBD_ECHO_BLOCK_SIZE;
275                 len    -= OBD_ECHO_BLOCK_SIZE;
276         }
277
278         kunmap(page);
279
280         return rc;
281 }
282
283 static int echo_map_nb_to_lb(struct obdo *oa, struct obd_ioobj *obj,
284                              struct niobuf_remote *nb, int *pages,
285                              struct niobuf_local *lb, int cmd, int *left)
286 {
287         gfp_t gfp_mask = (ostid_id(&obj->ioo_oid) & 1) ?
288                         GFP_HIGHUSER : GFP_KERNEL;
289         int ispersistent = ostid_id(&obj->ioo_oid) == ECHO_PERSISTENT_OBJID;
290         int debug_setup = (!ispersistent &&
291                            (oa->o_valid & OBD_MD_FLFLAGS) != 0 &&
292                            (oa->o_flags & OBD_FL_DEBUG_CHECK) != 0);
293         struct niobuf_local *res = lb;
294         u64 offset = nb->rnb_offset;
295         int len = nb->rnb_len;
296
297         while (len > 0) {
298                 int plen = PAGE_SIZE - (offset & (PAGE_SIZE-1));
299                 if (len < plen)
300                         plen = len;
301
302                 /* check for local buf overflow */
303                 if (*left == 0)
304                         return -EINVAL;
305
306                 res->lnb_file_offset = offset;
307                 res->lnb_len = plen;
308                 LASSERT((res->lnb_file_offset & ~PAGE_MASK) +
309                         res->lnb_len <= PAGE_SIZE);
310
311                 if (ispersistent &&
312                     ((res->lnb_file_offset >> PAGE_SHIFT) <
313                       ECHO_PERSISTENT_PAGES)) {
314                         res->lnb_page =
315                                 echo_persistent_pages[res->lnb_file_offset >>
316                                                       PAGE_SHIFT];
317                         /* Take extra ref so __free_pages() can be called OK */
318                         get_page(res->lnb_page);
319                 } else {
320                         res->lnb_page = alloc_page(gfp_mask);
321                         if (res->lnb_page == NULL) {
322                                 CERROR("can't get page for id " DOSTID"\n",
323                                        POSTID(&obj->ioo_oid));
324                                 return -ENOMEM;
325                         }
326                 }
327
328                 CDEBUG(D_PAGE, "$$$$ get page %p @ %llu for %d\n",
329                        res->lnb_page, res->lnb_file_offset, res->lnb_len);
330
331                 if (cmd & OBD_BRW_READ)
332                         res->lnb_rc = res->lnb_len;
333
334                 if (debug_setup)
335                         echo_page_debug_setup(res->lnb_page, cmd,
336                                               ostid_id(&obj->ioo_oid),
337                                               res->lnb_file_offset,
338                                               res->lnb_len);
339
340                 offset += plen;
341                 len -= plen;
342                 res++;
343
344                 (*left)--;
345                 (*pages)++;
346         }
347
348         return 0;
349 }
350
351 static int echo_finalize_lb(struct obdo *oa, struct obd_ioobj *obj,
352                             struct niobuf_remote *rb, int *pgs,
353                             struct niobuf_local *lb, int verify)
354 {
355         struct niobuf_local *res = lb;
356         u64 start = rb->rnb_offset >> PAGE_SHIFT;
357         u64 end   = (rb->rnb_offset + rb->rnb_len + PAGE_SIZE - 1) >>
358                     PAGE_SHIFT;
359         int     count  = (int)(end - start);
360         int     rc     = 0;
361         int     i;
362
363         for (i = 0; i < count; i++, (*pgs) ++, res++) {
364                 struct page *page = res->lnb_page;
365                 void       *addr;
366
367                 if (page == NULL) {
368                         CERROR("null page objid %llu:%p, buf %d/%d\n",
369                                ostid_id(&obj->ioo_oid), page, i,
370                                obj->ioo_bufcnt);
371                         return -EFAULT;
372                 }
373
374                 addr = kmap(page);
375
376                 CDEBUG(D_PAGE, "$$$$ use page %p, addr %p@%llu\n",
377                        res->lnb_page, addr, res->lnb_file_offset);
378
379                 if (verify) {
380                         int vrc = echo_page_debug_check(page,
381                                                         ostid_id(&obj->ioo_oid),
382                                                         res->lnb_file_offset,
383                                                         res->lnb_len);
384                         /* check all the pages always */
385                         if (vrc != 0 && rc == 0)
386                                 rc = vrc;
387                 }
388
389                 kunmap(page);
390                 /* NB see comment above regarding persistent pages */
391                 __free_page(page);
392         }
393
394         return rc;
395 }
396
397 static int echo_preprw(const struct lu_env *env, int cmd,
398                        struct obd_export *export, struct obdo *oa,
399                        int objcount, struct obd_ioobj *obj,
400                        struct niobuf_remote *nb, int *pages,
401                        struct niobuf_local *res)
402 {
403         struct obd_device *obd;
404         int tot_bytes = 0;
405         int rc = 0;
406         int i, left;
407         ENTRY;
408
409         obd = export->exp_obd;
410         if (obd == NULL)
411                 RETURN(-EINVAL);
412
413         /* Temp fix to stop falling foul of osc_announce_cached() */
414         oa->o_valid &= ~(OBD_MD_FLBLOCKS | OBD_MD_FLGRANT);
415
416         memset(res, 0, sizeof(*res) * *pages);
417
418         CDEBUG(D_PAGE, "%s %d obdos with %d IOs\n",
419                cmd == OBD_BRW_READ ? "reading" : "writing", objcount, *pages);
420
421         left = *pages;
422         *pages = 0;
423
424         for (i = 0; i < objcount; i++, obj++) {
425                 int j;
426
427                 for (j = 0 ; j < obj->ioo_bufcnt ; j++, nb++) {
428
429                         rc = echo_map_nb_to_lb(oa, obj, nb, pages,
430                                                res + *pages, cmd, &left);
431                         if (rc)
432                                 GOTO(preprw_cleanup, rc);
433
434                         tot_bytes += nb->rnb_len;
435                 }
436         }
437
438         atomic_add(*pages, &obd->u.echo.eo_prep);
439
440         if (cmd & OBD_BRW_READ)
441                 lprocfs_counter_add(obd->obd_stats, LPROC_ECHO_READ_BYTES,
442                                     tot_bytes);
443         else
444                 lprocfs_counter_add(obd->obd_stats, LPROC_ECHO_WRITE_BYTES,
445                                     tot_bytes);
446
447         CDEBUG(D_PAGE, "%d pages allocated after prep\n",
448                atomic_read(&obd->u.echo.eo_prep));
449
450         RETURN(0);
451
452 preprw_cleanup:
453         /* It is possible that we would rather handle errors by  allow
454          * any already-set-up pages to complete, rather than tearing them
455          * all down again.  I believe that this is what the in-kernel
456          * prep/commit operations do.
457          */
458         CERROR("cleaning up %u pages (%d obdos)\n", *pages, objcount);
459         for (i = 0; i < *pages; i++) {
460                 kunmap(res[i].lnb_page);
461                 /* NB if this is a persistent page, __free_page() will just
462                  * lose the extra ref gained above */
463                 __free_page(res[i].lnb_page);
464                 res[i].lnb_page = NULL;
465                 atomic_dec(&obd->u.echo.eo_prep);
466         }
467
468         return rc;
469 }
470
471 static int echo_commitrw(const struct lu_env *env, int cmd,
472                          struct obd_export *export, struct obdo *oa,
473                          int objcount, struct obd_ioobj *obj,
474                          struct niobuf_remote *rb, int niocount,
475                          struct niobuf_local *res, int rc)
476 {
477         struct obd_device *obd;
478         int pgs = 0;
479         int i;
480         ENTRY;
481
482         obd = export->exp_obd;
483         if (obd == NULL)
484                 RETURN(-EINVAL);
485
486         if (rc)
487                 GOTO(commitrw_cleanup, rc);
488
489         if ((cmd & OBD_BRW_RWMASK) == OBD_BRW_READ) {
490                 CDEBUG(D_PAGE, "reading %d obdos with %d IOs\n",
491                        objcount, niocount);
492         } else {
493                 CDEBUG(D_PAGE, "writing %d obdos with %d IOs\n",
494                        objcount, niocount);
495         }
496
497         if (niocount && res == NULL) {
498                 CERROR("NULL res niobuf with niocount %d\n", niocount);
499                 RETURN(-EINVAL);
500         }
501
502         for (i = 0; i < objcount; i++, obj++) {
503                 int verify = (rc == 0 &&
504                              ostid_id(&obj->ioo_oid) != ECHO_PERSISTENT_OBJID &&
505                               (oa->o_valid & OBD_MD_FLFLAGS) != 0 &&
506                               (oa->o_flags & OBD_FL_DEBUG_CHECK) != 0);
507                 int j;
508
509                 for (j = 0 ; j < obj->ioo_bufcnt ; j++, rb++) {
510                         int vrc = echo_finalize_lb(oa, obj, rb, &pgs, &res[pgs],
511                                                    verify);
512                         if (vrc == 0)
513                                 continue;
514
515                         if (vrc == -EFAULT)
516                                 GOTO(commitrw_cleanup, rc = vrc);
517
518                         if (rc == 0)
519                                 rc = vrc;
520                 }
521
522         }
523
524         atomic_sub(pgs, &obd->u.echo.eo_prep);
525
526         CDEBUG(D_PAGE, "%d pages remain after commit\n",
527                atomic_read(&obd->u.echo.eo_prep));
528         RETURN(rc);
529
530 commitrw_cleanup:
531         atomic_sub(pgs, &obd->u.echo.eo_prep);
532
533         CERROR("cleaning up %d pages (%d obdos)\n",
534                niocount - pgs - 1, objcount);
535
536         while (pgs < niocount) {
537                 struct page *page = res[pgs++].lnb_page;
538
539                 if (page == NULL)
540                         continue;
541
542                 /* NB see comment above regarding persistent pages */
543                 __free_page(page);
544                 atomic_dec(&obd->u.echo.eo_prep);
545         }
546         return rc;
547 }
548
549 static int echo_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
550 {
551         int                     rc;
552         __u64                   lock_flags = 0;
553         struct ldlm_res_id      res_id = {.name = {1}};
554         char                    ns_name[48];
555         ENTRY;
556
557         obd->u.echo.eo_obt.obt_magic = OBT_MAGIC;
558         spin_lock_init(&obd->u.echo.eo_lock);
559         obd->u.echo.eo_lastino = ECHO_INIT_OID;
560
561         sprintf(ns_name, "echotgt-%s", obd->obd_uuid.uuid);
562         obd->obd_namespace = ldlm_namespace_new(obd, ns_name,
563                                                 LDLM_NAMESPACE_SERVER,
564                                                 LDLM_NAMESPACE_MODEST,
565                                                 LDLM_NS_TYPE_OST);
566         if (obd->obd_namespace == NULL) {
567                 LBUG();
568                 RETURN(-ENOMEM);
569         }
570
571         rc = ldlm_cli_enqueue_local(obd->obd_namespace, &res_id, LDLM_PLAIN,
572                                     NULL, LCK_NL, &lock_flags, NULL,
573                                     ldlm_completion_ast, NULL, NULL, 0,
574                                     LVB_T_NONE, NULL, &obd->u.echo.eo_nl_lock);
575         LASSERT (rc == ELDLM_OK);
576
577         if (!lprocfs_obd_setup(obd, true) &&
578             lprocfs_alloc_obd_stats(obd, LPROC_ECHO_LAST) == 0) {
579                 lprocfs_counter_init(obd->obd_stats, LPROC_ECHO_READ_BYTES,
580                                      LPROCFS_CNTR_AVGMINMAX,
581                                      "read_bytes", "bytes");
582                 lprocfs_counter_init(obd->obd_stats, LPROC_ECHO_WRITE_BYTES,
583                                      LPROCFS_CNTR_AVGMINMAX,
584                                      "write_bytes", "bytes");
585         }
586
587         ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
588                            "echo_ldlm_cb_client", &obd->obd_ldlm_client);
589         RETURN(0);
590 }
591
592 static int echo_cleanup(struct obd_device *obd)
593 {
594         int leaked;
595         ENTRY;
596
597         lprocfs_obd_cleanup(obd);
598         lprocfs_free_obd_stats(obd);
599
600         ldlm_lock_decref(&obd->u.echo.eo_nl_lock, LCK_NL);
601
602         /* XXX Bug 3413; wait for a bit to ensure the BL callback has
603          * happened before calling ldlm_namespace_free() */
604         set_current_state(TASK_UNINTERRUPTIBLE);
605         schedule_timeout(cfs_time_seconds(1));
606
607         ldlm_namespace_free(obd->obd_namespace, NULL, obd->obd_force);
608         obd->obd_namespace = NULL;
609
610         leaked = atomic_read(&obd->u.echo.eo_prep);
611         if (leaked != 0)
612                 CERROR("%d prep/commitrw pages leaked\n", leaked);
613
614         RETURN(0);
615 }
616
617 struct obd_ops echo_obd_ops = {
618         .o_owner           = THIS_MODULE,
619         .o_connect         = echo_connect,
620         .o_disconnect      = echo_disconnect,
621         .o_init_export     = echo_init_export,
622         .o_destroy_export  = echo_destroy_export,
623         .o_create          = echo_create,
624         .o_destroy         = echo_destroy,
625         .o_getattr         = echo_getattr,
626         .o_setattr         = echo_setattr,
627         .o_preprw          = echo_preprw,
628         .o_commitrw        = echo_commitrw,
629         .o_setup           = echo_setup,
630         .o_cleanup         = echo_cleanup
631 };
632
633 void echo_persistent_pages_fini(void)
634 {
635         int i;
636
637         for (i = 0; i < ECHO_PERSISTENT_PAGES; i++)
638                 if (echo_persistent_pages[i] != NULL) {
639                         __free_page(echo_persistent_pages[i]);
640                         echo_persistent_pages[i] = NULL;
641                 }
642 }
643
644 int echo_persistent_pages_init(void)
645 {
646         struct page *pg;
647         int          i;
648
649         for (i = 0; i < ECHO_PERSISTENT_PAGES; i++) {
650                 gfp_t gfp_mask = (i < ECHO_PERSISTENT_PAGES/2) ?
651                         GFP_KERNEL : GFP_HIGHUSER;
652
653                 pg = alloc_page(gfp_mask);
654                 if (pg == NULL) {
655                         echo_persistent_pages_fini();
656                         return -ENOMEM;
657                 }
658
659                 memset(kmap(pg), 0, PAGE_SIZE);
660                 kunmap(pg);
661
662                 echo_persistent_pages[i] = pg;
663         }
664
665         return 0;
666 }