Whamcloud - gitweb
LU-6158 mdt: always shrink_capsule in getxattr_all
[fs/lustre-release.git] / lustre / obdecho / echo.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2010, 2014, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/obdecho/echo.c
37  *
38  * Author: Peter Braam <braam@clusterfs.com>
39  * Author: Andreas Dilger <adilger@clusterfs.com>
40  */
41
42 #define DEBUG_SUBSYSTEM S_ECHO
43
44 #include <obd_support.h>
45 #include <obd_class.h>
46 #include <lustre_debug.h>
47 #include <lustre_dlm.h>
48 #include <lprocfs_status.h>
49
50 #include "echo_internal.h"
51
52 /* The echo objid needs to be below 2^32, because regular FID numbers are
53  * limited to 2^32 objects in f_oid for the FID_SEQ_ECHO range. b=23335 */
54 #define ECHO_INIT_OID        0x10000000ULL
55 #define ECHO_HANDLE_MAGIC    0xabcd0123fedc9876ULL
56
57 #define ECHO_PERSISTENT_PAGES (ECHO_PERSISTENT_SIZE >> PAGE_CACHE_SHIFT)
58 static struct page *echo_persistent_pages[ECHO_PERSISTENT_PAGES];
59
60 enum {
61         LPROC_ECHO_READ_BYTES = 1,
62         LPROC_ECHO_WRITE_BYTES = 2,
63         LPROC_ECHO_LAST = LPROC_ECHO_WRITE_BYTES +1
64 };
65
66 static int echo_connect(const struct lu_env *env,
67                         struct obd_export **exp, struct obd_device *obd,
68                         struct obd_uuid *cluuid, struct obd_connect_data *data,
69                         void *localdata)
70 {
71         struct lustre_handle conn = { 0 };
72         int rc;
73
74         data->ocd_connect_flags &= ECHO_CONNECT_SUPPORTED;
75         rc = class_connect(&conn, obd, cluuid);
76         if (rc) {
77                 CERROR("can't connect %d\n", rc);
78                 return rc;
79         }
80         *exp = class_conn2export(&conn);
81
82         return 0;
83 }
84
85 static int echo_disconnect(struct obd_export *exp)
86 {
87         LASSERT (exp != NULL);
88
89         return server_disconnect_export(exp);
90 }
91
92 static int echo_init_export(struct obd_export *exp)
93 {
94         return ldlm_init_export(exp);
95 }
96
97 static int echo_destroy_export(struct obd_export *exp)
98 {
99         ENTRY;
100
101         target_destroy_export(exp);
102         ldlm_destroy_export(exp);
103
104         RETURN(0);
105 }
106
107 static u64 echo_next_id(struct obd_device *obddev)
108 {
109         u64 id;
110
111         spin_lock(&obddev->u.echo.eo_lock);
112         id = ++obddev->u.echo.eo_lastino;
113         spin_unlock(&obddev->u.echo.eo_lock);
114
115         return id;
116 }
117
118 static int echo_create(const struct lu_env *env, struct obd_export *exp,
119                        struct obdo *oa)
120 {
121         struct obd_device *obd = class_exp2obd(exp);
122
123         if (!obd) {
124                 CERROR("invalid client cookie "LPX64"\n",
125                        exp->exp_handle.h_cookie);
126                 return -EINVAL;
127         }
128
129         if (!(oa->o_mode & S_IFMT)) {
130                 CERROR("echo obd: no type!\n");
131                 return -ENOENT;
132         }
133
134         if (!(oa->o_valid & OBD_MD_FLTYPE)) {
135                 CERROR("invalid o_valid "LPX64"\n", oa->o_valid);
136                 return -EINVAL;
137         }
138
139         ostid_set_seq_echo(&oa->o_oi);
140         ostid_set_id(&oa->o_oi, echo_next_id(obd));
141         oa->o_valid = OBD_MD_FLID;
142
143         return 0;
144 }
145
146 static int echo_destroy(const struct lu_env *env, struct obd_export *exp,
147                         struct obdo *oa)
148 {
149         struct obd_device *obd = class_exp2obd(exp);
150
151         ENTRY;
152         if (!obd) {
153                 CERROR("invalid client cookie "LPX64"\n",
154                        exp->exp_handle.h_cookie);
155                 RETURN(-EINVAL);
156         }
157
158         if (!(oa->o_valid & OBD_MD_FLID)) {
159                 CERROR("obdo missing FLID valid flag: "LPX64"\n", oa->o_valid);
160                 RETURN(-EINVAL);
161         }
162
163         if (ostid_id(&oa->o_oi) > obd->u.echo.eo_lastino ||
164             ostid_id(&oa->o_oi) < ECHO_INIT_OID) {
165                 CERROR("bad destroy objid: "DOSTID"\n", POSTID(&oa->o_oi));
166                 RETURN(-EINVAL);
167         }
168
169         RETURN(0);
170 }
171
172 static int echo_getattr(const struct lu_env *env, struct obd_export *exp,
173                         struct obdo *oa)
174 {
175         struct obd_device *obd = class_exp2obd(exp);
176         u64 id = ostid_id(&oa->o_oi);
177
178         ENTRY;
179         if (!obd) {
180                 CERROR("invalid client cookie "LPX64"\n",
181                        exp->exp_handle.h_cookie);
182                 RETURN(-EINVAL);
183         }
184
185         if (!(oa->o_valid & OBD_MD_FLID)) {
186                 CERROR("obdo missing FLID valid flag: "LPX64"\n", oa->o_valid);
187                 RETURN(-EINVAL);
188         }
189
190         obdo_cpy_md(oa, &obd->u.echo.eo_oa, oa->o_valid);
191         ostid_set_seq_echo(&oa->o_oi);
192         ostid_set_id(&oa->o_oi, id);
193
194         RETURN(0);
195 }
196
197 static int echo_setattr(const struct lu_env *env, struct obd_export *exp,
198                         struct obdo *oa)
199 {
200         struct obd_device *obd = class_exp2obd(exp);
201
202         ENTRY;
203         if (!obd) {
204                 CERROR("invalid client cookie "LPX64"\n",
205                        exp->exp_handle.h_cookie);
206                 RETURN(-EINVAL);
207         }
208
209         if (!(oa->o_valid & OBD_MD_FLID)) {
210                 CERROR("obdo missing FLID valid flag: "LPX64"\n", oa->o_valid);
211                 RETURN(-EINVAL);
212         }
213
214         obd->u.echo.eo_oa = *oa;
215
216         RETURN(0);
217 }
218
219 static void
220 echo_page_debug_setup(struct page *page, int rw, u64 id,
221                       __u64 offset, int len)
222 {
223         int   page_offset = offset & ~PAGE_MASK;
224         char *addr        = ((char *)kmap(page)) + page_offset;
225
226         if (len % OBD_ECHO_BLOCK_SIZE != 0)
227                 CERROR("Unexpected block size %d\n", len);
228
229         while (len > 0) {
230                 if (rw & OBD_BRW_READ)
231                         block_debug_setup(addr, OBD_ECHO_BLOCK_SIZE,
232                                           offset, id);
233                 else
234                         block_debug_setup(addr, OBD_ECHO_BLOCK_SIZE,
235                                           0xecc0ecc0ecc0ecc0ULL,
236                                           0xecc0ecc0ecc0ecc0ULL);
237
238                 addr   += OBD_ECHO_BLOCK_SIZE;
239                 offset += OBD_ECHO_BLOCK_SIZE;
240                 len    -= OBD_ECHO_BLOCK_SIZE;
241         }
242
243         kunmap(page);
244 }
245
246 static int
247 echo_page_debug_check(struct page *page, u64 id,
248                       __u64 offset, int len)
249 {
250         int   page_offset = offset & ~PAGE_MASK;
251         char *addr        = ((char *)kmap(page)) + page_offset;
252         int   rc          = 0;
253         int   rc2;
254
255         if (len % OBD_ECHO_BLOCK_SIZE != 0)
256                 CERROR("Unexpected block size %d\n", len);
257
258         while (len > 0) {
259                 rc2 = block_debug_check("echo", addr, OBD_ECHO_BLOCK_SIZE,
260                                         offset, id);
261
262                 if (rc2 != 0 && rc == 0)
263                         rc = rc2;
264
265                 addr   += OBD_ECHO_BLOCK_SIZE;
266                 offset += OBD_ECHO_BLOCK_SIZE;
267                 len    -= OBD_ECHO_BLOCK_SIZE;
268         }
269
270         kunmap(page);
271
272         return rc;
273 }
274
275 static int echo_map_nb_to_lb(struct obdo *oa, struct obd_ioobj *obj,
276                              struct niobuf_remote *nb, int *pages,
277                              struct niobuf_local *lb, int cmd, int *left)
278 {
279         gfp_t gfp_mask = (ostid_id(&obj->ioo_oid) & 1) ?
280                         GFP_HIGHUSER : GFP_IOFS;
281         int ispersistent = ostid_id(&obj->ioo_oid) == ECHO_PERSISTENT_OBJID;
282         int debug_setup = (!ispersistent &&
283                            (oa->o_valid & OBD_MD_FLFLAGS) != 0 &&
284                            (oa->o_flags & OBD_FL_DEBUG_CHECK) != 0);
285         struct niobuf_local *res = lb;
286         u64 offset = nb->rnb_offset;
287         int len = nb->rnb_len;
288
289         while (len > 0) {
290                 int plen = PAGE_CACHE_SIZE - (offset & (PAGE_CACHE_SIZE-1));
291                 if (len < plen)
292                         plen = len;
293
294                 /* check for local buf overflow */
295                 if (*left == 0)
296                         return -EINVAL;
297
298                 res->lnb_file_offset = offset;
299                 res->lnb_len = plen;
300                 LASSERT((res->lnb_file_offset & ~PAGE_MASK) +
301                         res->lnb_len <= PAGE_CACHE_SIZE);
302
303                 if (ispersistent &&
304                     ((res->lnb_file_offset >> PAGE_CACHE_SHIFT) <
305                       ECHO_PERSISTENT_PAGES)) {
306                         res->lnb_page =
307                                 echo_persistent_pages[res->lnb_file_offset >>
308                                                       PAGE_CACHE_SHIFT];
309                         /* Take extra ref so __free_pages() can be called OK */
310                         get_page(res->lnb_page);
311                 } else {
312                         res->lnb_page = alloc_page(gfp_mask);
313                         if (res->lnb_page == NULL) {
314                                 CERROR("can't get page for id " DOSTID"\n",
315                                        POSTID(&obj->ioo_oid));
316                                 return -ENOMEM;
317                         }
318                 }
319
320                 CDEBUG(D_PAGE, "$$$$ get page %p @ "LPU64" for %d\n",
321                        res->lnb_page, res->lnb_file_offset, res->lnb_len);
322
323                 if (cmd & OBD_BRW_READ)
324                         res->lnb_rc = res->lnb_len;
325
326                 if (debug_setup)
327                         echo_page_debug_setup(res->lnb_page, cmd,
328                                               ostid_id(&obj->ioo_oid),
329                                               res->lnb_file_offset,
330                                               res->lnb_len);
331
332                 offset += plen;
333                 len -= plen;
334                 res++;
335
336                 (*left)--;
337                 (*pages)++;
338         }
339
340         return 0;
341 }
342
343 static int echo_finalize_lb(struct obdo *oa, struct obd_ioobj *obj,
344                             struct niobuf_remote *rb, int *pgs,
345                             struct niobuf_local *lb, int verify)
346 {
347         struct niobuf_local *res = lb;
348         u64 start = rb->rnb_offset >> PAGE_CACHE_SHIFT;
349         u64 end   = (rb->rnb_offset + rb->rnb_len + PAGE_CACHE_SIZE - 1) >>
350                     PAGE_CACHE_SHIFT;
351         int     count  = (int)(end - start);
352         int     rc     = 0;
353         int     i;
354
355         for (i = 0; i < count; i++, (*pgs) ++, res++) {
356                 struct page *page = res->lnb_page;
357                 void       *addr;
358
359                 if (page == NULL) {
360                         CERROR("null page objid "LPU64":%p, buf %d/%d\n",
361                                ostid_id(&obj->ioo_oid), page, i,
362                                obj->ioo_bufcnt);
363                         return -EFAULT;
364                 }
365
366                 addr = kmap(page);
367
368                 CDEBUG(D_PAGE, "$$$$ use page %p, addr %p@"LPU64"\n",
369                        res->lnb_page, addr, res->lnb_file_offset);
370
371                 if (verify) {
372                         int vrc = echo_page_debug_check(page,
373                                                         ostid_id(&obj->ioo_oid),
374                                                         res->lnb_file_offset,
375                                                         res->lnb_len);
376                         /* check all the pages always */
377                         if (vrc != 0 && rc == 0)
378                                 rc = vrc;
379                 }
380
381                 kunmap(page);
382                 /* NB see comment above regarding persistent pages */
383                 __free_page(page);
384         }
385
386         return rc;
387 }
388
389 static int echo_preprw(const struct lu_env *env, int cmd,
390                        struct obd_export *export, struct obdo *oa,
391                        int objcount, struct obd_ioobj *obj,
392                        struct niobuf_remote *nb, int *pages,
393                        struct niobuf_local *res)
394 {
395         struct obd_device *obd;
396         int tot_bytes = 0;
397         int rc = 0;
398         int i, left;
399         ENTRY;
400
401         obd = export->exp_obd;
402         if (obd == NULL)
403                 RETURN(-EINVAL);
404
405         /* Temp fix to stop falling foul of osc_announce_cached() */
406         oa->o_valid &= ~(OBD_MD_FLBLOCKS | OBD_MD_FLGRANT);
407
408         memset(res, 0, sizeof(*res) * *pages);
409
410         CDEBUG(D_PAGE, "%s %d obdos with %d IOs\n",
411                cmd == OBD_BRW_READ ? "reading" : "writing", objcount, *pages);
412
413         left = *pages;
414         *pages = 0;
415
416         for (i = 0; i < objcount; i++, obj++) {
417                 int j;
418
419                 for (j = 0 ; j < obj->ioo_bufcnt ; j++, nb++) {
420
421                         rc = echo_map_nb_to_lb(oa, obj, nb, pages,
422                                                res + *pages, cmd, &left);
423                         if (rc)
424                                 GOTO(preprw_cleanup, rc);
425
426                         tot_bytes += nb->rnb_len;
427                 }
428         }
429
430         atomic_add(*pages, &obd->u.echo.eo_prep);
431
432         if (cmd & OBD_BRW_READ)
433                 lprocfs_counter_add(obd->obd_stats, LPROC_ECHO_READ_BYTES,
434                                     tot_bytes);
435         else
436                 lprocfs_counter_add(obd->obd_stats, LPROC_ECHO_WRITE_BYTES,
437                                     tot_bytes);
438
439         CDEBUG(D_PAGE, "%d pages allocated after prep\n",
440                atomic_read(&obd->u.echo.eo_prep));
441
442         RETURN(0);
443
444 preprw_cleanup:
445         /* It is possible that we would rather handle errors by  allow
446          * any already-set-up pages to complete, rather than tearing them
447          * all down again.  I believe that this is what the in-kernel
448          * prep/commit operations do.
449          */
450         CERROR("cleaning up %u pages (%d obdos)\n", *pages, objcount);
451         for (i = 0; i < *pages; i++) {
452                 kunmap(res[i].lnb_page);
453                 /* NB if this is a persistent page, __free_page() will just
454                  * lose the extra ref gained above */
455                 __free_page(res[i].lnb_page);
456                 res[i].lnb_page = NULL;
457                 atomic_dec(&obd->u.echo.eo_prep);
458         }
459
460         return rc;
461 }
462
463 static int echo_commitrw(const struct lu_env *env, int cmd,
464                          struct obd_export *export, struct obdo *oa,
465                          int objcount, struct obd_ioobj *obj,
466                          struct niobuf_remote *rb, int niocount,
467                          struct niobuf_local *res, int rc)
468 {
469         struct obd_device *obd;
470         int pgs = 0;
471         int i;
472         ENTRY;
473
474         obd = export->exp_obd;
475         if (obd == NULL)
476                 RETURN(-EINVAL);
477
478         if (rc)
479                 GOTO(commitrw_cleanup, rc);
480
481         if ((cmd & OBD_BRW_RWMASK) == OBD_BRW_READ) {
482                 CDEBUG(D_PAGE, "reading %d obdos with %d IOs\n",
483                        objcount, niocount);
484         } else {
485                 CDEBUG(D_PAGE, "writing %d obdos with %d IOs\n",
486                        objcount, niocount);
487         }
488
489         if (niocount && res == NULL) {
490                 CERROR("NULL res niobuf with niocount %d\n", niocount);
491                 RETURN(-EINVAL);
492         }
493
494         for (i = 0; i < objcount; i++, obj++) {
495                 int verify = (rc == 0 &&
496                              ostid_id(&obj->ioo_oid) != ECHO_PERSISTENT_OBJID &&
497                               (oa->o_valid & OBD_MD_FLFLAGS) != 0 &&
498                               (oa->o_flags & OBD_FL_DEBUG_CHECK) != 0);
499                 int j;
500
501                 for (j = 0 ; j < obj->ioo_bufcnt ; j++, rb++) {
502                         int vrc = echo_finalize_lb(oa, obj, rb, &pgs, &res[pgs],
503                                                    verify);
504                         if (vrc == 0)
505                                 continue;
506
507                         if (vrc == -EFAULT)
508                                 GOTO(commitrw_cleanup, rc = vrc);
509
510                         if (rc == 0)
511                                 rc = vrc;
512                 }
513
514         }
515
516         atomic_sub(pgs, &obd->u.echo.eo_prep);
517
518         CDEBUG(D_PAGE, "%d pages remain after commit\n",
519                atomic_read(&obd->u.echo.eo_prep));
520         RETURN(rc);
521
522 commitrw_cleanup:
523         atomic_sub(pgs, &obd->u.echo.eo_prep);
524
525         CERROR("cleaning up %d pages (%d obdos)\n",
526                niocount - pgs - 1, objcount);
527
528         while (pgs < niocount) {
529                 struct page *page = res[pgs++].lnb_page;
530
531                 if (page == NULL)
532                         continue;
533
534                 /* NB see comment above regarding persistent pages */
535                 __free_page(page);
536                 atomic_dec(&obd->u.echo.eo_prep);
537         }
538         return rc;
539 }
540
541 LPROC_SEQ_FOPS_RO_TYPE(echo, uuid);
542 static struct lprocfs_vars lprocfs_echo_obd_vars[] = {
543         { .name =       "uuid",
544           .fops =       &echo_uuid_fops         },
545         { NULL }
546 };
547
548 static int echo_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
549 {
550         int                     rc;
551         __u64                   lock_flags = 0;
552         struct ldlm_res_id      res_id = {.name = {1}};
553         char                    ns_name[48];
554         ENTRY;
555
556         obd->u.echo.eo_obt.obt_magic = OBT_MAGIC;
557         spin_lock_init(&obd->u.echo.eo_lock);
558         obd->u.echo.eo_lastino = ECHO_INIT_OID;
559
560         sprintf(ns_name, "echotgt-%s", obd->obd_uuid.uuid);
561         obd->obd_namespace = ldlm_namespace_new(obd, ns_name,
562                                                 LDLM_NAMESPACE_SERVER,
563                                                 LDLM_NAMESPACE_MODEST,
564                                                 LDLM_NS_TYPE_OST);
565         if (obd->obd_namespace == NULL) {
566                 LBUG();
567                 RETURN(-ENOMEM);
568         }
569
570         rc = ldlm_cli_enqueue_local(obd->obd_namespace, &res_id, LDLM_PLAIN,
571                                     NULL, LCK_NL, &lock_flags, NULL,
572                                     ldlm_completion_ast, NULL, NULL, 0,
573                                     LVB_T_NONE, NULL, &obd->u.echo.eo_nl_lock);
574         LASSERT (rc == ELDLM_OK);
575
576         obd->obd_vars = lprocfs_echo_obd_vars;
577         if (lprocfs_obd_setup(obd) == 0 &&
578             lprocfs_alloc_obd_stats(obd, LPROC_ECHO_LAST) == 0) {
579                 lprocfs_counter_init(obd->obd_stats, LPROC_ECHO_READ_BYTES,
580                                      LPROCFS_CNTR_AVGMINMAX,
581                                      "read_bytes", "bytes");
582                 lprocfs_counter_init(obd->obd_stats, LPROC_ECHO_WRITE_BYTES,
583                                      LPROCFS_CNTR_AVGMINMAX,
584                                      "write_bytes", "bytes");
585         }
586
587         ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
588                            "echo_ldlm_cb_client", &obd->obd_ldlm_client);
589         RETURN(0);
590 }
591
592 static int echo_cleanup(struct obd_device *obd)
593 {
594         int leaked;
595         ENTRY;
596
597         lprocfs_obd_cleanup(obd);
598         lprocfs_free_obd_stats(obd);
599
600         ldlm_lock_decref(&obd->u.echo.eo_nl_lock, LCK_NL);
601
602         /* XXX Bug 3413; wait for a bit to ensure the BL callback has
603          * happened before calling ldlm_namespace_free() */
604         set_current_state(TASK_UNINTERRUPTIBLE);
605         schedule_timeout(cfs_time_seconds(1));
606
607         ldlm_namespace_free(obd->obd_namespace, NULL, obd->obd_force);
608         obd->obd_namespace = NULL;
609
610         leaked = atomic_read(&obd->u.echo.eo_prep);
611         if (leaked != 0)
612                 CERROR("%d prep/commitrw pages leaked\n", leaked);
613
614         RETURN(0);
615 }
616
617 struct obd_ops echo_obd_ops = {
618         .o_owner           = THIS_MODULE,
619         .o_connect         = echo_connect,
620         .o_disconnect      = echo_disconnect,
621         .o_init_export     = echo_init_export,
622         .o_destroy_export  = echo_destroy_export,
623         .o_create          = echo_create,
624         .o_destroy         = echo_destroy,
625         .o_getattr         = echo_getattr,
626         .o_setattr         = echo_setattr,
627         .o_preprw          = echo_preprw,
628         .o_commitrw        = echo_commitrw,
629         .o_setup           = echo_setup,
630         .o_cleanup         = echo_cleanup
631 };
632
633 void echo_persistent_pages_fini(void)
634 {
635         int i;
636
637         for (i = 0; i < ECHO_PERSISTENT_PAGES; i++)
638                 if (echo_persistent_pages[i] != NULL) {
639                         __free_page(echo_persistent_pages[i]);
640                         echo_persistent_pages[i] = NULL;
641                 }
642 }
643
644 int echo_persistent_pages_init(void)
645 {
646         struct page *pg;
647         int          i;
648
649         for (i = 0; i < ECHO_PERSISTENT_PAGES; i++) {
650                 gfp_t gfp_mask = (i < ECHO_PERSISTENT_PAGES/2) ?
651                         GFP_IOFS : GFP_HIGHUSER;
652
653                 pg = alloc_page(gfp_mask);
654                 if (pg == NULL) {
655                         echo_persistent_pages_fini();
656                         return -ENOMEM;
657                 }
658
659                 memset (kmap (pg), 0, PAGE_CACHE_SIZE);
660                 kunmap (pg);
661
662                 echo_persistent_pages[i] = pg;
663         }
664
665         return 0;
666 }