Whamcloud - gitweb
7329a26061a23ce565977c69d809ba9425a6ff7e
[fs/lustre-release.git] / lustre / lmv / lmv_intent.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2016, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  */
31
32 #define DEBUG_SUBSYSTEM S_LMV
33 #include <linux/slab.h>
34 #include <linux/module.h>
35 #include <linux/init.h>
36 #include <linux/slab.h>
37 #include <linux/pagemap.h>
38 #include <linux/math64.h>
39 #include <linux/seq_file.h>
40 #include <linux/namei.h>
41 #include <lustre_intent.h>
42
43 #include <obd_support.h>
44 #include <lustre_lib.h>
45 #include <lustre_net.h>
46 #include <lustre_dlm.h>
47 #include <lustre_mdc.h>
48 #include <obd_class.h>
49 #include <lprocfs_status.h>
50 #include "lmv_internal.h"
51
52 static int lmv_intent_remote(struct obd_export *exp, struct lookup_intent *it,
53                              const struct lu_fid *parent_fid,
54                              struct ptlrpc_request **reqp,
55                              ldlm_blocking_callback cb_blocking,
56                              __u64 extra_lock_flags,
57                              const char *secctx_name, __u32 secctx_name_size)
58 {
59         struct obd_device       *obd = exp->exp_obd;
60         struct lmv_obd          *lmv = &obd->u.lmv;
61         struct ptlrpc_request   *req = NULL;
62         struct lustre_handle    plock;
63         struct md_op_data       *op_data;
64         struct lmv_tgt_desc     *tgt;
65         struct mdt_body         *body;
66         int                     pmode;
67         int                     rc = 0;
68         ENTRY;
69
70         body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
71         if (body == NULL)
72                 RETURN(-EPROTO);
73
74         LASSERT((body->mbo_valid & OBD_MD_MDS));
75
76         /*
77          * We got LOOKUP lock, but we really need attrs.
78          */
79         pmode = it->it_lock_mode;
80         if (pmode) {
81                 plock.cookie = it->it_lock_handle;
82                 it->it_lock_mode = 0;
83                 it->it_request = NULL;
84         }
85
86         LASSERT(fid_is_sane(&body->mbo_fid1));
87
88         tgt = lmv_fid2tgt(lmv, &body->mbo_fid1);
89         if (IS_ERR(tgt))
90                 GOTO(out, rc = PTR_ERR(tgt));
91
92         OBD_ALLOC_PTR(op_data);
93         if (op_data == NULL)
94                 GOTO(out, rc = -ENOMEM);
95
96         op_data->op_fid1 = body->mbo_fid1;
97         /* Sent the parent FID to the remote MDT */
98         if (parent_fid != NULL) {
99                 /* The parent fid is only for remote open to
100                  * check whether the open is from OBF,
101                  * see mdt_cross_open */
102                 LASSERT(it->it_op & IT_OPEN);
103                 op_data->op_fid2 = *parent_fid;
104         }
105
106         op_data->op_bias = MDS_CROSS_REF;
107         op_data->op_cli_flags = CLI_NO_SLOT;
108         CDEBUG(D_INODE, "REMOTE_INTENT with fid="DFID" -> mds #%u\n",
109                PFID(&body->mbo_fid1), tgt->ltd_index);
110
111         /* ask for security context upon intent */
112         if (it->it_op & (IT_LOOKUP | IT_GETATTR | IT_OPEN) &&
113             secctx_name_size != 0 && secctx_name != NULL) {
114                 op_data->op_file_secctx_name = secctx_name;
115                 op_data->op_file_secctx_name_size = secctx_name_size;
116                 CDEBUG(D_SEC, "'%.*s' is security xattr to fetch for "
117                        DFID"\n",
118                        secctx_name_size, secctx_name, PFID(&body->mbo_fid1));
119         }
120
121         rc = md_intent_lock(tgt->ltd_exp, op_data, it, &req, cb_blocking,
122                             extra_lock_flags);
123         if (rc)
124                 GOTO(out_free_op_data, rc);
125
126         /*
127          * LLite needs LOOKUP lock to track dentry revocation in order to
128          * maintain dcache consistency. Thus drop UPDATE|PERM lock here
129          * and put LOOKUP in request.
130          */
131         if (it->it_lock_mode != 0) {
132                 it->it_remote_lock_handle =
133                                         it->it_lock_handle;
134                 it->it_remote_lock_mode = it->it_lock_mode;
135         }
136
137         if (pmode) {
138                 it->it_lock_handle = plock.cookie;
139                 it->it_lock_mode = pmode;
140         }
141
142         EXIT;
143 out_free_op_data:
144         OBD_FREE_PTR(op_data);
145 out:
146         if (rc && pmode)
147                 ldlm_lock_decref(&plock, pmode);
148
149         ptlrpc_req_finished(*reqp);
150         *reqp = req;
151         return rc;
152 }
153
154 int lmv_revalidate_slaves(struct obd_export *exp,
155                           const struct lmv_stripe_md *lsm,
156                           ldlm_blocking_callback cb_blocking,
157                           int extra_lock_flags)
158 {
159         struct obd_device *obd = exp->exp_obd;
160         struct lmv_obd *lmv = &obd->u.lmv;
161         struct ptlrpc_request *req = NULL;
162         struct mdt_body *body;
163         struct md_op_data *op_data;
164         int i;
165         int valid_stripe_count = 0;
166         int rc = 0;
167
168         ENTRY;
169
170         /**
171          * revalidate slaves has some problems, temporarily return,
172          * we may not need that
173          */
174         OBD_ALLOC_PTR(op_data);
175         if (op_data == NULL)
176                 RETURN(-ENOMEM);
177
178         /**
179          * Loop over the stripe information, check validity and update them
180          * from MDS if needed.
181          */
182         for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
183                 struct lu_fid           fid;
184                 struct lookup_intent    it = { .it_op = IT_GETATTR };
185                 struct lustre_handle    *lockh = NULL;
186                 struct lmv_tgt_desc     *tgt = NULL;
187                 struct inode            *inode;
188
189                 fid = lsm->lsm_md_oinfo[i].lmo_fid;
190                 inode = lsm->lsm_md_oinfo[i].lmo_root;
191
192                 if (!inode)
193                         continue;
194
195                 /*
196                  * Prepare op_data for revalidating. Note that @fid2 shluld be
197                  * defined otherwise it will go to server and take new lock
198                  * which is not needed here.
199                  */
200                 memset(op_data, 0, sizeof(*op_data));
201                 op_data->op_fid1 = fid;
202                 op_data->op_fid2 = fid;
203                 /* shard revalidate only needs to fetch attributes and UPDATE
204                  * lock, which is similar to the bottom half of remote object
205                  * getattr, set this flag so that MDT skips checking whether
206                  * it's remote object.
207                  */
208                 op_data->op_bias = MDS_CROSS_REF;
209                 op_data->op_cli_flags = CLI_NO_SLOT;
210
211                 tgt = lmv_tgt_retry(lmv, lsm->lsm_md_oinfo[i].lmo_mds);
212                 if (!tgt)
213                         GOTO(cleanup, rc = -ENODEV);
214
215                 CDEBUG(D_INODE, "Revalidate slave "DFID" -> mds #%u\n",
216                        PFID(&fid), tgt->ltd_index);
217
218                 if (req != NULL) {
219                         ptlrpc_req_finished(req);
220                         req = NULL;
221                 }
222
223                 rc = md_intent_lock(tgt->ltd_exp, op_data, &it, &req,
224                                     cb_blocking, extra_lock_flags);
225                 if (rc == -ENOENT || rc == -ESHUTDOWN) {
226                         /* skip stripe that doesn't exist or is inaccessible */
227                         rc = 0;
228                         continue;
229                 }
230
231                 if (rc < 0)
232                         GOTO(cleanup, rc);
233
234                 lockh = (struct lustre_handle *)&it.it_lock_handle;
235                 if (rc > 0 && req == NULL) {
236                         /* slave inode is still valid */
237                         CDEBUG(D_INODE, "slave "DFID" is still valid.\n",
238                                PFID(&fid));
239                         rc = 0;
240                 } else {
241                         /* refresh slave from server */
242                         body = req_capsule_server_get(&req->rq_pill,
243                                                       &RMF_MDT_BODY);
244                         if (body == NULL) {
245                                 if (it.it_lock_mode && lockh) {
246                                         ldlm_lock_decref(lockh,
247                                                  it.it_lock_mode);
248                                         it.it_lock_mode = 0;
249                                 }
250                                 GOTO(cleanup, rc = -ENOENT);
251                         }
252
253                         i_size_write(inode, body->mbo_size);
254                         inode->i_blocks = body->mbo_blocks;
255                         spin_lock(&inode->i_lock);
256                         set_nlink(inode, body->mbo_nlink);
257                         spin_unlock(&inode->i_lock);
258                         inode->i_atime.tv_sec = body->mbo_atime;
259                         inode->i_ctime.tv_sec = body->mbo_ctime;
260                         inode->i_mtime.tv_sec = body->mbo_mtime;
261                 }
262
263                 md_set_lock_data(tgt->ltd_exp, lockh, inode, NULL);
264                 if (it.it_lock_mode != 0 && lockh != NULL) {
265                         ldlm_lock_decref(lockh, it.it_lock_mode);
266                         it.it_lock_mode = 0;
267                 }
268
269                 valid_stripe_count++;
270         }
271
272 cleanup:
273         if (req != NULL)
274                 ptlrpc_req_finished(req);
275
276         /* if all stripes are invalid, return -ENOENT to notify user */
277         if (!rc && !valid_stripe_count)
278                 rc = -ENOENT;
279
280         OBD_FREE_PTR(op_data);
281         RETURN(rc);
282 }
283
284 /*
285  * IT_OPEN is intended to open (and create, possible) an object. Parent (pid)
286  * may be split dir.
287  */
288 static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
289                            struct lookup_intent *it,
290                            struct ptlrpc_request **reqp,
291                            ldlm_blocking_callback cb_blocking,
292                            __u64 extra_lock_flags)
293 {
294         struct obd_device *obd = exp->exp_obd;
295         struct lmv_obd *lmv = &obd->u.lmv;
296         struct lmv_tgt_desc *tgt;
297         struct mdt_body *body;
298         __u64 flags = it->it_flags;
299         int rc;
300
301         ENTRY;
302
303         /* do not allow file creation in foreign dir */
304         if ((it->it_op & IT_CREAT) && lmv_dir_foreign(op_data->op_lso1))
305                 RETURN(-ENODATA);
306
307         if ((it->it_op & IT_CREAT) && !(flags & MDS_OPEN_BY_FID)) {
308                 /* don't allow create under dir with bad hash */
309                 if (lmv_dir_bad_hash(op_data->op_lso1))
310                         RETURN(-EBADF);
311
312                 if (lmv_dir_layout_changing(op_data->op_lso1)) {
313                         if (flags & O_EXCL) {
314                                 /*
315                                  * open(O_CREAT | O_EXCL) needs to check
316                                  * existing name, which should be done on both
317                                  * old and new layout, check old layout on
318                                  * client side.
319                                  */
320                                 rc = lmv_old_layout_lookup(lmv, op_data);
321                                 if (rc != -ENOENT)
322                                         RETURN(rc);
323
324                                 op_data->op_new_layout = true;
325                         } else {
326                                 /*
327                                  * open(O_CREAT) will be sent to MDT in old
328                                  * layout first, to avoid creating new file
329                                  * under old layout, clear O_CREAT.
330                                  */
331                                 it->it_flags &= ~O_CREAT;
332                         }
333                 }
334         }
335
336 retry:
337         if (it->it_flags & MDS_OPEN_BY_FID) {
338                 LASSERT(fid_is_sane(&op_data->op_fid2));
339
340                 /* for striped directory, we can't know parent stripe fid
341                  * without name, but we can set it to child fid, and MDT
342                  * will obtain it from linkea in open in such case. */
343                 if (lmv_dir_striped(op_data->op_lso1))
344                         op_data->op_fid1 = op_data->op_fid2;
345
346                 tgt = lmv_fid2tgt(lmv, &op_data->op_fid2);
347                 if (IS_ERR(tgt))
348                         RETURN(PTR_ERR(tgt));
349
350                 op_data->op_mds = tgt->ltd_index;
351         } else {
352                 LASSERT(fid_is_sane(&op_data->op_fid1));
353                 LASSERT(fid_is_zero(&op_data->op_fid2));
354                 LASSERT(op_data->op_name != NULL);
355
356                 tgt = lmv_locate_tgt(lmv, op_data);
357                 if (IS_ERR(tgt))
358                         RETURN(PTR_ERR(tgt));
359         }
360
361         /* If it is ready to open the file by FID, do not need
362          * allocate FID at all, otherwise it will confuse MDT */
363         if ((it->it_op & IT_CREAT) && !(it->it_flags & MDS_OPEN_BY_FID)) {
364                 /*
365                  * For lookup(IT_CREATE) cases allocate new fid and setup FLD
366                  * for it.
367                  */
368                 rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
369                 if (rc != 0)
370                         RETURN(rc);
371         }
372
373         CDEBUG(D_INODE, "OPEN_INTENT with fid1="DFID", fid2="DFID","
374                " name='%s' -> mds #%u\n", PFID(&op_data->op_fid1),
375                PFID(&op_data->op_fid2), op_data->op_name, tgt->ltd_index);
376
377         rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp, cb_blocking,
378                             extra_lock_flags);
379         if (rc != 0)
380                 RETURN(rc);
381         /*
382          * Nothing is found, do not access body->fid1 as it is zero and thus
383          * pointless.
384          */
385         if ((it->it_disposition & DISP_LOOKUP_NEG) &&
386             !(it->it_disposition & DISP_OPEN_CREATE) &&
387             !(it->it_disposition & DISP_OPEN_OPEN)) {
388                 if (!(it->it_flags & MDS_OPEN_BY_FID) &&
389                     lmv_dir_retry_check_update(op_data)) {
390                         ptlrpc_req_finished(*reqp);
391                         it->it_request = NULL;
392                         it->it_disposition = 0;
393                         *reqp = NULL;
394
395                         it->it_flags = flags;
396                         fid_zero(&op_data->op_fid2);
397                         goto retry;
398                 }
399
400                 RETURN(rc);
401         }
402
403         body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
404         if (body == NULL)
405                 RETURN(-EPROTO);
406
407         /* Not cross-ref case, just get out of here. */
408         if (unlikely((body->mbo_valid & OBD_MD_MDS))) {
409                 rc = lmv_intent_remote(exp, it, &op_data->op_fid1, reqp,
410                                        cb_blocking, extra_lock_flags,
411                                        op_data->op_file_secctx_name,
412                                        op_data->op_file_secctx_name_size);
413                 if (rc != 0)
414                         RETURN(rc);
415
416                 body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
417                 if (body == NULL)
418                         RETURN(-EPROTO);
419         }
420
421         RETURN(rc);
422 }
423
424 /*
425  * Handler for: getattr, lookup and revalidate cases.
426  */
427 static int
428 lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data,
429                   struct lookup_intent *it, struct ptlrpc_request **reqp,
430                   ldlm_blocking_callback cb_blocking,
431                   __u64 extra_lock_flags)
432 {
433         struct obd_device *obd = exp->exp_obd;
434         struct lmv_obd *lmv = &obd->u.lmv;
435         struct lmv_tgt_desc *tgt = NULL;
436         struct mdt_body *body;
437         int rc;
438         ENTRY;
439
440         /* foreign dir is not striped */
441         if (lmv_dir_foreign(op_data->op_lso1)) {
442                 /* only allow getattr/lookup for itself */
443                 if (op_data->op_name != NULL)
444                         RETURN(-ENODATA);
445                 RETURN(0);
446         }
447
448 retry:
449         if (op_data->op_flags & MF_GETATTR_BY_FID) {
450                 /* getattr by FID, replace fid1 with stripe FID,
451                  * NB, don't replace if name is "/", because it may be a subtree
452                  * mount, and if it's a striped directory, fid1 will be replaced
453                  * to stripe FID by hash, while fid2 is master object FID, which
454                  * will be treated as a remote object if the two FIDs are
455                  * located on different MDTs, and LOOKUP lock can't be fetched.
456                  */
457                 LASSERT(op_data->op_name);
458                 if (op_data->op_namelen != 1 ||
459                     strncmp(op_data->op_name, "/", 1) != 0) {
460                         tgt = lmv_locate_tgt(lmv, op_data);
461                         if (IS_ERR(tgt))
462                                 RETURN(PTR_ERR(tgt));
463                 }
464
465                 /* name is used to locate stripe target, clear it here
466                  * to avoid packing name in request, so that MDS knows
467                  * it's getattr by FID.
468                  */
469                 op_data->op_name = NULL;
470                 op_data->op_namelen = 0;
471
472                 /* getattr request is sent to MDT where fid2 inode is */
473                 tgt = lmv_fid2tgt(lmv, &op_data->op_fid2);
474         } else if (op_data->op_name) {
475                 /* getattr by name */
476                 tgt = lmv_locate_tgt(lmv, op_data);
477                 if (!fid_is_sane(&op_data->op_fid2))
478                         fid_zero(&op_data->op_fid2);
479         } else {
480                 /* old way to getattr by FID, parent FID not packed */
481                 tgt = lmv_fid2tgt(lmv, &op_data->op_fid1);
482         }
483         if (IS_ERR(tgt))
484                 RETURN(PTR_ERR(tgt));
485
486         CDEBUG(D_INODE, "LOOKUP_INTENT with fid1="DFID", fid2="DFID
487                ", name='%s' -> mds #%u\n",
488                PFID(&op_data->op_fid1), PFID(&op_data->op_fid2),
489                op_data->op_name ? op_data->op_name : "<NULL>",
490                tgt->ltd_index);
491
492         op_data->op_bias &= ~MDS_CROSS_REF;
493
494         rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp, cb_blocking,
495                             extra_lock_flags);
496         if (rc < 0)
497                 RETURN(rc);
498
499         if (*reqp == NULL) {
500                 /* If RPC happens, lsm information will be revalidated
501                  * during update_inode process (see ll_update_lsm_md) */
502                 if (lmv_dir_striped(op_data->op_lso2)) {
503                         rc = lmv_revalidate_slaves(exp,
504                                                    &op_data->op_lso2->lso_lsm,
505                                                    cb_blocking,
506                                                    extra_lock_flags);
507                         if (rc != 0)
508                                 RETURN(rc);
509                 }
510                 RETURN(rc);
511         } else if (it_disposition(it, DISP_LOOKUP_NEG) &&
512                    lmv_dir_retry_check_update(op_data)) {
513                 ptlrpc_req_finished(*reqp);
514                 it->it_request = NULL;
515                 it->it_disposition = 0;
516                 *reqp = NULL;
517
518                 goto retry;
519         }
520
521         if (!it_has_reply_body(it))
522                 RETURN(0);
523
524         /*
525          * MDS has returned success. Probably name has been resolved in
526          * remote inode. Let's check this.
527          */
528         body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
529         if (body == NULL)
530                 RETURN(-EPROTO);
531
532         /* Not cross-ref case, just get out of here. */
533         if (unlikely((body->mbo_valid & OBD_MD_MDS))) {
534                 rc = lmv_intent_remote(exp, it, NULL, reqp, cb_blocking,
535                                        extra_lock_flags,
536                                        op_data->op_file_secctx_name,
537                                        op_data->op_file_secctx_name_size);
538                 if (rc != 0)
539                         RETURN(rc);
540                 body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
541                 if (body == NULL)
542                         RETURN(-EPROTO);
543         }
544
545         RETURN(rc);
546 }
547
548 int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
549                     struct lookup_intent *it, struct ptlrpc_request **reqp,
550                     ldlm_blocking_callback cb_blocking,
551                     __u64 extra_lock_flags)
552 {
553         int rc;
554         ENTRY;
555
556         LASSERT(it != NULL);
557         LASSERT(fid_is_sane(&op_data->op_fid1));
558
559         CDEBUG(D_INODE, "INTENT LOCK '%s' for "DFID" '%.*s' on "DFID"\n",
560                 LL_IT2STR(it), PFID(&op_data->op_fid2),
561                 (int)op_data->op_namelen, op_data->op_name,
562                 PFID(&op_data->op_fid1));
563
564         if (it->it_op & (IT_LOOKUP | IT_GETATTR | IT_LAYOUT | IT_GETXATTR))
565                 rc = lmv_intent_lookup(exp, op_data, it, reqp, cb_blocking,
566                                        extra_lock_flags);
567         else if (it->it_op & IT_OPEN)
568                 rc = lmv_intent_open(exp, op_data, it, reqp, cb_blocking,
569                                      extra_lock_flags);
570         else
571                 LBUG();
572
573         if (rc < 0) {
574                 struct lustre_handle lock_handle;
575
576                 if (it->it_lock_mode != 0) {
577                         lock_handle.cookie = it->it_lock_handle;
578                         ldlm_lock_decref_and_cancel(&lock_handle,
579                                                     it->it_lock_mode);
580                 }
581
582                 it->it_lock_handle = 0;
583                 it->it_lock_mode = 0;
584
585                 if (it->it_remote_lock_mode != 0) {
586                         lock_handle.cookie = it->it_remote_lock_handle;
587                         ldlm_lock_decref_and_cancel(&lock_handle,
588                                                     it->it_remote_lock_mode);
589                 }
590
591                 it->it_remote_lock_handle = 0;
592                 it->it_remote_lock_mode = 0;
593         }
594
595         RETURN(rc);
596 }