Whamcloud - gitweb
- Bug 296: remove LDLM_MDSINTENT to fix mismatches with LDLM_PLAIN locks.
[fs/lustre-release.git] / lustre / mdc / mdc_request.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.sf.net/projects/lustre/
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  *
21  */
22
23 #define EXPORT_SYMTAB
24 #define DEBUG_SUBSYSTEM S_MDC
25
26 #include <linux/module.h>
27 #include <linux/miscdevice.h>
28 #include <linux/lustre_mds.h>
29 #include <linux/lustre_lite.h>
30 #include <linux/lustre_dlm.h>
31 #include <linux/init.h>
32 #include <linux/obd_lov.h>
33
34 #define REQUEST_MINOR 244
35
36 extern int mds_queue_req(struct ptlrpc_request *);
37
38 int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid,
39                   __u64 *last_committed, __u64 *last_xid,
40                   struct ptlrpc_request **request)
41 {
42         struct ptlrpc_request *req;
43         struct mds_body *body;
44         int rc, size = sizeof(*body);
45         ENTRY;
46
47         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETSTATUS, 1, &size,
48                               NULL);
49         if (!req)
50                 GOTO(out, rc = -ENOMEM);
51
52         body = lustre_msg_buf(req->rq_reqmsg, 0);
53         req->rq_level = LUSTRE_CONN_CON;
54         req->rq_replen = lustre_msg_size(1, &size);
55
56         mds_pack_req_body(req);
57         rc = ptlrpc_queue_wait(req);
58         rc = ptlrpc_check_status(req, rc);
59
60         if (!rc) {
61                 body = lustre_msg_buf(req->rq_repmsg, 0);
62                 mds_unpack_body(body);
63                 memcpy(rootfid, &body->fid1, sizeof(*rootfid));
64                 *last_committed = req->rq_repmsg->last_committed;
65                 *last_xid = req->rq_repmsg->last_xid;
66
67                 CDEBUG(D_NET,"root ino=%ld, last_committed=%Lu, last_xid=%Ld\n",
68                        (unsigned long)rootfid->id,
69                        (unsigned long long)*last_committed,
70                        (unsigned long long)*last_xid);
71         }
72
73         EXIT;
74  out:
75         ptlrpc_req_finished(req);
76         return rc;
77 }
78
79 int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh,
80                    struct ptlrpc_request **request)
81 {
82         struct ptlrpc_request *req;
83         struct mds_status_req *streq;
84         int rc, size[2] = {sizeof(*streq)};
85         ENTRY;
86
87         req = ptlrpc_prep_req(class_conn2cliimp(mdc_connh), MDS_GETLOVINFO, 1,
88                               size, NULL);
89         if (!req)
90                 GOTO(out, rc = -ENOMEM);
91
92         *request = req;
93         streq = lustre_msg_buf(req->rq_reqmsg, 0);
94         streq->flags = HTON__u32(MDS_STATUS_LOV);
95         streq->repbuf = HTON__u32(8192);
96
97         /* prepare for reply */
98         req->rq_level = LUSTRE_CONN_CON;
99         size[0] = 512;
100         size[1] = 8192;
101         req->rq_replen = lustre_msg_size(2, size);
102
103         rc = ptlrpc_queue_wait(req);
104         rc = ptlrpc_check_status(req, rc);
105
106  out:
107         RETURN(rc);
108 }
109
110
111 int mdc_getattr(struct lustre_handle *conn,
112                 obd_id ino, int type, unsigned long valid, size_t ea_size,
113                 struct ptlrpc_request **request)
114 {
115         struct ptlrpc_request *req;
116         struct mds_body *body;
117         int rc, size[2] = {sizeof(*body), 0}, bufcount = 1;
118         ENTRY;
119
120         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETATTR, 1, size,
121                               NULL);
122         if (!req)
123                 GOTO(out, rc = -ENOMEM);
124
125         body = lustre_msg_buf(req->rq_reqmsg, 0);
126         ll_ino2fid(&body->fid1, ino, 0, type);
127         body->valid = valid;
128
129         if (S_ISREG(type)) {
130                 struct client_obd *mdc = &class_conn2obd(conn)->u.cli;
131                 bufcount = 2;
132                 size[1] = mdc->cl_max_mds_easize;
133         } else if (valid & OBD_MD_LINKNAME) {
134                 bufcount = 2;
135                 size[1] = ea_size;
136                 body->size = ea_size;
137                 CDEBUG(D_INODE, "allocating %d bytes for symlink in packet\n",
138                        ea_size);
139         }
140         req->rq_replen = lustre_msg_size(bufcount, size);
141         mds_pack_req_body(req);
142
143         rc = ptlrpc_queue_wait(req);
144         rc = ptlrpc_check_status(req, rc);
145
146         if (!rc) {
147                 body = lustre_msg_buf(req->rq_repmsg, 0);
148                 mds_unpack_body(body);
149                 CDEBUG(D_NET, "mode: %o\n", body->mode);
150         }
151
152         EXIT;
153  out:
154         *request = req;
155         return rc;
156 }
157
158 static void d_delete_aliases(struct inode *inode)
159 {
160         struct dentry *dentry = NULL;
161         struct list_head *tmp;
162         int dentry_count = 0;
163         ENTRY;
164
165         spin_lock(&dcache_lock);
166         list_for_each(tmp, &inode->i_dentry) {
167                 dentry = list_entry(tmp, struct dentry, d_alias);
168                 dentry_count++;
169         }
170
171         /* XXX FIXME tell phil/peter that you see this -- unless you're playing
172          * with hard links, in which case, stop. */
173         LASSERT(dentry_count <= 1);
174
175         if (dentry_count == 0) {
176                 spin_unlock(&dcache_lock);
177                 EXIT;
178                 return;
179         }
180
181         CDEBUG(D_INODE, "d_deleting dentry %p\n", dentry);
182         dget_locked(dentry);
183         spin_unlock(&dcache_lock);
184         d_delete(dentry);
185         dput(dentry);
186         EXIT;
187 }
188
189 static int mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
190                             void *data, __u32 data_len, int flag)
191 {
192         int rc;
193         struct lustre_handle lockh;
194         ENTRY;
195
196         switch (flag) {
197         case LDLM_CB_BLOCKING:
198                 ldlm_lock2handle(lock, &lockh);
199                 rc = ldlm_cli_cancel(&lockh);
200                 if (rc < 0) {
201                         CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
202                         RETURN(rc);
203                 }
204                 break;
205         case LDLM_CB_CANCELING: {
206                 /* Invalidate all dentries associated with this inode */
207                 struct inode *inode = data;
208
209                 LASSERT(inode != NULL);
210                 LASSERT(data_len == sizeof(*inode));
211
212                 if (S_ISDIR(inode->i_mode)) {
213                         CDEBUG(D_INODE, "invalidating inode %ld\n",
214                                inode->i_ino);
215                         ll_invalidate_inode_pages(inode);
216                 }
217
218                 LASSERT(igrab(inode) == inode);
219                 d_delete_aliases(inode);
220                 iput(inode);
221                 break;
222         }
223         default:
224                 LBUG();
225         }
226
227         RETURN(0);
228 }
229
230 struct create_replay_data {
231         struct super_block *sb;
232         u32                 generation;
233 };
234
235 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
236 static int create_replay_find_inode(struct inode *inode, unsigned long ino,
237                                     void *opaque)
238 #else
239 static int create_replay_find_inode(struct inode *inode, void *opaque)
240 #endif
241 {
242         struct ptlrpc_request *req = opaque;
243         struct create_replay_data *saved;
244         struct mds_body *body;
245         
246         saved = lustre_msg_buf(req->rq_reqmsg, 5); /* lock with intent */
247         
248         if (saved->generation != inode->i_generation) {
249                 CDEBUG(D_HA,
250                        "generation mismatch for ino %u: saved %u != inode %u\n",
251                        inode->i_ino, saved->generation, inode->i_generation);
252                 return 0;
253         }
254
255         body = lustre_msg_buf(req->rq_repmsg, 1);
256
257         /* XXX do I need more out of ll_update_inode? */
258         CDEBUG(D_HA, "updating inode %u generation %u to %u\n",
259                inode->i_ino, inode->i_generation, body->generation);
260
261         inode->i_generation = body->generation;
262
263         return 1;
264 }
265
266 static void fixup_req_for_recreate(struct ptlrpc_request *fixreq,
267                                    struct ptlrpc_request *req,
268                                    struct inode *inode)
269 {
270         struct ldlm_request *lockreq; 
271         struct mds_rec_link *rec; /* representative, two-fid op structure */
272         int opc;
273
274         if (fixreq->rq_import != req->rq_import) {
275                 DEBUG_REQ(D_HA, fixreq, "import mismatch, skipping");
276                 return;
277         }
278
279         DEBUG_REQ(D_HA, fixreq, "fixing");
280         
281         /* XXX check replay_state to see if we'll actually replay. */
282
283         /* We only care about LDLM_ENQUEUE and MDS_REINT requests. */
284         if (fixreq->rq_reqmsg->opc == LDLM_ENQUEUE) {
285                 lockreq = lustre_msg_buf(fixreq->rq_reqmsg, 0);
286
287                 if (lockreq->lock_desc.l_resource.lr_type != LDLM_PLAIN &&
288                     !(lockreq->lock_flags & LDLM_FL_HAS_INTENT)) {
289                         DEBUG_REQ(D_HA, fixreq, "non-intent lock, skipping");
290                         return;
291                 }
292
293                 if (fixreq->rq_reqmsg->bufcount < 2) {
294                         DEBUG_REQ(D_HA, fixreq,
295                                   "short intent (probably readdir), skipping");
296                         return;
297                 }
298
299                 /* XXX endianness is probably very very wrong here. Very. */
300                 rec = lustre_msg_buf(fixreq->rq_reqmsg, 2);
301         } else if (fixreq->rq_reqmsg->opc == MDS_REINT) {
302                 rec = lustre_msg_buf(fixreq->rq_reqmsg, 0);
303         } else if (fixreq->rq_reqmsg->opc == MDS_OPEN) {
304                 struct mds_body *body = lustre_msg_buf(fixreq->rq_reqmsg, 0);
305                 DEBUG_REQ(D_HA, fixreq, "fixing fid1: %u -> %u",
306                           body->fid1.generation, inode->i_generation);
307                 body->fid1.generation = inode->i_generation;
308                 return;
309         } else {
310                 DEBUG_REQ(D_HA, fixreq, "not a replayable request, skipping");
311                 return;
312         }
313         
314         if (rec->lk_fid1.id == inode->i_ino) {
315                 DEBUG_REQ(D_HA, fixreq, "fixing fid1: %u -> %u",
316                           rec->lk_fid1.generation, inode->i_generation);
317                 rec->lk_fid1.generation = inode->i_generation;
318         }
319         
320         /* Some ops have two FIDs. ZZZ We rely on the identical
321          * placement of that second FID in all such ops' messages.
322          */
323         opc = rec->lk_opcode & REINT_OPCODE_MASK;
324         if ((opc == REINT_LINK || opc == REINT_UNLINK ||
325              opc == REINT_RENAME) &&
326             rec->lk_fid2.id == inode->i_ino) {
327                 DEBUG_REQ(D_HA, fixreq, "fixing fid2: %u -> %u",
328                           rec->lk_fid2.generation, inode->i_generation);
329                 rec->lk_fid2.generation = inode->i_generation;
330         }
331 }
332
333 static void mdc_replay_create(struct ptlrpc_request *req)
334 {
335         struct create_replay_data *saved;
336         struct mds_body *body;
337         struct inode *inode;
338         struct list_head *tmp;
339
340         if (req->rq_reqmsg->opc == MDS_REINT)
341                 LBUG(); /* XXX don't handle the non-intent case yet */
342
343         body = lustre_msg_buf(req->rq_repmsg, 1);
344         saved = lustre_msg_buf(req->rq_reqmsg, 5); /* lock with intent */
345
346         CDEBUG(D_HA, "create of inode %d replayed; gen %u -> %u\n",
347                body->fid1.id, saved->generation, body->generation);
348         /* XXX cargo-culted right out of ll_iget */
349 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
350         inode = iget4(saved->sb, body->fid1.id, create_replay_find_inode, req);
351 #endif
352 #if 0
353         {
354                 extern int ll_read_inode2(struct inode *inode, void *opaque);
355                 inode = iget5_locked(saved->sb, body->fid1.id,
356                                      create_replay_find_inode, 
357                                      ll_read_inode2, req);
358
359                 if (!inode)
360                         LBUG(); /* XXX ick */
361                 
362                 if (inode->i_state & I_NEW)
363                         unlock_new_inode(inode);
364         }
365 #endif
366
367         /* Now that we've updated the generation, we need to go and find all
368          * the other requests that refer to this file and will be replayed,
369          * and teach them about our new generation.
370          */
371         list_for_each(tmp, &req->rq_connection->c_sending_head) {
372                 struct ptlrpc_request *fixreq =
373                         list_entry(tmp, struct ptlrpc_request, rq_list);
374
375                 fixup_req_for_recreate(fixreq, req, inode);
376         }
377
378         list_for_each(tmp, &req->rq_connection->c_delayed_head) {
379                 struct ptlrpc_request *fixreq =
380                         list_entry(tmp, struct ptlrpc_request, rq_list);
381
382                 fixup_req_for_recreate(fixreq, req, inode);
383         }
384 }
385
386 void mdc_store_create_replay_data(struct ptlrpc_request *req,
387                                   struct super_block *sb)
388 {
389         struct create_replay_data *saved = 
390                 lustre_msg_buf(req->rq_reqmsg, 5);
391         struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1);
392
393
394         if (req->rq_reqmsg->opc == MDS_REINT)
395                 LBUG(); /* XXX don't handle the non-intent case yet */
396
397         saved->generation = body->generation;
398         saved->sb = sb; /* XXX is this safe? */
399
400         req->rq_replay_cb = mdc_replay_create;
401 }
402
403 int mdc_enqueue(struct lustre_handle *conn, int lock_type,
404                 struct lookup_intent *it, int lock_mode, struct inode *dir,
405                 struct dentry *de, struct lustre_handle *lockh,
406                 char *tgt, int tgtlen, void *data, int datalen)
407 {
408         struct ptlrpc_request *req;
409         struct obd_device *obddev = class_conn2obd(conn);
410         __u64 res_id[RES_NAME_SIZE] = {dir->i_ino, (__u64)dir->i_generation};
411         int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)};
412         int rc, flags = LDLM_FL_HAS_INTENT;
413         int repsize[3] = {sizeof(struct ldlm_reply),
414                           sizeof(struct mds_body),
415                           obddev->u.cli.cl_max_mds_easize};
416         struct ldlm_reply *dlm_rep;
417         struct ldlm_intent *lit;
418         struct ldlm_request *lockreq;
419         ENTRY;
420
421         LDLM_DEBUG_NOLOCK("mdsintent %s dir %ld", ldlm_it2str(it->it_op),
422                           dir->i_ino);
423
424         if (it->it_op & (IT_MKDIR | IT_CREAT | IT_SYMLINK | IT_MKNOD)) {
425                 switch (it->it_op) {
426                 case IT_MKDIR:
427                         it->it_mode |= S_IFDIR;
428                         break;
429                 case (IT_CREAT|IT_OPEN):
430                 case IT_CREAT:
431                         it->it_mode |= S_IFREG;
432                         break;
433                 case IT_SYMLINK:
434                         it->it_mode |= S_IFLNK;
435                         break;
436                 }
437                 it->it_mode &= ~current->fs->umask;
438
439                 size[2] = sizeof(struct mds_rec_create);
440                 size[3] = de->d_name.len + 1;
441                 size[4] = tgtlen + 1;
442                 size[5] = sizeof(struct create_replay_data);
443                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 6,
444                                       size, NULL);
445                 if (!req)
446                         RETURN(-ENOMEM);
447
448                 /* pack the intent */
449                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
450                 lit->opc = NTOH__u64((__u64)it->it_op);
451
452                 /* pack the intended request */
453                 mds_create_pack(req, 2, dir, it->it_mode, 0, current->fsuid,
454                                 current->fsgid, CURRENT_TIME, de->d_name.name,
455                                 de->d_name.len, tgt, tgtlen);
456                 req->rq_replen = lustre_msg_size(3, repsize);
457         } else if (it->it_op == IT_RENAME2) {
458                 struct dentry *old_de = it->it_data;
459
460                 size[2] = sizeof(struct mds_rec_rename);
461                 size[3] = old_de->d_name.len + 1;
462                 size[4] = de->d_name.len + 1;
463                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 5,
464                                       size, NULL);
465                 if (!req)
466                         RETURN(-ENOMEM);
467
468                 /* pack the intent */
469                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
470                 lit->opc = NTOH__u64((__u64)it->it_op);
471
472                 /* pack the intended request */
473                 mds_rename_pack(req, 2, old_de->d_parent->d_inode, dir,
474                                 old_de->d_name.name, old_de->d_name.len,
475                                 de->d_name.name, de->d_name.len);
476                 req->rq_replen = lustre_msg_size(3, repsize);
477         } else if (it->it_op == IT_LINK2) {
478                 struct dentry *old_de = it->it_data;
479
480                 size[2] = sizeof(struct mds_rec_link);
481                 size[3] = de->d_name.len + 1;
482                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
483                                       size, NULL);
484                 if (!req)
485                         RETURN(-ENOMEM);
486
487                 /* pack the intent */
488                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
489                 lit->opc = NTOH__u64((__u64)it->it_op);
490
491                 /* pack the intended request */
492                 mds_link_pack(req, 2, old_de->d_inode, dir,
493                               de->d_name.name, de->d_name.len);
494                 req->rq_replen = lustre_msg_size(3, repsize);
495         } else if (it->it_op == IT_UNLINK || it->it_op == IT_RMDIR) {
496                 size[2] = sizeof(struct mds_rec_unlink);
497                 size[3] = de->d_name.len + 1;
498                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
499                                       size, NULL);
500                 if (!req)
501                         RETURN(-ENOMEM);
502
503                 /* pack the intent */
504                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
505                 lit->opc = NTOH__u64((__u64)it->it_op);
506
507                 /* pack the intended request */
508                 mds_unlink_pack(req, 2, dir, NULL,
509                                 it->it_op == IT_UNLINK ? S_IFREG : S_IFDIR,
510                                 de->d_name.name, de->d_name.len);
511
512                 req->rq_replen = lustre_msg_size(3, repsize);
513         } else if (it->it_op  & (IT_GETATTR | IT_RENAME | IT_LINK | 
514                    IT_OPEN |  IT_SETATTR | IT_LOOKUP | IT_READLINK)) {
515                 size[2] = sizeof(struct mds_body);
516                 size[3] = de->d_name.len + 1;
517
518                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
519                                       size, NULL);
520                 if (!req)
521                         RETURN(-ENOMEM);
522
523                 /* pack the intent */
524                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
525                 lit->opc = NTOH__u64((__u64)it->it_op);
526
527                 /* pack the intended request */
528                 mds_getattr_pack(req, 2, dir, de->d_name.name, de->d_name.len);
529
530                 /* we need to replay opens */
531                 if (it->it_op == IT_OPEN)
532                         req->rq_flags |= PTL_RPC_FL_REPLAY;
533
534                 /* get ready for the reply */
535                 req->rq_replen = lustre_msg_size(3, repsize);
536         } else if (it->it_op == IT_READDIR) {
537                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 1,
538                                       size, NULL);
539                 if (!req)
540                         RETURN(-ENOMEM);
541
542                 /* get ready for the reply */
543                 req->rq_replen = lustre_msg_size(1, repsize);
544         } else {
545                 LBUG();
546                 RETURN(-EINVAL);
547         }
548 #warning FIXME: the data here needs to be different if a lock was granted for a different inode
549         rc = ldlm_cli_enqueue(conn, req, obddev->obd_namespace, NULL, res_id,
550                               lock_type, NULL, 0, lock_mode, &flags,
551                               ldlm_completion_ast, mdc_blocking_ast, data,
552                               datalen, lockh);
553         if (rc == -ENOENT) {
554                 /* This can go when we're sure that this can never happen */
555                 LBUG();
556         }
557         if (rc == ELDLM_LOCK_ABORTED) {
558                 lock_mode = 0;
559                 memset(lockh, 0, sizeof(*lockh));
560                 /* rc = 0 */
561         } else if (rc != 0) {
562                 CERROR("ldlm_cli_enqueue: %d\n", rc);
563                 RETURN(rc);
564         }
565
566         /* On replay, we don't want the lock granted. */
567         lockreq = lustre_msg_buf(req->rq_reqmsg, 0);
568         lockreq->lock_flags |= LDLM_FL_INTENT_ONLY;
569
570         dlm_rep = lustre_msg_buf(req->rq_repmsg, 0);
571         it->it_disposition = (int) dlm_rep->lock_policy_res1;
572         it->it_status = (int) dlm_rep->lock_policy_res2;
573         it->it_lock_mode = lock_mode;
574         it->it_data = req;
575
576         RETURN(0);
577 }
578
579 int mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
580                       int flags)
581 {
582         __u64 res_id[RES_NAME_SIZE] = {inode->i_ino, inode->i_generation};
583         struct obd_device *obddev = class_conn2obd(conn);
584         ENTRY;
585         RETURN(ldlm_cli_cancel_unused(obddev->obd_namespace, res_id, flags));
586 }
587
588 struct replay_open_data {
589         struct lustre_handle *fh;
590 };
591
592 static void mdc_replay_open(struct ptlrpc_request *req)
593 {
594         int offset;
595         struct replay_open_data *saved;
596         struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 0);
597
598         if (lustre_msg_get_op_flags(req->rq_reqmsg) & MDS_OPEN_HAS_EA)
599                 offset = 2;
600         else
601                 offset = 1;
602
603         saved = lustre_msg_buf(req->rq_reqmsg, offset);
604         mds_unpack_body(body);
605         CDEBUG(D_HA, "updating from "LPD64"/"LPD64" to "LPD64"/"LPD64"\n",
606                saved->fh->addr, saved->fh->cookie,
607                body->handle.addr, body->handle.cookie);
608         memcpy(saved->fh, &body->handle, sizeof(body->handle));
609 }
610
611 int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags,
612              struct lov_stripe_md *lsm, struct lustre_handle *fh,
613              struct ptlrpc_request **request)
614 {
615         struct mds_body *body;
616         struct replay_open_data *replay_data;
617         int rc, size[3] = {sizeof(*body), sizeof(*replay_data)}, bufcount = 2;
618         struct ptlrpc_request *req;
619         ENTRY;
620
621         if (lsm) {
622                 bufcount = 3;
623                 size[2] = size[1]; /* shuffle the spare data along */
624
625                 size[1] = lsm->lsm_mds_easize;
626         }
627
628         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_OPEN, bufcount, size,
629                               NULL);
630         if (!req)
631                 GOTO(out, rc = -ENOMEM);
632
633         if (lsm)
634                 lustre_msg_set_op_flags(req->rq_reqmsg, MDS_OPEN_HAS_EA);
635
636
637         req->rq_flags |= PTL_RPC_FL_REPLAY;
638         body = lustre_msg_buf(req->rq_reqmsg, 0);
639
640         ll_ino2fid(&body->fid1, ino, 0, type);
641         body->flags = HTON__u32(flags);
642         memcpy(&body->handle, fh, sizeof(body->handle));
643
644         if (lsm)
645                 lov_packmd(lustre_msg_buf(req->rq_reqmsg, 1), lsm);
646
647         req->rq_replen = lustre_msg_size(1, size);
648
649         rc = ptlrpc_queue_wait(req);
650         rc = ptlrpc_check_status(req, rc);
651         if (!rc) {
652                 body = lustre_msg_buf(req->rq_repmsg, 0);
653                 mds_unpack_body(body);
654                 memcpy(fh, &body->handle, sizeof(*fh));
655         }
656
657         /* If open is replayed, we need to fix up the fh. */
658         req->rq_replay_cb = mdc_replay_open;
659         replay_data = lustre_msg_buf(req->rq_reqmsg, lsm ? 2 : 1);
660         replay_data->fh = fh;
661         
662         EXIT;
663  out:
664         *request = req;
665         return rc;
666 }
667
668 int mdc_close(struct lustre_handle *conn, obd_id ino, int type,
669               struct lustre_handle *fh, struct ptlrpc_request **request)
670 {
671         struct mds_body *body;
672         int rc, size = sizeof(*body);
673         struct ptlrpc_request *req;
674
675         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_CLOSE, 1, &size,
676                               NULL);
677         if (!req)
678                 GOTO(out, rc = -ENOMEM);
679
680         body = lustre_msg_buf(req->rq_reqmsg, 0);
681         ll_ino2fid(&body->fid1, ino, 0, type);
682         memcpy(&body->handle, fh, sizeof(body->handle));
683
684         req->rq_replen = lustre_msg_size(0, NULL);
685
686         rc = ptlrpc_queue_wait(req);
687         rc = ptlrpc_check_status(req, rc);
688
689         EXIT;
690  out:
691         *request = req;
692         return rc;
693 }
694
695 int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset,
696                  char *addr, struct ptlrpc_request **request)
697 {
698         struct ptlrpc_connection *connection = 
699                 client_conn2cli(conn)->cl_import.imp_connection;
700         struct ptlrpc_request *req = NULL;
701         struct ptlrpc_bulk_desc *desc = NULL;
702         struct ptlrpc_bulk_page *bulk = NULL;
703         struct mds_body *body;
704         int rc, size = sizeof(*body);
705         ENTRY;
706
707         CDEBUG(D_INODE, "inode: %ld\n", (long)ino);
708
709         desc = ptlrpc_prep_bulk(connection);
710         if (desc == NULL)
711                 GOTO(out, rc = -ENOMEM);
712
713         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_READPAGE, 1, &size,
714                               NULL);
715         if (!req)
716                 GOTO(out2, rc = -ENOMEM);
717
718         bulk = ptlrpc_prep_bulk_page(desc);
719         bulk->bp_buflen = PAGE_SIZE;
720         bulk->bp_buf = addr;
721         bulk->bp_xid = req->rq_xid;
722         desc->bd_portal = MDS_BULK_PORTAL;
723
724         rc = ptlrpc_register_bulk(desc);
725         if (rc) {
726                 CERROR("couldn't setup bulk sink: error %d.\n", rc);
727                 GOTO(out2, rc);
728         }
729
730         body = lustre_msg_buf(req->rq_reqmsg, 0);
731         body->fid1.id = ino;
732         body->fid1.f_type = type;
733         body->size = offset;
734
735         req->rq_replen = lustre_msg_size(1, &size);
736         rc = ptlrpc_queue_wait(req);
737         rc = ptlrpc_check_status(req, rc);
738         if (rc) {
739                 ptlrpc_abort_bulk(desc);
740                 GOTO(out2, rc);
741         } else {
742                 body = lustre_msg_buf(req->rq_repmsg, 0);
743                 mds_unpack_body(body);
744         }
745
746         EXIT;
747  out2:
748         ptlrpc_free_bulk(desc);
749  out:
750         *request = req;
751         return rc;
752 }
753
754 int mdc_statfs(struct lustre_handle *conn, struct obd_statfs *osfs,
755                struct ptlrpc_request **request)
756 {
757         struct ptlrpc_request *req;
758         int rc, size = sizeof(*osfs);
759         ENTRY;
760
761         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_STATFS, 0, NULL,
762                               NULL);
763         if (!req)
764                 GOTO(out, rc = -ENOMEM);
765         req->rq_replen = lustre_msg_size(1, &size);
766
767         rc = ptlrpc_queue_wait(req);
768         rc = ptlrpc_check_status(req, rc);
769
770         if (rc)
771                 GOTO(out, rc);
772
773         obd_statfs_unpack(osfs, lustre_msg_buf(req->rq_repmsg, 0));
774
775         EXIT;
776 out:
777         *request = req;
778
779         return rc;
780 }
781
782 struct obd_ops mdc_obd_ops = {
783         o_setup:   client_obd_setup,
784         o_cleanup: client_obd_cleanup,
785         o_connect: client_obd_connect,
786         o_disconnect: client_obd_disconnect,
787 };
788
789 static int __init ptlrpc_request_init(void)
790 {
791         return class_register_type(&mdc_obd_ops, LUSTRE_MDC_NAME);
792 }
793
794 static void __exit ptlrpc_request_exit(void)
795 {
796         class_unregister_type(LUSTRE_MDC_NAME);
797 }
798
799 MODULE_AUTHOR("Cluster File Systems <info@clusterfs.com>");
800 MODULE_DESCRIPTION("Lustre Metadata Client v1.0");
801 MODULE_LICENSE("GPL");
802
803 EXPORT_SYMBOL(mdc_getstatus);
804 EXPORT_SYMBOL(mdc_getlovinfo);
805 EXPORT_SYMBOL(mdc_enqueue);
806 EXPORT_SYMBOL(mdc_cancel_unused);
807 EXPORT_SYMBOL(mdc_getattr);
808 EXPORT_SYMBOL(mdc_statfs);
809 EXPORT_SYMBOL(mdc_create);
810 EXPORT_SYMBOL(mdc_unlink);
811 EXPORT_SYMBOL(mdc_rename);
812 EXPORT_SYMBOL(mdc_link);
813 EXPORT_SYMBOL(mdc_readpage);
814 EXPORT_SYMBOL(mdc_setattr);
815 EXPORT_SYMBOL(mdc_close);
816 EXPORT_SYMBOL(mdc_open);
817
818 EXPORT_SYMBOL(mdc_store_create_replay_data);
819
820 module_init(ptlrpc_request_init);
821 module_exit(ptlrpc_request_exit);