lustre/llite/dir.c

   1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
   2  * vim:expandtab:shiftwidth=8:tabstop=8:
   3  *
   4  * GPL HEADER START
   5  *
   6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License version 2 only,
  10  * as published by the Free Software Foundation.
  11  *
  12  * This program is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * General Public License version 2 for more details (a copy is included
  16  * in the LICENSE file that accompanied this code).
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * version 2 along with this program; If not, see
  20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  21  *
  22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  23  * CA 95054 USA or visit www.sun.com if you need additional information or
  24  * have any questions.
  25  *
  26  * GPL HEADER END
  27  */
  28 /*
  29  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
  30  * Use is subject to license terms.
  31  */
  32 /*
  33  * This file is part of Lustre, http://www.lustre.org/
  34  * Lustre is a trademark of Sun Microsystems, Inc.
  35  *
  36  * lustre/llite/dir.c
  37  *
  38  * Directory code for lustre client.
  39  */
  40
  41 #include <linux/fs.h>
  42 #include <linux/pagemap.h>
  43 #include <linux/mm.h>
  44 #include <linux/version.h>
  45 #include <linux/smp_lock.h>
  46 #include <asm/uaccess.h>
  47 #include <linux/buffer_head.h>   // for wait_on_buffer
  48
  49 #define DEBUG_SUBSYSTEM S_LLITE
  50
  51 #include <obd_support.h>
  52 #include <obd_class.h>
  53 #include <lustre_lib.h>
  54 #include <lustre/lustre_idl.h>
  55 #include <lustre_lite.h>
  56 #include <lustre_dlm.h>
  57 #include "llite_internal.h"
  58
  59 #ifndef HAVE_PAGE_CHECKED
  60 #ifdef HAVE_PG_FS_MISC
  61 #define PageChecked(page)        test_bit(PG_fs_misc, &(page)->flags)
  62 #define SetPageChecked(page)     set_bit(PG_fs_misc, &(page)->flags)
  63 #else
  64 #error PageChecked or PageFsMisc not defined in kernel
  65 #endif
  66 #endif
  67
  68 /* returns the page unlocked, but with a reference */
  69 static int ll_dir_readpage(struct file *file, struct page *page)
  70 {
  71         struct inode *inode = page->mapping->host;
  72         struct ll_fid mdc_fid;
  73         __u64 offset;
  74         struct ptlrpc_request *request;
  75         struct mds_body *body;
  76         int rc = 0;
  77         ENTRY;
  78
  79         offset = (__u64)page->index << CFS_PAGE_SHIFT;
  80         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) off "LPU64"\n",
  81                inode->i_ino, inode->i_generation, inode, offset);
  82
  83         ll_pack_fid(&mdc_fid, inode->i_ino, inode->i_generation, S_IFDIR);
  84
  85         rc = mdc_readpage(ll_i2sbi(inode)->ll_mdc_exp, &mdc_fid,
  86                           offset, page, &request);
  87         if (!rc) {
  88                 body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF,
  89                                       sizeof(*body));
  90                 LASSERT(body != NULL); /* checked by mdc_readpage() */
  91                 /* swabbed by mdc_readpage() */
  92                 LASSERT(lustre_rep_swabbed(request, REPLY_REC_OFF));
  93
  94                 if (body->size != i_size_read(inode)) {
  95                         ll_inode_size_lock(inode, 0);
  96                         i_size_write(inode, body->size);
  97                         ll_inode_size_unlock(inode, 0);
  98                 }
  99
 100                 SetPageUptodate(page);
 101         }
 102         ptlrpc_req_finished(request);
 103
 104         unlock_page(page);
 105         EXIT;
 106         return rc;
 107 }
 108
 109 #ifndef MS_HAS_NEW_AOPS
 110 struct address_space_operations ll_dir_aops = {
 111         .readpage  = ll_dir_readpage,
 112 };
 113 #else
 114 struct address_space_operations_ext ll_dir_aops = {
 115         .orig_aops.readpage  = ll_dir_readpage,
 116 };
 117 #endif
 118
 119 static inline unsigned ll_dir_page_mask(struct inode *inode)
 120 {
 121         return ~(inode->i_sb->s_blocksize - 1);
 122 }
 123
 124 /*
 125  * Check consistency of a single entry.
 126  */
 127 static int ll_dir_check_entry(struct inode *dir, struct ll_dir_entry *ent,
 128                               unsigned offset, unsigned rec_len, pgoff_t index)
 129 {
 130         const char *msg;
 131
 132         /*
 133          * Consider adding more checks.
 134          */
 135
 136         if (unlikely(rec_len < ll_dir_rec_len(1)))
 137                 msg = "entry is too short";
 138         else if (unlikely(rec_len & 3))
 139                 msg = "wrong alignment";
 140         else if (unlikely(rec_len < ll_dir_rec_len(ent->lde_name_len)))
 141                 msg = "rec_len doesn't match name_len";
 142         else if (unlikely(((offset + rec_len - 1) ^ offset) &
 143                           ll_dir_page_mask(dir)))
 144                 msg = "directory entry across blocks";
 145         else
 146                 return 0;
 147         CERROR("%s: bad entry in directory %lu/%u: %s - "
 148                "offset=%lu+%u, inode=%lu, rec_len=%d,"
 149                " name_len=%d\n", ll_i2mdcexp(dir)->exp_obd->obd_name,
 150                dir->i_ino, dir->i_generation, msg,
 151                index << CFS_PAGE_SHIFT,
 152                offset, (unsigned long)le32_to_cpu(ent->lde_inode),
 153                rec_len, ent->lde_name_len);
 154         return -EIO;
 155 }
 156
 157 static void ll_dir_check_page(struct inode *dir, struct page *page)
 158 {
 159         int      err;
 160         unsigned size = dir->i_sb->s_blocksize;
 161         char    *addr = page_address(page);
 162         unsigned off;
 163         unsigned limit;
 164         unsigned reclen;
 165
 166         struct ll_dir_entry *ent;
 167
 168         err = 0;
 169         if ((i_size_read(dir) >> CFS_PAGE_SHIFT) == (__u64)page->index) {
 170                 /*
 171                  * Last page.
 172                  */
 173                 limit = i_size_read(dir) & ~CFS_PAGE_MASK;
 174                 if (limit & (size - 1)) {
 175                         CERROR("%s: dir %lu/%u size %llu doesn't match %u\n",
 176                                ll_i2mdcexp(dir)->exp_obd->obd_name, dir->i_ino,
 177                                dir->i_generation, i_size_read(dir), size);
 178                         err++;
 179                 } else {
 180                         /*
 181                          * Place dummy forwarding entries to streamline
 182                          * ll_readdir().
 183                          */
 184                         for (off = limit; off < CFS_PAGE_SIZE; off += size) {
 185                                 ent = ll_entry_at(addr, off);
 186                                 ent->lde_rec_len = cpu_to_le16(size);
 187                                 ent->lde_name_len = 0;
 188                                 ent->lde_inode = 0;
 189                         }
 190                 }
 191         } else
 192                 limit = CFS_PAGE_SIZE;
 193
 194         for (off = 0;
 195              !err && off <= limit - ll_dir_rec_len(1); off += reclen) {
 196                 ent    = ll_entry_at(addr, off);
 197                 reclen = le16_to_cpu(ent->lde_rec_len);
 198                 err    = ll_dir_check_entry(dir, ent, off, reclen, page->index);
 199         }
 200
 201         if (!err && off != limit) {
 202                 ent = ll_entry_at(addr, off);
 203                 CERROR("%s: entry in directory %lu/%u spans the page boundary "
 204                        "offset="LPU64"+%u, inode=%lu\n",
 205                        ll_i2mdcexp(dir)->exp_obd->obd_name,
 206                        dir->i_ino, dir->i_generation,
 207                        (__u64)page->index << CFS_PAGE_SHIFT,
 208                        off, (unsigned long)le32_to_cpu(ent->lde_inode));
 209                 err++;
 210         }
 211         if (err)
 212                 SetPageError(page);
 213         SetPageChecked(page);
 214 }
 215
 216 struct page *ll_get_dir_page(struct inode *dir, unsigned long n)
 217 {
 218         struct ldlm_res_id res_id;
 219         struct lustre_handle lockh;
 220         struct obd_device *obddev = class_exp2obd(ll_i2sbi(dir)->ll_mdc_exp);
 221         struct address_space *mapping = dir->i_mapping;
 222         struct page *page;
 223         ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_UPDATE} };
 224         int rc;
 225
 226         fid_build_reg_res_name(ll_inode_lu_fid(dir), &res_id);
 227         rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_BLOCK_GRANTED,
 228                              &res_id, LDLM_IBITS, &policy, LCK_CR, &lockh);
 229         if (!rc) {
 230                 struct lookup_intent it = { .it_op = IT_READDIR };
 231                 struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_CR,
 232                        ll_mdc_blocking_ast, ldlm_completion_ast, NULL, dir };
 233                 struct ptlrpc_request *request;
 234                 struct mdc_op_data data = { { 0 } };
 235
 236                 ll_prepare_mdc_op_data(&data, dir, NULL, NULL, 0, 0, NULL);
 237
 238                 rc = mdc_enqueue(ll_i2sbi(dir)->ll_mdc_exp, &einfo, &it,
 239                                  &data, &lockh, NULL, 0, 0);
 240
 241                 request = (struct ptlrpc_request *)it.d.lustre.it_data;
 242                 if (request)
 243                         ptlrpc_req_finished(request);
 244                 if (rc < 0) {
 245                         CERROR("lock enqueue: rc: %d\n", rc);
 246                         return ERR_PTR(rc);
 247                 }
 248         }
 249         ldlm_lock_dump_handle(D_OTHER, &lockh);
 250
 251         page = read_cache_page(mapping, n,
 252                                (filler_t*)mapping->a_ops->readpage, NULL);
 253         if (IS_ERR(page))
 254                 GOTO(out_unlock, page);
 255
 256         wait_on_page(page);
 257         (void)kmap(page);
 258         if (!PageUptodate(page))
 259                 goto fail;
 260         if (!PageChecked(page))
 261                 ll_dir_check_page(dir, page);
 262         if (PageError(page))
 263                 goto fail;
 264
 265 out_unlock:
 266         ldlm_lock_decref(&lockh, LCK_CR);
 267         return page;
 268
 269 fail:
 270         ll_put_page(page);
 271         page = ERR_PTR(-EIO);
 272         goto out_unlock;
 273 }
 274
 275 static inline unsigned ll_dir_validate_entry(char *base, unsigned offset,
 276                                              unsigned mask)
 277 {
 278         struct ll_dir_entry *de = ll_entry_at(base, offset);
 279         struct ll_dir_entry *p  = ll_entry_at(base, offset & mask);
 280         while (p < de && p->lde_rec_len > 0)
 281                 p = ll_dir_next_entry(p);
 282         return (char *)p - base;
 283 }
 284
 285 /*
 286  * File type constants. The same as in ext2 for compatibility.
 287  */
 288
 289 enum {
 290         LL_DIR_FT_UNKNOWN,
 291         LL_DIR_FT_REG_FILE,
 292         LL_DIR_FT_DIR,
 293         LL_DIR_FT_CHRDEV,
 294         LL_DIR_FT_BLKDEV,
 295         LL_DIR_FT_FIFO,
 296         LL_DIR_FT_SOCK,
 297         LL_DIR_FT_SYMLINK,
 298         LL_DIR_FT_MAX
 299 };
 300
 301 static unsigned char ll_dir_filetype_table[LL_DIR_FT_MAX] = {
 302         [LL_DIR_FT_UNKNOWN]  = DT_UNKNOWN,
 303         [LL_DIR_FT_REG_FILE] = DT_REG,
 304         [LL_DIR_FT_DIR]      = DT_DIR,
 305         [LL_DIR_FT_CHRDEV]   = DT_CHR,
 306         [LL_DIR_FT_BLKDEV]   = DT_BLK,
 307         [LL_DIR_FT_FIFO]     = DT_FIFO,
 308         [LL_DIR_FT_SOCK]     = DT_SOCK,
 309         [LL_DIR_FT_SYMLINK]  = DT_LNK,
 310 };
 311
 312 /*
 313  * Process one page. Returns:
 314  *
 315  *     -ve: filldir commands readdir to stop.
 316  *     +ve: number of entries submitted to filldir.
 317  *       0: no live entries on this page.
 318  */
 319
 320 static int ll_readdir_page(char *addr, __u64 base, unsigned *offset,
 321                            filldir_t filldir, void *cookie)
 322 {
 323         struct ll_dir_entry *de;
 324         char *end;
 325         int nr;
 326
 327         de = ll_entry_at(addr, *offset);
 328         end = addr + CFS_PAGE_SIZE - ll_dir_rec_len(1);
 329         for (nr = 0 ;(char*)de <= end; de = ll_dir_next_entry(de)) {
 330                 if (de->lde_inode != 0) {
 331                         nr++;
 332                         *offset = (char *)de - addr;
 333                         if (filldir(cookie, de->lde_name, de->lde_name_len,
 334                                     base | *offset, le32_to_cpu(de->lde_inode),
 335                                     ll_dir_filetype_table[de->lde_file_type &
 336                                                           (LL_DIR_FT_MAX - 1)]))
 337                                 return -1;
 338                 }
 339         }
 340         return nr;
 341 }
 342
 343 static int ll_readdir_18(struct file *filp, void *dirent, filldir_t filldir)
 344 {
 345         struct inode *inode = filp->f_dentry->d_inode;
 346         loff_t pos          = filp->f_pos;
 347         unsigned offset     = pos & ~CFS_PAGE_MASK;
 348         pgoff_t idx         = pos >> CFS_PAGE_SHIFT;
 349         pgoff_t npages      = dir_pages(inode);
 350         unsigned chunk_mask = ll_dir_page_mask(inode);
 351         int need_revalidate = (filp->f_version != inode->i_version);
 352         int rc              = 0;
 353         int done; /* when this becomes negative --- stop iterating */
 354
 355         ENTRY;
 356
 357         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %llu/%llu\n",
 358                inode->i_ino, inode->i_generation, inode,
 359                pos, i_size_read(inode));
 360
 361         /*
 362          * Checking ->i_size without the lock. Should be harmless, as server
 363          * re-checks.
 364          */
 365         if (pos > i_size_read(inode) - ll_dir_rec_len(1))
 366                 RETURN(0);
 367
 368         for (done = 0; idx < npages; idx++, offset = 0) {
 369                 /*
 370                  * We can assume that all blocks on this page are filled with
 371                  * entries, because ll_dir_check_page() placed special dummy
 372                  * entries for us.
 373                  */
 374
 375                 char *kaddr;
 376                 struct page *page;
 377
 378                 CDEBUG(D_EXT2,"read %lu of dir %lu/%u page %lu/%lu "
 379                        "size %llu\n",
 380                        CFS_PAGE_SIZE, inode->i_ino, inode->i_generation,
 381                        idx, npages, i_size_read(inode));
 382                 page = ll_get_dir_page(inode, idx);
 383
 384                 /* size might have been updated by mdc_readpage */
 385                 npages = dir_pages(inode);
 386
 387                 if (IS_ERR(page)) {
 388                         rc = PTR_ERR(page);
 389                         CERROR("error reading dir %lu/%u page %lu: rc %d\n",
 390                                inode->i_ino, inode->i_generation, idx, rc);
 391                         continue;
 392                 }
 393
 394                 kaddr = page_address(page);
 395                 if (need_revalidate) {
 396                         /*
 397                          * File offset was changed by lseek() and possibly
 398                          * points in the middle of an entry. Re-scan from the
 399                          * beginning of the chunk.
 400                          */
 401                         offset = ll_dir_validate_entry(kaddr, offset,
 402                                                        chunk_mask);
 403                         need_revalidate = 0;
 404                 }
 405                 done = ll_readdir_page(kaddr, idx << CFS_PAGE_SHIFT,
 406                                        &offset, filldir, dirent);
 407                 ll_put_page(page);
 408                 if (done > 0)
 409                         /*
 410                          * Some entries were sent to the user space, return
 411                          * success.
 412                          */
 413                         rc = 0;
 414                 else if (done < 0)
 415                         /*
 416                          * filldir is satisfied.
 417                          */
 418                         break;
 419         }
 420
 421         filp->f_pos = (idx << CFS_PAGE_SHIFT) | offset;
 422         filp->f_version = inode->i_version;
 423         touch_atime(filp->f_vfsmnt, filp->f_dentry);
 424
 425         RETURN(rc);
 426 }
 427
 428 /*
 429  * Chain of hash overflow pages.
 430  */
 431 struct ll_dir_chain {
 432         /* XXX something. Later */
 433 };
 434
 435 static inline void ll_dir_chain_init(struct ll_dir_chain *chain)
 436 {
 437 }
 438
 439 static inline void ll_dir_chain_fini(struct ll_dir_chain *chain)
 440 {
 441 }
 442
 443 static inline unsigned long hash_x_index(__u64 hash, int hash64)
 444 {
 445 #ifdef __KERNEL__
 446         if (BITS_PER_LONG == 32 && hash64)
 447                 hash >>= 32;
 448 #endif
 449         return ~0UL - hash;
 450 }
 451
 452 /**
 453  * Layout of readdir pages, as transmitted on wire.
 454  */
 455 struct lu_dirent {
 456         /** valid if LUDA_FID is set. */
 457         struct lu_fid lde_fid;
 458         /** a unique entry identifier: a hash or an offset. */
 459         __u64         lde_hash;
 460         /** total record length, including all attributes. */
 461         __u16         lde_reclen;
 462         /** name length */
 463         __u16         lde_namelen;
 464         /** optional variable size attributes following this entry.
 465          *  taken from enum lu_dirent_attrs.
 466          */
 467         __u32         lde_attrs;
 468         /** name is followed by the attributes indicated in ->ldp_attrs, in
 469          *  their natural order. After the last attribute, padding bytes are
 470          *  added to make ->lde_reclen a multiple of 8.
 471          */
 472         char          lde_name[0];
 473 };
 474
 475 struct lu_dirpage {
 476         __u64            ldp_hash_start;
 477         __u64            ldp_hash_end;
 478         __u16            ldp_flags;
 479         __u16            ldp_pad0;
 480         __u32            ldp_pad1;
 481         struct lu_dirent ldp_entries[0];
 482 };
 483
 484 /*
 485  * Definitions of optional directory entry attributes formats.
 486  *
 487  * Individual attributes do not have their length encoded in a generic way. It
 488  * is assumed that consumer of an attribute knows its format. This means that
 489  * it is impossible to skip over an unknown attribute, except by skipping over all
 490  * remaining attributes (by using ->lde_reclen), which is not too
 491  * constraining, because new server versions will append new attributes at
 492  * the end of an entry.
 493  */
 494
 495 /**
 496  * Fid directory attribute: a fid of an object referenced by the entry. This
 497  * will be almost always requested by the client and supplied by the server.
 498  *
 499  * Aligned to 8 bytes.
 500  */
 501 /* To have compatibility with 1.8, lets have fid in lu_dirent struct. */
 502
 503 /**
 504  * File type.
 505  *
 506  * Aligned to 2 bytes.
 507  */
 508 struct luda_type {
 509         __u16 lt_type;
 510 };
 511
 512 enum lu_dirpage_flags {
 513         LDF_EMPTY = 1 << 0
 514 };
 515
 516 static inline int lu_dirent_calc_size(int namelen, __u16 attr)
 517 {
 518         int size;
 519
 520         if (attr & LUDA_TYPE) {
 521                 const unsigned align = sizeof(struct luda_type) - 1;
 522                 size = (sizeof(struct lu_dirent) + namelen + align) & ~align;
 523                 size += sizeof(struct luda_type);
 524         } else
 525                 size = sizeof(struct lu_dirent) + namelen;
 526
 527         return (size + 7) & ~7;
 528 }
 529
 530 /**
 531  * return IF_* type for given lu_dirent entry.
 532  * IF_* flag shld be converted to particular OS file type in
 533  * platform llite module.
 534  */
 535 __u16 ll_dirent_type_get(struct lu_dirent *ent)
 536 {
 537         __u16 type = 0;
 538         struct luda_type *lt;
 539         int len = 0;
 540
 541         if (le32_to_cpu(ent->lde_attrs) & LUDA_TYPE) {
 542                 const unsigned align = sizeof(struct luda_type) - 1;
 543
 544                 len = le16_to_cpu(ent->lde_namelen);
 545                 len = (len + align) & ~align;
 546                 lt = (void *) ent->lde_name + len;
 547                 type = CFS_IFTODT(le16_to_cpu(lt->lt_type));
 548         }
 549         return type;
 550 }
 551
 552 static inline struct lu_dirent *lu_dirent_start(struct lu_dirpage *dp)
 553 {
 554         if (le16_to_cpu(dp->ldp_flags) & LDF_EMPTY)
 555                 return NULL;
 556         else
 557                 return dp->ldp_entries;
 558 }
 559
 560 static inline struct lu_dirent *lu_dirent_next(struct lu_dirent *ent)
 561 {
 562         struct lu_dirent *next;
 563
 564         if (le16_to_cpu(ent->lde_reclen) != 0)
 565                 next = ((void *)ent) + le16_to_cpu(ent->lde_reclen);
 566         else
 567                 next = NULL;
 568
 569         return next;
 570 }
 571
 572 static inline int lu_dirent_size(struct lu_dirent *ent)
 573 {
 574         if (le16_to_cpu(ent->lde_reclen) == 0) {
 575                 return lu_dirent_calc_size(le16_to_cpu(ent->lde_namelen),
 576                                            le32_to_cpu(ent->lde_attrs));
 577         }
 578         return le16_to_cpu(ent->lde_reclen);
 579 }
 580
 581 #ifdef HAVE_RW_TREE_LOCK
 582 #define TREE_READ_LOCK_IRQ(mapping)     read_lock_irq(&(mapping)->tree_lock)
 583 #define TREE_READ_UNLOCK_IRQ(mapping) read_unlock_irq(&(mapping)->tree_lock)
 584 #else
 585 #define TREE_READ_LOCK_IRQ(mapping) spin_lock_irq(&(mapping)->tree_lock)
 586 #define TREE_READ_UNLOCK_IRQ(mapping) spin_unlock_irq(&(mapping)->tree_lock)
 587 #endif
 588
 589 /* returns the page unlocked, but with a reference */
 590 static int ll_dir_readpage_20(struct file *file, struct page *page)
 591 {
 592         struct inode *inode = page->mapping->host;
 593         struct ptlrpc_request *request;
 594         struct mdt_body *body;
 595         struct ll_fid fid;
 596         __u64 hash;
 597         int rc;
 598         ENTRY;
 599
 600         /*XXX: statahead is disabled by force under interoperability mode.
 601          *     So file must not be NULL here. Fix me when enable statahead
 602          *     under interoperability mode. */
 603         LASSERT(file != NULL);
 604         hash = ((struct ll_file_data *)LUSTRE_FPRIVATE(file))->fd_dir.lfd_next;
 605         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) off %lu\n",
 606                inode->i_ino, inode->i_generation, inode, (unsigned long)hash);
 607
 608         ll_inode2fid(&fid, inode);
 609         rc = mdc_readpage(ll_i2sbi(inode)->ll_mdc_exp, &fid,
 610                           hash, page, &request);
 611         if (!rc) {
 612                 body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF,
 613                                       sizeof(*body));
 614                 /* Checked by mdc_readpage() */
 615                 LASSERT(body != NULL);
 616
 617                 if (body->valid & OBD_MD_FLSIZE) {
 618                         ll_inode_size_lock(inode, 0);
 619                         i_size_write(inode, body->size);
 620                         ll_inode_size_unlock(inode, 0);
 621                 }
 622                 SetPageUptodate(page);
 623         }
 624         ptlrpc_req_finished(request);
 625
 626         unlock_page(page);
 627         EXIT;
 628         return rc;
 629 }
 630
 631
 632 static void ll_check_page(struct inode *dir, struct page *page)
 633 {
 634         /* XXX: check page format later */
 635         SetPageChecked(page);
 636 }
 637
 638
 639 /*
 640  * Find, kmap and return page that contains given hash.
 641  */
 642 static struct page *ll_dir_page_locate(struct inode *dir, __u64 *hash,
 643                                        __u64 *start, __u64 *end)
 644 {
 645         int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;
 646         struct address_space *mapping = dir->i_mapping;
 647         /*
 648          * Complement of hash is used as an index so that
 649          * radix_tree_gang_lookup() can be used to find a page with starting
 650          * hash _smaller_ than one we are looking for.
 651          */
 652         unsigned long offset = hash_x_index(*hash, hash64);
 653         struct page *page;
 654         int found;
 655         ENTRY;
 656
 657         TREE_READ_LOCK_IRQ(mapping);
 658         found = radix_tree_gang_lookup(&mapping->page_tree,
 659                                        (void **)&page, offset, 1);
 660         if (found > 0) {
 661                 struct lu_dirpage *dp;
 662
 663                 page_cache_get(page);
 664                 TREE_READ_UNLOCK_IRQ(mapping);
 665                 /*
 666                  * In contrast to find_lock_page() we are sure that directory
 667                  * page cannot be truncated (while DLM lock is held) and,
 668                  * hence, can avoid restart.
 669                  *
 670                  * In fact, page cannot be locked here at all, because
 671                  * ll_dir_readpage() does synchronous io.
 672                  */
 673                 wait_on_page(page);
 674                 if (PageUptodate(page)) {
 675                         dp = kmap(page);
 676                         if (BITS_PER_LONG == 32 && hash64) {
 677                                 *start = le64_to_cpu(dp->ldp_hash_start) >> 32;
 678                                 *end   = le64_to_cpu(dp->ldp_hash_end) >> 32;
 679                                 *hash  = *hash >> 32;
 680                         } else {
 681                                 *start = le64_to_cpu(dp->ldp_hash_start);
 682                                 *end   = le64_to_cpu(dp->ldp_hash_end);
 683                         }
 684                         LASSERTF(*start <= *hash, "start = "LPX64",end = "
 685                                  LPX64",hash = "LPX64"\n", *start, *end, *hash);
 686                         if (*hash > *end || (*end != *start && *hash == *end)) {
 687                                 kunmap(page);
 688                                 lock_page(page);
 689                                 truncate_complete_page(page->mapping, page);
 690                                 unlock_page(page);
 691                                 page_cache_release(page);
 692                                 page = NULL;
 693                         }
 694                 } else {
 695                         page_cache_release(page);
 696                         page = ERR_PTR(-EIO);
 697                 }
 698
 699         } else {
 700                 TREE_READ_UNLOCK_IRQ(mapping);
 701                 page = NULL;
 702         }
 703         RETURN(page);
 704 }
 705
 706 static struct page *ll_get_dir_page_20(struct file *filp, struct inode *dir,
 707                                        __u64 hash, int exact,
 708                                        struct ll_dir_chain *chain)
 709 {
 710         struct ldlm_res_id res_id;
 711         struct lustre_handle lockh;
 712         struct obd_device *obddev = class_exp2obd(ll_i2sbi(dir)->ll_mdc_exp);
 713         struct address_space *mapping = dir->i_mapping;
 714         struct lu_dirpage *dp;
 715         struct page *page;
 716         ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_UPDATE} };
 717         ldlm_mode_t mode;
 718         int rc;
 719         __u64 start = 0;
 720         __u64 end = 0;
 721         __u64 lhash = hash;
 722         int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;
 723         ENTRY;
 724
 725         fid_build_reg_res_name(ll_inode_lu_fid(dir), &res_id);
 726         mode = LCK_PR;
 727         rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_BLOCK_GRANTED,
 728                              &res_id, LDLM_IBITS, &policy, mode, &lockh);
 729         if (!rc) {
 730                 struct lookup_intent it = { .it_op = IT_READDIR };
 731                 struct ldlm_enqueue_info einfo = { LDLM_IBITS, mode,
 732                        ll_mdc_blocking_ast, ldlm_completion_ast, NULL, dir };
 733                 struct ptlrpc_request *request;
 734                 struct mdc_op_data op_data = { { 0 } };
 735
 736                 ll_prepare_mdc_op_data(&op_data, dir, NULL, NULL, 0, 0, NULL);
 737
 738                 rc = mdc_enqueue(ll_i2sbi(dir)->ll_mdc_exp, &einfo, &it,
 739                                  &op_data, &lockh, NULL, 0, 0);
 740
 741                 request = (struct ptlrpc_request *)it.d.lustre.it_data;
 742                 if (request)
 743                         ptlrpc_req_finished(request);
 744                 if (rc < 0) {
 745                         CERROR("lock enqueue: rc: %d\n", rc);
 746                         RETURN(ERR_PTR(rc));
 747                 }
 748         }
 749         ldlm_lock_dump_handle(D_OTHER, &lockh);
 750
 751         page = ll_dir_page_locate(dir, &lhash, &start, &end);
 752         if (IS_ERR(page))
 753                 GOTO(out_unlock, page);
 754
 755         if (page != NULL) {
 756                 /*
 757                  * XXX nikita: not entirely correct handling of a corner case:
 758                  * suppose hash chain of entries with hash value HASH crosses
 759                  * border between pages P0 and P1. First both P0 and P1 are
 760                  * cached, seekdir() is called for some entry from the P0 part
 761                  * of the chain. Later P0 goes out of cache. telldir(HASH)
 762                  * happens and finds P1, as it starts with matching hash
 763                  * value. Remaining entries from P0 part of the chain are
 764                  * skipped. (Is that really a bug?)
 765                  *
 766                  * Possible solutions: 0. don't cache P1 is such case, handle
 767                  * it as an "overflow" page. 1. invalidate all pages at
 768                  * once. 2. use HASH|1 as an index for P1.
 769                  */
 770                 if (exact && hash != start) {
 771                         /*
 772                          * readdir asked for a page starting _exactly_ from
 773                          * given hash, but cache contains stale page, with
 774                          * entries with smaller hash values. Stale page should
 775                          * be invalidated, and new one fetched.
 776                          */
 777                         CDEBUG(D_INFO, "Stale readpage page %p: %#lx != %#lx\n",
 778                               page, (unsigned long)lhash, (unsigned long)start);
 779                         lock_page(page);
 780                         truncate_complete_page(page->mapping, page);
 781                         unlock_page(page);
 782                         page_cache_release(page);
 783                 } else {
 784                         GOTO(hash_collision, page);
 785                 }
 786         }
 787
 788         page = read_cache_page(mapping, hash_x_index(hash, hash64),
 789                                (filler_t*)ll_dir_readpage_20, filp);
 790         if (IS_ERR(page))
 791                 GOTO(out_unlock, page);
 792
 793         wait_on_page(page);
 794         (void)kmap(page);
 795         if (!PageUptodate(page))
 796                 goto fail;
 797         if (!PageChecked(page))
 798                 ll_check_page(dir, page);
 799         if (PageError(page))
 800                 goto fail;
 801 hash_collision:
 802         dp = page_address(page);
 803
 804         if (BITS_PER_LONG == 32 && hash64) {
 805                 start = le64_to_cpu(dp->ldp_hash_start) >> 32;
 806                 end   = le64_to_cpu(dp->ldp_hash_end) >> 32;
 807                 lhash = hash >> 32;
 808         } else {
 809                 start = le64_to_cpu(dp->ldp_hash_start);
 810                 end   = le64_to_cpu(dp->ldp_hash_end);
 811                 lhash = hash;
 812         }
 813         if (end == start) {
 814                 LASSERT(start == lhash);
 815                 CWARN("Page-wide hash collision: "LPU64"\n", end);
 816                 if (BITS_PER_LONG == 32 && hash64)
 817                         CWARN("Real page-wide hash collision at ["LPU64" "LPU64
 818                               "] with hash "LPU64"\n",
 819                               le64_to_cpu(dp->ldp_hash_start),
 820                               le64_to_cpu(dp->ldp_hash_end), hash);
 821                 /*
 822                  * Fetch whole overflow chain...
 823                  *
 824                  * XXX not yet.
 825                  */
 826                 goto fail;
 827         }
 828 out_unlock:
 829         ldlm_lock_decref(&lockh, mode);
 830         RETURN(page);
 831
 832 fail:
 833         ll_put_page(page);
 834         page = ERR_PTR(-EIO);
 835         goto out_unlock;
 836 }
 837
 838 static int ll_readdir_20(struct file *filp, void *cookie, filldir_t filldir)
 839 {
 840         struct inode         *inode = filp->f_dentry->d_inode;
 841         struct ll_sb_info    *sbi   = ll_i2sbi(inode);
 842         struct ll_file_data  *fd    = LUSTRE_FPRIVATE(filp);
 843         __u64                 pos   = fd->fd_dir.lfd_pos;
 844         int                   api32 = ll_need_32bit_api(sbi);
 845         int                   hash64= sbi->ll_flags & LL_SBI_64BIT_HASH;
 846         struct page          *page;
 847         struct ll_dir_chain   chain;
 848         int                   rc;
 849         int                   done;
 850         int                   shift;
 851         ENTRY;
 852
 853         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %lu/%llu 32bit_api %d\n",
 854                inode->i_ino, inode->i_generation, inode,
 855                (unsigned long)pos, i_size_read(inode), api32);
 856
 857         if (pos == MDS_DIR_END_OFF)
 858                 /*
 859                  * end-of-file.
 860                  */
 861                 RETURN(0);
 862
 863         rc    = 0;
 864         done  = 0;
 865         shift = 0;
 866         ll_dir_chain_init(&chain);
 867
 868         fd->fd_dir.lfd_next = pos;
 869         page = ll_get_dir_page_20(filp, inode, pos, 0, &chain);
 870
 871
 872         while (rc == 0 && !done) {
 873                 struct lu_dirpage *dp;
 874                 struct lu_dirent  *ent;
 875
 876                 if (!IS_ERR(page)) {
 877                         /*
 878                          * If page is empty (end of directoryis reached),
 879                          * use this value.
 880                          */
 881                         __u64 hash = MDS_DIR_END_OFF;
 882                         __u64 next;
 883
 884                         dp = page_address(page);
 885                         for (ent = lu_dirent_start(dp); ent != NULL && !done;
 886                              ent = lu_dirent_next(ent)) {
 887                                 __u16          type;
 888                                 int            namelen;
 889                                 struct lu_fid  fid;
 890                                 __u64          lhash;
 891                                 __u64          ino;
 892
 893                                 hash = le64_to_cpu(ent->lde_hash);
 894                                 if (hash < pos)
 895                                         /*
 896                                          * Skip until we find target hash
 897                                          * value.
 898                                          */
 899                                         continue;
 900
 901                                 namelen = le16_to_cpu(ent->lde_namelen);
 902                                 if (namelen == 0)
 903                                         /*
 904                                          * Skip dummy record.
 905                                          */
 906                                         continue;
 907
 908                                 fid_le_to_cpu(&fid, &ent->lde_fid);
 909                                 ino = ll_fid_build_ino((struct ll_fid *)&fid,
 910                                                        api32);
 911                                 if (api32 && hash64)
 912                                         lhash = hash >> 32;
 913                                 else
 914                                         lhash = hash;
 915                                 type = ll_dirent_type_get(ent);
 916                                 done = filldir(cookie, ent->lde_name, namelen,
 917                                                lhash, ino, type);
 918                         }
 919                         next = le64_to_cpu(dp->ldp_hash_end);
 920                         ll_put_page(page);
 921                         if (!done) {
 922                                 pos = next;
 923                                 if (pos == MDS_DIR_END_OFF) {
 924                                         /*
 925                                          * End of directory reached.
 926                                          */
 927                                         done = 1;
 928                                 } else if (1 /* chain is exhausted*/) {
 929                                         /*
 930                                          * Normal case: continue to the next
 931                                          * page.
 932                                          */
 933                                         fd->fd_dir.lfd_next = pos;
 934                                         page = ll_get_dir_page_20(filp, inode,
 935                                                                   pos, 1,
 936                                                                   &chain);
 937                                 } else {
 938                                         /*
 939                                          * go into overflow page.
 940                                          */
 941                                 }
 942                         } else {
 943                                 pos = hash;
 944                         }
 945                 } else {
 946                         rc = PTR_ERR(page);
 947                         CERROR("error reading dir "DFID" at %lu: rc %d\n",
 948                                PFID(ll_inode_lu_fid(inode)),
 949                                (unsigned long)pos, rc);
 950                 }
 951         }
 952
 953         fd->fd_dir.lfd_pos = pos;
 954         if (pos == MDS_DIR_END_OFF) {
 955                 if (api32)
 956                         filp->f_pos = LL_DIR_END_OFF_32BIT;
 957                 else
 958                         filp->f_pos = LL_DIR_END_OFF;
 959         } else {
 960                 if (api32 && hash64)
 961                         filp->f_pos = pos >> 32;
 962                 else
 963                         filp->f_pos = pos;
 964         }
 965         filp->f_version = inode->i_version;
 966         touch_atime(filp->f_vfsmnt, filp->f_dentry);
 967
 968         ll_dir_chain_fini(&chain);
 969
 970         RETURN(rc);
 971 }
 972
 973 static int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
 974 {
 975         struct inode      *inode = filp->f_dentry->d_inode;
 976         struct ll_sb_info *sbi = ll_i2sbi(inode);
 977
 978         if (sbi->ll_mdc_exp->exp_connect_flags & OBD_CONNECT_FID) {
 979                 return ll_readdir_20(filp, cookie, filldir);
 980         } else {
 981                 return ll_readdir_18(filp, cookie, filldir);
 982         }
 983 }
 984
 985 #define QCTL_COPY(out, in)              \
 986 do {                                    \
 987         Q_COPY(out, in, qc_cmd);        \
 988         Q_COPY(out, in, qc_type);       \
 989         Q_COPY(out, in, qc_id);         \
 990         Q_COPY(out, in, qc_stat);       \
 991         Q_COPY(out, in, qc_dqinfo);     \
 992         Q_COPY(out, in, qc_dqblk);      \
 993 } while (0)
 994
 995 static int ll_send_mgc_param(struct obd_export *mgc, char *string)
 996 {
 997         struct mgs_send_param *msp;
 998         int rc = 0;
 999
1000         OBD_ALLOC_PTR(msp);
1001         if (!msp)
1002                 return -ENOMEM;
1003
1004         strncpy(msp->mgs_param, string, MGS_PARAM_MAXLEN);
1005         rc = obd_set_info_async(mgc, sizeof(KEY_SET_INFO), KEY_SET_INFO,
1006                                 sizeof(struct mgs_send_param), msp, NULL);
1007         if (rc)
1008                 CERROR("Failed to set parameter: %d\n", rc);
1009
1010         OBD_FREE_PTR(msp);
1011         return rc;
1012 }
1013
1014 static char *ll_get_fsname(struct inode *inode)
1015 {
1016         struct lustre_sb_info *lsi = s2lsi(inode->i_sb);
1017         char *ptr, *fsname;
1018         int len;
1019
1020         OBD_ALLOC(fsname, MGS_PARAM_MAXLEN);
1021         len = strlen(lsi->lsi_lmd->lmd_profile);
1022         ptr = strrchr(lsi->lsi_lmd->lmd_profile, '-');
1023         if (ptr && (strcmp(ptr, "-client") == 0))
1024                 len -= 7;
1025         strncpy(fsname, lsi->lsi_lmd->lmd_profile, len);
1026         fsname[len] = '\0';
1027
1028         return fsname;
1029 }
1030
1031 int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
1032                      int set_default)
1033 {
1034         struct ll_sb_info *sbi = ll_i2sbi(inode);
1035         struct mdc_op_data data = { { 0 } };
1036         struct ptlrpc_request *req = NULL;
1037         struct lustre_sb_info *lsi = s2lsi(inode->i_sb);
1038         struct obd_device *mgc = lsi->lsi_mgc;
1039         char *fsname = NULL, *param = NULL;
1040         struct iattr attr = { 0 };
1041         int lum_size = 0, rc = 0;
1042
1043         if (lump != NULL) {
1044                 if (lump->lmm_magic == LOV_USER_MAGIC_V3)
1045                         lum_size = sizeof(struct lov_user_md_v3);
1046                 else
1047                         lum_size = sizeof(struct lov_user_md_v1);
1048                 /*
1049                  * This is coming from userspace, so should be in
1050                  * local endian.  But the MDS would like it in little
1051                  * endian, so we swab it before we send it.
1052                  */
1053                 if ((lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V1)) &&
1054                     (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V3))) {
1055                         rc = lustre_swab_lov_user_md(lump);
1056                         if (rc)
1057                                 return rc;
1058                 }
1059         } else { /* NULL value means remove LOV EA */
1060                 lum_size = sizeof(struct lov_user_md_v1);
1061         }
1062
1063         ll_prepare_mdc_op_data(&data, inode, NULL, NULL, 0, 0, NULL);
1064
1065         /* swabbing is done in lov_setstripe() on server side */
1066         rc = mdc_setattr(sbi->ll_mdc_exp, &data,
1067                          &attr, lump, lum_size, NULL, 0, &req);
1068         if (rc) {
1069                 ptlrpc_req_finished(req);
1070                 if (rc != -EPERM && rc != -EACCES)
1071                         CERROR("mdc_setattr fails: rc = %d\n", rc);
1072                 return rc;
1073         }
1074         ptlrpc_req_finished(req);
1075
1076         /* In the following we use the fact that LOV_USER_MAGIC_V1 and
1077          LOV_USER_MAGIC_V3 have the same initial fields so we do not
1078          need the make the distiction between the 2 versions */
1079         if (set_default && mgc->u.cli.cl_mgc_mgsexp) {
1080                 OBD_ALLOC(param, MGS_PARAM_MAXLEN);
1081
1082                 /* Get fsname and assume devname to be -MDT0000. */
1083                 fsname = ll_get_fsname(inode);
1084                 /* Set root stripesize */
1085                 sprintf(param, "%s-MDT0000.lov.stripesize=%u", fsname,
1086                         lump ? le32_to_cpu(lump->lmm_stripe_size) : 0);
1087                 rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
1088                 if (rc)
1089                         goto end;
1090
1091                 /* Set root stripecount */
1092                 sprintf(param, "%s-MDT0000.lov.stripecount=%u", fsname,
1093                         lump ? le16_to_cpu(lump->lmm_stripe_count) : 0);
1094                 rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
1095                 if (rc)
1096                         goto end;
1097
1098                 /* Set root stripeoffset */
1099                 sprintf(param, "%s-MDT0000.lov.stripeoffset=%u", fsname,
1100                         lump ? le16_to_cpu(lump->lmm_stripe_offset) :
1101                         (typeof(lump->lmm_stripe_offset))(-1));
1102                 rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
1103                 if (rc)
1104                         goto end;
1105 end:
1106                 if (fsname)
1107                         OBD_FREE(fsname, MGS_PARAM_MAXLEN);
1108                 if (param)
1109                         OBD_FREE(param, MGS_PARAM_MAXLEN);
1110         }
1111         return rc;
1112 }
1113
1114 int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
1115                      int *lmm_size, struct ptlrpc_request **request)
1116 {
1117         struct ll_sb_info *sbi = ll_i2sbi(inode);
1118         struct ll_fid     fid;
1119         struct mds_body   *body;
1120         struct lov_mds_md *lmm = NULL;
1121         struct ptlrpc_request *req = NULL;
1122         int rc, lmmsize;
1123
1124         ll_inode2fid(&fid, inode);
1125
1126         rc = ll_get_max_mdsize(sbi, &lmmsize);
1127         if (rc)
1128                 RETURN(rc);
1129
1130         rc = mdc_getattr(sbi->ll_mdc_exp, &fid,
1131                         OBD_MD_FLEASIZE|OBD_MD_FLDIREA,
1132                         lmmsize, &req);
1133         if (rc < 0) {
1134                 CDEBUG(D_INFO, "mdc_getattr failed on inode "
1135                        "%lu/%u: rc %d\n", inode->i_ino,
1136                        inode->i_generation, rc);
1137                 GOTO(out, rc);
1138         }
1139         body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
1140                         sizeof(*body));
1141         LASSERT(body != NULL); /* checked by mdc_getattr_name */
1142         /* swabbed by mdc_getattr_name */
1143         LASSERT(lustre_rep_swabbed(req, REPLY_REC_OFF));
1144
1145         lmmsize = body->eadatasize;
1146
1147         if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1148             lmmsize == 0) {
1149                 GOTO(out, rc = -ENODATA);
1150         }
1151
1152         lmm = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1, lmmsize);
1153         LASSERT(lmm != NULL);
1154         LASSERT(lustre_rep_swabbed(req, REPLY_REC_OFF + 1));
1155
1156         /*
1157          * This is coming from the MDS, so is probably in
1158          * little endian.  We convert it to host endian before
1159          * passing it to userspace.
1160          */
1161         /* We don't swab objects for directories */
1162         if (((le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_V1) ||
1163             (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_V3)) &&
1164             (LOV_MAGIC != cpu_to_le32(LOV_MAGIC))) {
1165                 rc = lustre_swab_lov_user_md((struct lov_user_md*)lmm);
1166                 if (rc)
1167                         GOTO(out, rc);
1168         }
1169
1170 out:
1171         *lmmp = lmm;
1172         *lmm_size = lmmsize;
1173         *request = req;
1174         return rc;
1175 }
1176
1177 static int ll_dir_ioctl(struct inode *inode, struct file *file,
1178                         unsigned int cmd, unsigned long arg)
1179 {
1180         struct ll_sb_info *sbi = ll_i2sbi(inode);
1181         struct obd_ioctl_data *data;
1182         ENTRY;
1183
1184         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), cmd=%#x\n",
1185                inode->i_ino, inode->i_generation, inode, cmd);
1186
1187         /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1188         if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1189                 return -ENOTTY;
1190
1191         ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1192         switch(cmd) {
1193         case FSFILT_IOC_GETFLAGS:
1194         case FSFILT_IOC_SETFLAGS:
1195                 RETURN(ll_iocontrol(inode, file, cmd, arg));
1196         case FSFILT_IOC_GETVERSION_OLD:
1197         case FSFILT_IOC_GETVERSION:
1198                 RETURN(put_user(inode->i_generation, (int *)arg));
1199         /* We need to special case any other ioctls we want to handle,
1200          * to send them to the MDS/OST as appropriate and to properly
1201          * network encode the arg field.
1202         case EXT3_IOC_SETVERSION_OLD:
1203         case EXT3_IOC_SETVERSION:
1204         */
1205         case IOC_MDC_LOOKUP: {
1206                 struct ptlrpc_request *request = NULL;
1207                 struct ll_fid fid;
1208                 char *buf = NULL;
1209                 char *filename;
1210                 int namelen, rc, len = 0;
1211
1212                 rc = obd_ioctl_getdata(&buf, &len, (void *)arg);
1213                 if (rc)
1214                         RETURN(rc);
1215                 data = (void *)buf;
1216
1217                 filename = data->ioc_inlbuf1;
1218                 namelen = data->ioc_inllen1;
1219
1220                 if (namelen < 1) {
1221                         CDEBUG(D_INFO, "IOC_MDC_LOOKUP missing filename\n");
1222                         GOTO(out, rc = -EINVAL);
1223                 }
1224
1225                 ll_inode2fid(&fid, inode);
1226                 rc = mdc_getattr_name(sbi->ll_mdc_exp, &fid, filename, namelen,
1227                                       OBD_MD_FLID, 0, &request);
1228                 if (rc < 0) {
1229                         CDEBUG(D_INFO, "mdc_getattr_name: %d\n", rc);
1230                         GOTO(out, rc);
1231                 }
1232
1233                 ptlrpc_req_finished(request);
1234
1235                 EXIT;
1236         out:
1237                 obd_ioctl_freedata(buf, len);
1238                 return rc;
1239         }
1240         case LL_IOC_LOV_SETSTRIPE: {
1241                 struct lov_user_md_v3 lumv3;
1242                 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1243                 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1244                 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1245
1246                 int rc = 0;
1247                 int set_default = 0;
1248
1249                 LASSERT(sizeof(lumv3) == sizeof(*lumv3p));
1250                 LASSERT(sizeof(lumv3.lmm_objects[0]) ==
1251                         sizeof(lumv3p->lmm_objects[0]));
1252
1253                 /* first try with v1 which is smaller than v3 */
1254                 if (copy_from_user(lumv1, lumv1p, sizeof(*lumv1)))
1255                         RETURN(-EFAULT);
1256
1257                 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1258                         if (copy_from_user(&lumv3, lumv3p, sizeof(lumv3)))
1259                                 RETURN(-EFAULT);
1260                 }
1261
1262                 if (inode->i_sb->s_root == file->f_dentry)
1263                         set_default = 1;
1264
1265                 /* in v1 and v3 cases lumv1 points to data */
1266                 rc = ll_dir_setstripe(inode, lumv1, set_default);
1267
1268                 return rc;
1269         }
1270         case LL_IOC_OBD_STATFS:
1271                 RETURN(ll_obd_statfs(inode, (void *)arg));
1272         case LL_IOC_LOV_GETSTRIPE:
1273         case LL_IOC_MDC_GETINFO:
1274         case IOC_MDC_GETFILEINFO:
1275         case IOC_MDC_GETFILESTRIPE: {
1276                 struct ptlrpc_request *request = NULL;
1277                 struct mds_body *body;
1278                 struct lov_user_md *lump;
1279                 struct lov_mds_md *lmm = NULL;
1280                 char *filename = NULL;
1281                 int rc, lmmsize;
1282
1283                 if (cmd == IOC_MDC_GETFILEINFO ||
1284                     cmd == IOC_MDC_GETFILESTRIPE) {
1285                         filename = getname((const char *)arg);
1286                         if (IS_ERR(filename))
1287                                 RETURN(PTR_ERR(filename));
1288
1289                         rc = ll_lov_getstripe_ea_info(inode, filename, &lmm,
1290                                                       &lmmsize, &request);
1291                 } else {
1292                         rc = ll_dir_getstripe(inode, &lmm, &lmmsize, &request);
1293                 }
1294
1295                 if (request) {
1296                         body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF,
1297                                               sizeof(*body));
1298                         LASSERT(body != NULL); /* checked by mdc_getattr_name */
1299                         /* swabbed by mdc_getattr_name */
1300                         LASSERT(lustre_rep_swabbed(request, REPLY_REC_OFF));
1301                 } else {
1302                         GOTO(out_req, rc);
1303                 }
1304
1305                 if (rc < 0) {
1306                         if (rc == -ENODATA && (cmd == IOC_MDC_GETFILEINFO ||
1307                                                cmd == LL_IOC_MDC_GETINFO))
1308                                 GOTO(skip_lmm, rc = 0);
1309                         else
1310                                 GOTO(out_req, rc);
1311                 }
1312
1313                 if (cmd == IOC_MDC_GETFILESTRIPE ||
1314                     cmd == LL_IOC_LOV_GETSTRIPE) {
1315                         lump = (struct lov_user_md *)arg;
1316                 } else {
1317                         struct lov_user_mds_data *lmdp;
1318                         lmdp = (struct lov_user_mds_data *)arg;
1319                         lump = &lmdp->lmd_lmm;
1320                 }
1321                 if (copy_to_user(lump, lmm, lmmsize) != 0) {
1322                         if (copy_to_user(lump, lmm, sizeof(*lump)) != 0)
1323                                 GOTO(out_lmm, rc = -EFAULT);
1324                         rc = -EOVERFLOW;
1325                 }
1326         skip_lmm:
1327                 if (cmd == IOC_MDC_GETFILEINFO || cmd == LL_IOC_MDC_GETINFO) {
1328                         struct lov_user_mds_data *lmdp;
1329                         lstat_t st = { 0 };
1330
1331                         st.st_dev     = inode->i_sb->s_dev;
1332                         st.st_mode    = body->mode;
1333                         st.st_nlink   = body->nlink;
1334                         st.st_uid     = body->uid;
1335                         st.st_gid     = body->gid;
1336                         st.st_rdev    = body->rdev;
1337                         st.st_size    = body->size;
1338                         st.st_blksize = CFS_PAGE_SIZE;
1339                         st.st_blocks  = body->blocks;
1340                         st.st_atime   = body->atime;
1341                         st.st_mtime   = body->mtime;
1342                         st.st_ctime   = body->ctime;
1343                         st.st_ino     = body->ino;
1344
1345                         lmdp = (struct lov_user_mds_data *)arg;
1346                         if (copy_to_user(&lmdp->lmd_st, &st, sizeof(st)))
1347                                 GOTO(out_lmm, rc = -EFAULT);
1348                 }
1349
1350                 EXIT;
1351         out_lmm:
1352                 if (lmm && lmm->lmm_magic == LOV_MAGIC_JOIN)
1353                         OBD_FREE(lmm, lmmsize);
1354         out_req:
1355                 ptlrpc_req_finished(request);
1356                 if (filename)
1357                         putname(filename);
1358                 return rc;
1359         }
1360         case IOC_LOV_GETINFO: {
1361                 struct lov_user_mds_data *lumd;
1362                 struct lov_stripe_md *lsm;
1363                 struct lov_user_md *lum;
1364                 struct lov_mds_md *lmm;
1365                 int lmmsize;
1366                 lstat_t st;
1367                 int rc;
1368
1369                 lumd = (struct lov_user_mds_data *)arg;
1370                 lum = &lumd->lmd_lmm;
1371
1372                 rc = ll_get_max_mdsize(sbi, &lmmsize);
1373                 if (rc)
1374                         RETURN(rc);
1375
1376                 OBD_ALLOC(lmm, lmmsize);
1377                 if (copy_from_user(lmm, lum, lmmsize))
1378                         GOTO(free_lmm, rc = -EFAULT);
1379
1380                 if (LOV_USER_MAGIC != cpu_to_le32(LOV_USER_MAGIC)) {
1381                         rc = lustre_swab_lov_user_md(
1382                                                 (struct lov_user_md_v1 *)lmm);
1383                         if (rc)
1384                                 GOTO(free_lmm, rc);
1385                         rc = lustre_swab_lov_user_md_objects(
1386                                                 (struct lov_user_md*)lmm);
1387                         if (rc)
1388                                 GOTO(free_lmm, rc);
1389                 }
1390
1391                 rc = obd_unpackmd(sbi->ll_osc_exp, &lsm, lmm, lmmsize);
1392                 if (rc < 0)
1393                         GOTO(free_lmm, rc = -ENOMEM);
1394
1395                 rc = obd_checkmd(sbi->ll_osc_exp, sbi->ll_mdc_exp, lsm);
1396                 if (rc)
1397                         GOTO(free_lsm, rc);
1398
1399                 /* Perform glimpse_size operation. */
1400                 memset(&st, 0, sizeof(st));
1401
1402                 rc = ll_glimpse_ioctl(sbi, lsm, &st);
1403                 if (rc)
1404                         GOTO(free_lsm, rc);
1405
1406                 if (copy_to_user(&lumd->lmd_st, &st, sizeof(st)))
1407                         GOTO(free_lsm, rc = -EFAULT);
1408
1409                 EXIT;
1410         free_lsm:
1411                 obd_free_memmd(sbi->ll_osc_exp, &lsm);
1412         free_lmm:
1413                 OBD_FREE(lmm, lmmsize);
1414                 return rc;
1415         }
1416         case OBD_IOC_LLOG_CATINFO: {
1417                 struct ptlrpc_request *req = NULL;
1418                 char *buf = NULL;
1419                 int rc, len = 0;
1420                 char *bufs[3] = { NULL }, *str;
1421                 int lens[3] = { sizeof(struct ptlrpc_body) };
1422                 int size[2] = { sizeof(struct ptlrpc_body) };
1423
1424                 rc = obd_ioctl_getdata(&buf, &len, (void *)arg);
1425                 if (rc)
1426                         RETURN(rc);
1427                 data = (void *)buf;
1428
1429                 if (!data->ioc_inlbuf1) {
1430                         obd_ioctl_freedata(buf, len);
1431                         RETURN(-EINVAL);
1432                 }
1433
1434                 lens[REQ_REC_OFF] = data->ioc_inllen1;
1435                 bufs[REQ_REC_OFF] = data->ioc_inlbuf1;
1436                 if (data->ioc_inllen2) {
1437                         lens[REQ_REC_OFF + 1] = data->ioc_inllen2;
1438                         bufs[REQ_REC_OFF + 1] = data->ioc_inlbuf2;
1439                 } else {
1440                         lens[REQ_REC_OFF + 1] = 0;
1441                         bufs[REQ_REC_OFF + 1] = NULL;
1442                 }
1443
1444                 req = ptlrpc_prep_req(sbi2mdc(sbi)->cl_import,
1445                                       LUSTRE_LOG_VERSION, LLOG_CATINFO, 3, lens,
1446                                       bufs);
1447                 if (!req)
1448                         GOTO(out_catinfo, rc = -ENOMEM);
1449
1450                 size[REPLY_REC_OFF] = data->ioc_plen1;
1451                 ptlrpc_req_set_repsize(req, 2, size);
1452
1453                 rc = ptlrpc_queue_wait(req);
1454                 str = lustre_msg_string(req->rq_repmsg, REPLY_REC_OFF,
1455                                         data->ioc_plen1);
1456                 if (!rc)
1457                         if (copy_to_user(data->ioc_pbuf1, str,data->ioc_plen1))
1458                                 rc = -EFAULT;
1459                 ptlrpc_req_finished(req);
1460         out_catinfo:
1461                 obd_ioctl_freedata(buf, len);
1462                 RETURN(rc);
1463         }
1464         case OBD_IOC_QUOTACHECK: {
1465                 struct obd_quotactl *oqctl;
1466                 int rc, error = 0;
1467
1468                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1469                         RETURN(-EPERM);
1470
1471                 OBD_ALLOC_PTR(oqctl);
1472                 if (!oqctl)
1473                         RETURN(-ENOMEM);
1474                 oqctl->qc_type = arg;
1475                 rc = obd_quotacheck(sbi->ll_mdc_exp, oqctl);
1476                 if (rc < 0) {
1477                         CDEBUG(D_INFO, "mdc_quotacheck failed: rc %d\n", rc);
1478                         error = rc;
1479                 }
1480
1481                 rc = obd_quotacheck(sbi->ll_osc_exp, oqctl);
1482                 if (rc < 0)
1483                         CDEBUG(D_INFO, "osc_quotacheck failed: rc %d\n", rc);
1484
1485                 OBD_FREE_PTR(oqctl);
1486                 return error ?: rc;
1487         }
1488         case OBD_IOC_POLL_QUOTACHECK: {
1489                 struct if_quotacheck *check;
1490                 int rc;
1491
1492                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1493                         RETURN(-EPERM);
1494
1495                 OBD_ALLOC_PTR(check);
1496                 if (!check)
1497                         RETURN(-ENOMEM);
1498
1499                 rc = obd_iocontrol(cmd, sbi->ll_mdc_exp, 0, (void *)check,
1500                                    NULL);
1501                 if (rc) {
1502                         CDEBUG(D_QUOTA, "mdc ioctl %d failed: %d\n", cmd, rc);
1503                         if (copy_to_user((void *)arg, check, sizeof(*check)))
1504                                 CDEBUG(D_QUOTA, "copy_to_user failed\n");
1505                         GOTO(out_poll, rc);
1506                 }
1507
1508                 rc = obd_iocontrol(cmd, sbi->ll_osc_exp, 0, (void *)check,
1509                                    NULL);
1510                 if (rc) {
1511                         CDEBUG(D_QUOTA, "osc ioctl %d failed: %d\n", cmd, rc);
1512                         if (copy_to_user((void *)arg, check, sizeof(*check)))
1513                                 CDEBUG(D_QUOTA, "copy_to_user failed\n");
1514                         GOTO(out_poll, rc);
1515                 }
1516         out_poll:
1517                 OBD_FREE_PTR(check);
1518                 RETURN(rc);
1519         }
1520         case OBD_IOC_QUOTACTL: {
1521                 struct if_quotactl *qctl;
1522                 struct obd_quotactl *oqctl;
1523
1524                 int cmd, type, id, rc = 0;
1525
1526                 OBD_ALLOC_PTR(qctl);
1527                 if (!qctl)
1528                         RETURN(-ENOMEM);
1529
1530                 OBD_ALLOC_PTR(oqctl);
1531                 if (!oqctl) {
1532                         OBD_FREE_PTR(qctl);
1533                         RETURN(-ENOMEM);
1534                 }
1535                 if (copy_from_user(qctl, (void *)arg, sizeof(*qctl)))
1536                         GOTO(out_quotactl, rc = -EFAULT);
1537
1538                 cmd = qctl->qc_cmd;
1539                 type = qctl->qc_type;
1540                 id = qctl->qc_id;
1541                 switch (cmd) {
1542                 case LUSTRE_Q_INVALIDATE:
1543                 case LUSTRE_Q_FINVALIDATE:
1544                 case Q_QUOTAON:
1545                 case Q_QUOTAOFF:
1546                 case Q_SETQUOTA:
1547                 case Q_SETINFO:
1548                         if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1549                                 GOTO(out_quotactl, rc = -EPERM);
1550                         break;
1551                 case Q_GETQUOTA:
1552                         if (((type == USRQUOTA && cfs_curproc_euid() != id) ||
1553                              (type == GRPQUOTA && !in_egroup_p(id))) &&
1554                             !cfs_capable(CFS_CAP_SYS_ADMIN))
1555                                 GOTO(out_quotactl, rc = -EPERM);
1556
1557                         /* XXX: dqb_valid is borrowed as a flag to mark that
1558                          *      only mds quota is wanted */
1559                         if (qctl->qc_dqblk.dqb_valid) {
1560                                 qctl->obd_uuid = sbi->ll_mdc_exp->exp_obd->
1561                                                         u.cli.cl_target_uuid;
1562                                 qctl->qc_dqblk.dqb_valid = 0;
1563                         }
1564
1565                         break;
1566                 case Q_GETINFO:
1567                         break;
1568                 default:
1569                         CERROR("unsupported quotactl op: %#x\n", cmd);
1570                         GOTO(out_quotactl, -ENOTTY);
1571                 }
1572
1573                 QCTL_COPY(oqctl, qctl);
1574
1575                 if (qctl->obd_uuid.uuid[0]) {
1576                         struct obd_device *obd;
1577                         struct obd_uuid *uuid = &qctl->obd_uuid;
1578
1579                         obd = class_find_client_notype(uuid,
1580                                          &sbi->ll_osc_exp->exp_obd->obd_uuid);
1581                         if (!obd)
1582                                 GOTO(out_quotactl, rc = -ENOENT);
1583
1584                         if (cmd == Q_GETINFO)
1585                                 oqctl->qc_cmd = Q_GETOINFO;
1586                         else if (cmd == Q_GETQUOTA)
1587                                 oqctl->qc_cmd = Q_GETOQUOTA;
1588                         else
1589                                 GOTO(out_quotactl, rc = -EINVAL);
1590
1591                         if (sbi->ll_mdc_exp->exp_obd == obd) {
1592                                 rc = obd_quotactl(sbi->ll_mdc_exp, oqctl);
1593                         } else {
1594                                 int i;
1595                                 struct obd_export *exp;
1596                                 struct lov_obd *lov = &sbi->ll_osc_exp->
1597                                                             exp_obd->u.lov;
1598
1599                                 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
1600                                         if (!lov->lov_tgts[i] ||
1601                                             !lov->lov_tgts[i]->ltd_active)
1602                                                 continue;
1603                                         exp = lov->lov_tgts[i]->ltd_exp;
1604                                         if (exp->exp_obd == obd) {
1605                                                 rc = obd_quotactl(exp, oqctl);
1606                                                 break;
1607                                         }
1608                                 }
1609                         }
1610
1611                         oqctl->qc_cmd = cmd;
1612                         QCTL_COPY(qctl, oqctl);
1613
1614                         if (copy_to_user((void *)arg, qctl, sizeof(*qctl)))
1615                                 rc = -EFAULT;
1616
1617                         GOTO(out_quotactl, rc);
1618                 }
1619
1620                 rc = obd_quotactl(sbi->ll_mdc_exp, oqctl);
1621                 if (rc && rc != -EBUSY && cmd == Q_QUOTAON) {
1622                         oqctl->qc_cmd = Q_QUOTAOFF;
1623                         obd_quotactl(sbi->ll_mdc_exp, oqctl);
1624                 }
1625
1626                 /* If QIF_SPACE is not set, client should collect the
1627                  * space usage from OSSs by itself */
1628                 if (cmd == Q_GETQUOTA &&
1629                     !(oqctl->qc_dqblk.dqb_valid & QIF_SPACE) &&
1630                     !oqctl->qc_dqblk.dqb_curspace) {
1631                         struct obd_quotactl *oqctl_tmp;
1632
1633                         OBD_ALLOC_PTR(oqctl_tmp);
1634                         if (oqctl_tmp == NULL)
1635                                 GOTO(out_quotactl, rc = -ENOMEM);
1636
1637                         oqctl_tmp->qc_cmd = Q_GETOQUOTA;
1638                         oqctl_tmp->qc_id = oqctl->qc_id;
1639                         oqctl_tmp->qc_type = oqctl->qc_type;
1640
1641                         /* collect space usage from OSTs */
1642                         oqctl_tmp->qc_dqblk.dqb_curspace = 0;
1643                         rc = obd_quotactl(sbi->ll_osc_exp, oqctl_tmp);
1644                         if (!rc || rc == -EREMOTEIO) {
1645                                 oqctl->qc_dqblk.dqb_curspace =
1646                                         oqctl_tmp->qc_dqblk.dqb_curspace;
1647                                 oqctl->qc_dqblk.dqb_valid |= QIF_SPACE;
1648                         }
1649
1650                         /* collect space & inode usage from MDTs */
1651                         oqctl_tmp->qc_dqblk.dqb_curspace = 0;
1652                         oqctl_tmp->qc_dqblk.dqb_curinodes = 0;
1653                         rc = obd_quotactl(sbi->ll_mdc_exp, oqctl_tmp);
1654                         if (!rc || rc == -EREMOTEIO) {
1655                                 oqctl->qc_dqblk.dqb_curspace +=
1656                                         oqctl_tmp->qc_dqblk.dqb_curspace;
1657                                 oqctl->qc_dqblk.dqb_curinodes =
1658                                         oqctl_tmp->qc_dqblk.dqb_curinodes;
1659                                 oqctl->qc_dqblk.dqb_valid |= QIF_INODES;
1660                         } else {
1661                                 oqctl->qc_dqblk.dqb_valid &= ~QIF_SPACE;
1662                         }
1663
1664                         OBD_FREE_PTR(oqctl_tmp);
1665                 }
1666
1667                 QCTL_COPY(qctl, oqctl);
1668
1669                 if (copy_to_user((void *)arg, qctl, sizeof(*qctl)))
1670                         rc = -EFAULT;
1671         out_quotactl:
1672                 OBD_FREE_PTR(qctl);
1673                 OBD_FREE_PTR(oqctl);
1674                 RETURN(rc);
1675         }
1676         case OBD_IOC_GETNAME_OLD:
1677         case OBD_IOC_GETNAME: {
1678                 struct obd_device *obd = class_exp2obd(sbi->ll_osc_exp);
1679                 if (!obd)
1680                         RETURN(-EFAULT);
1681                 if (copy_to_user((void *)arg, obd->obd_name,
1682                                 strlen(obd->obd_name) + 1))
1683                         RETURN (-EFAULT);
1684                 RETURN(0);
1685         }
1686         case LL_IOC_PATH2FID: {
1687                 if (copy_to_user((void *)arg, ll_inode_lu_fid(inode),
1688                                  sizeof(struct lu_fid)))
1689                         RETURN(-EFAULT);
1690
1691                 RETURN(0);
1692         }
1693         case LL_IOC_GET_CONNECT_FLAGS: {
1694                 if (copy_to_user((void *)arg,
1695                                  &sbi->ll_mdc_exp->exp_connect_flags,
1696                                  sizeof(__u64)))
1697                         RETURN(-EFAULT);
1698                 RETURN(0);
1699         }
1700         default:
1701                 RETURN(obd_iocontrol(cmd, sbi->ll_osc_exp,0,NULL,(void *)arg));
1702         }
1703 }
1704
1705 static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
1706 {
1707         struct inode *inode = file->f_mapping->host;
1708         struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1709         struct ll_sb_info *sbi = ll_i2sbi(inode);
1710         int api32 = ll_need_32bit_api(sbi);
1711         loff_t ret = -EINVAL;
1712         ENTRY;
1713
1714         if (!(sbi->ll_mdc_exp->exp_connect_flags & OBD_CONNECT_FID))
1715                 return default_llseek(file, offset, origin);
1716
1717         mutex_lock(&inode->i_mutex);
1718         switch (origin) {
1719                 case SEEK_SET:
1720                         break;
1721                 case SEEK_CUR:
1722                         offset += file->f_pos;
1723                         break;
1724                 case SEEK_END:
1725                         if (offset > 0)
1726                                 GOTO(out, ret);
1727                         if (api32)
1728                                 offset += LL_DIR_END_OFF_32BIT;
1729                         else
1730                                 offset += LL_DIR_END_OFF;
1731                         break;
1732                 default:
1733                         GOTO(out, ret);
1734         }
1735
1736         if (offset >= 0 &&
1737             ((api32 && offset <= LL_DIR_END_OFF_32BIT) ||
1738              (!api32 && offset <= LL_DIR_END_OFF))) {
1739                 if (offset != file->f_pos) {
1740                         if ((api32 && offset == LL_DIR_END_OFF_32BIT) ||
1741                             (!api32 && offset == LL_DIR_END_OFF))
1742                                 fd->fd_dir.lfd_pos = MDS_DIR_END_OFF;
1743                         else if (api32 && sbi->ll_flags & LL_SBI_64BIT_HASH)
1744                                 fd->fd_dir.lfd_pos = offset << 32;
1745                         else
1746                                 fd->fd_dir.lfd_pos = offset;
1747                         file->f_pos = offset;
1748                         file->f_version = 0;
1749                 }
1750                 ret = offset;
1751         }
1752         GOTO(out, ret);
1753
1754 out:
1755         mutex_unlock(&inode->i_mutex);
1756         return ret;
1757 }
1758
1759 struct file_operations ll_dir_operations = {
1760         .open     = ll_file_open,
1761         .llseek   = ll_dir_seek,
1762         .release  = ll_file_release,
1763         .read     = generic_read_dir,
1764         .readdir  = ll_readdir,
1765         .ioctl    = ll_dir_ioctl,
1766         .fsync    = ll_fsync
1767 };