lustre/llite/lcommon_cl.c

   1 /*
   2  * GPL HEADER START
   3  *
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License version 2 only,
   8  * as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope that it will be useful, but
  11  * WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * General Public License version 2 for more details (a copy is included
  14  * in the LICENSE file that accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * version 2 along with this program; If not, see
  18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  19  *
  20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  21  * CA 95054 USA or visit www.sun.com if you need additional information or
  22  * have any questions.
  23  *
  24  * GPL HEADER END
  25  */
  26 /*
  27  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  28  * Use is subject to license terms.
  29  *
  30  * Copyright (c) 2011, 2014, Intel Corporation.
  31  */
  32 /*
  33  * This file is part of Lustre, http://www.lustre.org/
  34  * Lustre is a trademark of Sun Microsystems, Inc.
  35  *
  36  * cl code shared between vvp and liblustre (and other Lustre clients in the
  37  * future).
  38  *
  39  *   Author: Nikita Danilov <nikita.danilov@sun.com>
  40  */
  41
  42 #define DEBUG_SUBSYSTEM S_LLITE
  43
  44 #include <libcfs/libcfs.h>
  45 #include <linux/fs.h>
  46 #include <linux/sched.h>
  47 #include <linux/mm.h>
  48 #include <linux/quotaops.h>
  49 #include <linux/highmem.h>
  50 #include <linux/pagemap.h>
  51 #include <linux/rbtree.h>
  52
  53 #include <obd.h>
  54 #include <obd_support.h>
  55 #include <lustre_fid.h>
  56 #include <lustre_dlm.h>
  57 #include <lustre_ver.h>
  58 #include <lustre_mdc.h>
  59 #include <cl_object.h>
  60
  61 #include "llite_internal.h"
  62
  63 static const struct cl_req_operations ccc_req_ops;
  64
  65 /*
  66  * ccc_ prefix stands for "Common Client Code".
  67  */
  68
  69 static struct kmem_cache *ccc_thread_kmem;
  70 static struct kmem_cache *ccc_req_kmem;
  71
  72 static struct lu_kmem_descr ccc_caches[] = {
  73         {
  74                 .ckd_cache = &ccc_thread_kmem,
  75                 .ckd_name  = "ccc_thread_kmem",
  76                 .ckd_size  = sizeof (struct ccc_thread_info),
  77         },
  78         {
  79                 .ckd_cache = &ccc_req_kmem,
  80                 .ckd_name  = "ccc_req_kmem",
  81                 .ckd_size  = sizeof (struct ccc_req)
  82         },
  83         {
  84                 .ckd_cache = NULL
  85         }
  86 };
  87
  88 /*****************************************************************************
  89  *
  90  * Vvp device and device type functions.
  91  *
  92  */
  93
  94 void *ccc_key_init(const struct lu_context *ctx, struct lu_context_key *key)
  95 {
  96         struct ccc_thread_info *info;
  97
  98         OBD_SLAB_ALLOC_PTR_GFP(info, ccc_thread_kmem, GFP_NOFS);
  99         if (info == NULL)
 100                 info = ERR_PTR(-ENOMEM);
 101         return info;
 102 }
 103
 104 void ccc_key_fini(const struct lu_context *ctx,
 105                          struct lu_context_key *key, void *data)
 106 {
 107         struct ccc_thread_info *info = data;
 108         OBD_SLAB_FREE_PTR(info, ccc_thread_kmem);
 109 }
 110
 111 struct lu_context_key ccc_key = {
 112         .lct_tags = LCT_CL_THREAD,
 113         .lct_init = ccc_key_init,
 114         .lct_fini = ccc_key_fini
 115 };
 116
 117 int ccc_req_init(const struct lu_env *env, struct cl_device *dev,
 118                  struct cl_req *req)
 119 {
 120         struct ccc_req *vrq;
 121         int result;
 122
 123         OBD_SLAB_ALLOC_PTR_GFP(vrq, ccc_req_kmem, GFP_NOFS);
 124         if (vrq != NULL) {
 125                 cl_req_slice_add(req, &vrq->crq_cl, dev, &ccc_req_ops);
 126                 result = 0;
 127         } else
 128                 result = -ENOMEM;
 129         return result;
 130 }
 131
 132 /**
 133  * An `emergency' environment used by ccc_inode_fini() when cl_env_get()
 134  * fails. Access to this environment is serialized by ccc_inode_fini_guard
 135  * mutex.
 136  */
 137 static struct lu_env *ccc_inode_fini_env = NULL;
 138
 139 /**
 140  * A mutex serializing calls to slp_inode_fini() under extreme memory
 141  * pressure, when environments cannot be allocated.
 142  */
 143 static DEFINE_MUTEX(ccc_inode_fini_guard);
 144 static int dummy_refcheck;
 145
 146 int ccc_global_init(struct lu_device_type *device_type)
 147 {
 148         int result;
 149
 150         result = lu_kmem_init(ccc_caches);
 151         if (result)
 152                 return result;
 153
 154         result = lu_device_type_init(device_type);
 155         if (result)
 156                 goto out_kmem;
 157
 158         ccc_inode_fini_env = cl_env_alloc(&dummy_refcheck,
 159                                           LCT_REMEMBER|LCT_NOREF);
 160         if (IS_ERR(ccc_inode_fini_env)) {
 161                 result = PTR_ERR(ccc_inode_fini_env);
 162                 goto out_device;
 163         }
 164
 165         ccc_inode_fini_env->le_ctx.lc_cookie = 0x4;
 166         return 0;
 167 out_device:
 168         lu_device_type_fini(device_type);
 169 out_kmem:
 170         lu_kmem_fini(ccc_caches);
 171         return result;
 172 }
 173
 174 void ccc_global_fini(struct lu_device_type *device_type)
 175 {
 176         if (ccc_inode_fini_env != NULL) {
 177                 cl_env_put(ccc_inode_fini_env, &dummy_refcheck);
 178                 ccc_inode_fini_env = NULL;
 179         }
 180         lu_device_type_fini(device_type);
 181         lu_kmem_fini(ccc_caches);
 182 }
 183
 184 static void vvp_object_size_lock(struct cl_object *obj)
 185 {
 186         struct inode *inode = vvp_object_inode(obj);
 187
 188         ll_inode_size_lock(inode);
 189         cl_object_attr_lock(obj);
 190 }
 191
 192 static void vvp_object_size_unlock(struct cl_object *obj)
 193 {
 194         struct inode *inode = vvp_object_inode(obj);
 195
 196         cl_object_attr_unlock(obj);
 197         ll_inode_size_unlock(inode);
 198 }
 199
 200 /*****************************************************************************
 201  *
 202  * io operations.
 203  *
 204  */
 205
 206 int vvp_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
 207                           __u32 enqflags, enum cl_lock_mode mode,
 208                           pgoff_t start, pgoff_t end)
 209 {
 210         struct vvp_io          *cio   = vvp_env_io(env);
 211         struct cl_lock_descr   *descr = &cio->cui_link.cill_descr;
 212         struct cl_object       *obj   = io->ci_obj;
 213
 214         CLOBINVRNT(env, obj, vvp_object_invariant(obj));
 215         ENTRY;
 216
 217         CDEBUG(D_VFSTRACE, "lock: %d [%lu, %lu]\n", mode, start, end);
 218
 219         memset(&cio->cui_link, 0, sizeof cio->cui_link);
 220
 221         if (cio->cui_fd && (cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
 222                 descr->cld_mode = CLM_GROUP;
 223                 descr->cld_gid  = cio->cui_fd->fd_grouplock.cg_gid;
 224         } else {
 225                 descr->cld_mode  = mode;
 226         }
 227         descr->cld_obj   = obj;
 228         descr->cld_start = start;
 229         descr->cld_end   = end;
 230         descr->cld_enq_flags = enqflags;
 231
 232         cl_io_lock_add(env, io, &cio->cui_link);
 233         RETURN(0);
 234 }
 235
 236 void vvp_io_update_iov(const struct lu_env *env,
 237                        struct vvp_io *cio, struct cl_io *io)
 238 {
 239         int i;
 240         size_t size = io->u.ci_rw.crw_count;
 241
 242         cio->cui_iov_olen = 0;
 243         if (!cl_is_normalio(env, io) || cio->cui_tot_nrsegs == 0)
 244                 return;
 245
 246         for (i = 0; i < cio->cui_tot_nrsegs; i++) {
 247                 struct iovec *iv = &cio->cui_iov[i];
 248
 249                 if (iv->iov_len < size)
 250                         size -= iv->iov_len;
 251                 else {
 252                         if (iv->iov_len > size) {
 253                                 cio->cui_iov_olen = iv->iov_len;
 254                                 iv->iov_len = size;
 255                         }
 256                         break;
 257                 }
 258         }
 259
 260         cio->cui_nrsegs = i + 1;
 261         LASSERTF(cio->cui_tot_nrsegs >= cio->cui_nrsegs,
 262                  "tot_nrsegs: %lu, nrsegs: %lu\n",
 263                  cio->cui_tot_nrsegs, cio->cui_nrsegs);
 264 }
 265
 266 int vvp_io_one_lock(const struct lu_env *env, struct cl_io *io,
 267                     __u32 enqflags, enum cl_lock_mode mode,
 268                     loff_t start, loff_t end)
 269 {
 270         struct cl_object *obj = io->ci_obj;
 271
 272         return vvp_io_one_lock_index(env, io, enqflags, mode,
 273                                      cl_index(obj, start), cl_index(obj, end));
 274 }
 275
 276 void vvp_io_end(const struct lu_env *env, const struct cl_io_slice *ios)
 277 {
 278         CLOBINVRNT(env, ios->cis_io->ci_obj,
 279                    vvp_object_invariant(ios->cis_io->ci_obj));
 280 }
 281
 282 void vvp_io_advance(const struct lu_env *env,
 283                     const struct cl_io_slice *ios,
 284                     size_t nob)
 285 {
 286         struct vvp_io    *cio = cl2vvp_io(env, ios);
 287         struct cl_io     *io  = ios->cis_io;
 288         struct cl_object *obj = ios->cis_io->ci_obj;
 289
 290         CLOBINVRNT(env, obj, vvp_object_invariant(obj));
 291
 292         if (!cl_is_normalio(env, io))
 293                 return;
 294
 295         LASSERT(cio->cui_tot_nrsegs >= cio->cui_nrsegs);
 296         LASSERT(cio->cui_tot_count  >= nob);
 297
 298         cio->cui_iov        += cio->cui_nrsegs;
 299         cio->cui_tot_nrsegs -= cio->cui_nrsegs;
 300         cio->cui_tot_count  -= nob;
 301
 302         /* update the iov */
 303         if (cio->cui_iov_olen > 0) {
 304                 struct iovec *iv;
 305
 306                 cio->cui_iov--;
 307                 cio->cui_tot_nrsegs++;
 308                 iv = &cio->cui_iov[0];
 309                 if (io->ci_continue) {
 310                         iv->iov_base += iv->iov_len;
 311                         LASSERT(cio->cui_iov_olen > iv->iov_len);
 312                         iv->iov_len = cio->cui_iov_olen - iv->iov_len;
 313                 } else {
 314                         /* restore the iov_len, in case of restart io. */
 315                         iv->iov_len = cio->cui_iov_olen;
 316                 }
 317                 cio->cui_iov_olen = 0;
 318         }
 319 }
 320
 321 /**
 322  * Helper function that if necessary adjusts file size (inode->i_size), when
 323  * position at the offset \a pos is accessed. File size can be arbitrary stale
 324  * on a Lustre client, but client at least knows KMS. If accessed area is
 325  * inside [0, KMS], set file size to KMS, otherwise glimpse file size.
 326  *
 327  * Locking: cl_isize_lock is used to serialize changes to inode size and to
 328  * protect consistency between inode size and cl_object
 329  * attributes. cl_object_size_lock() protects consistency between cl_attr's of
 330  * top-object and sub-objects.
 331  */
 332 int ccc_prep_size(const struct lu_env *env, struct cl_object *obj,
 333                   struct cl_io *io, loff_t start, size_t count, int *exceed)
 334 {
 335         struct cl_attr *attr  = ccc_env_thread_attr(env);
 336         struct inode   *inode = vvp_object_inode(obj);
 337         loff_t          pos   = start + count - 1;
 338         loff_t kms;
 339         int result;
 340
 341         /*
 342          * Consistency guarantees: following possibilities exist for the
 343          * relation between region being accessed and real file size at this
 344          * moment:
 345          *
 346          *  (A): the region is completely inside of the file;
 347          *
 348          *  (B-x): x bytes of region are inside of the file, the rest is
 349          *  outside;
 350          *
 351          *  (C): the region is completely outside of the file.
 352          *
 353          * This classification is stable under DLM lock already acquired by
 354          * the caller, because to change the class, other client has to take
 355          * DLM lock conflicting with our lock. Also, any updates to ->i_size
 356          * by other threads on this client are serialized by
 357          * ll_inode_size_lock(). This guarantees that short reads are handled
 358          * correctly in the face of concurrent writes and truncates.
 359          */
 360         vvp_object_size_lock(obj);
 361         result = cl_object_attr_get(env, obj, attr);
 362         if (result == 0) {
 363                 kms = attr->cat_kms;
 364                 if (pos > kms) {
 365                         /*
 366                          * A glimpse is necessary to determine whether we
 367                          * return a short read (B) or some zeroes at the end
 368                          * of the buffer (C)
 369                          */
 370                         vvp_object_size_unlock(obj);
 371                         result = cl_glimpse_lock(env, io, inode, obj, 0);
 372                         if (result == 0 && exceed != NULL) {
 373                                 /* If objective page index exceed end-of-file
 374                                  * page index, return directly. Do not expect
 375                                  * kernel will check such case correctly.
 376                                  * linux-2.6.18-128.1.1 miss to do that.
 377                                  * --bug 17336 */
 378                                 loff_t size = i_size_read(inode);
 379                                 unsigned long cur_index = start >>
 380                                                           PAGE_CACHE_SHIFT;
 381
 382                                 if ((size == 0 && cur_index != 0) ||
 383                                     (((size - 1) >> PAGE_CACHE_SHIFT) <
 384                                      cur_index))
 385                                 *exceed = 1;
 386                         }
 387                         return result;
 388                 } else {
 389                         /*
 390                          * region is within kms and, hence, within real file
 391                          * size (A). We need to increase i_size to cover the
 392                          * read region so that generic_file_read() will do its
 393                          * job, but that doesn't mean the kms size is
 394                          * _correct_, it is only the _minimum_ size. If
 395                          * someone does a stat they will get the correct size
 396                          * which will always be >= the kms value here.
 397                          * b=11081
 398                          */
 399                         if (i_size_read(inode) < kms) {
 400                                 i_size_write(inode, kms);
 401                                 CDEBUG(D_VFSTRACE,
 402                                        DFID" updating i_size "LPU64"\n",
 403                                        PFID(lu_object_fid(&obj->co_lu)),
 404                                        (__u64)i_size_read(inode));
 405
 406                         }
 407                 }
 408         }
 409
 410         vvp_object_size_unlock(obj);
 411
 412         return result;
 413 }
 414
 415 /*****************************************************************************
 416  *
 417  * Transfer operations.
 418  *
 419  */
 420
 421 void ccc_req_completion(const struct lu_env *env,
 422                         const struct cl_req_slice *slice, int ioret)
 423 {
 424         struct ccc_req *vrq;
 425
 426         if (ioret > 0)
 427                 cl_stats_tally(slice->crs_dev, slice->crs_req->crq_type, ioret);
 428
 429         vrq = cl2ccc_req(slice);
 430         OBD_SLAB_FREE_PTR(vrq, ccc_req_kmem);
 431 }
 432
 433 /**
 434  * Implementation of struct cl_req_operations::cro_attr_set() for ccc
 435  * layer. ccc is responsible for
 436  *
 437  *    - o_[mac]time
 438  *
 439  *    - o_mode
 440  *
 441  *    - o_parent_seq
 442  *
 443  *    - o_[ug]id
 444  *
 445  *    - o_parent_oid
 446  *
 447  *    - o_parent_ver
 448  *
 449  *    - o_ioepoch,
 450  *
 451  *  and capability.
 452  */
 453 void ccc_req_attr_set(const struct lu_env *env,
 454                       const struct cl_req_slice *slice,
 455                       const struct cl_object *obj,
 456                       struct cl_req_attr *attr, u64 flags)
 457 {
 458         struct inode    *inode;
 459         struct obdo     *oa;
 460         u32              valid_flags;
 461
 462         oa = attr->cra_oa;
 463         inode = vvp_object_inode(obj);
 464         valid_flags = OBD_MD_FLTYPE;
 465
 466         if ((flags & OBD_MD_FLOSSCAPA) != 0) {
 467                 LASSERT(attr->cra_capa == NULL);
 468                 attr->cra_capa = cl_capa_lookup(inode,
 469                                                 slice->crs_req->crq_type);
 470         }
 471
 472         if (slice->crs_req->crq_type == CRT_WRITE) {
 473                 if (flags & OBD_MD_FLEPOCH) {
 474                         oa->o_valid |= OBD_MD_FLEPOCH;
 475                         oa->o_ioepoch = ll_i2info(inode)->lli_ioepoch;
 476                         valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
 477                                        OBD_MD_FLUID | OBD_MD_FLGID;
 478                 }
 479         }
 480         obdo_from_inode(oa, inode, valid_flags & flags);
 481         obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
 482         if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_INVALID_PFID))
 483                 oa->o_parent_oid++;
 484         memcpy(attr->cra_jobid, ll_i2info(inode)->lli_jobid,
 485                LUSTRE_JOBID_SIZE);
 486 }
 487
 488 static const struct cl_req_operations ccc_req_ops = {
 489         .cro_attr_set   = ccc_req_attr_set,
 490         .cro_completion = ccc_req_completion
 491 };
 492
 493 int cl_setattr_ost(struct inode *inode, const struct iattr *attr,
 494                    struct obd_capa *capa)
 495 {
 496         struct lu_env *env;
 497         struct cl_io  *io;
 498         int            result;
 499         int            refcheck;
 500
 501         ENTRY;
 502
 503         env = cl_env_get(&refcheck);
 504         if (IS_ERR(env))
 505                 RETURN(PTR_ERR(env));
 506
 507         io = ccc_env_thread_io(env);
 508         io->ci_obj = ll_i2info(inode)->lli_clob;
 509
 510         io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime);
 511         io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime);
 512         io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime);
 513         io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size;
 514         io->u.ci_setattr.sa_valid = attr->ia_valid;
 515         io->u.ci_setattr.sa_capa = capa;
 516
 517 again:
 518         if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) {
 519                 struct vvp_io *cio = vvp_env_io(env);
 520
 521                 if (attr->ia_valid & ATTR_FILE)
 522                         /* populate the file descriptor for ftruncate to honor
 523                          * group lock - see LU-787 */
 524                         cio->cui_fd = LUSTRE_FPRIVATE(attr->ia_file);
 525
 526                 result = cl_io_loop(env, io);
 527         } else {
 528                 result = io->ci_result;
 529         }
 530         cl_io_fini(env, io);
 531         if (unlikely(io->ci_need_restart))
 532                 goto again;
 533         /* HSM import case: file is released, cannot be restored
 534          * no need to fail except if restore registration failed
 535          * with -ENODATA */
 536         if (result == -ENODATA && io->ci_restore_needed &&
 537             io->ci_result != -ENODATA)
 538                 result = 0;
 539         cl_env_put(env, &refcheck);
 540         RETURN(result);
 541 }
 542
 543 /*****************************************************************************
 544  *
 545  * Type conversions.
 546  *
 547  */
 548
 549 struct vvp_io *cl2vvp_io(const struct lu_env *env,
 550                          const struct cl_io_slice *slice)
 551 {
 552         struct vvp_io *cio;
 553
 554         cio = container_of(slice, struct vvp_io, cui_cl);
 555         LASSERT(cio == vvp_env_io(env));
 556
 557         return cio;
 558 }
 559
 560 struct ccc_req *cl2ccc_req(const struct cl_req_slice *slice)
 561 {
 562         return container_of0(slice, struct ccc_req, crq_cl);
 563 }
 564
 565 /**
 566  * Initialize or update CLIO structures for regular files when new
 567  * meta-data arrives from the server.
 568  *
 569  * \param inode regular file inode
 570  * \param md    new file metadata from MDS
 571  * - allocates cl_object if necessary,
 572  * - updated layout, if object was already here.
 573  */
 574 int cl_file_inode_init(struct inode *inode, struct lustre_md *md)
 575 {
 576         struct lu_env        *env;
 577         struct ll_inode_info *lli;
 578         struct cl_object     *clob;
 579         struct lu_site       *site;
 580         struct lu_fid        *fid;
 581         struct cl_object_conf conf = {
 582                 .coc_inode = inode,
 583                 .u = {
 584                         .coc_md    = md
 585                 }
 586         };
 587         int result = 0;
 588         int refcheck;
 589
 590         LASSERT(md->body->mbo_valid & OBD_MD_FLID);
 591         LASSERT(S_ISREG(inode->i_mode));
 592
 593         env = cl_env_get(&refcheck);
 594         if (IS_ERR(env))
 595                 return PTR_ERR(env);
 596
 597         site = ll_i2sbi(inode)->ll_site;
 598         lli  = ll_i2info(inode);
 599         fid  = &lli->lli_fid;
 600         LASSERT(fid_is_sane(fid));
 601
 602         if (lli->lli_clob == NULL) {
 603                 /* clob is slave of inode, empty lli_clob means for new inode,
 604                  * there is no clob in cache with the given fid, so it is
 605                  * unnecessary to perform lookup-alloc-lookup-insert, just
 606                  * alloc and insert directly. */
 607                 LASSERT(inode->i_state & I_NEW);
 608                 conf.coc_lu.loc_flags = LOC_F_NEW;
 609                 clob = cl_object_find(env, lu2cl_dev(site->ls_top_dev),
 610                                       fid, &conf);
 611                 if (!IS_ERR(clob)) {
 612                         /*
 613                          * No locking is necessary, as new inode is
 614                          * locked by I_NEW bit.
 615                          */
 616                         lli->lli_clob = clob;
 617                         lli->lli_has_smd = lsm_has_objects(md->lsm);
 618                         lu_object_ref_add(&clob->co_lu, "inode", inode);
 619                 } else
 620                         result = PTR_ERR(clob);
 621         } else {
 622                 result = cl_conf_set(env, lli->lli_clob, &conf);
 623         }
 624
 625         cl_env_put(env, &refcheck);
 626
 627         if (result != 0)
 628                 CERROR("Failure to initialize cl object "DFID": %d\n",
 629                        PFID(fid), result);
 630         return result;
 631 }
 632
 633 /**
 634  * Wait for others drop their references of the object at first, then we drop
 635  * the last one, which will lead to the object be destroyed immediately.
 636  * Must be called after cl_object_kill() against this object.
 637  *
 638  * The reason we want to do this is: destroying top object will wait for sub
 639  * objects being destroyed first, so we can't let bottom layer (e.g. from ASTs)
 640  * to initiate top object destroying which may deadlock. See bz22520.
 641  */
 642 static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
 643 {
 644         struct lu_object_header *header = obj->co_lu.lo_header;
 645         wait_queue_t           waiter;
 646
 647         if (unlikely(atomic_read(&header->loh_ref) != 1)) {
 648                 struct lu_site *site = obj->co_lu.lo_dev->ld_site;
 649                 struct lu_site_bkt_data *bkt;
 650
 651                 bkt = lu_site_bkt_from_fid(site, &header->loh_fid);
 652
 653                 init_waitqueue_entry_current(&waiter);
 654                 add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
 655
 656                 while (1) {
 657                         set_current_state(TASK_UNINTERRUPTIBLE);
 658                         if (atomic_read(&header->loh_ref) == 1)
 659                                 break;
 660                         waitq_wait(&waiter, TASK_UNINTERRUPTIBLE);
 661                 }
 662
 663                 set_current_state(TASK_RUNNING);
 664                 remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);
 665         }
 666
 667         cl_object_put(env, obj);
 668 }
 669
 670 void cl_inode_fini(struct inode *inode)
 671 {
 672         struct lu_env           *env;
 673         struct ll_inode_info    *lli  = ll_i2info(inode);
 674         struct cl_object        *clob = lli->lli_clob;
 675         int refcheck;
 676         int emergency;
 677
 678         if (clob != NULL) {
 679                 void                    *cookie;
 680
 681                 cookie = cl_env_reenter();
 682                 env = cl_env_get(&refcheck);
 683                 emergency = IS_ERR(env);
 684                 if (emergency) {
 685                         mutex_lock(&ccc_inode_fini_guard);
 686                         LASSERT(ccc_inode_fini_env != NULL);
 687                         cl_env_implant(ccc_inode_fini_env, &refcheck);
 688                         env = ccc_inode_fini_env;
 689                 }
 690                 /*
 691                  * cl_object cache is a slave to inode cache (which, in turn
 692                  * is a slave to dentry cache), don't keep cl_object in memory
 693                  * when its master is evicted.
 694                  */
 695                 cl_object_kill(env, clob);
 696                 lu_object_ref_del(&clob->co_lu, "inode", inode);
 697                 cl_object_put_last(env, clob);
 698                 lli->lli_clob = NULL;
 699                 if (emergency) {
 700                         cl_env_unplant(ccc_inode_fini_env, &refcheck);
 701                         mutex_unlock(&ccc_inode_fini_guard);
 702                 } else
 703                         cl_env_put(env, &refcheck);
 704                 cl_env_reexit(cookie);
 705         }
 706 }
 707
 708 /**
 709  * return IF_* type for given lu_dirent entry.
 710  * IF_* flag shld be converted to particular OS file type in
 711  * platform llite module.
 712  */
 713 __u16 ll_dirent_type_get(struct lu_dirent *ent)
 714 {
 715         __u16 type = 0;
 716         struct luda_type *lt;
 717         int len = 0;
 718
 719         if (le32_to_cpu(ent->lde_attrs) & LUDA_TYPE) {
 720                 const unsigned align = sizeof(struct luda_type) - 1;
 721
 722                 len = le16_to_cpu(ent->lde_namelen);
 723                 len = (len + align) & ~align;
 724                 lt = (void *)ent->lde_name + len;
 725                 type = IFTODT(le16_to_cpu(lt->lt_type));
 726         }
 727         return type;
 728 }
 729
 730 /**
 731  * build inode number from passed @fid */
 732 __u64 cl_fid_build_ino(const struct lu_fid *fid, int api32)
 733 {
 734         if (BITS_PER_LONG == 32 || api32)
 735                 RETURN(fid_flatten32(fid));
 736         else
 737                 RETURN(fid_flatten(fid));
 738 }
 739
 740 /**
 741  * build inode generation from passed @fid.  If our FID overflows the 32-bit
 742  * inode number then return a non-zero generation to distinguish them. */
 743 __u32 cl_fid_build_gen(const struct lu_fid *fid)
 744 {
 745         __u32 gen;
 746         ENTRY;
 747
 748         if (fid_is_igif(fid)) {
 749                 gen = lu_igif_gen(fid);
 750                 RETURN(gen);
 751         }
 752
 753         gen = (fid_flatten(fid) >> 32);
 754         RETURN(gen);
 755 }
 756
 757 /* lsm is unreliable after hsm implementation as layout can be changed at
 758  * any time. This is only to support old, non-clio-ized interfaces. It will
 759  * cause deadlock if clio operations are called with this extra layout refcount
 760  * because in case the layout changed during the IO, ll_layout_refresh() will
 761  * have to wait for the refcount to become zero to destroy the older layout.
 762  *
 763  * Notice that the lsm returned by this function may not be valid unless called
 764  * inside layout lock - MDS_INODELOCK_LAYOUT. */
 765 struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode)
 766 {
 767         return lov_lsm_get(ll_i2info(inode)->lli_clob);
 768 }
 769
 770 void inline ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm)
 771 {
 772         lov_lsm_put(ll_i2info(inode)->lli_clob, lsm);
 773 }