lustre/ldlm/ldlm_flock.c

   1 /*
   2  * GPL HEADER START
   3  *
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License version 2 only,
   8  * as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope that it will be useful, but
  11  * WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * General Public License version 2 for more details (a copy is included
  14  * in the LICENSE file that accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * version 2 along with this program; If not, see
  18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  19  *
  20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  21  * CA 95054 USA or visit www.sun.com if you need additional information or
  22  * have any questions.
  23  *
  24  * GPL HEADER END
  25  */
  26 /*
  27  * Copyright (c) 2003 Hewlett-Packard Development Company LP.
  28  * Developed under the sponsorship of the US Government under
  29  * Subcontract No. B514193
  30  *
  31  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  32  * Use is subject to license terms.
  33  *
  34  * Copyright (c) 2010, 2012, Intel Corporation.
  35  */
  36 /*
  37  * This file is part of Lustre, http://www.lustre.org/
  38  * Lustre is a trademark of Sun Microsystems, Inc.
  39  */
  40
  41 /**
  42  * This file implements POSIX lock type for Lustre.
  43  * Its policy properties are start and end of extent and PID.
  44  *
  45  * These locks are only done through MDS due to POSIX semantics requiring
  46  * e.g. that locks could be only partially released and as such split into
  47  * two parts, and also that two adjacent locks from the same process may be
  48  * merged into a single wider lock.
  49  *
  50  * Lock modes are mapped like this:
  51  * PR and PW for READ and WRITE locks
  52  * NL to request a releasing of a portion of the lock
  53  *
  54  * These flock locks never timeout.
  55  */
  56
  57 #define DEBUG_SUBSYSTEM S_LDLM
  58
  59 #ifdef __KERNEL__
  60 #include <lustre_dlm.h>
  61 #include <obd_support.h>
  62 #include <obd_class.h>
  63 #include <lustre_lib.h>
  64 #include <libcfs/list.h>
  65 #else
  66 #include <liblustre.h>
  67 #include <obd_class.h>
  68 #endif
  69
  70 #include "ldlm_internal.h"
  71
  72 int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
  73                             void *data, int flag);
  74
  75 /**
  76  * list_for_remaining_safe - iterate over the remaining entries in a list
  77  *              and safeguard against removal of a list entry.
  78  * \param pos   the &struct list_head to use as a loop counter. pos MUST
  79  *              have been initialized prior to using it in this macro.
  80  * \param n     another &struct list_head to use as temporary storage
  81  * \param head  the head for your list.
  82  */
  83 #define list_for_remaining_safe(pos, n, head) \
  84         for (n = pos->next; pos != (head); pos = n, n = pos->next)
  85
  86 static inline int
  87 ldlm_same_flock_owner(struct ldlm_lock *lock, struct ldlm_lock *new)
  88 {
  89         return((new->l_policy_data.l_flock.owner ==
  90                 lock->l_policy_data.l_flock.owner) &&
  91                (new->l_export == lock->l_export));
  92 }
  93
  94 static inline int
  95 ldlm_flocks_overlap(struct ldlm_lock *lock, struct ldlm_lock *new)
  96 {
  97         return((new->l_policy_data.l_flock.start <=
  98                 lock->l_policy_data.l_flock.end) &&
  99                (new->l_policy_data.l_flock.end >=
 100                 lock->l_policy_data.l_flock.start));
 101 }
 102
 103 static inline void ldlm_flock_blocking_link(struct ldlm_lock *req,
 104                                             struct ldlm_lock *lock)
 105 {
 106         /* For server only */
 107         if (req->l_export == NULL)
 108                 return;
 109
 110         LASSERT(cfs_hlist_unhashed(&req->l_exp_flock_hash));
 111
 112         req->l_policy_data.l_flock.blocking_owner =
 113                 lock->l_policy_data.l_flock.owner;
 114         req->l_policy_data.l_flock.blocking_export =
 115                 lock->l_export;
 116         req->l_policy_data.l_flock.blocking_refs = 0;
 117
 118         cfs_hash_add(req->l_export->exp_flock_hash,
 119                      &req->l_policy_data.l_flock.owner,
 120                      &req->l_exp_flock_hash);
 121 }
 122
 123 static inline void ldlm_flock_blocking_unlink(struct ldlm_lock *req)
 124 {
 125         /* For server only */
 126         if (req->l_export == NULL)
 127                 return;
 128
 129         check_res_locked(req->l_resource);
 130         if (req->l_export->exp_flock_hash != NULL &&
 131             !cfs_hlist_unhashed(&req->l_exp_flock_hash))
 132                 cfs_hash_del(req->l_export->exp_flock_hash,
 133                              &req->l_policy_data.l_flock.owner,
 134                              &req->l_exp_flock_hash);
 135 }
 136
 137 static inline void
 138 ldlm_flock_destroy(struct ldlm_lock *lock, ldlm_mode_t mode, __u64 flags)
 139 {
 140         ENTRY;
 141
 142         LDLM_DEBUG(lock, "ldlm_flock_destroy(mode: %d, flags: 0x%llx)",
 143                    mode, flags);
 144
 145         /* Safe to not lock here, since it should be empty anyway */
 146         LASSERT(cfs_hlist_unhashed(&lock->l_exp_flock_hash));
 147
 148         cfs_list_del_init(&lock->l_res_link);
 149         if (flags == LDLM_FL_WAIT_NOREPROC &&
 150             !(lock->l_flags & LDLM_FL_FAILED)) {
 151                 /* client side - set a flag to prevent sending a CANCEL */
 152                 lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_CBPENDING;
 153
 154                 /* when reaching here, it is under lock_res_and_lock(). Thus,
 155                    need call the nolock version of ldlm_lock_decref_internal*/
 156                 ldlm_lock_decref_internal_nolock(lock, mode);
 157         }
 158
 159         ldlm_lock_destroy_nolock(lock);
 160         EXIT;
 161 }
 162
 163 /**
 164  * POSIX locks deadlock detection code.
 165  *
 166  * Given a new lock \a req and an existing lock \a bl_lock it conflicts
 167  * with, we need to iterate through all blocked POSIX locks for this
 168  * export and see if there is a deadlock condition arising. (i.e. when
 169  * one client holds a lock on something and want a lock on something
 170  * else and at the same time another client has the opposite situation).
 171  */
 172 static int
 173 ldlm_flock_deadlock(struct ldlm_lock *req, struct ldlm_lock *bl_lock)
 174 {
 175         struct obd_export *req_exp = req->l_export;
 176         struct obd_export *bl_exp = bl_lock->l_export;
 177         __u64 req_owner = req->l_policy_data.l_flock.owner;
 178         __u64 bl_owner = bl_lock->l_policy_data.l_flock.owner;
 179
 180         /* For server only */
 181         if (req_exp == NULL)
 182                 return 0;
 183
 184         class_export_get(bl_exp);
 185         while (1) {
 186                 struct obd_export *bl_exp_new;
 187                 struct ldlm_lock *lock = NULL;
 188                 struct ldlm_flock *flock;
 189
 190                 if (bl_exp->exp_flock_hash != NULL)
 191                         lock = cfs_hash_lookup(bl_exp->exp_flock_hash,
 192                                                &bl_owner);
 193                 if (lock == NULL)
 194                         break;
 195
 196                 LASSERT(req != lock);
 197                 flock = &lock->l_policy_data.l_flock;
 198                 LASSERT(flock->owner == bl_owner);
 199                 bl_owner = flock->blocking_owner;
 200                 bl_exp_new = class_export_get(flock->blocking_export);
 201                 class_export_put(bl_exp);
 202
 203                 cfs_hash_put(bl_exp->exp_flock_hash, &lock->l_exp_flock_hash);
 204                 bl_exp = bl_exp_new;
 205
 206                 if (bl_exp->exp_failed)
 207                         break;
 208
 209                 if (bl_owner == req_owner && bl_exp == req_exp) {
 210                         class_export_put(bl_exp);
 211                         return 1;
 212                 }
 213         }
 214         class_export_put(bl_exp);
 215
 216         return 0;
 217 }
 218
 219 static void ldlm_flock_cancel_on_deadlock(struct ldlm_lock *lock,
 220                                                 cfs_list_t *work_list)
 221 {
 222         CDEBUG(D_INFO, "reprocess deadlock req=%p\n", lock);
 223
 224         if ((exp_connect_flags(lock->l_export) &
 225                                 OBD_CONNECT_FLOCK_DEAD) == 0) {
 226                 CERROR("deadlock found, but client doesn't "
 227                                 "support flock canceliation\n");
 228         } else {
 229                 LASSERT(lock->l_completion_ast);
 230                 LASSERT((lock->l_flags & LDLM_FL_AST_SENT) == 0);
 231                 lock->l_flags |= LDLM_FL_AST_SENT | LDLM_FL_CANCEL_ON_BLOCK |
 232                         LDLM_FL_FLOCK_DEADLOCK;
 233                 ldlm_flock_blocking_unlink(lock);
 234                 ldlm_resource_unlink_lock(lock);
 235                 ldlm_add_ast_work_item(lock, NULL, work_list);
 236         }
 237 }
 238
 239 /**
 240  * Process a granting attempt for flock lock.
 241  * Must be called under ns lock held.
 242  *
 243  * This function looks for any conflicts for \a lock in the granted or
 244  * waiting queues. The lock is granted if no conflicts are found in
 245  * either queue.
 246  *
 247  * It is also responsible for splitting a lock if a portion of the lock
 248  * is released.
 249  *
 250  * If \a first_enq is 0 (ie, called from ldlm_reprocess_queue):
 251  *   - blocking ASTs have already been sent
 252  *
 253  * If \a first_enq is 1 (ie, called from ldlm_lock_enqueue):
 254  *   - blocking ASTs have not been sent yet, so list of conflicting locks
 255  *     would be collected and ASTs sent.
 256  */
 257 int
 258 ldlm_process_flock_lock(struct ldlm_lock *req, __u64 *flags, int first_enq,
 259                         ldlm_error_t *err, cfs_list_t *work_list)
 260 {
 261         struct ldlm_resource *res = req->l_resource;
 262         struct ldlm_namespace *ns = ldlm_res_to_ns(res);
 263         cfs_list_t *tmp;
 264         cfs_list_t *ownlocks = NULL;
 265         struct ldlm_lock *lock = NULL;
 266         struct ldlm_lock *new = req;
 267         struct ldlm_lock *new2 = NULL;
 268         ldlm_mode_t mode = req->l_req_mode;
 269         int local = ns_is_client(ns);
 270         int added = (mode == LCK_NL);
 271         int overlaps = 0;
 272         int splitted = 0;
 273         const struct ldlm_callback_suite null_cbs = { NULL };
 274         ENTRY;
 275
 276         CDEBUG(D_DLMTRACE, "flags %#llx owner "LPU64" pid %u mode %u start "
 277                LPU64" end "LPU64"\n", *flags,
 278                new->l_policy_data.l_flock.owner,
 279                new->l_policy_data.l_flock.pid, mode,
 280                req->l_policy_data.l_flock.start,
 281                req->l_policy_data.l_flock.end);
 282
 283         *err = ELDLM_OK;
 284
 285         if (local) {
 286                 /* No blocking ASTs are sent to the clients for
 287                  * Posix file & record locks */
 288                 req->l_blocking_ast = NULL;
 289         } else {
 290                 /* Called on the server for lock cancels. */
 291                 req->l_blocking_ast = ldlm_flock_blocking_ast;
 292         }
 293
 294 reprocess:
 295         if ((*flags == LDLM_FL_WAIT_NOREPROC) || (mode == LCK_NL)) {
 296                 /* This loop determines where this processes locks start
 297                  * in the resource lr_granted list. */
 298                 cfs_list_for_each(tmp, &res->lr_granted) {
 299                         lock = cfs_list_entry(tmp, struct ldlm_lock,
 300                                               l_res_link);
 301                         if (ldlm_same_flock_owner(lock, req)) {
 302                                 ownlocks = tmp;
 303                                 break;
 304                         }
 305                 }
 306         } else {
 307                 int reprocess_failed = 0;
 308                 lockmode_verify(mode);
 309
 310                 /* This loop determines if there are existing locks
 311                  * that conflict with the new lock request. */
 312                 cfs_list_for_each(tmp, &res->lr_granted) {
 313                         lock = cfs_list_entry(tmp, struct ldlm_lock,
 314                                               l_res_link);
 315
 316                         if (ldlm_same_flock_owner(lock, req)) {
 317                                 if (!ownlocks)
 318                                         ownlocks = tmp;
 319                                 continue;
 320                         }
 321
 322                         /* locks are compatible, overlap doesn't matter */
 323                         if (lockmode_compat(lock->l_granted_mode, mode))
 324                                 continue;
 325
 326                         if (!ldlm_flocks_overlap(lock, req))
 327                                 continue;
 328
 329                         if (!first_enq) {
 330                                 reprocess_failed = 1;
 331                                 if (ldlm_flock_deadlock(req, lock)) {
 332                                         ldlm_flock_cancel_on_deadlock(req,
 333                                                         work_list);
 334                                         RETURN(LDLM_ITER_CONTINUE);
 335                                 }
 336                                 continue;
 337                         }
 338
 339                         if (*flags & LDLM_FL_BLOCK_NOWAIT) {
 340                                 ldlm_flock_destroy(req, mode, *flags);
 341                                 *err = -EAGAIN;
 342                                 RETURN(LDLM_ITER_STOP);
 343                         }
 344
 345                         if (*flags & LDLM_FL_TEST_LOCK) {
 346                                 ldlm_flock_destroy(req, mode, *flags);
 347                                 req->l_req_mode = lock->l_granted_mode;
 348                                 req->l_policy_data.l_flock.pid =
 349                                         lock->l_policy_data.l_flock.pid;
 350                                 req->l_policy_data.l_flock.start =
 351                                         lock->l_policy_data.l_flock.start;
 352                                 req->l_policy_data.l_flock.end =
 353                                         lock->l_policy_data.l_flock.end;
 354                                 *flags |= LDLM_FL_LOCK_CHANGED;
 355                                 RETURN(LDLM_ITER_STOP);
 356                         }
 357
 358                         /* add lock to blocking list before deadlock
 359                          * check to prevent race */
 360                         ldlm_flock_blocking_link(req, lock);
 361
 362                         if (ldlm_flock_deadlock(req, lock)) {
 363                                 ldlm_flock_blocking_unlink(req);
 364                                 ldlm_flock_destroy(req, mode, *flags);
 365                                 *err = -EDEADLK;
 366                                 RETURN(LDLM_ITER_STOP);
 367                         }
 368
 369                         ldlm_resource_add_lock(res, &res->lr_waiting, req);
 370                         *flags |= LDLM_FL_BLOCK_GRANTED;
 371                         RETURN(LDLM_ITER_STOP);
 372                 }
 373                 if (reprocess_failed)
 374                         RETURN(LDLM_ITER_CONTINUE);
 375         }
 376
 377         if (*flags & LDLM_FL_TEST_LOCK) {
 378                 ldlm_flock_destroy(req, mode, *flags);
 379                 req->l_req_mode = LCK_NL;
 380                 *flags |= LDLM_FL_LOCK_CHANGED;
 381                 RETURN(LDLM_ITER_STOP);
 382         }
 383
 384         /* In case we had slept on this lock request take it off of the
 385          * deadlock detection hash list. */
 386         ldlm_flock_blocking_unlink(req);
 387
 388         /* Scan the locks owned by this process that overlap this request.
 389          * We may have to merge or split existing locks. */
 390
 391         if (!ownlocks)
 392                 ownlocks = &res->lr_granted;
 393
 394         list_for_remaining_safe(ownlocks, tmp, &res->lr_granted) {
 395                 lock = cfs_list_entry(ownlocks, struct ldlm_lock, l_res_link);
 396
 397                 if (!ldlm_same_flock_owner(lock, new))
 398                         break;
 399
 400                 if (lock->l_granted_mode == mode) {
 401                         /* If the modes are the same then we need to process
 402                          * locks that overlap OR adjoin the new lock. The extra
 403                          * logic condition is necessary to deal with arithmetic
 404                          * overflow and underflow. */
 405                         if ((new->l_policy_data.l_flock.start >
 406                              (lock->l_policy_data.l_flock.end + 1))
 407                             && (lock->l_policy_data.l_flock.end !=
 408                                 OBD_OBJECT_EOF))
 409                                 continue;
 410
 411                         if ((new->l_policy_data.l_flock.end <
 412                              (lock->l_policy_data.l_flock.start - 1))
 413                             && (lock->l_policy_data.l_flock.start != 0))
 414                                 break;
 415
 416                         if (new->l_policy_data.l_flock.start <
 417                             lock->l_policy_data.l_flock.start) {
 418                                 lock->l_policy_data.l_flock.start =
 419                                         new->l_policy_data.l_flock.start;
 420                         } else {
 421                                 new->l_policy_data.l_flock.start =
 422                                         lock->l_policy_data.l_flock.start;
 423                         }
 424
 425                         if (new->l_policy_data.l_flock.end >
 426                             lock->l_policy_data.l_flock.end) {
 427                                 lock->l_policy_data.l_flock.end =
 428                                         new->l_policy_data.l_flock.end;
 429                         } else {
 430                                 new->l_policy_data.l_flock.end =
 431                                         lock->l_policy_data.l_flock.end;
 432                         }
 433
 434                         if (added) {
 435                                 ldlm_flock_destroy(lock, mode, *flags);
 436                         } else {
 437                                 new = lock;
 438                                 added = 1;
 439                         }
 440                         continue;
 441                 }
 442
 443                 if (new->l_policy_data.l_flock.start >
 444                     lock->l_policy_data.l_flock.end)
 445                         continue;
 446
 447                 if (new->l_policy_data.l_flock.end <
 448                     lock->l_policy_data.l_flock.start)
 449                         break;
 450
 451                 ++overlaps;
 452
 453                 if (new->l_policy_data.l_flock.start <=
 454                     lock->l_policy_data.l_flock.start) {
 455                         if (new->l_policy_data.l_flock.end <
 456                             lock->l_policy_data.l_flock.end) {
 457                                 lock->l_policy_data.l_flock.start =
 458                                         new->l_policy_data.l_flock.end + 1;
 459                                 break;
 460                         }
 461                         ldlm_flock_destroy(lock, lock->l_req_mode, *flags);
 462                         continue;
 463                 }
 464                 if (new->l_policy_data.l_flock.end >=
 465                     lock->l_policy_data.l_flock.end) {
 466                         lock->l_policy_data.l_flock.end =
 467                                 new->l_policy_data.l_flock.start - 1;
 468                         continue;
 469                 }
 470
 471                 /* split the existing lock into two locks */
 472
 473                 /* if this is an F_UNLCK operation then we could avoid
 474                  * allocating a new lock and use the req lock passed in
 475                  * with the request but this would complicate the reply
 476                  * processing since updates to req get reflected in the
 477                  * reply. The client side replays the lock request so
 478                  * it must see the original lock data in the reply. */
 479
 480                 /* XXX - if ldlm_lock_new() can sleep we should
 481                  * release the lr_lock, allocate the new lock,
 482                  * and restart processing this lock. */
 483                 if (!new2) {
 484                         unlock_res_and_lock(req);
 485                         new2 = ldlm_lock_create(ns, &res->lr_name, LDLM_FLOCK,
 486                                                 lock->l_granted_mode, &null_cbs,
 487                                                 NULL, 0, LVB_T_NONE);
 488                         lock_res_and_lock(req);
 489                         if (!new2) {
 490                                 ldlm_flock_destroy(req, lock->l_granted_mode,
 491                                                    *flags);
 492                                 *err = -ENOLCK;
 493                                 RETURN(LDLM_ITER_STOP);
 494                         }
 495                         goto reprocess;
 496                 }
 497
 498                 splitted = 1;
 499
 500                 new2->l_granted_mode = lock->l_granted_mode;
 501                 new2->l_policy_data.l_flock.pid =
 502                         new->l_policy_data.l_flock.pid;
 503                 new2->l_policy_data.l_flock.owner =
 504                         new->l_policy_data.l_flock.owner;
 505                 new2->l_policy_data.l_flock.start =
 506                         lock->l_policy_data.l_flock.start;
 507                 new2->l_policy_data.l_flock.end =
 508                         new->l_policy_data.l_flock.start - 1;
 509                 lock->l_policy_data.l_flock.start =
 510                         new->l_policy_data.l_flock.end + 1;
 511                 new2->l_conn_export = lock->l_conn_export;
 512                 if (lock->l_export != NULL) {
 513                         new2->l_export = class_export_lock_get(lock->l_export, new2);
 514                         if (new2->l_export->exp_lock_hash &&
 515                             cfs_hlist_unhashed(&new2->l_exp_hash))
 516                                 cfs_hash_add(new2->l_export->exp_lock_hash,
 517                                              &new2->l_remote_handle,
 518                                              &new2->l_exp_hash);
 519                 }
 520                 if (*flags == LDLM_FL_WAIT_NOREPROC)
 521                         ldlm_lock_addref_internal_nolock(new2,
 522                                                          lock->l_granted_mode);
 523
 524                 /* insert new2 at lock */
 525                 ldlm_resource_add_lock(res, ownlocks, new2);
 526                 LDLM_LOCK_RELEASE(new2);
 527                 break;
 528         }
 529
 530         /* if new2 is created but never used, destroy it*/
 531         if (splitted == 0 && new2 != NULL)
 532                 ldlm_lock_destroy_nolock(new2);
 533
 534         /* At this point we're granting the lock request. */
 535         req->l_granted_mode = req->l_req_mode;
 536
 537         /* Add req to the granted queue before calling ldlm_reprocess_all(). */
 538         if (!added) {
 539                 cfs_list_del_init(&req->l_res_link);
 540                 /* insert new lock before ownlocks in list. */
 541                 ldlm_resource_add_lock(res, ownlocks, req);
 542         }
 543
 544         if (*flags != LDLM_FL_WAIT_NOREPROC) {
 545 #ifdef HAVE_SERVER_SUPPORT
 546                 if (first_enq) {
 547                         /* If this is an unlock, reprocess the waitq and
 548                          * send completions ASTs for locks that can now be
 549                          * granted. The only problem with doing this
 550                          * reprocessing here is that the completion ASTs for
 551                          * newly granted locks will be sent before the unlock
 552                          * completion is sent. It shouldn't be an issue. Also
 553                          * note that ldlm_process_flock_lock() will recurse,
 554                          * but only once because first_enq will be false from
 555                          * ldlm_reprocess_queue. */
 556                         if ((mode == LCK_NL) && overlaps) {
 557                                 CFS_LIST_HEAD(rpc_list);
 558                                 int rc;
 559 restart:
 560                                 ldlm_reprocess_queue(res, &res->lr_waiting,
 561                                                      &rpc_list);
 562
 563                                 unlock_res_and_lock(req);
 564                                 rc = ldlm_run_ast_work(ns, &rpc_list,
 565                                                        LDLM_WORK_CP_AST);
 566                                 lock_res_and_lock(req);
 567                                 if (rc == -ERESTART)
 568                                         GOTO(restart, -ERESTART);
 569                        }
 570                 } else {
 571                         LASSERT(req->l_completion_ast);
 572                         ldlm_add_ast_work_item(req, NULL, work_list);
 573                 }
 574 #else /* !HAVE_SERVER_SUPPORT */
 575                 /* The only one possible case for client-side calls flock
 576                  * policy function is ldlm_flock_completion_ast inside which
 577                  * carries LDLM_FL_WAIT_NOREPROC flag. */
 578                 CERROR("Illegal parameter for client-side-only module.\n");
 579                 LBUG();
 580 #endif /* HAVE_SERVER_SUPPORT */
 581         }
 582
 583         /* In case we're reprocessing the requested lock we can't destroy
 584          * it until after calling ldlm_add_ast_work_item() above so that laawi()
 585          * can bump the reference count on \a req. Otherwise \a req
 586          * could be freed before the completion AST can be sent.  */
 587         if (added)
 588                 ldlm_flock_destroy(req, mode, *flags);
 589
 590         ldlm_resource_dump(D_INFO, res);
 591         RETURN(LDLM_ITER_CONTINUE);
 592 }
 593
 594 struct ldlm_flock_wait_data {
 595         struct ldlm_lock *fwd_lock;
 596         int               fwd_generation;
 597 };
 598
 599 static void
 600 ldlm_flock_interrupted_wait(void *data)
 601 {
 602         struct ldlm_lock *lock;
 603         ENTRY;
 604
 605         lock = ((struct ldlm_flock_wait_data *)data)->fwd_lock;
 606
 607         /* take lock off the deadlock detection hash list. */
 608         lock_res_and_lock(lock);
 609         ldlm_flock_blocking_unlink(lock);
 610
 611         /* client side - set flag to prevent lock from being put on LRU list */
 612         lock->l_flags |= LDLM_FL_CBPENDING;
 613         unlock_res_and_lock(lock);
 614
 615         EXIT;
 616 }
 617
 618 /**
 619  * Flock completion callback function.
 620  *
 621  * \param lock [in,out]: A lock to be handled
 622  * \param flags    [in]: flags
 623  * \param *data    [in]: ldlm_work_cp_ast_lock() will use ldlm_cb_set_arg
 624  *
 625  * \retval 0    : success
 626  * \retval <0   : failure
 627  */
 628 int
 629 ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
 630 {
 631         struct file_lock                *getlk = lock->l_ast_data;
 632         struct obd_device              *obd;
 633         struct obd_import              *imp = NULL;
 634         struct ldlm_flock_wait_data     fwd;
 635         struct l_wait_info              lwi;
 636         ldlm_error_t                    err;
 637         int                             rc = 0;
 638         ENTRY;
 639
 640         CDEBUG(D_DLMTRACE, "flags: 0x%llx data: %p getlk: %p\n",
 641                flags, data, getlk);
 642
 643         /* Import invalidation. We need to actually release the lock
 644          * references being held, so that it can go away. No point in
 645          * holding the lock even if app still believes it has it, since
 646          * server already dropped it anyway. Only for granted locks too. */
 647         if ((lock->l_flags & (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) ==
 648             (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) {
 649                 if (lock->l_req_mode == lock->l_granted_mode &&
 650                     lock->l_granted_mode != LCK_NL &&
 651                     NULL == data)
 652                         ldlm_lock_decref_internal(lock, lock->l_req_mode);
 653
 654                 /* Need to wake up the waiter if we were evicted */
 655                 wake_up(&lock->l_waitq);
 656                 RETURN(0);
 657         }
 658
 659         LASSERT(flags != LDLM_FL_WAIT_NOREPROC);
 660
 661         if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
 662                        LDLM_FL_BLOCK_CONV))) {
 663                 if (NULL == data)
 664                         /* mds granted the lock in the reply */
 665                         goto granted;
 666                 /* CP AST RPC: lock get granted, wake it up */
 667                 wake_up(&lock->l_waitq);
 668                 RETURN(0);
 669         }
 670
 671         LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, "
 672                    "sleeping");
 673         fwd.fwd_lock = lock;
 674         obd = class_exp2obd(lock->l_conn_export);
 675
 676         /* if this is a local lock, there is no import */
 677         if (NULL != obd)
 678                 imp = obd->u.cli.cl_import;
 679
 680         if (NULL != imp) {
 681                 spin_lock(&imp->imp_lock);
 682                 fwd.fwd_generation = imp->imp_generation;
 683                 spin_unlock(&imp->imp_lock);
 684         }
 685
 686         lwi = LWI_TIMEOUT_INTR(0, NULL, ldlm_flock_interrupted_wait, &fwd);
 687
 688         /* Go to sleep until the lock is granted. */
 689         rc = l_wait_event(lock->l_waitq, is_granted_or_cancelled(lock), &lwi);
 690
 691         if (rc) {
 692                 LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)",
 693                            rc);
 694                 RETURN(rc);
 695         }
 696
 697 granted:
 698         OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT, 10);
 699
 700         if (lock->l_flags & LDLM_FL_FAILED) {
 701                 LDLM_DEBUG(lock, "client-side enqueue waking up: failed");
 702                 RETURN(-EIO);
 703         }
 704
 705         LDLM_DEBUG(lock, "client-side enqueue granted");
 706
 707         lock_res_and_lock(lock);
 708
 709
 710         /* Protect against race where lock could have been just destroyed
 711          * due to overlap in ldlm_process_flock_lock().
 712          */
 713         if (lock->l_flags & LDLM_FL_DESTROYED) {
 714                 unlock_res_and_lock(lock);
 715                 LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed");
 716                 RETURN(0);
 717         }
 718
 719         /* take lock off the deadlock detection hash list. */
 720         ldlm_flock_blocking_unlink(lock);
 721
 722         /* ldlm_lock_enqueue() has already placed lock on the granted list. */
 723         cfs_list_del_init(&lock->l_res_link);
 724
 725         if (lock->l_flags & LDLM_FL_FLOCK_DEADLOCK) {
 726                 LDLM_DEBUG(lock, "client-side enqueue deadlock received");
 727                 rc = -EDEADLK;
 728         } else if (flags & LDLM_FL_TEST_LOCK) {
 729                 /* fcntl(F_GETLK) request */
 730                 /* The old mode was saved in getlk->fl_type so that if the mode
 731                  * in the lock changes we can decref the appropriate refcount.*/
 732                 ldlm_flock_destroy(lock, flock_type(getlk),
 733                                    LDLM_FL_WAIT_NOREPROC);
 734                 switch (lock->l_granted_mode) {
 735                 case LCK_PR:
 736                         flock_set_type(getlk, F_RDLCK);
 737                         break;
 738                 case LCK_PW:
 739                         flock_set_type(getlk, F_WRLCK);
 740                         break;
 741                 default:
 742                         flock_set_type(getlk, F_UNLCK);
 743                 }
 744                 flock_set_pid(getlk, (pid_t)lock->l_policy_data.l_flock.pid);
 745                 flock_set_start(getlk,
 746                                 (loff_t)lock->l_policy_data.l_flock.start);
 747                 flock_set_end(getlk,
 748                               (loff_t)lock->l_policy_data.l_flock.end);
 749         } else {
 750                 __u64 noreproc = LDLM_FL_WAIT_NOREPROC;
 751
 752                 /* We need to reprocess the lock to do merges or splits
 753                  * with existing locks owned by this process. */
 754                 ldlm_process_flock_lock(lock, &noreproc, 1, &err, NULL);
 755         }
 756         unlock_res_and_lock(lock);
 757         RETURN(rc);
 758 }
 759 EXPORT_SYMBOL(ldlm_flock_completion_ast);
 760
 761 int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
 762                             void *data, int flag)
 763 {
 764         ENTRY;
 765
 766         LASSERT(lock);
 767         LASSERT(flag == LDLM_CB_CANCELING);
 768
 769         /* take lock off the deadlock detection hash list. */
 770         lock_res_and_lock(lock);
 771         ldlm_flock_blocking_unlink(lock);
 772         unlock_res_and_lock(lock);
 773         RETURN(0);
 774 }
 775
 776 void ldlm_flock_policy_wire18_to_local(const ldlm_wire_policy_data_t *wpolicy,
 777                                        ldlm_policy_data_t *lpolicy)
 778 {
 779         memset(lpolicy, 0, sizeof(*lpolicy));
 780         lpolicy->l_flock.start = wpolicy->l_flock.lfw_start;
 781         lpolicy->l_flock.end = wpolicy->l_flock.lfw_end;
 782         lpolicy->l_flock.pid = wpolicy->l_flock.lfw_pid;
 783         /* Compat code, old clients had no idea about owner field and
 784          * relied solely on pid for ownership. Introduced in LU-104, 2.1,
 785          * April 2011 */
 786         lpolicy->l_flock.owner = wpolicy->l_flock.lfw_pid;
 787 }
 788
 789
 790 void ldlm_flock_policy_wire21_to_local(const ldlm_wire_policy_data_t *wpolicy,
 791                                        ldlm_policy_data_t *lpolicy)
 792 {
 793         memset(lpolicy, 0, sizeof(*lpolicy));
 794         lpolicy->l_flock.start = wpolicy->l_flock.lfw_start;
 795         lpolicy->l_flock.end = wpolicy->l_flock.lfw_end;
 796         lpolicy->l_flock.pid = wpolicy->l_flock.lfw_pid;
 797         lpolicy->l_flock.owner = wpolicy->l_flock.lfw_owner;
 798 }
 799
 800 void ldlm_flock_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
 801                                      ldlm_wire_policy_data_t *wpolicy)
 802 {
 803         memset(wpolicy, 0, sizeof(*wpolicy));
 804         wpolicy->l_flock.lfw_start = lpolicy->l_flock.start;
 805         wpolicy->l_flock.lfw_end = lpolicy->l_flock.end;
 806         wpolicy->l_flock.lfw_pid = lpolicy->l_flock.pid;
 807         wpolicy->l_flock.lfw_owner = lpolicy->l_flock.owner;
 808 }
 809
 810 /*
 811  * Export handle<->flock hash operations.
 812  */
 813 static unsigned
 814 ldlm_export_flock_hash(cfs_hash_t *hs, const void *key, unsigned mask)
 815 {
 816         return cfs_hash_u64_hash(*(__u64 *)key, mask);
 817 }
 818
 819 static void *
 820 ldlm_export_flock_key(cfs_hlist_node_t *hnode)
 821 {
 822         struct ldlm_lock *lock;
 823
 824         lock = cfs_hlist_entry(hnode, struct ldlm_lock, l_exp_flock_hash);
 825         return &lock->l_policy_data.l_flock.owner;
 826 }
 827
 828 static int
 829 ldlm_export_flock_keycmp(const void *key, cfs_hlist_node_t *hnode)
 830 {
 831         return !memcmp(ldlm_export_flock_key(hnode), key, sizeof(__u64));
 832 }
 833
 834 static void *
 835 ldlm_export_flock_object(cfs_hlist_node_t *hnode)
 836 {
 837         return cfs_hlist_entry(hnode, struct ldlm_lock, l_exp_flock_hash);
 838 }
 839
 840 static void
 841 ldlm_export_flock_get(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
 842 {
 843         struct ldlm_lock *lock;
 844         struct ldlm_flock *flock;
 845
 846         lock = cfs_hlist_entry(hnode, struct ldlm_lock, l_exp_flock_hash);
 847         LDLM_LOCK_GET(lock);
 848
 849         flock = &lock->l_policy_data.l_flock;
 850         LASSERT(flock->blocking_export != NULL);
 851         class_export_get(flock->blocking_export);
 852         flock->blocking_refs++;
 853 }
 854
 855 static void
 856 ldlm_export_flock_put(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
 857 {
 858         struct ldlm_lock *lock;
 859         struct ldlm_flock *flock;
 860
 861         lock = cfs_hlist_entry(hnode, struct ldlm_lock, l_exp_flock_hash);
 862         LDLM_LOCK_RELEASE(lock);
 863
 864         flock = &lock->l_policy_data.l_flock;
 865         LASSERT(flock->blocking_export != NULL);
 866         class_export_put(flock->blocking_export);
 867         if (--flock->blocking_refs == 0) {
 868                 flock->blocking_owner = 0;
 869                 flock->blocking_export = NULL;
 870         }
 871 }
 872
 873 static cfs_hash_ops_t ldlm_export_flock_ops = {
 874         .hs_hash        = ldlm_export_flock_hash,
 875         .hs_key         = ldlm_export_flock_key,
 876         .hs_keycmp      = ldlm_export_flock_keycmp,
 877         .hs_object      = ldlm_export_flock_object,
 878         .hs_get         = ldlm_export_flock_get,
 879         .hs_put         = ldlm_export_flock_put,
 880         .hs_put_locked  = ldlm_export_flock_put,
 881 };
 882
 883 int ldlm_init_flock_export(struct obd_export *exp)
 884 {
 885         if( strcmp(exp->exp_obd->obd_type->typ_name, LUSTRE_MDT_NAME) != 0)
 886                 RETURN(0);
 887
 888         exp->exp_flock_hash =
 889                 cfs_hash_create(obd_uuid2str(&exp->exp_client_uuid),
 890                                 HASH_EXP_LOCK_CUR_BITS,
 891                                 HASH_EXP_LOCK_MAX_BITS,
 892                                 HASH_EXP_LOCK_BKT_BITS, 0,
 893                                 CFS_HASH_MIN_THETA, CFS_HASH_MAX_THETA,
 894                                 &ldlm_export_flock_ops,
 895                                 CFS_HASH_DEFAULT | CFS_HASH_NBLK_CHANGE);
 896         if (!exp->exp_flock_hash)
 897                 RETURN(-ENOMEM);
 898
 899         RETURN(0);
 900 }
 901 EXPORT_SYMBOL(ldlm_init_flock_export);
 902
 903 void ldlm_destroy_flock_export(struct obd_export *exp)
 904 {
 905         ENTRY;
 906         if (exp->exp_flock_hash) {
 907                 cfs_hash_putref(exp->exp_flock_hash);
 908                 exp->exp_flock_hash = NULL;
 909         }
 910         EXIT;
 911 }
 912 EXPORT_SYMBOL(ldlm_destroy_flock_export);